mrt-ingest 0.0.6 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c46977a557350af94d24aea5e4d53ed2252aa13bdbb059289ac48b6c36c9a05c
4
- data.tar.gz: bb12de3c86fe0a45f9b7f4fbea5f0a9478d13ed5e5dd1f6dea09da655cd08306
3
+ metadata.gz: 45bcaa8222c9328220d0a78f058217d7d75f74bbfa3665d2b4575f6331871795
4
+ data.tar.gz: e14e16b3101143fff600664d7178069b48f1a0a2c77582eb11942c0124486630
5
5
  SHA512:
6
- metadata.gz: 5391b059eb9bcfca59bc4af506b4c8cddf5a923e53b23e35f12f255893c5e88e178e2fdff150555a37477a0c4e64b45f9d3cf71a61855a03b7730a1a0084cac5
7
- data.tar.gz: 28cef0d3f22950f7127e766a2e1de0ebd78bffd326ee3a85e0a66beeb790b7417860155b61221dce09e932352c3411368294a467026f0e60e425ef0351588147
6
+ metadata.gz: cf9e18c9d0d6cc47f7d238d5726714991f3ff2a8201ca9026516843f43d964fa8f9bb4099a668a21b20da0d4661ed5a0a893707648ce0b67650f4288d3a044d3
7
+ data.tar.gz: ee160153eb825804c5a53080fc4b09562bc6dcd0bdc4527705efaf1706c3cd7ced461e0edb8f22c8a7ea47c47e3e271d83685430c2c93272610dcb6e7e859ecd
@@ -6,10 +6,9 @@ module Mrt
6
6
  # #File.
7
7
  class Component # :nodoc:
8
8
 
9
- attr_reader :server, :uri
9
+ attr_reader :uri
10
10
 
11
- def initialize(server, location, options)
12
- @server = server
11
+ def initialize(location, options)
13
12
  @name = options[:name]
14
13
  @digest = options[:digest]
15
14
  @mime_type = options[:mime_type]
@@ -18,25 +17,6 @@ module Mrt
18
17
  init_uri(location)
19
18
  end
20
19
 
21
- class << self
22
- def from_erc(server, erc)
23
- return Component.new(server, erc, name: 'mrt-erc.txt') if erc.is_a?(URI) || erc.is_a?(File)
24
- return from_hash(server, erc) if erc.is_a?(Hash)
25
-
26
- raise ArgumentError, 'Bad ERC supplied: must be a URI, File, or Hash'
27
- end
28
-
29
- def from_hash(server, erc_h)
30
- uri_str, path = server.add_file do |f|
31
- f.write("erc:\n")
32
- erc_h.each_pair { |k, v| f.write("#{k}: #{v}\n") }
33
- end
34
-
35
- digest = Mrt::Ingest::MessageDigest::MD5.from_file(File.new(path))
36
- Component.new(server, URI.parse(uri_str), name: 'mrt-erc.txt', digest: digest)
37
- end
38
- end
39
-
40
20
  def to_manifest_entry
41
21
  "#{@uri} | #{digest_type} | #{digest_value} | #{@size} | | #{@name} | #{@mime_type}\n"
42
22
  end
@@ -64,7 +44,7 @@ module Mrt
64
44
 
65
45
  def init_from_file(file)
66
46
  @name = File.basename(file.path) if @name.nil?
67
- @uri = server.add_file(file)[0]
47
+ # @uri = server.add_file(file)[0]
68
48
  @digest = Mrt::Ingest::MessageDigest::MD5.from_file(file) if @digest.nil?
69
49
  @size = File.size(file.path) if @size.nil?
70
50
  end
@@ -12,18 +12,16 @@ module Mrt
12
12
  # An object prepared for ingest into Merritt.
13
13
  class IObject
14
14
 
15
- attr_accessor :primary_identifier, :local_identifier, :erc
16
- attr_reader :server
15
+ attr_accessor :primary_identifier, :local_identifier, :erc, :what, :who, :when
17
16
 
18
17
  # Options can have the keys :primary_identifier,
19
- # :local_identifier, :server, or :erc. :erc can be a #File, #Uri
20
- # or a #Hash of metadata. :server is a #OneTimeServer.
18
+ # :local_identifier, or :erc. :erc can be a #File, #Uri
19
+ # or a #Hash of metadata.
21
20
  def initialize(options = {})
22
21
  @primary_identifier = options[:primary_identifier]
23
22
  @local_identifier = options[:local_identifier]
24
23
  @erc = options[:erc] || {}
25
24
  @components = []
26
- @server = options[:server] || Mrt::Ingest::OneTimeServer.new
27
25
  end
28
26
 
29
27
  # Add a component to the object. where can be either a #URI or a
@@ -32,33 +30,20 @@ module Mrt
32
30
  # subclass of Mrt::Ingest::MessageDigest::Base. If where is a
33
31
  # #File, it will be hosted on an embedded web server.
34
32
  def add_component(where, options = {})
35
- @components.push(Component.new(@server, where, options))
33
+ @components.push(Component.new(where, options))
36
34
  end
37
35
 
38
36
  # Make a Mrt::Ingest::Request object for this mrt-object
39
37
  def mk_request(profile, user_agent)
40
38
  manifest_file = Tempfile.new('mrt-ingest')
41
- erc_component = Component.from_erc(@server, @erc)
42
- mk_manifest(manifest_file, erc_component)
39
+ mk_manifest(manifest_file)
43
40
  # reset to beginning
44
41
  manifest_file.open
45
42
  new_request(manifest_file, profile, user_agent)
46
43
  end
47
44
 
48
- def start_server # :nodoc:
49
- @server.start_server
50
- end
51
-
52
- def join_server # :nodoc:
53
- @server.join_server
54
- end
55
-
56
- def stop_server # :nodoc:
57
- @server.stop_server
58
- end
59
-
60
45
  # rubocop:disable Metrics/LineLength
61
- def mk_manifest(manifest, erc_component) # :nodoc:
46
+ def mk_manifest(manifest) # :nodoc:
62
47
  manifest.write("#%checkm_0.7\n")
63
48
  manifest.write("#%profile http://uc3.cdlib.org/registry/ingest/manifest/mrt-ingest-manifest\n")
64
49
  manifest.write("#%prefix | mrt: | http://uc3.cdlib.org/ontology/mom#\n")
@@ -67,7 +52,6 @@ module Mrt
67
52
  @components.each do |c|
68
53
  manifest.write(c.to_manifest_entry)
69
54
  end
70
- manifest.write(erc_component.to_manifest_entry)
71
55
  manifest.write("#%EOF\n")
72
56
  end
73
57
  # rubocop:enable Metrics/LineLength
@@ -76,7 +60,6 @@ module Mrt
76
60
  # submitter.
77
61
  def start_ingest(client, profile, submitter)
78
62
  request = mk_request(profile, submitter)
79
- start_server
80
63
  @response = client.ingest(request)
81
64
  end
82
65
 
@@ -84,7 +67,6 @@ module Mrt
84
67
  def finish_ingest
85
68
  # XXX Right now we only join the hosting server; in the future
86
69
  # we will check the status via the ingest server.
87
- join_server
88
70
  end
89
71
 
90
72
  private
@@ -96,6 +78,9 @@ module Mrt
96
78
  type: 'object-manifest',
97
79
  submitter: user_agent,
98
80
  profile: profile,
81
+ title: @erc['what'],
82
+ creator: @erc['who'],
83
+ date: @erc['when'],
99
84
  local_identifier: @local_identifier,
100
85
  primary_identifier: @primary_identifier
101
86
  )
data/mrt-ingest.gemspec CHANGED
@@ -3,7 +3,7 @@ $LOAD_PATH.push File.expand_path('lib', __dir__)
3
3
  Gem::Specification.new do |s|
4
4
  s.required_ruby_version = '>= 2.4.0'
5
5
  s.name = 'mrt-ingest'
6
- s.version = '0.0.6'
6
+ s.version = '0.0.10'
7
7
  s.platform = Gem::Platform::RUBY
8
8
  s.authors = ['Mark Reyes', 'David Moles']
9
9
  s.email = ['mark.reyes@ucop.edu', 'david.moles@ucop.edu']
@@ -12,10 +12,11 @@ Gem::Specification.new do |s|
12
12
  s.description = 'A client for the Merritt ingest system. More details available from https://github.com/CDLUC3/mrt-doc/wiki'
13
13
  s.license = 'BSD-3-Clause'
14
14
 
15
- s.add_dependency 'json', '~> 2.0'
16
- s.add_dependency 'rest-client', '~> 2.0'
15
+ # s.add_dependency 'json', '~> 2.1'
16
+ s.add_dependency 'rest-client', '~> 2.1'
17
17
 
18
- s.add_development_dependency 'bundler', '>= 2.2.10'
18
+
19
+ # s.add_development_dependency 'bundler', '>= 2.2.10'
19
20
  s.add_development_dependency 'checkm', '0.0.6'
20
21
  s.add_development_dependency 'mocha', '~> 1.7'
21
22
  s.add_development_dependency 'rake', '~> 12.0'
@@ -4,8 +4,6 @@ module Mrt::Ingest
4
4
  describe Component do
5
5
  describe :from_erc do
6
6
  it 'rejects string ERCs' do
7
- server = instance_double(OneTimeServer)
8
- expect { Component.from_erc(server, 'I am not an ERC') }.to raise_error(ArgumentError)
9
7
  end
10
8
  end
11
9
  end
@@ -81,11 +81,9 @@ module Mrt::Ingest
81
81
  def check_erc_content(iobject, asserted_erc)
82
82
  erc_entry = get_uri_for_name(iobject, 'mrt-erc.txt')
83
83
  expect(erc_entry).not_to be_nil
84
- iobject.start_server
85
84
  begin
86
85
  expect(parse_erc_entry(erc_entry)).to eq(asserted_erc)
87
86
  ensure
88
- iobject.stop_server
89
87
  end
90
88
  end
91
89
 
@@ -137,11 +135,9 @@ module Mrt::Ingest
137
135
 
138
136
  it 'should serve a valid mrt-erc.txt entry' do
139
137
  expect(@erc_entry).not_to be_nil
140
- @iobject.start_server
141
138
  begin
142
139
  open(@erc_entry.values[0]).read.lines.to_a
143
140
  ensure
144
- @iobject.stop_server
145
141
  end
146
142
  end
147
143
 
@@ -186,11 +182,9 @@ module Mrt::Ingest
186
182
  manifest = parse_object_manifest(iobject)
187
183
  expect(manifest).not_to(be_nil)
188
184
  expect(uri_entry).not_to be_nil
189
- iobject.start_server
190
185
  begin
191
186
  expect(open(uri_entry.values[0]).read).to eq(FILE_CONTENT)
192
187
  ensure
193
- iobject.stop_server
194
188
  end
195
189
  end
196
190
  end
@@ -221,9 +215,8 @@ module Mrt::Ingest
221
215
  @iobject.start_ingest(@client, 'example_profile', 'Atom processor/Example collection')
222
216
 
223
217
  # TODO: just mock the server
224
- server = @iobject.server
225
- files = Dir.entries(server.dir).reject { |e| %w[. ..].include?(e) }
226
- urls = files.map { |f| "http://#{Socket.gethostname}:#{server.port}/#{f}" }
218
+ # files = Dir.entries(server.dir).reject { |e| %w[. ..].include?(e) }
219
+ # urls = files.map { |f| "http://#{Socket.gethostname}:#{server.port}/#{f}" }
227
220
 
228
221
  client_process_id = fork do
229
222
  begin
metadata CHANGED
@@ -1,58 +1,30 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mrt-ingest
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mark Reyes
8
8
  - David Moles
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-06-08 00:00:00.000000000 Z
12
+ date: 2022-07-12 00:00:00.000000000 Z
13
13
  dependencies:
14
- - !ruby/object:Gem::Dependency
15
- name: json
16
- requirement: !ruby/object:Gem::Requirement
17
- requirements:
18
- - - "~>"
19
- - !ruby/object:Gem::Version
20
- version: '2.0'
21
- type: :runtime
22
- prerelease: false
23
- version_requirements: !ruby/object:Gem::Requirement
24
- requirements:
25
- - - "~>"
26
- - !ruby/object:Gem::Version
27
- version: '2.0'
28
14
  - !ruby/object:Gem::Dependency
29
15
  name: rest-client
30
16
  requirement: !ruby/object:Gem::Requirement
31
17
  requirements:
32
18
  - - "~>"
33
19
  - !ruby/object:Gem::Version
34
- version: '2.0'
20
+ version: '2.1'
35
21
  type: :runtime
36
22
  prerelease: false
37
23
  version_requirements: !ruby/object:Gem::Requirement
38
24
  requirements:
39
25
  - - "~>"
40
26
  - !ruby/object:Gem::Version
41
- version: '2.0'
42
- - !ruby/object:Gem::Dependency
43
- name: bundler
44
- requirement: !ruby/object:Gem::Requirement
45
- requirements:
46
- - - ">="
47
- - !ruby/object:Gem::Version
48
- version: 2.2.10
49
- type: :development
50
- prerelease: false
51
- version_requirements: !ruby/object:Gem::Requirement
52
- requirements:
53
- - - ">="
54
- - !ruby/object:Gem::Version
55
- version: 2.2.10
27
+ version: '2.1'
56
28
  - !ruby/object:Gem::Dependency
57
29
  name: checkm
58
30
  requirement: !ruby/object:Gem::Requirement
@@ -209,7 +181,6 @@ files:
209
181
  - lib/mrt/ingest/ingest_exception.rb
210
182
  - lib/mrt/ingest/iobject.rb
211
183
  - lib/mrt/ingest/message_digest.rb
212
- - lib/mrt/ingest/one_time_server.rb
213
184
  - lib/mrt/ingest/request.rb
214
185
  - lib/mrt/ingest/response.rb
215
186
  - mrt-ingest-ruby.iml
@@ -221,14 +192,13 @@ files:
221
192
  - spec/unit/mrt/ingest/component_spec.rb
222
193
  - spec/unit/mrt/ingest/iobject_spec.rb
223
194
  - spec/unit/mrt/ingest/message_digest_spec.rb
224
- - spec/unit/mrt/ingest/one_time_server_spec.rb
225
195
  - spec/unit/mrt/ingest/request_spec.rb
226
196
  - spec/unit/mrt/ingest/response_spec.rb
227
197
  homepage: https://github.com/CDLUC3/mrt-ingest-ruby
228
198
  licenses:
229
199
  - BSD-3-Clause
230
200
  metadata: {}
231
- post_install_message:
201
+ post_install_message:
232
202
  rdoc_options: []
233
203
  require_paths:
234
204
  - lib
@@ -244,7 +214,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
244
214
  version: '0'
245
215
  requirements: []
246
216
  rubygems_version: 3.0.3.1
247
- signing_key:
217
+ signing_key:
248
218
  specification_version: 4
249
219
  summary: A client for Merritt ingest.
250
220
  test_files:
@@ -255,6 +225,5 @@ test_files:
255
225
  - spec/unit/mrt/ingest/component_spec.rb
256
226
  - spec/unit/mrt/ingest/iobject_spec.rb
257
227
  - spec/unit/mrt/ingest/message_digest_spec.rb
258
- - spec/unit/mrt/ingest/one_time_server_spec.rb
259
228
  - spec/unit/mrt/ingest/request_spec.rb
260
229
  - spec/unit/mrt/ingest/response_spec.rb
@@ -1,111 +0,0 @@
1
- # Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
2
- # Copyright:: Copyright (c) 2011, Regents of the University of California
3
-
4
- require 'webrick'
5
-
6
- # An HTTP server that will serve each file ONCE before shutting down.
7
- module Mrt
8
- module Ingest
9
- class OneTimeServer
10
-
11
- attr_reader :dir, :port
12
-
13
- # Find an open port, starting with start and adding one until we get
14
- # an open port
15
- def get_open_port(start = 8081)
16
- try_port = start
17
- loop do
18
- begin
19
- s = TCPServer.open(try_port)
20
- s.close
21
- return try_port
22
- rescue Errno::EADDRINUSE
23
- try_port += 1
24
- end
25
- end
26
- end
27
-
28
- def initialize
29
- @dir = Dir.mktmpdir
30
- @mutex = Mutex.new
31
- @known_paths = {}
32
- @requested = {}
33
- @port = get_open_port
34
- @file_callback = ->(req, _res) { @requested[req.path] ||= true }
35
- @server = WEBrick::HTTPServer.new(Port: @port)
36
- @server.mount('/', WEBrick::HTTPServlet::FileHandler, @dir, FileCallback: @file_callback)
37
- end
38
-
39
- # Return true if each file has been served.
40
- def finished?
41
- Dir.entries(@dir).each do |entry|
42
- next if %w[. ..].include?(entry)
43
- return false if @requested["/#{entry}"].nil?
44
- end
45
- true
46
- end
47
-
48
- def temppath
49
- tmpfile = Tempfile.new('tmp', @dir)
50
- tmppath = tmpfile.path
51
- tmpfile.close!
52
- @mutex.synchronize do
53
- unless @known_paths.key?(tmppath)
54
- # no collision
55
- @known_paths[tmppath] = true
56
- return tmppath
57
- end
58
- end
59
- # need to retry, there was a collision
60
- temppath
61
- end
62
-
63
- # Add a file to this server. Returns the URL to use
64
- # to fetch the file & the file path
65
- def add_file(sourcefile = nil)
66
- fullpath = temppath
67
- path = File.basename(fullpath)
68
-
69
- if sourcefile
70
- @server.mount("/#{path}", WEBrick::HTTPServlet::FileHandler, sourcefile.path, FileCallback: @file_callback)
71
- else
72
- File.open(fullpath, 'w+') { |f| yield f }
73
- end
74
- ["http://#{Socket.gethostname}:#{@port}/#{path}", fullpath]
75
- end
76
-
77
- def start_server
78
- if @thread.nil?
79
- @thread = Thread.new do
80
- @server.start
81
- end
82
- end
83
- sleep(0.1) while @server.status != :Running
84
- @thread
85
- end
86
-
87
- # Stop server unconditionally.
88
- def stop_server
89
- @server.shutdown
90
- @thread.join
91
- end
92
-
93
- # Wait for server to finish serving all files.
94
- def join_server
95
- # ensure that each file is requested once before shutting down
96
- sleep(1) until finished?
97
- @server.shutdown
98
- @thread.join
99
- end
100
-
101
- # Run the server and wait until each file has been served once.
102
- # Cleans up files before it returns.
103
- def run
104
- start_server
105
- join_server
106
- # FileUtils.rm_rf(@dir)
107
- nil
108
- end
109
- end
110
- end
111
- end
@@ -1,113 +0,0 @@
1
- require 'spec_helper'
2
- require 'English'
3
-
4
- module Mrt::Ingest
5
- describe OneTimeServer do
6
- attr_reader :server
7
-
8
- before(:each) do
9
- @server = OneTimeServer.new
10
- server.start_server
11
- end
12
-
13
- after(:each) do
14
- server.stop_server
15
- end
16
-
17
- describe :finished? do
18
- it 'returns true when all files have been served, false otherwise' do
19
- urls = (0..3).map do |i|
20
- url_str, = server.add_file { |f| f.puts("I am file #{i}") }
21
- url_str
22
- end
23
-
24
- urls.each do |url|
25
- expect(server.finished?).to be_falsey
26
- Net::HTTP.get(URI.parse(url))
27
- end
28
-
29
- expect(server.finished?).to be_truthy
30
- end
31
- end
32
-
33
- describe :temppath do
34
- it 'avoids collisions' do
35
- tmpfiles = []
36
- allow(Tempfile).to receive(:new).and_wrap_original do |m, *args|
37
- tmpfile = m.call(*args)
38
- if tmpfiles.empty?
39
- known_paths = server.instance_variable_get(:@known_paths)
40
- known_paths[tmpfile.path] = true
41
- end
42
- tmpfiles << tmpfile.path
43
- tmpfile
44
- end
45
-
46
- temppath = server.temppath
47
- expect(tmpfiles.size).to eq(2)
48
- expect(temppath).to eq(tmpfiles[1])
49
- end
50
- end
51
-
52
- describe :join_server do
53
- it 'blocks till all files have been served' do
54
- urls = (0..3).map do |i|
55
- url_str, = server.add_file { |f| f.puts("I am file #{i}") }
56
- url_str
57
- end
58
-
59
- joining_thread = Thread.new { server.join_server }
60
- expect(joining_thread.status).not_to be_falsey
61
-
62
- client_process_id = fork do
63
- begin
64
- urls.each do |url|
65
- resp = Net::HTTP.get_response(URI.parse(url))
66
- status = resp.code.to_i
67
- exit(status) if status != 200
68
- end
69
- rescue StandardError => e
70
- warn(e)
71
- exit(1)
72
- end
73
- end
74
- Process.wait(client_process_id)
75
- expect($CHILD_STATUS.exitstatus).to eq(0) # just to be sure
76
-
77
- Timeout.timeout(5) { joining_thread.join }
78
- expect(joining_thread.status).to eq(false)
79
- end
80
- end
81
-
82
- describe :run do
83
- it 'starts, serves, and stops' do
84
- server2 = OneTimeServer.new
85
- urls = (0..3).map do |i|
86
- url_str, = server2.add_file { |f| f.puts("I am file #{i}") }
87
- url_str
88
- end
89
-
90
- running_thread = Thread.new { server2.run }
91
- expect(running_thread.status).not_to be_falsey
92
-
93
- client_process_id = fork do
94
- begin
95
- urls.each do |url|
96
- resp = Net::HTTP.get_response(URI.parse(url))
97
- status = resp.code.to_i
98
- exit(status) if status != 200
99
- end
100
- rescue StandardError => e
101
- warn(e)
102
- exit(1)
103
- end
104
- end
105
- Process.wait(client_process_id)
106
- expect($CHILD_STATUS.exitstatus).to eq(0) # just to be sure
107
-
108
- Timeout.timeout(5) { running_thread.join }
109
- expect(running_thread.status).to eq(false)
110
- end
111
- end
112
- end
113
- end