mrt-ingest 0.0.6 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c46977a557350af94d24aea5e4d53ed2252aa13bdbb059289ac48b6c36c9a05c
4
- data.tar.gz: bb12de3c86fe0a45f9b7f4fbea5f0a9478d13ed5e5dd1f6dea09da655cd08306
3
+ metadata.gz: 45bcaa8222c9328220d0a78f058217d7d75f74bbfa3665d2b4575f6331871795
4
+ data.tar.gz: e14e16b3101143fff600664d7178069b48f1a0a2c77582eb11942c0124486630
5
5
  SHA512:
6
- metadata.gz: 5391b059eb9bcfca59bc4af506b4c8cddf5a923e53b23e35f12f255893c5e88e178e2fdff150555a37477a0c4e64b45f9d3cf71a61855a03b7730a1a0084cac5
7
- data.tar.gz: 28cef0d3f22950f7127e766a2e1de0ebd78bffd326ee3a85e0a66beeb790b7417860155b61221dce09e932352c3411368294a467026f0e60e425ef0351588147
6
+ metadata.gz: cf9e18c9d0d6cc47f7d238d5726714991f3ff2a8201ca9026516843f43d964fa8f9bb4099a668a21b20da0d4661ed5a0a893707648ce0b67650f4288d3a044d3
7
+ data.tar.gz: ee160153eb825804c5a53080fc4b09562bc6dcd0bdc4527705efaf1706c3cd7ced461e0edb8f22c8a7ea47c47e3e271d83685430c2c93272610dcb6e7e859ecd
@@ -6,10 +6,9 @@ module Mrt
6
6
  # #File.
7
7
  class Component # :nodoc:
8
8
 
9
- attr_reader :server, :uri
9
+ attr_reader :uri
10
10
 
11
- def initialize(server, location, options)
12
- @server = server
11
+ def initialize(location, options)
13
12
  @name = options[:name]
14
13
  @digest = options[:digest]
15
14
  @mime_type = options[:mime_type]
@@ -18,25 +17,6 @@ module Mrt
18
17
  init_uri(location)
19
18
  end
20
19
 
21
- class << self
22
- def from_erc(server, erc)
23
- return Component.new(server, erc, name: 'mrt-erc.txt') if erc.is_a?(URI) || erc.is_a?(File)
24
- return from_hash(server, erc) if erc.is_a?(Hash)
25
-
26
- raise ArgumentError, 'Bad ERC supplied: must be a URI, File, or Hash'
27
- end
28
-
29
- def from_hash(server, erc_h)
30
- uri_str, path = server.add_file do |f|
31
- f.write("erc:\n")
32
- erc_h.each_pair { |k, v| f.write("#{k}: #{v}\n") }
33
- end
34
-
35
- digest = Mrt::Ingest::MessageDigest::MD5.from_file(File.new(path))
36
- Component.new(server, URI.parse(uri_str), name: 'mrt-erc.txt', digest: digest)
37
- end
38
- end
39
-
40
20
  def to_manifest_entry
41
21
  "#{@uri} | #{digest_type} | #{digest_value} | #{@size} | | #{@name} | #{@mime_type}\n"
42
22
  end
@@ -64,7 +44,7 @@ module Mrt
64
44
 
65
45
  def init_from_file(file)
66
46
  @name = File.basename(file.path) if @name.nil?
67
- @uri = server.add_file(file)[0]
47
+ # @uri = server.add_file(file)[0]
68
48
  @digest = Mrt::Ingest::MessageDigest::MD5.from_file(file) if @digest.nil?
69
49
  @size = File.size(file.path) if @size.nil?
70
50
  end
@@ -12,18 +12,16 @@ module Mrt
12
12
  # An object prepared for ingest into Merritt.
13
13
  class IObject
14
14
 
15
- attr_accessor :primary_identifier, :local_identifier, :erc
16
- attr_reader :server
15
+ attr_accessor :primary_identifier, :local_identifier, :erc, :what, :who, :when
17
16
 
18
17
  # Options can have the keys :primary_identifier,
19
- # :local_identifier, :server, or :erc. :erc can be a #File, #Uri
20
- # or a #Hash of metadata. :server is a #OneTimeServer.
18
+ # :local_identifier, or :erc. :erc can be a #File, #Uri
19
+ # or a #Hash of metadata.
21
20
  def initialize(options = {})
22
21
  @primary_identifier = options[:primary_identifier]
23
22
  @local_identifier = options[:local_identifier]
24
23
  @erc = options[:erc] || {}
25
24
  @components = []
26
- @server = options[:server] || Mrt::Ingest::OneTimeServer.new
27
25
  end
28
26
 
29
27
  # Add a component to the object. where can be either a #URI or a
@@ -32,33 +30,20 @@ module Mrt
32
30
  # subclass of Mrt::Ingest::MessageDigest::Base. If where is a
33
31
  # #File, it will be hosted on an embedded web server.
34
32
  def add_component(where, options = {})
35
- @components.push(Component.new(@server, where, options))
33
+ @components.push(Component.new(where, options))
36
34
  end
37
35
 
38
36
  # Make a Mrt::Ingest::Request object for this mrt-object
39
37
  def mk_request(profile, user_agent)
40
38
  manifest_file = Tempfile.new('mrt-ingest')
41
- erc_component = Component.from_erc(@server, @erc)
42
- mk_manifest(manifest_file, erc_component)
39
+ mk_manifest(manifest_file)
43
40
  # reset to beginning
44
41
  manifest_file.open
45
42
  new_request(manifest_file, profile, user_agent)
46
43
  end
47
44
 
48
- def start_server # :nodoc:
49
- @server.start_server
50
- end
51
-
52
- def join_server # :nodoc:
53
- @server.join_server
54
- end
55
-
56
- def stop_server # :nodoc:
57
- @server.stop_server
58
- end
59
-
60
45
  # rubocop:disable Metrics/LineLength
61
- def mk_manifest(manifest, erc_component) # :nodoc:
46
+ def mk_manifest(manifest) # :nodoc:
62
47
  manifest.write("#%checkm_0.7\n")
63
48
  manifest.write("#%profile http://uc3.cdlib.org/registry/ingest/manifest/mrt-ingest-manifest\n")
64
49
  manifest.write("#%prefix | mrt: | http://uc3.cdlib.org/ontology/mom#\n")
@@ -67,7 +52,6 @@ module Mrt
67
52
  @components.each do |c|
68
53
  manifest.write(c.to_manifest_entry)
69
54
  end
70
- manifest.write(erc_component.to_manifest_entry)
71
55
  manifest.write("#%EOF\n")
72
56
  end
73
57
  # rubocop:enable Metrics/LineLength
@@ -76,7 +60,6 @@ module Mrt
76
60
  # submitter.
77
61
  def start_ingest(client, profile, submitter)
78
62
  request = mk_request(profile, submitter)
79
- start_server
80
63
  @response = client.ingest(request)
81
64
  end
82
65
 
@@ -84,7 +67,6 @@ module Mrt
84
67
  def finish_ingest
85
68
  # XXX Right now we only join the hosting server; in the future
86
69
  # we will check the status via the ingest server.
87
- join_server
88
70
  end
89
71
 
90
72
  private
@@ -96,6 +78,9 @@ module Mrt
96
78
  type: 'object-manifest',
97
79
  submitter: user_agent,
98
80
  profile: profile,
81
+ title: @erc['what'],
82
+ creator: @erc['who'],
83
+ date: @erc['when'],
99
84
  local_identifier: @local_identifier,
100
85
  primary_identifier: @primary_identifier
101
86
  )
data/mrt-ingest.gemspec CHANGED
@@ -3,7 +3,7 @@ $LOAD_PATH.push File.expand_path('lib', __dir__)
3
3
  Gem::Specification.new do |s|
4
4
  s.required_ruby_version = '>= 2.4.0'
5
5
  s.name = 'mrt-ingest'
6
- s.version = '0.0.6'
6
+ s.version = '0.0.10'
7
7
  s.platform = Gem::Platform::RUBY
8
8
  s.authors = ['Mark Reyes', 'David Moles']
9
9
  s.email = ['mark.reyes@ucop.edu', 'david.moles@ucop.edu']
@@ -12,10 +12,11 @@ Gem::Specification.new do |s|
12
12
  s.description = 'A client for the Merritt ingest system. More details available from https://github.com/CDLUC3/mrt-doc/wiki'
13
13
  s.license = 'BSD-3-Clause'
14
14
 
15
- s.add_dependency 'json', '~> 2.0'
16
- s.add_dependency 'rest-client', '~> 2.0'
15
+ # s.add_dependency 'json', '~> 2.1'
16
+ s.add_dependency 'rest-client', '~> 2.1'
17
17
 
18
- s.add_development_dependency 'bundler', '>= 2.2.10'
18
+
19
+ # s.add_development_dependency 'bundler', '>= 2.2.10'
19
20
  s.add_development_dependency 'checkm', '0.0.6'
20
21
  s.add_development_dependency 'mocha', '~> 1.7'
21
22
  s.add_development_dependency 'rake', '~> 12.0'
@@ -4,8 +4,6 @@ module Mrt::Ingest
4
4
  describe Component do
5
5
  describe :from_erc do
6
6
  it 'rejects string ERCs' do
7
- server = instance_double(OneTimeServer)
8
- expect { Component.from_erc(server, 'I am not an ERC') }.to raise_error(ArgumentError)
9
7
  end
10
8
  end
11
9
  end
@@ -81,11 +81,9 @@ module Mrt::Ingest
81
81
  def check_erc_content(iobject, asserted_erc)
82
82
  erc_entry = get_uri_for_name(iobject, 'mrt-erc.txt')
83
83
  expect(erc_entry).not_to be_nil
84
- iobject.start_server
85
84
  begin
86
85
  expect(parse_erc_entry(erc_entry)).to eq(asserted_erc)
87
86
  ensure
88
- iobject.stop_server
89
87
  end
90
88
  end
91
89
 
@@ -137,11 +135,9 @@ module Mrt::Ingest
137
135
 
138
136
  it 'should serve a valid mrt-erc.txt entry' do
139
137
  expect(@erc_entry).not_to be_nil
140
- @iobject.start_server
141
138
  begin
142
139
  open(@erc_entry.values[0]).read.lines.to_a
143
140
  ensure
144
- @iobject.stop_server
145
141
  end
146
142
  end
147
143
 
@@ -186,11 +182,9 @@ module Mrt::Ingest
186
182
  manifest = parse_object_manifest(iobject)
187
183
  expect(manifest).not_to(be_nil)
188
184
  expect(uri_entry).not_to be_nil
189
- iobject.start_server
190
185
  begin
191
186
  expect(open(uri_entry.values[0]).read).to eq(FILE_CONTENT)
192
187
  ensure
193
- iobject.stop_server
194
188
  end
195
189
  end
196
190
  end
@@ -221,9 +215,8 @@ module Mrt::Ingest
221
215
  @iobject.start_ingest(@client, 'example_profile', 'Atom processor/Example collection')
222
216
 
223
217
  # TODO: just mock the server
224
- server = @iobject.server
225
- files = Dir.entries(server.dir).reject { |e| %w[. ..].include?(e) }
226
- urls = files.map { |f| "http://#{Socket.gethostname}:#{server.port}/#{f}" }
218
+ # files = Dir.entries(server.dir).reject { |e| %w[. ..].include?(e) }
219
+ # urls = files.map { |f| "http://#{Socket.gethostname}:#{server.port}/#{f}" }
227
220
 
228
221
  client_process_id = fork do
229
222
  begin
metadata CHANGED
@@ -1,58 +1,30 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mrt-ingest
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mark Reyes
8
8
  - David Moles
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-06-08 00:00:00.000000000 Z
12
+ date: 2022-07-12 00:00:00.000000000 Z
13
13
  dependencies:
14
- - !ruby/object:Gem::Dependency
15
- name: json
16
- requirement: !ruby/object:Gem::Requirement
17
- requirements:
18
- - - "~>"
19
- - !ruby/object:Gem::Version
20
- version: '2.0'
21
- type: :runtime
22
- prerelease: false
23
- version_requirements: !ruby/object:Gem::Requirement
24
- requirements:
25
- - - "~>"
26
- - !ruby/object:Gem::Version
27
- version: '2.0'
28
14
  - !ruby/object:Gem::Dependency
29
15
  name: rest-client
30
16
  requirement: !ruby/object:Gem::Requirement
31
17
  requirements:
32
18
  - - "~>"
33
19
  - !ruby/object:Gem::Version
34
- version: '2.0'
20
+ version: '2.1'
35
21
  type: :runtime
36
22
  prerelease: false
37
23
  version_requirements: !ruby/object:Gem::Requirement
38
24
  requirements:
39
25
  - - "~>"
40
26
  - !ruby/object:Gem::Version
41
- version: '2.0'
42
- - !ruby/object:Gem::Dependency
43
- name: bundler
44
- requirement: !ruby/object:Gem::Requirement
45
- requirements:
46
- - - ">="
47
- - !ruby/object:Gem::Version
48
- version: 2.2.10
49
- type: :development
50
- prerelease: false
51
- version_requirements: !ruby/object:Gem::Requirement
52
- requirements:
53
- - - ">="
54
- - !ruby/object:Gem::Version
55
- version: 2.2.10
27
+ version: '2.1'
56
28
  - !ruby/object:Gem::Dependency
57
29
  name: checkm
58
30
  requirement: !ruby/object:Gem::Requirement
@@ -209,7 +181,6 @@ files:
209
181
  - lib/mrt/ingest/ingest_exception.rb
210
182
  - lib/mrt/ingest/iobject.rb
211
183
  - lib/mrt/ingest/message_digest.rb
212
- - lib/mrt/ingest/one_time_server.rb
213
184
  - lib/mrt/ingest/request.rb
214
185
  - lib/mrt/ingest/response.rb
215
186
  - mrt-ingest-ruby.iml
@@ -221,14 +192,13 @@ files:
221
192
  - spec/unit/mrt/ingest/component_spec.rb
222
193
  - spec/unit/mrt/ingest/iobject_spec.rb
223
194
  - spec/unit/mrt/ingest/message_digest_spec.rb
224
- - spec/unit/mrt/ingest/one_time_server_spec.rb
225
195
  - spec/unit/mrt/ingest/request_spec.rb
226
196
  - spec/unit/mrt/ingest/response_spec.rb
227
197
  homepage: https://github.com/CDLUC3/mrt-ingest-ruby
228
198
  licenses:
229
199
  - BSD-3-Clause
230
200
  metadata: {}
231
- post_install_message:
201
+ post_install_message:
232
202
  rdoc_options: []
233
203
  require_paths:
234
204
  - lib
@@ -244,7 +214,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
244
214
  version: '0'
245
215
  requirements: []
246
216
  rubygems_version: 3.0.3.1
247
- signing_key:
217
+ signing_key:
248
218
  specification_version: 4
249
219
  summary: A client for Merritt ingest.
250
220
  test_files:
@@ -255,6 +225,5 @@ test_files:
255
225
  - spec/unit/mrt/ingest/component_spec.rb
256
226
  - spec/unit/mrt/ingest/iobject_spec.rb
257
227
  - spec/unit/mrt/ingest/message_digest_spec.rb
258
- - spec/unit/mrt/ingest/one_time_server_spec.rb
259
228
  - spec/unit/mrt/ingest/request_spec.rb
260
229
  - spec/unit/mrt/ingest/response_spec.rb
@@ -1,111 +0,0 @@
1
- # Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
2
- # Copyright:: Copyright (c) 2011, Regents of the University of California
3
-
4
- require 'webrick'
5
-
6
- # An HTTP server that will serve each file ONCE before shutting down.
7
- module Mrt
8
- module Ingest
9
- class OneTimeServer
10
-
11
- attr_reader :dir, :port
12
-
13
- # Find an open port, starting with start and adding one until we get
14
- # an open port
15
- def get_open_port(start = 8081)
16
- try_port = start
17
- loop do
18
- begin
19
- s = TCPServer.open(try_port)
20
- s.close
21
- return try_port
22
- rescue Errno::EADDRINUSE
23
- try_port += 1
24
- end
25
- end
26
- end
27
-
28
- def initialize
29
- @dir = Dir.mktmpdir
30
- @mutex = Mutex.new
31
- @known_paths = {}
32
- @requested = {}
33
- @port = get_open_port
34
- @file_callback = ->(req, _res) { @requested[req.path] ||= true }
35
- @server = WEBrick::HTTPServer.new(Port: @port)
36
- @server.mount('/', WEBrick::HTTPServlet::FileHandler, @dir, FileCallback: @file_callback)
37
- end
38
-
39
- # Return true if each file has been served.
40
- def finished?
41
- Dir.entries(@dir).each do |entry|
42
- next if %w[. ..].include?(entry)
43
- return false if @requested["/#{entry}"].nil?
44
- end
45
- true
46
- end
47
-
48
- def temppath
49
- tmpfile = Tempfile.new('tmp', @dir)
50
- tmppath = tmpfile.path
51
- tmpfile.close!
52
- @mutex.synchronize do
53
- unless @known_paths.key?(tmppath)
54
- # no collision
55
- @known_paths[tmppath] = true
56
- return tmppath
57
- end
58
- end
59
- # need to retry, there was a collision
60
- temppath
61
- end
62
-
63
- # Add a file to this server. Returns the URL to use
64
- # to fetch the file & the file path
65
- def add_file(sourcefile = nil)
66
- fullpath = temppath
67
- path = File.basename(fullpath)
68
-
69
- if sourcefile
70
- @server.mount("/#{path}", WEBrick::HTTPServlet::FileHandler, sourcefile.path, FileCallback: @file_callback)
71
- else
72
- File.open(fullpath, 'w+') { |f| yield f }
73
- end
74
- ["http://#{Socket.gethostname}:#{@port}/#{path}", fullpath]
75
- end
76
-
77
- def start_server
78
- if @thread.nil?
79
- @thread = Thread.new do
80
- @server.start
81
- end
82
- end
83
- sleep(0.1) while @server.status != :Running
84
- @thread
85
- end
86
-
87
- # Stop server unconditionally.
88
- def stop_server
89
- @server.shutdown
90
- @thread.join
91
- end
92
-
93
- # Wait for server to finish serving all files.
94
- def join_server
95
- # ensure that each file is requested once before shutting down
96
- sleep(1) until finished?
97
- @server.shutdown
98
- @thread.join
99
- end
100
-
101
- # Run the server and wait until each file has been served once.
102
- # Cleans up files before it returns.
103
- def run
104
- start_server
105
- join_server
106
- # FileUtils.rm_rf(@dir)
107
- nil
108
- end
109
- end
110
- end
111
- end
@@ -1,113 +0,0 @@
1
- require 'spec_helper'
2
- require 'English'
3
-
4
- module Mrt::Ingest
5
- describe OneTimeServer do
6
- attr_reader :server
7
-
8
- before(:each) do
9
- @server = OneTimeServer.new
10
- server.start_server
11
- end
12
-
13
- after(:each) do
14
- server.stop_server
15
- end
16
-
17
- describe :finished? do
18
- it 'returns true when all files have been served, false otherwise' do
19
- urls = (0..3).map do |i|
20
- url_str, = server.add_file { |f| f.puts("I am file #{i}") }
21
- url_str
22
- end
23
-
24
- urls.each do |url|
25
- expect(server.finished?).to be_falsey
26
- Net::HTTP.get(URI.parse(url))
27
- end
28
-
29
- expect(server.finished?).to be_truthy
30
- end
31
- end
32
-
33
- describe :temppath do
34
- it 'avoids collisions' do
35
- tmpfiles = []
36
- allow(Tempfile).to receive(:new).and_wrap_original do |m, *args|
37
- tmpfile = m.call(*args)
38
- if tmpfiles.empty?
39
- known_paths = server.instance_variable_get(:@known_paths)
40
- known_paths[tmpfile.path] = true
41
- end
42
- tmpfiles << tmpfile.path
43
- tmpfile
44
- end
45
-
46
- temppath = server.temppath
47
- expect(tmpfiles.size).to eq(2)
48
- expect(temppath).to eq(tmpfiles[1])
49
- end
50
- end
51
-
52
- describe :join_server do
53
- it 'blocks till all files have been served' do
54
- urls = (0..3).map do |i|
55
- url_str, = server.add_file { |f| f.puts("I am file #{i}") }
56
- url_str
57
- end
58
-
59
- joining_thread = Thread.new { server.join_server }
60
- expect(joining_thread.status).not_to be_falsey
61
-
62
- client_process_id = fork do
63
- begin
64
- urls.each do |url|
65
- resp = Net::HTTP.get_response(URI.parse(url))
66
- status = resp.code.to_i
67
- exit(status) if status != 200
68
- end
69
- rescue StandardError => e
70
- warn(e)
71
- exit(1)
72
- end
73
- end
74
- Process.wait(client_process_id)
75
- expect($CHILD_STATUS.exitstatus).to eq(0) # just to be sure
76
-
77
- Timeout.timeout(5) { joining_thread.join }
78
- expect(joining_thread.status).to eq(false)
79
- end
80
- end
81
-
82
- describe :run do
83
- it 'starts, serves, and stops' do
84
- server2 = OneTimeServer.new
85
- urls = (0..3).map do |i|
86
- url_str, = server2.add_file { |f| f.puts("I am file #{i}") }
87
- url_str
88
- end
89
-
90
- running_thread = Thread.new { server2.run }
91
- expect(running_thread.status).not_to be_falsey
92
-
93
- client_process_id = fork do
94
- begin
95
- urls.each do |url|
96
- resp = Net::HTTP.get_response(URI.parse(url))
97
- status = resp.code.to_i
98
- exit(status) if status != 200
99
- end
100
- rescue StandardError => e
101
- warn(e)
102
- exit(1)
103
- end
104
- end
105
- Process.wait(client_process_id)
106
- expect($CHILD_STATUS.exitstatus).to eq(0) # just to be sure
107
-
108
- Timeout.timeout(5) { running_thread.join }
109
- expect(running_thread.status).to eq(false)
110
- end
111
- end
112
- end
113
- end