mrt-ingest 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +5 -1
- data/.idea/.rakeTasks +7 -0
- data/.idea/encodings.xml +4 -0
- data/.idea/inspectionProfiles/Project_Default.xml +18 -0
- data/.idea/misc.xml +32 -0
- data/.idea/modules.xml +8 -0
- data/.idea/vcs.xml +6 -0
- data/.rubocop.yml +87 -0
- data/.ruby-version +2 -0
- data/CHANGES.md +25 -0
- data/Gemfile +1 -4
- data/README.md +56 -0
- data/Rakefile +36 -17
- data/lib/mrt/ingest.rb +1 -16
- data/lib/mrt/ingest/client.rb +11 -10
- data/lib/mrt/ingest/component.rb +78 -0
- data/lib/mrt/ingest/ingest_exception.rb +6 -0
- data/lib/mrt/ingest/iobject.rb +36 -93
- data/lib/mrt/ingest/message_digest.rb +8 -10
- data/lib/mrt/ingest/one_time_server.rb +34 -44
- data/lib/mrt/ingest/request.rb +67 -41
- data/lib/mrt/ingest/response.rb +6 -6
- data/mrt-ingest-ruby.iml +65 -0
- data/mrt-ingest.gemspec +22 -20
- data/spec/.rubocop.yml +16 -0
- data/spec/spec_helper.rb +36 -0
- data/spec/unit/data/file.txt +1 -0
- data/spec/unit/mrt/ingest/client_spec.rb +31 -0
- data/spec/unit/mrt/ingest/component_spec.rb +12 -0
- data/spec/unit/mrt/ingest/iobject_spec.rb +248 -0
- data/spec/unit/mrt/ingest/message_digest_spec.rb +38 -0
- data/spec/unit/mrt/ingest/one_time_server_spec.rb +113 -0
- data/spec/unit/mrt/ingest/request_spec.rb +25 -0
- data/spec/unit/mrt/ingest/response_spec.rb +56 -0
- metadata +95 -15
- data/README +0 -29
- data/test/test_client.rb +0 -40
- data/test/test_iobject.rb +0 -174
- data/test/test_request.rb +0 -37
- data/test/test_response.rb +0 -63
data/lib/mrt/ingest/iobject.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
2
|
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
3
|
|
4
|
-
require 'mrt/ingest'
|
5
4
|
require 'tempfile'
|
6
5
|
require 'uri'
|
7
6
|
require 'open-uri'
|
@@ -9,147 +8,76 @@ require 'digest/md5'
|
|
9
8
|
|
10
9
|
module Mrt
|
11
10
|
module Ingest
|
12
|
-
|
13
|
-
# #File.
|
14
|
-
class Component # :nodoc:
|
15
|
-
def initialize(server, where, options)
|
16
|
-
@name = options[:name]
|
17
|
-
@digest = options[:digest]
|
18
|
-
@mime_type = options[:mime_type]
|
19
|
-
@size = options[:size]
|
20
|
-
# @prefetch = options[:prefetch] || false
|
21
|
-
@prefetch = false # TODO: remove prefetch code
|
22
|
-
|
23
|
-
case where
|
24
|
-
when File, Tempfile
|
25
|
-
@name = File.basename(where.path) if @name.nil?
|
26
|
-
@uri = server.add_file(where)[0]
|
27
|
-
if @digest.nil? then
|
28
|
-
@digest = Mrt::Ingest::MessageDigest::MD5.from_file(where)
|
29
|
-
end
|
30
|
-
@size = File.size(where.path) if @size.nil?
|
31
|
-
when URI
|
32
|
-
@name = File.basename(where.to_s) if @name.nil?
|
33
|
-
if @prefetch then
|
34
|
-
digest = Digest::MD5.new()
|
35
|
-
@uri, ignore = server.add_file do |f|
|
36
|
-
open(where, (options[:prefetch_options] || {})) do |u|
|
37
|
-
while (buff = u.read(1024)) do
|
38
|
-
f << buff
|
39
|
-
digest << buff
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
43
|
-
@digest = Mrt::Ingest::MessageDigest::MD5.new(digest.hexdigest)
|
44
|
-
else
|
45
|
-
@uri = where
|
46
|
-
end
|
47
|
-
else
|
48
|
-
raise IngestException.new("Trying to add a component that is not a File or URI")
|
49
|
-
end
|
50
|
-
|
51
|
-
end
|
52
|
-
|
53
|
-
def to_manifest_entry
|
54
|
-
(digest_alg, digest_value) = if @digest.nil? then
|
55
|
-
['', '']
|
56
|
-
else
|
57
|
-
[@digest.type, @digest.value]
|
58
|
-
end
|
59
|
-
return "#{@uri} | #{digest_alg} | #{digest_value} | #{@size || ''} | | #{@name} | #{@mime_type || '' }\n"
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
11
|
+
|
63
12
|
# An object prepared for ingest into Merritt.
|
64
13
|
class IObject
|
65
|
-
|
14
|
+
|
66
15
|
attr_accessor :primary_identifier, :local_identifier, :erc
|
16
|
+
attr_reader :server
|
67
17
|
|
68
18
|
# Options can have the keys :primary_identifier,
|
69
19
|
# :local_identifier, :server, or :erc. :erc can be a #File, #Uri
|
70
20
|
# or a #Hash of metadata. :server is a #OneTimeServer.
|
71
|
-
def initialize(options={})
|
21
|
+
def initialize(options = {})
|
72
22
|
@primary_identifier = options[:primary_identifier]
|
73
23
|
@local_identifier = options[:local_identifier]
|
74
|
-
@erc = options[:erc] ||
|
24
|
+
@erc = options[:erc] || {}
|
75
25
|
@components = []
|
76
26
|
@server = options[:server] || Mrt::Ingest::OneTimeServer.new
|
77
27
|
end
|
78
|
-
|
28
|
+
|
79
29
|
# Add a component to the object. where can be either a #URI or a
|
80
30
|
# #File. Options is a hash whose keys may be :name, :digest,
|
81
31
|
# :mime_type, or :size. If :digest is supplied, it must be a
|
82
32
|
# subclass of Mrt::Ingest::MessageDigest::Base. If where is a
|
83
33
|
# #File, it will be hosted on an embedded web server.
|
84
|
-
def add_component(where, options={})
|
34
|
+
def add_component(where, options = {})
|
85
35
|
@components.push(Component.new(@server, where, options))
|
86
36
|
end
|
87
|
-
|
37
|
+
|
88
38
|
# Make a Mrt::Ingest::Request object for this mrt-object
|
89
|
-
def mk_request(profile,
|
90
|
-
|
91
|
-
|
92
|
-
Component.new(@server, @erc, :name => 'mrt-erc.txt')
|
93
|
-
when Hash
|
94
|
-
uri_str, path = @server.add_file do |f|
|
95
|
-
f.write("erc:\n")
|
96
|
-
@erc.each_pair do |k, v|
|
97
|
-
f.write("#{k}: #{v}\n")
|
98
|
-
end
|
99
|
-
end
|
100
|
-
Component.new(@server,
|
101
|
-
URI.parse(uri_str),
|
102
|
-
:name => 'mrt-erc.txt',
|
103
|
-
:digest => Mrt::Ingest::MessageDigest::MD5.from_file(File.new(path)))
|
104
|
-
else
|
105
|
-
raise IngestException.new("Bad ERC supplied: must be a URI, File, or Hash")
|
106
|
-
end
|
107
|
-
manifest_file = Tempfile.new("mrt-ingest")
|
39
|
+
def mk_request(profile, user_agent)
|
40
|
+
manifest_file = Tempfile.new('mrt-ingest')
|
41
|
+
erc_component = Component.from_erc(@server, @erc)
|
108
42
|
mk_manifest(manifest_file, erc_component)
|
109
43
|
# reset to beginning
|
110
44
|
manifest_file.open
|
111
|
-
|
112
|
-
new(:file => manifest_file,
|
113
|
-
:filename => manifest_file.path.split(/\//).last,
|
114
|
-
:type => "object-manifest",
|
115
|
-
:submitter => submitter,
|
116
|
-
:profile => profile,
|
117
|
-
:local_identifier => @local_identifier,
|
118
|
-
:primary_identifier => @primary_identifier)
|
45
|
+
new_request(manifest_file, profile, user_agent)
|
119
46
|
end
|
120
47
|
|
121
48
|
def start_server # :nodoc:
|
122
|
-
|
49
|
+
@server.start_server
|
123
50
|
end
|
124
51
|
|
125
52
|
def join_server # :nodoc:
|
126
|
-
|
53
|
+
@server.join_server
|
127
54
|
end
|
128
55
|
|
129
56
|
def stop_server # :nodoc:
|
130
|
-
|
57
|
+
@server.stop_server
|
131
58
|
end
|
132
|
-
|
59
|
+
|
60
|
+
# rubocop:disable Metrics/LineLength
|
133
61
|
def mk_manifest(manifest, erc_component) # :nodoc:
|
134
62
|
manifest.write("#%checkm_0.7\n")
|
135
63
|
manifest.write("#%profile http://uc3.cdlib.org/registry/ingest/manifest/mrt-ingest-manifest\n")
|
136
64
|
manifest.write("#%prefix | mrt: | http://uc3.cdlib.org/ontology/mom#\n")
|
137
65
|
manifest.write("#%prefix | nfo: | http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#\n")
|
138
66
|
manifest.write("#%fields | nfo:fileUrl | nfo:hashAlgorithm | nfo:hashValue | nfo:fileSize | nfo:fileLastModified | nfo:fileName | mrt:mimeType\n")
|
139
|
-
@components.each
|
67
|
+
@components.each do |c|
|
140
68
|
manifest.write(c.to_manifest_entry)
|
141
|
-
|
69
|
+
end
|
142
70
|
manifest.write(erc_component.to_manifest_entry)
|
143
71
|
manifest.write("#%EOF\n")
|
144
72
|
end
|
145
|
-
|
73
|
+
# rubocop:enable Metrics/LineLength
|
74
|
+
|
146
75
|
# Begin an ingest on the given client, with a profile and
|
147
76
|
# submitter.
|
148
77
|
def start_ingest(client, profile, submitter)
|
149
78
|
request = mk_request(profile, submitter)
|
150
79
|
start_server
|
151
80
|
@response = client.ingest(request)
|
152
|
-
return @response
|
153
81
|
end
|
154
82
|
|
155
83
|
# Wait for the ingest of this object to finish.
|
@@ -158,6 +86,21 @@ module Mrt
|
|
158
86
|
# we will check the status via the ingest server.
|
159
87
|
join_server
|
160
88
|
end
|
89
|
+
|
90
|
+
private
|
91
|
+
|
92
|
+
def new_request(manifest_file, profile, user_agent)
|
93
|
+
Mrt::Ingest::Request.new(
|
94
|
+
file: manifest_file,
|
95
|
+
filename: manifest_file.path.split(%r{/}).last,
|
96
|
+
type: 'object-manifest',
|
97
|
+
submitter: user_agent,
|
98
|
+
profile: profile,
|
99
|
+
local_identifier: @local_identifier,
|
100
|
+
primary_identifier: @primary_identifier
|
101
|
+
)
|
102
|
+
end
|
103
|
+
|
161
104
|
end
|
162
105
|
end
|
163
106
|
end
|
@@ -13,37 +13,35 @@ module Mrt
|
|
13
13
|
@type = type
|
14
14
|
end
|
15
15
|
end
|
16
|
-
|
16
|
+
|
17
17
|
# Represents a SHA256 digest suitable for a Checkm manifest.
|
18
18
|
class SHA256 < Base
|
19
19
|
def initialize(value)
|
20
|
-
super(value,
|
20
|
+
super(value, 'sha-256')
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
24
|
# Represents an MD5 digest suitable for a Checkm manifest.
|
25
25
|
class MD5 < Base
|
26
26
|
def initialize(value)
|
27
|
-
super(value,
|
27
|
+
super(value, 'md5')
|
28
28
|
end
|
29
|
-
|
29
|
+
|
30
30
|
# Generate a digest from a file.
|
31
31
|
def self.from_file(file)
|
32
32
|
digest = Digest::MD5.new
|
33
33
|
File.open(file.path, 'r') do |f|
|
34
|
-
buff =
|
35
|
-
|
36
|
-
digest << buff
|
37
|
-
end
|
34
|
+
buff = ''
|
35
|
+
digest << buff until f.read(1024, buff).nil?
|
38
36
|
end
|
39
|
-
|
37
|
+
Mrt::Ingest::MessageDigest::MD5.new(digest.hexdigest)
|
40
38
|
end
|
41
39
|
end
|
42
40
|
|
43
41
|
# Represents a SHA1 digest suitable for a Checkm manifest.
|
44
42
|
class SHA1 < Base
|
45
43
|
def initialize(value)
|
46
|
-
super(value,
|
44
|
+
super(value, 'sha1')
|
47
45
|
end
|
48
46
|
end
|
49
47
|
end
|
@@ -7,17 +7,20 @@ require 'webrick'
|
|
7
7
|
module Mrt
|
8
8
|
module Ingest
|
9
9
|
class OneTimeServer
|
10
|
+
|
11
|
+
attr_reader :dir, :port
|
12
|
+
|
10
13
|
# Find an open port, starting with start and adding one until we get
|
11
14
|
# an open port
|
12
|
-
def get_open_port(start=8081)
|
15
|
+
def get_open_port(start = 8081)
|
13
16
|
try_port = start
|
14
|
-
|
17
|
+
loop do
|
15
18
|
begin
|
16
19
|
s = TCPServer.open(try_port)
|
17
20
|
s.close
|
18
21
|
return try_port
|
19
22
|
rescue Errno::EADDRINUSE
|
20
|
-
try_port
|
23
|
+
try_port += 1
|
21
24
|
end
|
22
25
|
end
|
23
26
|
end
|
@@ -27,71 +30,58 @@ module Mrt
|
|
27
30
|
@mutex = Mutex.new
|
28
31
|
@known_paths = {}
|
29
32
|
@requested = {}
|
30
|
-
@port = get_open_port
|
31
|
-
@file_callback =
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
config = { :Port => @port }
|
36
|
-
@server = WEBrick::HTTPServer.new(config)
|
37
|
-
@server.mount("/", WEBrick::HTTPServlet::FileHandler, @dir,
|
38
|
-
{ :FileCallback=>@file_callback })
|
33
|
+
@port = get_open_port
|
34
|
+
@file_callback = ->(req, _res) { @requested[req.path] ||= true }
|
35
|
+
@server = WEBrick::HTTPServer.new(Port: @port)
|
36
|
+
@server.mount('/', WEBrick::HTTPServlet::FileHandler, @dir, FileCallback: @file_callback)
|
39
37
|
end
|
40
38
|
|
41
39
|
# Return true if each file has been served.
|
42
40
|
def finished?
|
43
41
|
Dir.entries(@dir).each do |entry|
|
44
|
-
next if
|
45
|
-
if @requested["/#{entry}"].nil?
|
46
|
-
return false
|
47
|
-
end
|
42
|
+
next if %w[. ..].include?(entry)
|
43
|
+
return false if @requested["/#{entry}"].nil?
|
48
44
|
end
|
49
|
-
|
45
|
+
true
|
50
46
|
end
|
51
47
|
|
52
|
-
def
|
53
|
-
tmpfile = Tempfile.new(
|
48
|
+
def temppath
|
49
|
+
tmpfile = Tempfile.new('tmp', @dir)
|
54
50
|
tmppath = tmpfile.path
|
55
51
|
tmpfile.close!
|
56
52
|
@mutex.synchronize do
|
57
|
-
|
53
|
+
unless @known_paths.key?(tmppath)
|
58
54
|
# no collision
|
59
55
|
@known_paths[tmppath] = true
|
60
56
|
return tmppath
|
61
57
|
end
|
62
58
|
end
|
63
59
|
# need to retry, there was a collision
|
64
|
-
|
60
|
+
temppath
|
65
61
|
end
|
66
62
|
|
67
63
|
# Add a file to this server. Returns the URL to use
|
68
64
|
# to fetch the file & the file path
|
69
|
-
def add_file(sourcefile=nil)
|
70
|
-
fullpath =
|
65
|
+
def add_file(sourcefile = nil)
|
66
|
+
fullpath = temppath
|
71
67
|
path = File.basename(fullpath)
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
sourcefile.path,
|
76
|
-
{ :FileCallback=>@file_callback })
|
68
|
+
|
69
|
+
if sourcefile
|
70
|
+
@server.mount("/#{path}", WEBrick::HTTPServlet::FileHandler, sourcefile.path, FileCallback: @file_callback)
|
77
71
|
else
|
78
|
-
File.open(fullpath, 'w+')
|
79
|
-
yield f
|
80
|
-
end
|
72
|
+
File.open(fullpath, 'w+') { |f| yield f }
|
81
73
|
end
|
82
|
-
|
74
|
+
["http://#{Socket.gethostname}:#{@port}/#{path}", fullpath]
|
83
75
|
end
|
84
|
-
|
76
|
+
|
85
77
|
def start_server
|
86
|
-
if @thread.nil?
|
78
|
+
if @thread.nil?
|
87
79
|
@thread = Thread.new do
|
88
80
|
@server.start
|
89
81
|
end
|
90
82
|
end
|
91
|
-
while
|
92
|
-
|
93
|
-
end
|
94
|
-
return @thread
|
83
|
+
sleep(0.1) while @server.status != :Running
|
84
|
+
@thread
|
95
85
|
end
|
96
86
|
|
97
87
|
# Stop server unconditionally.
|
@@ -103,18 +93,18 @@ module Mrt
|
|
103
93
|
# Wait for server to finish serving all files.
|
104
94
|
def join_server
|
105
95
|
# ensure that each file is requested once before shutting down
|
106
|
-
|
107
|
-
@server.shutdown
|
96
|
+
sleep(1) until finished?
|
97
|
+
@server.shutdown
|
108
98
|
@thread.join
|
109
99
|
end
|
110
|
-
|
100
|
+
|
111
101
|
# Run the server and wait until each file has been served once.
|
112
102
|
# Cleans up files before it returns.
|
113
103
|
def run
|
114
|
-
start_server
|
115
|
-
join_server
|
104
|
+
start_server
|
105
|
+
join_server
|
116
106
|
# FileUtils.rm_rf(@dir)
|
117
|
-
|
107
|
+
nil
|
118
108
|
end
|
119
109
|
end
|
120
110
|
end
|
data/lib/mrt/ingest/request.rb
CHANGED
@@ -3,54 +3,80 @@
|
|
3
3
|
|
4
4
|
module Mrt
|
5
5
|
module Ingest
|
6
|
-
class RequestException <
|
6
|
+
class RequestException < RuntimeError
|
7
7
|
end
|
8
8
|
|
9
9
|
# Represents a request to be sent to an ingest server.
|
10
10
|
class Request
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
11
|
+
|
12
|
+
attr_accessor :creator
|
13
|
+
attr_accessor :date
|
14
|
+
attr_accessor :digest
|
15
|
+
attr_accessor :file
|
16
|
+
attr_accessor :filename
|
17
|
+
attr_accessor :local_identifier
|
18
|
+
attr_accessor :note
|
19
|
+
attr_accessor :primary_identifier
|
20
|
+
attr_accessor :profile
|
21
|
+
attr_accessor :submitter
|
22
|
+
attr_accessor :title
|
23
|
+
attr_accessor :type
|
24
|
+
|
25
|
+
# rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
|
26
|
+
def initialize(
|
27
|
+
profile:, submitter:, type:,
|
28
|
+
creator: nil, date: nil, digest: nil, file: nil, filename: nil,
|
29
|
+
local_identifier: nil, primary_identifier: nil, note: nil, title: nil
|
30
|
+
)
|
31
|
+
raise ArgumentError, 'profile cannot be nil' unless profile
|
32
|
+
raise ArgumentError, 'profile cannot be submitter' unless submitter
|
33
|
+
raise ArgumentError, 'profile cannot be type' unless type
|
34
|
+
|
35
|
+
@creator = creator
|
36
|
+
@date = date
|
37
|
+
@digest = digest
|
38
|
+
@file = file
|
39
|
+
@filename = filename
|
40
|
+
@local_identifier = local_identifier
|
41
|
+
@primary_identifier = primary_identifier
|
42
|
+
@profile = profile
|
43
|
+
@note = note
|
44
|
+
@submitter = submitter
|
45
|
+
@title = title
|
46
|
+
@type = type
|
34
47
|
end
|
35
|
-
|
48
|
+
# rubocop:enable Metrics/MethodLength, Metrics/ParameterLists
|
49
|
+
|
36
50
|
# Returns a hash of arguments suitable for sending to a server.
|
51
|
+
# rubocop: disable Metrics/MethodLength, Metrics/AbcSize
|
37
52
|
def mk_args
|
38
|
-
|
39
|
-
'creator'
|
40
|
-
'date'
|
41
|
-
'digestType'
|
42
|
-
'digestValue'
|
43
|
-
'file'
|
44
|
-
'filename'
|
45
|
-
'localIdentifier'
|
46
|
-
'primaryIdentifier' =>
|
47
|
-
'profile'
|
48
|
-
'note'
|
49
|
-
'responseForm'
|
50
|
-
'submitter'
|
51
|
-
'title'
|
52
|
-
'type'
|
53
|
-
}.reject{|
|
53
|
+
{
|
54
|
+
'creator' => creator,
|
55
|
+
'date' => date,
|
56
|
+
'digestType' => digest_type,
|
57
|
+
'digestValue' => digest_value,
|
58
|
+
'file' => file,
|
59
|
+
'filename' => filename,
|
60
|
+
'localIdentifier' => local_identifier,
|
61
|
+
'primaryIdentifier' => primary_identifier,
|
62
|
+
'profile' => profile,
|
63
|
+
'note' => note,
|
64
|
+
'responseForm' => 'json',
|
65
|
+
'submitter' => submitter,
|
66
|
+
'title' => title,
|
67
|
+
'type' => type
|
68
|
+
}.reject { |_k, v| v.nil? || (v == '') }
|
69
|
+
end
|
70
|
+
# rubocop: enable Metrics/MethodLength, Metrics/AbcSize
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
def digest_value
|
75
|
+
digest && digest.value
|
76
|
+
end
|
77
|
+
|
78
|
+
def digest_type
|
79
|
+
digest && digest.type
|
54
80
|
end
|
55
81
|
end
|
56
82
|
end
|