mrt-ingest 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +5 -1
- data/.idea/.rakeTasks +7 -0
- data/.idea/encodings.xml +4 -0
- data/.idea/inspectionProfiles/Project_Default.xml +18 -0
- data/.idea/misc.xml +32 -0
- data/.idea/modules.xml +8 -0
- data/.idea/vcs.xml +6 -0
- data/.rubocop.yml +87 -0
- data/.ruby-version +2 -0
- data/CHANGES.md +25 -0
- data/Gemfile +1 -4
- data/README.md +56 -0
- data/Rakefile +36 -17
- data/lib/mrt/ingest.rb +1 -16
- data/lib/mrt/ingest/client.rb +11 -10
- data/lib/mrt/ingest/component.rb +78 -0
- data/lib/mrt/ingest/ingest_exception.rb +6 -0
- data/lib/mrt/ingest/iobject.rb +36 -93
- data/lib/mrt/ingest/message_digest.rb +8 -10
- data/lib/mrt/ingest/one_time_server.rb +34 -44
- data/lib/mrt/ingest/request.rb +67 -41
- data/lib/mrt/ingest/response.rb +6 -6
- data/mrt-ingest-ruby.iml +65 -0
- data/mrt-ingest.gemspec +22 -20
- data/spec/.rubocop.yml +16 -0
- data/spec/spec_helper.rb +36 -0
- data/spec/unit/data/file.txt +1 -0
- data/spec/unit/mrt/ingest/client_spec.rb +31 -0
- data/spec/unit/mrt/ingest/component_spec.rb +12 -0
- data/spec/unit/mrt/ingest/iobject_spec.rb +248 -0
- data/spec/unit/mrt/ingest/message_digest_spec.rb +38 -0
- data/spec/unit/mrt/ingest/one_time_server_spec.rb +113 -0
- data/spec/unit/mrt/ingest/request_spec.rb +25 -0
- data/spec/unit/mrt/ingest/response_spec.rb +56 -0
- metadata +95 -15
- data/README +0 -29
- data/test/test_client.rb +0 -40
- data/test/test_iobject.rb +0 -174
- data/test/test_request.rb +0 -37
- data/test/test_response.rb +0 -63
data/lib/mrt/ingest/iobject.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
2
|
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
3
|
|
4
|
-
require 'mrt/ingest'
|
5
4
|
require 'tempfile'
|
6
5
|
require 'uri'
|
7
6
|
require 'open-uri'
|
@@ -9,147 +8,76 @@ require 'digest/md5'
|
|
9
8
|
|
10
9
|
module Mrt
|
11
10
|
module Ingest
|
12
|
-
|
13
|
-
# #File.
|
14
|
-
class Component # :nodoc:
|
15
|
-
def initialize(server, where, options)
|
16
|
-
@name = options[:name]
|
17
|
-
@digest = options[:digest]
|
18
|
-
@mime_type = options[:mime_type]
|
19
|
-
@size = options[:size]
|
20
|
-
# @prefetch = options[:prefetch] || false
|
21
|
-
@prefetch = false # TODO: remove prefetch code
|
22
|
-
|
23
|
-
case where
|
24
|
-
when File, Tempfile
|
25
|
-
@name = File.basename(where.path) if @name.nil?
|
26
|
-
@uri = server.add_file(where)[0]
|
27
|
-
if @digest.nil? then
|
28
|
-
@digest = Mrt::Ingest::MessageDigest::MD5.from_file(where)
|
29
|
-
end
|
30
|
-
@size = File.size(where.path) if @size.nil?
|
31
|
-
when URI
|
32
|
-
@name = File.basename(where.to_s) if @name.nil?
|
33
|
-
if @prefetch then
|
34
|
-
digest = Digest::MD5.new()
|
35
|
-
@uri, ignore = server.add_file do |f|
|
36
|
-
open(where, (options[:prefetch_options] || {})) do |u|
|
37
|
-
while (buff = u.read(1024)) do
|
38
|
-
f << buff
|
39
|
-
digest << buff
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
43
|
-
@digest = Mrt::Ingest::MessageDigest::MD5.new(digest.hexdigest)
|
44
|
-
else
|
45
|
-
@uri = where
|
46
|
-
end
|
47
|
-
else
|
48
|
-
raise IngestException.new("Trying to add a component that is not a File or URI")
|
49
|
-
end
|
50
|
-
|
51
|
-
end
|
52
|
-
|
53
|
-
def to_manifest_entry
|
54
|
-
(digest_alg, digest_value) = if @digest.nil? then
|
55
|
-
['', '']
|
56
|
-
else
|
57
|
-
[@digest.type, @digest.value]
|
58
|
-
end
|
59
|
-
return "#{@uri} | #{digest_alg} | #{digest_value} | #{@size || ''} | | #{@name} | #{@mime_type || '' }\n"
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
11
|
+
|
63
12
|
# An object prepared for ingest into Merritt.
|
64
13
|
class IObject
|
65
|
-
|
14
|
+
|
66
15
|
attr_accessor :primary_identifier, :local_identifier, :erc
|
16
|
+
attr_reader :server
|
67
17
|
|
68
18
|
# Options can have the keys :primary_identifier,
|
69
19
|
# :local_identifier, :server, or :erc. :erc can be a #File, #Uri
|
70
20
|
# or a #Hash of metadata. :server is a #OneTimeServer.
|
71
|
-
def initialize(options={})
|
21
|
+
def initialize(options = {})
|
72
22
|
@primary_identifier = options[:primary_identifier]
|
73
23
|
@local_identifier = options[:local_identifier]
|
74
|
-
@erc = options[:erc] ||
|
24
|
+
@erc = options[:erc] || {}
|
75
25
|
@components = []
|
76
26
|
@server = options[:server] || Mrt::Ingest::OneTimeServer.new
|
77
27
|
end
|
78
|
-
|
28
|
+
|
79
29
|
# Add a component to the object. where can be either a #URI or a
|
80
30
|
# #File. Options is a hash whose keys may be :name, :digest,
|
81
31
|
# :mime_type, or :size. If :digest is supplied, it must be a
|
82
32
|
# subclass of Mrt::Ingest::MessageDigest::Base. If where is a
|
83
33
|
# #File, it will be hosted on an embedded web server.
|
84
|
-
def add_component(where, options={})
|
34
|
+
def add_component(where, options = {})
|
85
35
|
@components.push(Component.new(@server, where, options))
|
86
36
|
end
|
87
|
-
|
37
|
+
|
88
38
|
# Make a Mrt::Ingest::Request object for this mrt-object
|
89
|
-
def mk_request(profile,
|
90
|
-
|
91
|
-
|
92
|
-
Component.new(@server, @erc, :name => 'mrt-erc.txt')
|
93
|
-
when Hash
|
94
|
-
uri_str, path = @server.add_file do |f|
|
95
|
-
f.write("erc:\n")
|
96
|
-
@erc.each_pair do |k, v|
|
97
|
-
f.write("#{k}: #{v}\n")
|
98
|
-
end
|
99
|
-
end
|
100
|
-
Component.new(@server,
|
101
|
-
URI.parse(uri_str),
|
102
|
-
:name => 'mrt-erc.txt',
|
103
|
-
:digest => Mrt::Ingest::MessageDigest::MD5.from_file(File.new(path)))
|
104
|
-
else
|
105
|
-
raise IngestException.new("Bad ERC supplied: must be a URI, File, or Hash")
|
106
|
-
end
|
107
|
-
manifest_file = Tempfile.new("mrt-ingest")
|
39
|
+
def mk_request(profile, user_agent)
|
40
|
+
manifest_file = Tempfile.new('mrt-ingest')
|
41
|
+
erc_component = Component.from_erc(@server, @erc)
|
108
42
|
mk_manifest(manifest_file, erc_component)
|
109
43
|
# reset to beginning
|
110
44
|
manifest_file.open
|
111
|
-
|
112
|
-
new(:file => manifest_file,
|
113
|
-
:filename => manifest_file.path.split(/\//).last,
|
114
|
-
:type => "object-manifest",
|
115
|
-
:submitter => submitter,
|
116
|
-
:profile => profile,
|
117
|
-
:local_identifier => @local_identifier,
|
118
|
-
:primary_identifier => @primary_identifier)
|
45
|
+
new_request(manifest_file, profile, user_agent)
|
119
46
|
end
|
120
47
|
|
121
48
|
def start_server # :nodoc:
|
122
|
-
|
49
|
+
@server.start_server
|
123
50
|
end
|
124
51
|
|
125
52
|
def join_server # :nodoc:
|
126
|
-
|
53
|
+
@server.join_server
|
127
54
|
end
|
128
55
|
|
129
56
|
def stop_server # :nodoc:
|
130
|
-
|
57
|
+
@server.stop_server
|
131
58
|
end
|
132
|
-
|
59
|
+
|
60
|
+
# rubocop:disable Metrics/LineLength
|
133
61
|
def mk_manifest(manifest, erc_component) # :nodoc:
|
134
62
|
manifest.write("#%checkm_0.7\n")
|
135
63
|
manifest.write("#%profile http://uc3.cdlib.org/registry/ingest/manifest/mrt-ingest-manifest\n")
|
136
64
|
manifest.write("#%prefix | mrt: | http://uc3.cdlib.org/ontology/mom#\n")
|
137
65
|
manifest.write("#%prefix | nfo: | http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#\n")
|
138
66
|
manifest.write("#%fields | nfo:fileUrl | nfo:hashAlgorithm | nfo:hashValue | nfo:fileSize | nfo:fileLastModified | nfo:fileName | mrt:mimeType\n")
|
139
|
-
@components.each
|
67
|
+
@components.each do |c|
|
140
68
|
manifest.write(c.to_manifest_entry)
|
141
|
-
|
69
|
+
end
|
142
70
|
manifest.write(erc_component.to_manifest_entry)
|
143
71
|
manifest.write("#%EOF\n")
|
144
72
|
end
|
145
|
-
|
73
|
+
# rubocop:enable Metrics/LineLength
|
74
|
+
|
146
75
|
# Begin an ingest on the given client, with a profile and
|
147
76
|
# submitter.
|
148
77
|
def start_ingest(client, profile, submitter)
|
149
78
|
request = mk_request(profile, submitter)
|
150
79
|
start_server
|
151
80
|
@response = client.ingest(request)
|
152
|
-
return @response
|
153
81
|
end
|
154
82
|
|
155
83
|
# Wait for the ingest of this object to finish.
|
@@ -158,6 +86,21 @@ module Mrt
|
|
158
86
|
# we will check the status via the ingest server.
|
159
87
|
join_server
|
160
88
|
end
|
89
|
+
|
90
|
+
private
|
91
|
+
|
92
|
+
def new_request(manifest_file, profile, user_agent)
|
93
|
+
Mrt::Ingest::Request.new(
|
94
|
+
file: manifest_file,
|
95
|
+
filename: manifest_file.path.split(%r{/}).last,
|
96
|
+
type: 'object-manifest',
|
97
|
+
submitter: user_agent,
|
98
|
+
profile: profile,
|
99
|
+
local_identifier: @local_identifier,
|
100
|
+
primary_identifier: @primary_identifier
|
101
|
+
)
|
102
|
+
end
|
103
|
+
|
161
104
|
end
|
162
105
|
end
|
163
106
|
end
|
@@ -13,37 +13,35 @@ module Mrt
|
|
13
13
|
@type = type
|
14
14
|
end
|
15
15
|
end
|
16
|
-
|
16
|
+
|
17
17
|
# Represents a SHA256 digest suitable for a Checkm manifest.
|
18
18
|
class SHA256 < Base
|
19
19
|
def initialize(value)
|
20
|
-
super(value,
|
20
|
+
super(value, 'sha-256')
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
24
|
# Represents an MD5 digest suitable for a Checkm manifest.
|
25
25
|
class MD5 < Base
|
26
26
|
def initialize(value)
|
27
|
-
super(value,
|
27
|
+
super(value, 'md5')
|
28
28
|
end
|
29
|
-
|
29
|
+
|
30
30
|
# Generate a digest from a file.
|
31
31
|
def self.from_file(file)
|
32
32
|
digest = Digest::MD5.new
|
33
33
|
File.open(file.path, 'r') do |f|
|
34
|
-
buff =
|
35
|
-
|
36
|
-
digest << buff
|
37
|
-
end
|
34
|
+
buff = ''
|
35
|
+
digest << buff until f.read(1024, buff).nil?
|
38
36
|
end
|
39
|
-
|
37
|
+
Mrt::Ingest::MessageDigest::MD5.new(digest.hexdigest)
|
40
38
|
end
|
41
39
|
end
|
42
40
|
|
43
41
|
# Represents a SHA1 digest suitable for a Checkm manifest.
|
44
42
|
class SHA1 < Base
|
45
43
|
def initialize(value)
|
46
|
-
super(value,
|
44
|
+
super(value, 'sha1')
|
47
45
|
end
|
48
46
|
end
|
49
47
|
end
|
@@ -7,17 +7,20 @@ require 'webrick'
|
|
7
7
|
module Mrt
|
8
8
|
module Ingest
|
9
9
|
class OneTimeServer
|
10
|
+
|
11
|
+
attr_reader :dir, :port
|
12
|
+
|
10
13
|
# Find an open port, starting with start and adding one until we get
|
11
14
|
# an open port
|
12
|
-
def get_open_port(start=8081)
|
15
|
+
def get_open_port(start = 8081)
|
13
16
|
try_port = start
|
14
|
-
|
17
|
+
loop do
|
15
18
|
begin
|
16
19
|
s = TCPServer.open(try_port)
|
17
20
|
s.close
|
18
21
|
return try_port
|
19
22
|
rescue Errno::EADDRINUSE
|
20
|
-
try_port
|
23
|
+
try_port += 1
|
21
24
|
end
|
22
25
|
end
|
23
26
|
end
|
@@ -27,71 +30,58 @@ module Mrt
|
|
27
30
|
@mutex = Mutex.new
|
28
31
|
@known_paths = {}
|
29
32
|
@requested = {}
|
30
|
-
@port = get_open_port
|
31
|
-
@file_callback =
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
config = { :Port => @port }
|
36
|
-
@server = WEBrick::HTTPServer.new(config)
|
37
|
-
@server.mount("/", WEBrick::HTTPServlet::FileHandler, @dir,
|
38
|
-
{ :FileCallback=>@file_callback })
|
33
|
+
@port = get_open_port
|
34
|
+
@file_callback = ->(req, _res) { @requested[req.path] ||= true }
|
35
|
+
@server = WEBrick::HTTPServer.new(Port: @port)
|
36
|
+
@server.mount('/', WEBrick::HTTPServlet::FileHandler, @dir, FileCallback: @file_callback)
|
39
37
|
end
|
40
38
|
|
41
39
|
# Return true if each file has been served.
|
42
40
|
def finished?
|
43
41
|
Dir.entries(@dir).each do |entry|
|
44
|
-
next if
|
45
|
-
if @requested["/#{entry}"].nil?
|
46
|
-
return false
|
47
|
-
end
|
42
|
+
next if %w[. ..].include?(entry)
|
43
|
+
return false if @requested["/#{entry}"].nil?
|
48
44
|
end
|
49
|
-
|
45
|
+
true
|
50
46
|
end
|
51
47
|
|
52
|
-
def
|
53
|
-
tmpfile = Tempfile.new(
|
48
|
+
def temppath
|
49
|
+
tmpfile = Tempfile.new('tmp', @dir)
|
54
50
|
tmppath = tmpfile.path
|
55
51
|
tmpfile.close!
|
56
52
|
@mutex.synchronize do
|
57
|
-
|
53
|
+
unless @known_paths.key?(tmppath)
|
58
54
|
# no collision
|
59
55
|
@known_paths[tmppath] = true
|
60
56
|
return tmppath
|
61
57
|
end
|
62
58
|
end
|
63
59
|
# need to retry, there was a collision
|
64
|
-
|
60
|
+
temppath
|
65
61
|
end
|
66
62
|
|
67
63
|
# Add a file to this server. Returns the URL to use
|
68
64
|
# to fetch the file & the file path
|
69
|
-
def add_file(sourcefile=nil)
|
70
|
-
fullpath =
|
65
|
+
def add_file(sourcefile = nil)
|
66
|
+
fullpath = temppath
|
71
67
|
path = File.basename(fullpath)
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
sourcefile.path,
|
76
|
-
{ :FileCallback=>@file_callback })
|
68
|
+
|
69
|
+
if sourcefile
|
70
|
+
@server.mount("/#{path}", WEBrick::HTTPServlet::FileHandler, sourcefile.path, FileCallback: @file_callback)
|
77
71
|
else
|
78
|
-
File.open(fullpath, 'w+')
|
79
|
-
yield f
|
80
|
-
end
|
72
|
+
File.open(fullpath, 'w+') { |f| yield f }
|
81
73
|
end
|
82
|
-
|
74
|
+
["http://#{Socket.gethostname}:#{@port}/#{path}", fullpath]
|
83
75
|
end
|
84
|
-
|
76
|
+
|
85
77
|
def start_server
|
86
|
-
if @thread.nil?
|
78
|
+
if @thread.nil?
|
87
79
|
@thread = Thread.new do
|
88
80
|
@server.start
|
89
81
|
end
|
90
82
|
end
|
91
|
-
while
|
92
|
-
|
93
|
-
end
|
94
|
-
return @thread
|
83
|
+
sleep(0.1) while @server.status != :Running
|
84
|
+
@thread
|
95
85
|
end
|
96
86
|
|
97
87
|
# Stop server unconditionally.
|
@@ -103,18 +93,18 @@ module Mrt
|
|
103
93
|
# Wait for server to finish serving all files.
|
104
94
|
def join_server
|
105
95
|
# ensure that each file is requested once before shutting down
|
106
|
-
|
107
|
-
@server.shutdown
|
96
|
+
sleep(1) until finished?
|
97
|
+
@server.shutdown
|
108
98
|
@thread.join
|
109
99
|
end
|
110
|
-
|
100
|
+
|
111
101
|
# Run the server and wait until each file has been served once.
|
112
102
|
# Cleans up files before it returns.
|
113
103
|
def run
|
114
|
-
start_server
|
115
|
-
join_server
|
104
|
+
start_server
|
105
|
+
join_server
|
116
106
|
# FileUtils.rm_rf(@dir)
|
117
|
-
|
107
|
+
nil
|
118
108
|
end
|
119
109
|
end
|
120
110
|
end
|
data/lib/mrt/ingest/request.rb
CHANGED
@@ -3,54 +3,80 @@
|
|
3
3
|
|
4
4
|
module Mrt
|
5
5
|
module Ingest
|
6
|
-
class RequestException <
|
6
|
+
class RequestException < RuntimeError
|
7
7
|
end
|
8
8
|
|
9
9
|
# Represents a request to be sent to an ingest server.
|
10
10
|
class Request
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
11
|
+
|
12
|
+
attr_accessor :creator
|
13
|
+
attr_accessor :date
|
14
|
+
attr_accessor :digest
|
15
|
+
attr_accessor :file
|
16
|
+
attr_accessor :filename
|
17
|
+
attr_accessor :local_identifier
|
18
|
+
attr_accessor :note
|
19
|
+
attr_accessor :primary_identifier
|
20
|
+
attr_accessor :profile
|
21
|
+
attr_accessor :submitter
|
22
|
+
attr_accessor :title
|
23
|
+
attr_accessor :type
|
24
|
+
|
25
|
+
# rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
|
26
|
+
def initialize(
|
27
|
+
profile:, submitter:, type:,
|
28
|
+
creator: nil, date: nil, digest: nil, file: nil, filename: nil,
|
29
|
+
local_identifier: nil, primary_identifier: nil, note: nil, title: nil
|
30
|
+
)
|
31
|
+
raise ArgumentError, 'profile cannot be nil' unless profile
|
32
|
+
raise ArgumentError, 'profile cannot be submitter' unless submitter
|
33
|
+
raise ArgumentError, 'profile cannot be type' unless type
|
34
|
+
|
35
|
+
@creator = creator
|
36
|
+
@date = date
|
37
|
+
@digest = digest
|
38
|
+
@file = file
|
39
|
+
@filename = filename
|
40
|
+
@local_identifier = local_identifier
|
41
|
+
@primary_identifier = primary_identifier
|
42
|
+
@profile = profile
|
43
|
+
@note = note
|
44
|
+
@submitter = submitter
|
45
|
+
@title = title
|
46
|
+
@type = type
|
34
47
|
end
|
35
|
-
|
48
|
+
# rubocop:enable Metrics/MethodLength, Metrics/ParameterLists
|
49
|
+
|
36
50
|
# Returns a hash of arguments suitable for sending to a server.
|
51
|
+
# rubocop: disable Metrics/MethodLength, Metrics/AbcSize
|
37
52
|
def mk_args
|
38
|
-
|
39
|
-
'creator'
|
40
|
-
'date'
|
41
|
-
'digestType'
|
42
|
-
'digestValue'
|
43
|
-
'file'
|
44
|
-
'filename'
|
45
|
-
'localIdentifier'
|
46
|
-
'primaryIdentifier' =>
|
47
|
-
'profile'
|
48
|
-
'note'
|
49
|
-
'responseForm'
|
50
|
-
'submitter'
|
51
|
-
'title'
|
52
|
-
'type'
|
53
|
-
}.reject{|
|
53
|
+
{
|
54
|
+
'creator' => creator,
|
55
|
+
'date' => date,
|
56
|
+
'digestType' => digest_type,
|
57
|
+
'digestValue' => digest_value,
|
58
|
+
'file' => file,
|
59
|
+
'filename' => filename,
|
60
|
+
'localIdentifier' => local_identifier,
|
61
|
+
'primaryIdentifier' => primary_identifier,
|
62
|
+
'profile' => profile,
|
63
|
+
'note' => note,
|
64
|
+
'responseForm' => 'json',
|
65
|
+
'submitter' => submitter,
|
66
|
+
'title' => title,
|
67
|
+
'type' => type
|
68
|
+
}.reject { |_k, v| v.nil? || (v == '') }
|
69
|
+
end
|
70
|
+
# rubocop: enable Metrics/MethodLength, Metrics/AbcSize
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
def digest_value
|
75
|
+
digest && digest.value
|
76
|
+
end
|
77
|
+
|
78
|
+
def digest_type
|
79
|
+
digest && digest.type
|
54
80
|
end
|
55
81
|
end
|
56
82
|
end
|