mrt-ingest 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ module Mrt
2
+ module Ingest
3
+ class IngestException < RuntimeError
4
+ end
5
+ end
6
+ end
@@ -1,7 +1,6 @@
1
1
  # Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
2
2
  # Copyright:: Copyright (c) 2011, Regents of the University of California
3
3
 
4
- require 'mrt/ingest'
5
4
  require 'tempfile'
6
5
  require 'uri'
7
6
  require 'open-uri'
@@ -9,147 +8,76 @@ require 'digest/md5'
9
8
 
10
9
  module Mrt
11
10
  module Ingest
12
- # Represents a component of an object to ingest. Either a #URI or a
13
- # #File.
14
- class Component # :nodoc:
15
- def initialize(server, where, options)
16
- @name = options[:name]
17
- @digest = options[:digest]
18
- @mime_type = options[:mime_type]
19
- @size = options[:size]
20
- # @prefetch = options[:prefetch] || false
21
- @prefetch = false # TODO: remove prefetch code
22
-
23
- case where
24
- when File, Tempfile
25
- @name = File.basename(where.path) if @name.nil?
26
- @uri = server.add_file(where)[0]
27
- if @digest.nil? then
28
- @digest = Mrt::Ingest::MessageDigest::MD5.from_file(where)
29
- end
30
- @size = File.size(where.path) if @size.nil?
31
- when URI
32
- @name = File.basename(where.to_s) if @name.nil?
33
- if @prefetch then
34
- digest = Digest::MD5.new()
35
- @uri, ignore = server.add_file do |f|
36
- open(where, (options[:prefetch_options] || {})) do |u|
37
- while (buff = u.read(1024)) do
38
- f << buff
39
- digest << buff
40
- end
41
- end
42
- end
43
- @digest = Mrt::Ingest::MessageDigest::MD5.new(digest.hexdigest)
44
- else
45
- @uri = where
46
- end
47
- else
48
- raise IngestException.new("Trying to add a component that is not a File or URI")
49
- end
50
-
51
- end
52
-
53
- def to_manifest_entry
54
- (digest_alg, digest_value) = if @digest.nil? then
55
- ['', '']
56
- else
57
- [@digest.type, @digest.value]
58
- end
59
- return "#{@uri} | #{digest_alg} | #{digest_value} | #{@size || ''} | | #{@name} | #{@mime_type || '' }\n"
60
- end
61
- end
62
-
11
+
63
12
  # An object prepared for ingest into Merritt.
64
13
  class IObject
65
-
14
+
66
15
  attr_accessor :primary_identifier, :local_identifier, :erc
16
+ attr_reader :server
67
17
 
68
18
  # Options can have the keys :primary_identifier,
69
19
  # :local_identifier, :server, or :erc. :erc can be a #File, #Uri
70
20
  # or a #Hash of metadata. :server is a #OneTimeServer.
71
- def initialize(options={})
21
+ def initialize(options = {})
72
22
  @primary_identifier = options[:primary_identifier]
73
23
  @local_identifier = options[:local_identifier]
74
- @erc = options[:erc] || Hash.new
24
+ @erc = options[:erc] || {}
75
25
  @components = []
76
26
  @server = options[:server] || Mrt::Ingest::OneTimeServer.new
77
27
  end
78
-
28
+
79
29
  # Add a component to the object. where can be either a #URI or a
80
30
  # #File. Options is a hash whose keys may be :name, :digest,
81
31
  # :mime_type, or :size. If :digest is supplied, it must be a
82
32
  # subclass of Mrt::Ingest::MessageDigest::Base. If where is a
83
33
  # #File, it will be hosted on an embedded web server.
84
- def add_component(where, options={})
34
+ def add_component(where, options = {})
85
35
  @components.push(Component.new(@server, where, options))
86
36
  end
87
-
37
+
88
38
  # Make a Mrt::Ingest::Request object for this mrt-object
89
- def mk_request(profile, submitter)
90
- erc_component = case @erc
91
- when URI, File, Tempfile
92
- Component.new(@server, @erc, :name => 'mrt-erc.txt')
93
- when Hash
94
- uri_str, path = @server.add_file do |f|
95
- f.write("erc:\n")
96
- @erc.each_pair do |k, v|
97
- f.write("#{k}: #{v}\n")
98
- end
99
- end
100
- Component.new(@server,
101
- URI.parse(uri_str),
102
- :name => 'mrt-erc.txt',
103
- :digest => Mrt::Ingest::MessageDigest::MD5.from_file(File.new(path)))
104
- else
105
- raise IngestException.new("Bad ERC supplied: must be a URI, File, or Hash")
106
- end
107
- manifest_file = Tempfile.new("mrt-ingest")
39
+ def mk_request(profile, user_agent)
40
+ manifest_file = Tempfile.new('mrt-ingest')
41
+ erc_component = Component.from_erc(@server, @erc)
108
42
  mk_manifest(manifest_file, erc_component)
109
43
  # reset to beginning
110
44
  manifest_file.open
111
- return Mrt::Ingest::Request.
112
- new(:file => manifest_file,
113
- :filename => manifest_file.path.split(/\//).last,
114
- :type => "object-manifest",
115
- :submitter => submitter,
116
- :profile => profile,
117
- :local_identifier => @local_identifier,
118
- :primary_identifier => @primary_identifier)
45
+ new_request(manifest_file, profile, user_agent)
119
46
  end
120
47
 
121
48
  def start_server # :nodoc:
122
- return @server.start_server()
49
+ @server.start_server
123
50
  end
124
51
 
125
52
  def join_server # :nodoc:
126
- return @server.join_server()
53
+ @server.join_server
127
54
  end
128
55
 
129
56
  def stop_server # :nodoc:
130
- return @server.stop_server()
57
+ @server.stop_server
131
58
  end
132
-
59
+
60
+ # rubocop:disable Metrics/LineLength
133
61
  def mk_manifest(manifest, erc_component) # :nodoc:
134
62
  manifest.write("#%checkm_0.7\n")
135
63
  manifest.write("#%profile http://uc3.cdlib.org/registry/ingest/manifest/mrt-ingest-manifest\n")
136
64
  manifest.write("#%prefix | mrt: | http://uc3.cdlib.org/ontology/mom#\n")
137
65
  manifest.write("#%prefix | nfo: | http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#\n")
138
66
  manifest.write("#%fields | nfo:fileUrl | nfo:hashAlgorithm | nfo:hashValue | nfo:fileSize | nfo:fileLastModified | nfo:fileName | mrt:mimeType\n")
139
- @components.each { |c|
67
+ @components.each do |c|
140
68
  manifest.write(c.to_manifest_entry)
141
- }
69
+ end
142
70
  manifest.write(erc_component.to_manifest_entry)
143
71
  manifest.write("#%EOF\n")
144
72
  end
145
-
73
+ # rubocop:enable Metrics/LineLength
74
+
146
75
  # Begin an ingest on the given client, with a profile and
147
76
  # submitter.
148
77
  def start_ingest(client, profile, submitter)
149
78
  request = mk_request(profile, submitter)
150
79
  start_server
151
80
  @response = client.ingest(request)
152
- return @response
153
81
  end
154
82
 
155
83
  # Wait for the ingest of this object to finish.
@@ -158,6 +86,21 @@ module Mrt
158
86
  # we will check the status via the ingest server.
159
87
  join_server
160
88
  end
89
+
90
+ private
91
+
92
+ def new_request(manifest_file, profile, user_agent)
93
+ Mrt::Ingest::Request.new(
94
+ file: manifest_file,
95
+ filename: manifest_file.path.split(%r{/}).last,
96
+ type: 'object-manifest',
97
+ submitter: user_agent,
98
+ profile: profile,
99
+ local_identifier: @local_identifier,
100
+ primary_identifier: @primary_identifier
101
+ )
102
+ end
103
+
161
104
  end
162
105
  end
163
106
  end
@@ -13,37 +13,35 @@ module Mrt
13
13
  @type = type
14
14
  end
15
15
  end
16
-
16
+
17
17
  # Represents a SHA256 digest suitable for a Checkm manifest.
18
18
  class SHA256 < Base
19
19
  def initialize(value)
20
- super(value, "sha-256")
20
+ super(value, 'sha-256')
21
21
  end
22
22
  end
23
23
 
24
24
  # Represents an MD5 digest suitable for a Checkm manifest.
25
25
  class MD5 < Base
26
26
  def initialize(value)
27
- super(value, "md5")
27
+ super(value, 'md5')
28
28
  end
29
-
29
+
30
30
  # Generate a digest from a file.
31
31
  def self.from_file(file)
32
32
  digest = Digest::MD5.new
33
33
  File.open(file.path, 'r') do |f|
34
- buff = ""
35
- while (f.read(1024, buff) != nil)
36
- digest << buff
37
- end
34
+ buff = ''
35
+ digest << buff until f.read(1024, buff).nil?
38
36
  end
39
- return Mrt::Ingest::MessageDigest::MD5.new(digest.hexdigest)
37
+ Mrt::Ingest::MessageDigest::MD5.new(digest.hexdigest)
40
38
  end
41
39
  end
42
40
 
43
41
  # Represents a SHA1 digest suitable for a Checkm manifest.
44
42
  class SHA1 < Base
45
43
  def initialize(value)
46
- super(value, "sha1")
44
+ super(value, 'sha1')
47
45
  end
48
46
  end
49
47
  end
@@ -7,17 +7,20 @@ require 'webrick'
7
7
  module Mrt
8
8
  module Ingest
9
9
  class OneTimeServer
10
+
11
+ attr_reader :dir, :port
12
+
10
13
  # Find an open port, starting with start and adding one until we get
11
14
  # an open port
12
- def get_open_port(start=8081)
15
+ def get_open_port(start = 8081)
13
16
  try_port = start
14
- while (true)
17
+ loop do
15
18
  begin
16
19
  s = TCPServer.open(try_port)
17
20
  s.close
18
21
  return try_port
19
22
  rescue Errno::EADDRINUSE
20
- try_port = try_port + 1
23
+ try_port += 1
21
24
  end
22
25
  end
23
26
  end
@@ -27,71 +30,58 @@ module Mrt
27
30
  @mutex = Mutex.new
28
31
  @known_paths = {}
29
32
  @requested = {}
30
- @port = get_open_port()
31
- @file_callback = lambda do |req, res|
32
- @requested[req.path] ||= true
33
- end
34
-
35
- config = { :Port => @port }
36
- @server = WEBrick::HTTPServer.new(config)
37
- @server.mount("/", WEBrick::HTTPServlet::FileHandler, @dir,
38
- { :FileCallback=>@file_callback })
33
+ @port = get_open_port
34
+ @file_callback = ->(req, _res) { @requested[req.path] ||= true }
35
+ @server = WEBrick::HTTPServer.new(Port: @port)
36
+ @server.mount('/', WEBrick::HTTPServlet::FileHandler, @dir, FileCallback: @file_callback)
39
37
  end
40
38
 
41
39
  # Return true if each file has been served.
42
40
  def finished?
43
41
  Dir.entries(@dir).each do |entry|
44
- next if (entry == "." || entry == "..")
45
- if @requested["/#{entry}"].nil? then
46
- return false
47
- end
42
+ next if %w[. ..].include?(entry)
43
+ return false if @requested["/#{entry}"].nil?
48
44
  end
49
- return true
45
+ true
50
46
  end
51
47
 
52
- def get_temppath
53
- tmpfile = Tempfile.new("tmp", @dir)
48
+ def temppath
49
+ tmpfile = Tempfile.new('tmp', @dir)
54
50
  tmppath = tmpfile.path
55
51
  tmpfile.close!
56
52
  @mutex.synchronize do
57
- if !@known_paths.has_key?(tmppath) then
53
+ unless @known_paths.key?(tmppath)
58
54
  # no collision
59
55
  @known_paths[tmppath] = true
60
56
  return tmppath
61
57
  end
62
58
  end
63
59
  # need to retry, there was a collision
64
- return get_temppath
60
+ temppath
65
61
  end
66
62
 
67
63
  # Add a file to this server. Returns the URL to use
68
64
  # to fetch the file & the file path
69
- def add_file(sourcefile=nil)
70
- fullpath = get_temppath()
65
+ def add_file(sourcefile = nil)
66
+ fullpath = temppath
71
67
  path = File.basename(fullpath)
72
- if !sourcefile.nil? then
73
- @server.mount("/#{path}",
74
- WEBrick::HTTPServlet::FileHandler,
75
- sourcefile.path,
76
- { :FileCallback=>@file_callback })
68
+
69
+ if sourcefile
70
+ @server.mount("/#{path}", WEBrick::HTTPServlet::FileHandler, sourcefile.path, FileCallback: @file_callback)
77
71
  else
78
- File.open(fullpath, 'w+') do |f|
79
- yield f
80
- end
72
+ File.open(fullpath, 'w+') { |f| yield f }
81
73
  end
82
- return "http://#{Socket.gethostname}:#{@port}/#{path}", fullpath
74
+ ["http://#{Socket.gethostname}:#{@port}/#{path}", fullpath]
83
75
  end
84
-
76
+
85
77
  def start_server
86
- if @thread.nil? then
78
+ if @thread.nil?
87
79
  @thread = Thread.new do
88
80
  @server.start
89
81
  end
90
82
  end
91
- while (@server.status != :Running) do
92
- sleep(0.1)
93
- end
94
- return @thread
83
+ sleep(0.1) while @server.status != :Running
84
+ @thread
95
85
  end
96
86
 
97
87
  # Stop server unconditionally.
@@ -103,18 +93,18 @@ module Mrt
103
93
  # Wait for server to finish serving all files.
104
94
  def join_server
105
95
  # ensure that each file is requested once before shutting down
106
- while (!self.finished?) do sleep(1) end
107
- @server.shutdown
96
+ sleep(1) until finished?
97
+ @server.shutdown
108
98
  @thread.join
109
99
  end
110
-
100
+
111
101
  # Run the server and wait until each file has been served once.
112
102
  # Cleans up files before it returns.
113
103
  def run
114
- start_server()
115
- join_server()
104
+ start_server
105
+ join_server
116
106
  # FileUtils.rm_rf(@dir)
117
- return
107
+ nil
118
108
  end
119
109
  end
120
110
  end
@@ -3,54 +3,80 @@
3
3
 
4
4
  module Mrt
5
5
  module Ingest
6
- class RequestException < Exception
6
+ class RequestException < RuntimeError
7
7
  end
8
8
 
9
9
  # Represents a request to be sent to an ingest server.
10
10
  class Request
11
- attr_accessor :creator, :date, :local_identifier,
12
- :primary_identifier, :profile, :note, :submitter,
13
- :title, :type
14
-
15
- # Options is a hash; required are :profile, :submitter, :type.
16
- # May also include :creator, :date, :digest, :file, :filename,
17
- # :local_identifier, :primary_identifier, :note, :title.
18
- def initialize(options)
19
- @creator = options[:creator]
20
- @date = options[:date]
21
- @digest = options[:digest]
22
- @file = options[:file]
23
- @filename = options[:filename]
24
- @local_identifier = options[:local_identifier]
25
- @primary_identifier = options[:primary_identifier]
26
- @profile = options[:profile]
27
- @note = options[:note]
28
- @submitter = options[:submitter]
29
- @title = options[:title]
30
- @type = options[:type]
31
- [:profile, :submitter, :type].each do |arg|
32
- raise RequestException.new("#{arg} is required.") if options[arg].nil?
33
- end
11
+
12
+ attr_accessor :creator
13
+ attr_accessor :date
14
+ attr_accessor :digest
15
+ attr_accessor :file
16
+ attr_accessor :filename
17
+ attr_accessor :local_identifier
18
+ attr_accessor :note
19
+ attr_accessor :primary_identifier
20
+ attr_accessor :profile
21
+ attr_accessor :submitter
22
+ attr_accessor :title
23
+ attr_accessor :type
24
+
25
+ # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
26
+ def initialize(
27
+ profile:, submitter:, type:,
28
+ creator: nil, date: nil, digest: nil, file: nil, filename: nil,
29
+ local_identifier: nil, primary_identifier: nil, note: nil, title: nil
30
+ )
31
+ raise ArgumentError, 'profile cannot be nil' unless profile
32
+ raise ArgumentError, 'profile cannot be submitter' unless submitter
33
+ raise ArgumentError, 'profile cannot be type' unless type
34
+
35
+ @creator = creator
36
+ @date = date
37
+ @digest = digest
38
+ @file = file
39
+ @filename = filename
40
+ @local_identifier = local_identifier
41
+ @primary_identifier = primary_identifier
42
+ @profile = profile
43
+ @note = note
44
+ @submitter = submitter
45
+ @title = title
46
+ @type = type
34
47
  end
35
-
48
+ # rubocop:enable Metrics/MethodLength, Metrics/ParameterLists
49
+
36
50
  # Returns a hash of arguments suitable for sending to a server.
51
+ # rubocop: disable Metrics/MethodLength, Metrics/AbcSize
37
52
  def mk_args
38
- return {
39
- 'creator' => @creator,
40
- 'date' => @date,
41
- 'digestType' => ((!@digest.nil? && @digest.type) || nil),
42
- 'digestValue' => ((!@digest.nil? && @digest.value) || nil),
43
- 'file' => @file,
44
- 'filename' => @filename,
45
- 'localIdentifier' => @local_identifier,
46
- 'primaryIdentifier' => @primary_identifier,
47
- 'profile' => @profile,
48
- 'note' => @note,
49
- 'responseForm' => 'json',
50
- 'submitter' => @submitter,
51
- 'title' => @title,
52
- 'type' => @type
53
- }.reject{|k, v| v.nil? || (v == '')}
53
+ {
54
+ 'creator' => creator,
55
+ 'date' => date,
56
+ 'digestType' => digest_type,
57
+ 'digestValue' => digest_value,
58
+ 'file' => file,
59
+ 'filename' => filename,
60
+ 'localIdentifier' => local_identifier,
61
+ 'primaryIdentifier' => primary_identifier,
62
+ 'profile' => profile,
63
+ 'note' => note,
64
+ 'responseForm' => 'json',
65
+ 'submitter' => submitter,
66
+ 'title' => title,
67
+ 'type' => type
68
+ }.reject { |_k, v| v.nil? || (v == '') }
69
+ end
70
+ # rubocop: enable Metrics/MethodLength, Metrics/AbcSize
71
+
72
+ private
73
+
74
+ def digest_value
75
+ digest && digest.value
76
+ end
77
+
78
+ def digest_type
79
+ digest && digest.type
54
80
  end
55
81
  end
56
82
  end