mrt-ingest 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,6 @@
1
+ module Mrt
2
+ module Ingest
3
+ class IngestException < RuntimeError
4
+ end
5
+ end
6
+ end
@@ -1,7 +1,6 @@
1
1
  # Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
2
2
  # Copyright:: Copyright (c) 2011, Regents of the University of California
3
3
 
4
- require 'mrt/ingest'
5
4
  require 'tempfile'
6
5
  require 'uri'
7
6
  require 'open-uri'
@@ -9,147 +8,76 @@ require 'digest/md5'
9
8
 
10
9
  module Mrt
11
10
  module Ingest
12
- # Represents a component of an object to ingest. Either a #URI or a
13
- # #File.
14
- class Component # :nodoc:
15
- def initialize(server, where, options)
16
- @name = options[:name]
17
- @digest = options[:digest]
18
- @mime_type = options[:mime_type]
19
- @size = options[:size]
20
- # @prefetch = options[:prefetch] || false
21
- @prefetch = false # TODO: remove prefetch code
22
-
23
- case where
24
- when File, Tempfile
25
- @name = File.basename(where.path) if @name.nil?
26
- @uri = server.add_file(where)[0]
27
- if @digest.nil? then
28
- @digest = Mrt::Ingest::MessageDigest::MD5.from_file(where)
29
- end
30
- @size = File.size(where.path) if @size.nil?
31
- when URI
32
- @name = File.basename(where.to_s) if @name.nil?
33
- if @prefetch then
34
- digest = Digest::MD5.new()
35
- @uri, ignore = server.add_file do |f|
36
- open(where, (options[:prefetch_options] || {})) do |u|
37
- while (buff = u.read(1024)) do
38
- f << buff
39
- digest << buff
40
- end
41
- end
42
- end
43
- @digest = Mrt::Ingest::MessageDigest::MD5.new(digest.hexdigest)
44
- else
45
- @uri = where
46
- end
47
- else
48
- raise IngestException.new("Trying to add a component that is not a File or URI")
49
- end
50
-
51
- end
52
-
53
- def to_manifest_entry
54
- (digest_alg, digest_value) = if @digest.nil? then
55
- ['', '']
56
- else
57
- [@digest.type, @digest.value]
58
- end
59
- return "#{@uri} | #{digest_alg} | #{digest_value} | #{@size || ''} | | #{@name} | #{@mime_type || '' }\n"
60
- end
61
- end
62
-
11
+
63
12
  # An object prepared for ingest into Merritt.
64
13
  class IObject
65
-
14
+
66
15
  attr_accessor :primary_identifier, :local_identifier, :erc
16
+ attr_reader :server
67
17
 
68
18
  # Options can have the keys :primary_identifier,
69
19
  # :local_identifier, :server, or :erc. :erc can be a #File, #Uri
70
20
  # or a #Hash of metadata. :server is a #OneTimeServer.
71
- def initialize(options={})
21
+ def initialize(options = {})
72
22
  @primary_identifier = options[:primary_identifier]
73
23
  @local_identifier = options[:local_identifier]
74
- @erc = options[:erc] || Hash.new
24
+ @erc = options[:erc] || {}
75
25
  @components = []
76
26
  @server = options[:server] || Mrt::Ingest::OneTimeServer.new
77
27
  end
78
-
28
+
79
29
  # Add a component to the object. where can be either a #URI or a
80
30
  # #File. Options is a hash whose keys may be :name, :digest,
81
31
  # :mime_type, or :size. If :digest is supplied, it must be a
82
32
  # subclass of Mrt::Ingest::MessageDigest::Base. If where is a
83
33
  # #File, it will be hosted on an embedded web server.
84
- def add_component(where, options={})
34
+ def add_component(where, options = {})
85
35
  @components.push(Component.new(@server, where, options))
86
36
  end
87
-
37
+
88
38
  # Make a Mrt::Ingest::Request object for this mrt-object
89
- def mk_request(profile, submitter)
90
- erc_component = case @erc
91
- when URI, File, Tempfile
92
- Component.new(@server, @erc, :name => 'mrt-erc.txt')
93
- when Hash
94
- uri_str, path = @server.add_file do |f|
95
- f.write("erc:\n")
96
- @erc.each_pair do |k, v|
97
- f.write("#{k}: #{v}\n")
98
- end
99
- end
100
- Component.new(@server,
101
- URI.parse(uri_str),
102
- :name => 'mrt-erc.txt',
103
- :digest => Mrt::Ingest::MessageDigest::MD5.from_file(File.new(path)))
104
- else
105
- raise IngestException.new("Bad ERC supplied: must be a URI, File, or Hash")
106
- end
107
- manifest_file = Tempfile.new("mrt-ingest")
39
+ def mk_request(profile, user_agent)
40
+ manifest_file = Tempfile.new('mrt-ingest')
41
+ erc_component = Component.from_erc(@server, @erc)
108
42
  mk_manifest(manifest_file, erc_component)
109
43
  # reset to beginning
110
44
  manifest_file.open
111
- return Mrt::Ingest::Request.
112
- new(:file => manifest_file,
113
- :filename => manifest_file.path.split(/\//).last,
114
- :type => "object-manifest",
115
- :submitter => submitter,
116
- :profile => profile,
117
- :local_identifier => @local_identifier,
118
- :primary_identifier => @primary_identifier)
45
+ new_request(manifest_file, profile, user_agent)
119
46
  end
120
47
 
121
48
  def start_server # :nodoc:
122
- return @server.start_server()
49
+ @server.start_server
123
50
  end
124
51
 
125
52
  def join_server # :nodoc:
126
- return @server.join_server()
53
+ @server.join_server
127
54
  end
128
55
 
129
56
  def stop_server # :nodoc:
130
- return @server.stop_server()
57
+ @server.stop_server
131
58
  end
132
-
59
+
60
+ # rubocop:disable Metrics/LineLength
133
61
  def mk_manifest(manifest, erc_component) # :nodoc:
134
62
  manifest.write("#%checkm_0.7\n")
135
63
  manifest.write("#%profile http://uc3.cdlib.org/registry/ingest/manifest/mrt-ingest-manifest\n")
136
64
  manifest.write("#%prefix | mrt: | http://uc3.cdlib.org/ontology/mom#\n")
137
65
  manifest.write("#%prefix | nfo: | http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#\n")
138
66
  manifest.write("#%fields | nfo:fileUrl | nfo:hashAlgorithm | nfo:hashValue | nfo:fileSize | nfo:fileLastModified | nfo:fileName | mrt:mimeType\n")
139
- @components.each { |c|
67
+ @components.each do |c|
140
68
  manifest.write(c.to_manifest_entry)
141
- }
69
+ end
142
70
  manifest.write(erc_component.to_manifest_entry)
143
71
  manifest.write("#%EOF\n")
144
72
  end
145
-
73
+ # rubocop:enable Metrics/LineLength
74
+
146
75
  # Begin an ingest on the given client, with a profile and
147
76
  # submitter.
148
77
  def start_ingest(client, profile, submitter)
149
78
  request = mk_request(profile, submitter)
150
79
  start_server
151
80
  @response = client.ingest(request)
152
- return @response
153
81
  end
154
82
 
155
83
  # Wait for the ingest of this object to finish.
@@ -158,6 +86,21 @@ module Mrt
158
86
  # we will check the status via the ingest server.
159
87
  join_server
160
88
  end
89
+
90
+ private
91
+
92
+ def new_request(manifest_file, profile, user_agent)
93
+ Mrt::Ingest::Request.new(
94
+ file: manifest_file,
95
+ filename: manifest_file.path.split(%r{/}).last,
96
+ type: 'object-manifest',
97
+ submitter: user_agent,
98
+ profile: profile,
99
+ local_identifier: @local_identifier,
100
+ primary_identifier: @primary_identifier
101
+ )
102
+ end
103
+
161
104
  end
162
105
  end
163
106
  end
@@ -13,37 +13,35 @@ module Mrt
13
13
  @type = type
14
14
  end
15
15
  end
16
-
16
+
17
17
  # Represents a SHA256 digest suitable for a Checkm manifest.
18
18
  class SHA256 < Base
19
19
  def initialize(value)
20
- super(value, "sha-256")
20
+ super(value, 'sha-256')
21
21
  end
22
22
  end
23
23
 
24
24
  # Represents an MD5 digest suitable for a Checkm manifest.
25
25
  class MD5 < Base
26
26
  def initialize(value)
27
- super(value, "md5")
27
+ super(value, 'md5')
28
28
  end
29
-
29
+
30
30
  # Generate a digest from a file.
31
31
  def self.from_file(file)
32
32
  digest = Digest::MD5.new
33
33
  File.open(file.path, 'r') do |f|
34
- buff = ""
35
- while (f.read(1024, buff) != nil)
36
- digest << buff
37
- end
34
+ buff = ''
35
+ digest << buff until f.read(1024, buff).nil?
38
36
  end
39
- return Mrt::Ingest::MessageDigest::MD5.new(digest.hexdigest)
37
+ Mrt::Ingest::MessageDigest::MD5.new(digest.hexdigest)
40
38
  end
41
39
  end
42
40
 
43
41
  # Represents a SHA1 digest suitable for a Checkm manifest.
44
42
  class SHA1 < Base
45
43
  def initialize(value)
46
- super(value, "sha1")
44
+ super(value, 'sha1')
47
45
  end
48
46
  end
49
47
  end
@@ -7,17 +7,20 @@ require 'webrick'
7
7
  module Mrt
8
8
  module Ingest
9
9
  class OneTimeServer
10
+
11
+ attr_reader :dir, :port
12
+
10
13
  # Find an open port, starting with start and adding one until we get
11
14
  # an open port
12
- def get_open_port(start=8081)
15
+ def get_open_port(start = 8081)
13
16
  try_port = start
14
- while (true)
17
+ loop do
15
18
  begin
16
19
  s = TCPServer.open(try_port)
17
20
  s.close
18
21
  return try_port
19
22
  rescue Errno::EADDRINUSE
20
- try_port = try_port + 1
23
+ try_port += 1
21
24
  end
22
25
  end
23
26
  end
@@ -27,71 +30,58 @@ module Mrt
27
30
  @mutex = Mutex.new
28
31
  @known_paths = {}
29
32
  @requested = {}
30
- @port = get_open_port()
31
- @file_callback = lambda do |req, res|
32
- @requested[req.path] ||= true
33
- end
34
-
35
- config = { :Port => @port }
36
- @server = WEBrick::HTTPServer.new(config)
37
- @server.mount("/", WEBrick::HTTPServlet::FileHandler, @dir,
38
- { :FileCallback=>@file_callback })
33
+ @port = get_open_port
34
+ @file_callback = ->(req, _res) { @requested[req.path] ||= true }
35
+ @server = WEBrick::HTTPServer.new(Port: @port)
36
+ @server.mount('/', WEBrick::HTTPServlet::FileHandler, @dir, FileCallback: @file_callback)
39
37
  end
40
38
 
41
39
  # Return true if each file has been served.
42
40
  def finished?
43
41
  Dir.entries(@dir).each do |entry|
44
- next if (entry == "." || entry == "..")
45
- if @requested["/#{entry}"].nil? then
46
- return false
47
- end
42
+ next if %w[. ..].include?(entry)
43
+ return false if @requested["/#{entry}"].nil?
48
44
  end
49
- return true
45
+ true
50
46
  end
51
47
 
52
- def get_temppath
53
- tmpfile = Tempfile.new("tmp", @dir)
48
+ def temppath
49
+ tmpfile = Tempfile.new('tmp', @dir)
54
50
  tmppath = tmpfile.path
55
51
  tmpfile.close!
56
52
  @mutex.synchronize do
57
- if !@known_paths.has_key?(tmppath) then
53
+ unless @known_paths.key?(tmppath)
58
54
  # no collision
59
55
  @known_paths[tmppath] = true
60
56
  return tmppath
61
57
  end
62
58
  end
63
59
  # need to retry, there was a collision
64
- return get_temppath
60
+ temppath
65
61
  end
66
62
 
67
63
  # Add a file to this server. Returns the URL to use
68
64
  # to fetch the file & the file path
69
- def add_file(sourcefile=nil)
70
- fullpath = get_temppath()
65
+ def add_file(sourcefile = nil)
66
+ fullpath = temppath
71
67
  path = File.basename(fullpath)
72
- if !sourcefile.nil? then
73
- @server.mount("/#{path}",
74
- WEBrick::HTTPServlet::FileHandler,
75
- sourcefile.path,
76
- { :FileCallback=>@file_callback })
68
+
69
+ if sourcefile
70
+ @server.mount("/#{path}", WEBrick::HTTPServlet::FileHandler, sourcefile.path, FileCallback: @file_callback)
77
71
  else
78
- File.open(fullpath, 'w+') do |f|
79
- yield f
80
- end
72
+ File.open(fullpath, 'w+') { |f| yield f }
81
73
  end
82
- return "http://#{Socket.gethostname}:#{@port}/#{path}", fullpath
74
+ ["http://#{Socket.gethostname}:#{@port}/#{path}", fullpath]
83
75
  end
84
-
76
+
85
77
  def start_server
86
- if @thread.nil? then
78
+ if @thread.nil?
87
79
  @thread = Thread.new do
88
80
  @server.start
89
81
  end
90
82
  end
91
- while (@server.status != :Running) do
92
- sleep(0.1)
93
- end
94
- return @thread
83
+ sleep(0.1) while @server.status != :Running
84
+ @thread
95
85
  end
96
86
 
97
87
  # Stop server unconditionally.
@@ -103,18 +93,18 @@ module Mrt
103
93
  # Wait for server to finish serving all files.
104
94
  def join_server
105
95
  # ensure that each file is requested once before shutting down
106
- while (!self.finished?) do sleep(1) end
107
- @server.shutdown
96
+ sleep(1) until finished?
97
+ @server.shutdown
108
98
  @thread.join
109
99
  end
110
-
100
+
111
101
  # Run the server and wait until each file has been served once.
112
102
  # Cleans up files before it returns.
113
103
  def run
114
- start_server()
115
- join_server()
104
+ start_server
105
+ join_server
116
106
  # FileUtils.rm_rf(@dir)
117
- return
107
+ nil
118
108
  end
119
109
  end
120
110
  end
@@ -3,54 +3,80 @@
3
3
 
4
4
  module Mrt
5
5
  module Ingest
6
- class RequestException < Exception
6
+ class RequestException < RuntimeError
7
7
  end
8
8
 
9
9
  # Represents a request to be sent to an ingest server.
10
10
  class Request
11
- attr_accessor :creator, :date, :local_identifier,
12
- :primary_identifier, :profile, :note, :submitter,
13
- :title, :type
14
-
15
- # Options is a hash; required are :profile, :submitter, :type.
16
- # May also include :creator, :date, :digest, :file, :filename,
17
- # :local_identifier, :primary_identifier, :note, :title.
18
- def initialize(options)
19
- @creator = options[:creator]
20
- @date = options[:date]
21
- @digest = options[:digest]
22
- @file = options[:file]
23
- @filename = options[:filename]
24
- @local_identifier = options[:local_identifier]
25
- @primary_identifier = options[:primary_identifier]
26
- @profile = options[:profile]
27
- @note = options[:note]
28
- @submitter = options[:submitter]
29
- @title = options[:title]
30
- @type = options[:type]
31
- [:profile, :submitter, :type].each do |arg|
32
- raise RequestException.new("#{arg} is required.") if options[arg].nil?
33
- end
11
+
12
+ attr_accessor :creator
13
+ attr_accessor :date
14
+ attr_accessor :digest
15
+ attr_accessor :file
16
+ attr_accessor :filename
17
+ attr_accessor :local_identifier
18
+ attr_accessor :note
19
+ attr_accessor :primary_identifier
20
+ attr_accessor :profile
21
+ attr_accessor :submitter
22
+ attr_accessor :title
23
+ attr_accessor :type
24
+
25
+ # rubocop:disable Metrics/MethodLength, Metrics/ParameterLists
26
+ def initialize(
27
+ profile:, submitter:, type:,
28
+ creator: nil, date: nil, digest: nil, file: nil, filename: nil,
29
+ local_identifier: nil, primary_identifier: nil, note: nil, title: nil
30
+ )
31
+ raise ArgumentError, 'profile cannot be nil' unless profile
32
+ raise ArgumentError, 'profile cannot be submitter' unless submitter
33
+ raise ArgumentError, 'profile cannot be type' unless type
34
+
35
+ @creator = creator
36
+ @date = date
37
+ @digest = digest
38
+ @file = file
39
+ @filename = filename
40
+ @local_identifier = local_identifier
41
+ @primary_identifier = primary_identifier
42
+ @profile = profile
43
+ @note = note
44
+ @submitter = submitter
45
+ @title = title
46
+ @type = type
34
47
  end
35
-
48
+ # rubocop:enable Metrics/MethodLength, Metrics/ParameterLists
49
+
36
50
  # Returns a hash of arguments suitable for sending to a server.
51
+ # rubocop: disable Metrics/MethodLength, Metrics/AbcSize
37
52
  def mk_args
38
- return {
39
- 'creator' => @creator,
40
- 'date' => @date,
41
- 'digestType' => ((!@digest.nil? && @digest.type) || nil),
42
- 'digestValue' => ((!@digest.nil? && @digest.value) || nil),
43
- 'file' => @file,
44
- 'filename' => @filename,
45
- 'localIdentifier' => @local_identifier,
46
- 'primaryIdentifier' => @primary_identifier,
47
- 'profile' => @profile,
48
- 'note' => @note,
49
- 'responseForm' => 'json',
50
- 'submitter' => @submitter,
51
- 'title' => @title,
52
- 'type' => @type
53
- }.reject{|k, v| v.nil? || (v == '')}
53
+ {
54
+ 'creator' => creator,
55
+ 'date' => date,
56
+ 'digestType' => digest_type,
57
+ 'digestValue' => digest_value,
58
+ 'file' => file,
59
+ 'filename' => filename,
60
+ 'localIdentifier' => local_identifier,
61
+ 'primaryIdentifier' => primary_identifier,
62
+ 'profile' => profile,
63
+ 'note' => note,
64
+ 'responseForm' => 'json',
65
+ 'submitter' => submitter,
66
+ 'title' => title,
67
+ 'type' => type
68
+ }.reject { |_k, v| v.nil? || (v == '') }
69
+ end
70
+ # rubocop: enable Metrics/MethodLength, Metrics/AbcSize
71
+
72
+ private
73
+
74
+ def digest_value
75
+ digest && digest.value
76
+ end
77
+
78
+ def digest_type
79
+ digest && digest.type
54
80
  end
55
81
  end
56
82
  end