mrt-ingest 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.hgignore +4 -0
- data/Gemfile +10 -0
- data/LICENSE +26 -0
- data/README +29 -0
- data/Rakefile +21 -0
- data/lib/mrt/ingest.rb +16 -0
- data/lib/mrt/ingest/client.rb +38 -0
- data/lib/mrt/ingest/iobject.rb +144 -0
- data/lib/mrt/ingest/message_digest.rb +51 -0
- data/lib/mrt/ingest/one_time_server.rb +116 -0
- data/lib/mrt/ingest/request.rb +57 -0
- data/lib/mrt/ingest/response.rb +29 -0
- data/mrt-ingest.gemspec +24 -0
- data/test/test_client.rb +39 -0
- data/test/test_iobject.rb +143 -0
- data/test/test_request.rb +36 -0
- data/test/test_response.rb +62 -0
- metadata +113 -0
data/.hgignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
Copyright (c) 2011, Regents of the University of California
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
|
7
|
+
- Redistributions of source code must retain the above copyright notice,
|
8
|
+
this list of conditions and the following disclaimer.
|
9
|
+
- Redistributions in binary form must reproduce the above copyright notice,
|
10
|
+
this list of conditions and the following disclaimer in the documentation
|
11
|
+
and/or other materials provided with the distribution.
|
12
|
+
- Neither the name of the University of California nor the names of its
|
13
|
+
contributors may be used to endorse or promote products derived from this
|
14
|
+
software without specific prior written permission.
|
15
|
+
|
16
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
17
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
18
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
19
|
+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
20
|
+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
21
|
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
22
|
+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
23
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
24
|
+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
25
|
+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
26
|
+
POSSIBILITY OF SUCH DAMAGE.
|
data/README
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
= mrt-ingest (ruby)
|
2
|
+
Date:: 6 Sept. 2011
|
3
|
+
Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
4
|
+
|
5
|
+
== What?
|
6
|
+
|
7
|
+
A Ruby ingest client for Merritt[http://merritt.cdlib.org/].
|
8
|
+
|
9
|
+
== Install
|
10
|
+
|
11
|
+
$ gem build mrt-ingest.gemspec
|
12
|
+
$ sudo gem install mrt-ingest-0.0.1.gem
|
13
|
+
|
14
|
+
== How?
|
15
|
+
|
16
|
+
require 'rubygems'
|
17
|
+
require 'mrt/ingest'
|
18
|
+
client = Mrt::Ingest::Client.new("http://merritt.cdlib.org/object/ingest", USERNAME, PASSWORD)
|
19
|
+
obj = Mrt::Ingest::IObject.new(:erc => {
|
20
|
+
"who" => "Doe, John",
|
21
|
+
"what" => "Hello, world",
|
22
|
+
"when/created" => "2011" })
|
23
|
+
obj.add_component(File.new("/tmp/helloworld_a"))
|
24
|
+
obj.add_component(File.new("/tmp/helloworld_b"))
|
25
|
+
obj.add_component(URI.parse("http://example.org/xxx"),
|
26
|
+
:name => "helloworld_c",
|
27
|
+
:digest => Mrt::Ingest::MessageDigest::MD5.new("6f5902ac237024bdd0c176cb93063dc4"))
|
28
|
+
obj.start_ingest(client, "demo_merritt_content", "me/My Name")
|
29
|
+
obj.finish_ingest()
|
data/Rakefile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rdoc/task'
|
4
|
+
|
5
|
+
require 'bundler'
|
6
|
+
include Rake::DSL
|
7
|
+
Bundler::GemHelper.install_tasks
|
8
|
+
|
9
|
+
task :default => [:test]
|
10
|
+
Rake::TestTask.new(:test) do |t|
|
11
|
+
t.libs << 'lib' << 'test'
|
12
|
+
t.pattern = 'test/**/test_*.rb'
|
13
|
+
t.verbose = true
|
14
|
+
end
|
15
|
+
|
16
|
+
RDoc::Task.new do |rd|
|
17
|
+
rd.title = "Merritt Ingest Client"
|
18
|
+
rd.rdoc_files.include("README", "lib/**/*.rb")
|
19
|
+
|
20
|
+
rd.options += ['-f', 'darkfish',]
|
21
|
+
end
|
data/lib/mrt/ingest.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
module Mrt
|
5
|
+
module Ingest
|
6
|
+
autoload :Client, "mrt/ingest/client"
|
7
|
+
autoload :IObject, "mrt/ingest/iobject"
|
8
|
+
autoload :MessageDigest, "mrt/ingest/message_digest"
|
9
|
+
autoload :OneTimeServer, "mrt/ingest/one_time_server"
|
10
|
+
autoload :Request, "mrt/ingest/request"
|
11
|
+
autoload :Response, "mrt/ingest/response"
|
12
|
+
|
13
|
+
class IngestException < Exception
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
|
6
|
+
require 'rest-client'
|
7
|
+
|
8
|
+
module Mrt
|
9
|
+
module Ingest
|
10
|
+
|
11
|
+
# A client for ingesting objects into a Merritt.
|
12
|
+
class Client
|
13
|
+
def initialize(base_uri, username=nil, password=nil)
|
14
|
+
@base_uri = base_uri
|
15
|
+
@username = username
|
16
|
+
@password = password
|
17
|
+
end
|
18
|
+
|
19
|
+
# Send a request to the client.
|
20
|
+
def ingest(ingest_req)
|
21
|
+
return Response.new(mk_rest_request(ingest_req).execute())
|
22
|
+
end
|
23
|
+
|
24
|
+
# :nodoc:
|
25
|
+
def mk_rest_request(ingest_req)
|
26
|
+
args = {
|
27
|
+
:method => :post,
|
28
|
+
:url => @base_uri,
|
29
|
+
:user => @username,
|
30
|
+
:password => @password,
|
31
|
+
:payload => ingest_req.mk_args(),
|
32
|
+
:headers => { :multipart => true } }.delete_if { |k,v| (v.nil? || v == "") }
|
33
|
+
return RestClient::Request.new(args)
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
require 'mrt/ingest'
|
5
|
+
require 'tempfile'
|
6
|
+
require 'uri'
|
7
|
+
|
8
|
+
module Mrt
|
9
|
+
module Ingest
|
10
|
+
# Represents a component of an object to ingest. Either a #URI or a
|
11
|
+
# #File.
|
12
|
+
class Component # :nodoc:
|
13
|
+
def initialize(server, where, options)
|
14
|
+
@name = options[:name]
|
15
|
+
@digest = options[:digest]
|
16
|
+
@mime_type = options[:mime_type]
|
17
|
+
@size = options[:size]
|
18
|
+
|
19
|
+
case where
|
20
|
+
when File, Tempfile
|
21
|
+
@name = File.basename(where.path) if @name.nil?
|
22
|
+
@uri = server.add_file(where)[0]
|
23
|
+
if @digest.nil? then
|
24
|
+
@digest = Mrt::Ingest::MessageDigest::MD5.from_file(where)
|
25
|
+
end
|
26
|
+
@size = File.size(where.path) if @size.nil?
|
27
|
+
when URI
|
28
|
+
@name = File.basename(where.to_s) if @name.nil?
|
29
|
+
@uri = where
|
30
|
+
else
|
31
|
+
raise IngestException.new("Trying to add a component that is not a File or URI")
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
def to_manifest_entry
|
37
|
+
(digest_alg, digest_value) = if @digest.nil? then
|
38
|
+
['', '']
|
39
|
+
else
|
40
|
+
[@digest.type, @digest.value]
|
41
|
+
end
|
42
|
+
return "#{@uri} | #{digest_alg} | #{digest_value} | #{@size || ''} | | #{@name} | #{@mime_type || '' }\n"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# An object prepared for ingest into Merritt.
|
47
|
+
class IObject
|
48
|
+
|
49
|
+
attr_accessor :primary_identifier, :local_identifier, :erc
|
50
|
+
|
51
|
+
# Options can have the keys :primary_identifier,
|
52
|
+
# :local_identifier, :server, or :erc. :erc can be a #File, #Uri
|
53
|
+
# or a #Hash of metadata. :server is a #OneTimeServer.
|
54
|
+
def initialize(options={})
|
55
|
+
@primary_identifier = options[:primary_identifier]
|
56
|
+
@local_identifier = options[:local_identifier]
|
57
|
+
@erc = options[:erc] || Hash.new
|
58
|
+
@components = []
|
59
|
+
@server = options[:server] || Mrt::Ingest::OneTimeServer.new
|
60
|
+
end
|
61
|
+
|
62
|
+
# Add a component to the object. where can be either a #URI or a
|
63
|
+
# #File. Options is a hash whose keys may be :name, :digest,
|
64
|
+
# :mime_type, or :size. If :digest is supplied, it must be a
|
65
|
+
# subclass of Mrt::Ingest::MessageDigest::Base. If where is a
|
66
|
+
# #File, it will be hosted on an embedded web server.
|
67
|
+
def add_component(where, options={})
|
68
|
+
@components.push(Component.new(@server, where, options))
|
69
|
+
end
|
70
|
+
|
71
|
+
# Make a Mrt::Ingest::Request object for this mrt-object
|
72
|
+
def mk_request(profile, submitter)
|
73
|
+
erc_component = case @erc
|
74
|
+
when URI, File, Tempfile
|
75
|
+
Component.new(@server, @erc, :name => 'mrt-erc.txt')
|
76
|
+
when Hash
|
77
|
+
uri_str, path = @server.add_file do |f|
|
78
|
+
@erc.each_pair do |k, v|
|
79
|
+
f.write("#{k}: #{v}\n")
|
80
|
+
end
|
81
|
+
end
|
82
|
+
Component.new(@server,
|
83
|
+
URI.parse(uri_str),
|
84
|
+
:name => 'mrt-erc.txt',
|
85
|
+
:digest => Mrt::Ingest::MessageDigest::MD5.from_file(File.new(path)))
|
86
|
+
else
|
87
|
+
raise IngestException.new("Bad ERC supplied: must be a URI, File, or Hash")
|
88
|
+
end
|
89
|
+
manifest_file = Tempfile.new("mrt-ingest")
|
90
|
+
mk_manifest(manifest_file, erc_component)
|
91
|
+
# reset to beginning
|
92
|
+
manifest_file.open
|
93
|
+
return Mrt::Ingest::Request.
|
94
|
+
new(:file => manifest_file,
|
95
|
+
:filename => manifest_file.path.split(/\//).last,
|
96
|
+
:type => "object-manifest",
|
97
|
+
:submitter => submitter,
|
98
|
+
:profile => profile,
|
99
|
+
:primary_identifier => @primary_identifier)
|
100
|
+
end
|
101
|
+
|
102
|
+
def start_server # :nodoc:
|
103
|
+
return @server.start_server()
|
104
|
+
end
|
105
|
+
|
106
|
+
def join_server # :nodoc:
|
107
|
+
return @server.join_server()
|
108
|
+
end
|
109
|
+
|
110
|
+
def stop_server # :nodoc:
|
111
|
+
return @server.stop_server()
|
112
|
+
end
|
113
|
+
|
114
|
+
def mk_manifest(manifest, erc_component) # :nodoc:
|
115
|
+
manifest.write("#%checkm_0.7\n")
|
116
|
+
manifest.write("#%profile http://uc3.cdlib.org/registry/ingest/manifest/mrt-ingest-manifest\n")
|
117
|
+
manifest.write("#%prefix | mrt: | http://uc3.cdlib.org/ontology/mom#\n")
|
118
|
+
manifest.write("#%prefix | nfo: | http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#\n")
|
119
|
+
manifest.write("#%fields | nfo:fileUrl | nfo:hashAlgorithm | nfo:hashValue | nfo:fileSize | nfo:fileLastModified | nfo:fileName | mrt:mimeType\n")
|
120
|
+
@components.each { |c|
|
121
|
+
manifest.write(c.to_manifest_entry)
|
122
|
+
}
|
123
|
+
manifest.write(erc_component.to_manifest_entry)
|
124
|
+
manifest.write("#%EOF\n")
|
125
|
+
end
|
126
|
+
|
127
|
+
# Begin an ingest on the given client, with a profile and
|
128
|
+
# submitter.
|
129
|
+
def start_ingest(client, profile, submitter)
|
130
|
+
request = mk_request(profile, submitter)
|
131
|
+
start_server
|
132
|
+
@response = client.ingest(request)
|
133
|
+
return @response
|
134
|
+
end
|
135
|
+
|
136
|
+
# Wait for the ingest of this object to finish.
|
137
|
+
def finish_ingest
|
138
|
+
# XXX Right now we only join the hosting server; in the future
|
139
|
+
# we will check the status via the ingest server.
|
140
|
+
join_server
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
require 'digest/md5'
|
5
|
+
|
6
|
+
module Mrt
|
7
|
+
module Ingest
|
8
|
+
module MessageDigest
|
9
|
+
class Base # :nodoc:
|
10
|
+
attr_reader :value, :type
|
11
|
+
def initialize(value, type)
|
12
|
+
@value = value
|
13
|
+
@type = type
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Represents a SHA256 digest suitable for a Checkm manifest.
|
18
|
+
class SHA256 < Base
|
19
|
+
def initialize(value)
|
20
|
+
super(value, "sha-256")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Represents an MD5 digest suitable for a Checkm manifest.
|
25
|
+
class MD5 < Base
|
26
|
+
def initialize(value)
|
27
|
+
super(value, "md5")
|
28
|
+
end
|
29
|
+
|
30
|
+
# Generate a digest from a file.
|
31
|
+
def self.from_file(file)
|
32
|
+
digest = Digest::MD5.new
|
33
|
+
File.open(file.path, 'r') do |f|
|
34
|
+
buff = ""
|
35
|
+
while (f.read(1024, buff) != nil)
|
36
|
+
digest << buff
|
37
|
+
end
|
38
|
+
end
|
39
|
+
return Mrt::Ingest::MessageDigest::MD5.new(digest.hexdigest)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Represents a SHA1 digest suitable for a Checkm manifest.
|
44
|
+
class SHA1 < Base
|
45
|
+
def initialize(value)
|
46
|
+
super(value, "sha1")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
require 'webrick'
|
5
|
+
|
6
|
+
# An HTTP server that will serve each file ONCE before shutting down.
|
7
|
+
module Mrt
|
8
|
+
module Ingest
|
9
|
+
class OneTimeServer
|
10
|
+
# Find an open port, starting with start and adding one until we get
|
11
|
+
# an open port
|
12
|
+
def get_open_port(start=8080)
|
13
|
+
try_port = start
|
14
|
+
while (true)
|
15
|
+
begin
|
16
|
+
s = TCPServer.open(try_port)
|
17
|
+
s.close
|
18
|
+
return try_port
|
19
|
+
rescue Errno::EADDRINUSE
|
20
|
+
try_port = try_port + 1
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def initialize
|
26
|
+
@dir = Dir.mktmpdir
|
27
|
+
@mutex = Mutex.new
|
28
|
+
@known_paths = {}
|
29
|
+
@requested = {}
|
30
|
+
@port = get_open_port()
|
31
|
+
@file_callback = lambda do |req, res|
|
32
|
+
@requested[req.path] ||= true
|
33
|
+
end
|
34
|
+
|
35
|
+
config = { :Port => @port }
|
36
|
+
@server = WEBrick::HTTPServer.new(config)
|
37
|
+
@server.mount("/", WEBrick::HTTPServlet::FileHandler, @dir,
|
38
|
+
{ :FileCallback=>@file_callback })
|
39
|
+
end
|
40
|
+
|
41
|
+
# Return true if each file has been served.
|
42
|
+
def finished?
|
43
|
+
Dir.entries(@dir).each do |entry|
|
44
|
+
next if (entry == "." || entry == "..")
|
45
|
+
if @requested["/#{entry}"].nil? then
|
46
|
+
return false
|
47
|
+
end
|
48
|
+
end
|
49
|
+
return true
|
50
|
+
end
|
51
|
+
|
52
|
+
def get_temppath
|
53
|
+
tmpfile = Tempfile.new("tmp", @dir)
|
54
|
+
tmppath = tmpfile.path
|
55
|
+
tmpfile.close!
|
56
|
+
@mutex.synchronize do
|
57
|
+
if !@known_paths.has_key?(tmppath) then
|
58
|
+
# no collision
|
59
|
+
@known_paths[tmppath] = true
|
60
|
+
return tmppath
|
61
|
+
end
|
62
|
+
end
|
63
|
+
# need to retry, there was a collision
|
64
|
+
return get_temppath
|
65
|
+
end
|
66
|
+
|
67
|
+
# Add a file to this server. Returns the URL to use
|
68
|
+
# to fetch the file & the file path
|
69
|
+
def add_file(sourcefile=nil)
|
70
|
+
fullpath = get_temppath()
|
71
|
+
path = File.basename(fullpath)
|
72
|
+
if !sourcefile.nil? then
|
73
|
+
@server.mount("/#{path}",
|
74
|
+
WEBrick::HTTPServlet::FileHandler,
|
75
|
+
sourcefile.path,
|
76
|
+
{ :FileCallback=>@file_callback })
|
77
|
+
else
|
78
|
+
File.open(fullpath, 'w+') do |f|
|
79
|
+
yield f
|
80
|
+
end
|
81
|
+
end
|
82
|
+
return "http://#{Socket.gethostname}:#{@port}/#{path}", fullpath
|
83
|
+
end
|
84
|
+
|
85
|
+
def start_server
|
86
|
+
@thread = Thread.new do
|
87
|
+
@server.start
|
88
|
+
end
|
89
|
+
return @thread
|
90
|
+
end
|
91
|
+
|
92
|
+
# Stop server unconditionally.
|
93
|
+
def stop_server
|
94
|
+
@server.shutdown
|
95
|
+
@thread.join
|
96
|
+
end
|
97
|
+
|
98
|
+
# Wait for server to finish serving all files.
|
99
|
+
def join_server
|
100
|
+
# ensure that each file is requested once before shutting down
|
101
|
+
while (!self.finished?) do sleep(1) end
|
102
|
+
@server.shutdown
|
103
|
+
@thread.join
|
104
|
+
end
|
105
|
+
|
106
|
+
# Run the server and wait until each file has been served once.
|
107
|
+
# Cleans up files before it returns.
|
108
|
+
def run
|
109
|
+
start_server()
|
110
|
+
join_server()
|
111
|
+
# FileUtils.rm_rf(@dir)
|
112
|
+
return
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
module Mrt
|
5
|
+
module Ingest
|
6
|
+
class RequestException < Exception
|
7
|
+
end
|
8
|
+
|
9
|
+
# Represents a request to be sent to an ingest server.
|
10
|
+
class Request
|
11
|
+
attr_accessor :creator, :date, :local_identifier,
|
12
|
+
:primary_identifier, :profile, :note, :submitter,
|
13
|
+
:title, :type
|
14
|
+
|
15
|
+
# Options is a hash; required are :profile, :submitter, :type.
|
16
|
+
# May also include :creator, :date, :digest, :file, :filename,
|
17
|
+
# :local_identifier, :primary_identifier, :note, :title.
|
18
|
+
def initialize(options)
|
19
|
+
@creator = options[:creator]
|
20
|
+
@date = options[:date]
|
21
|
+
@digest = options[:digest]
|
22
|
+
@file = options[:file]
|
23
|
+
@filename = options[:filename]
|
24
|
+
@local_identifier = options[:local_identifier]
|
25
|
+
@primary_identifier = options[:primary_identifier]
|
26
|
+
@profile = options[:profile]
|
27
|
+
@note = options[:note]
|
28
|
+
@submitter = options[:submitter]
|
29
|
+
@title = options[:title]
|
30
|
+
@type = options[:type]
|
31
|
+
[:profile, :submitter, :type].each do |arg|
|
32
|
+
raise RequestException.new("#{arg} is required.") if options[arg].nil?
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Returns a hash of arguments suitable for sending to a server.
|
37
|
+
def mk_args
|
38
|
+
return {
|
39
|
+
'creator' => @creator,
|
40
|
+
'date' => @date,
|
41
|
+
'digestType' => ((!@digest.nil? && @digest.type) || nil),
|
42
|
+
'digestValue' => ((!@digest.nil? && @digest.value) || nil),
|
43
|
+
'file' => @file,
|
44
|
+
'filename' => @filename,
|
45
|
+
'localIdentifier' => @local_identifier,
|
46
|
+
'primaryIdentifier' => @primary_identifier,
|
47
|
+
'profile' => @profile,
|
48
|
+
'note' => @note,
|
49
|
+
'responseForm' => 'json',
|
50
|
+
'submitter' => @submitter,
|
51
|
+
'title' => @title,
|
52
|
+
'type' => @type
|
53
|
+
}.reject{|k, v| v.nil? || (v == '')}
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
|
6
|
+
require 'json'
|
7
|
+
require 'time'
|
8
|
+
|
9
|
+
module Mrt
|
10
|
+
module Ingest
|
11
|
+
class Response
|
12
|
+
def initialize(data)
|
13
|
+
@parsed = JSON.parse(data)['batchState']
|
14
|
+
end
|
15
|
+
|
16
|
+
def batch_id
|
17
|
+
return @parsed['batchID']
|
18
|
+
end
|
19
|
+
|
20
|
+
def user_agent
|
21
|
+
return @parsed['userAgent']
|
22
|
+
end
|
23
|
+
|
24
|
+
def submission_date
|
25
|
+
return Time.parse(@parsed['submissionDate'])
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/mrt-ingest.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "mrt/ingest/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "mrt-ingest"
|
7
|
+
s.version = "0.0.2"
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Erik Hetzner"]
|
10
|
+
s.email = ["erik.hetzner@ucop.edu"]
|
11
|
+
s.homepage = "http://bitbucket.org/merritt/mrt-ingest-ruby"
|
12
|
+
s.summary = %q{A client for Merritt ingest.}
|
13
|
+
s.description = %q{A client for the Merritt ingest system. More details available from http://wiki.ucop.edu/display/curation.}
|
14
|
+
|
15
|
+
s.add_dependency "json", ">=1.5.0"
|
16
|
+
s.add_dependency "rest-client", ">=1.6.0"
|
17
|
+
|
18
|
+
s.rubyforge_project = "mrt-ingest"
|
19
|
+
|
20
|
+
s.files = `hg locate`.split("\n")
|
21
|
+
s.test_files = `hg locate --include '{spec,features}'`.split("\n")
|
22
|
+
s.executables = `hg locate --include bin`.split("\n").map{ |f| File.basename(f) }
|
23
|
+
s.require_paths = ["lib"]
|
24
|
+
end
|
data/test/test_client.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
|
6
|
+
require 'checkm'
|
7
|
+
require 'fakeweb'
|
8
|
+
require 'mocha'
|
9
|
+
require 'mrt/ingest'
|
10
|
+
require 'shoulda'
|
11
|
+
require 'open-uri'
|
12
|
+
|
13
|
+
class TestClient < Test::Unit::TestCase
|
14
|
+
context "creating a client" do
|
15
|
+
should "be able to create an ingest client" do
|
16
|
+
client = Mrt::Ingest::Client.new("http://example.org/ingest")
|
17
|
+
assert_instance_of(Mrt::Ingest::Client, client)
|
18
|
+
end
|
19
|
+
|
20
|
+
should "be able to create an ingest client with login credentials" do
|
21
|
+
client = Mrt::Ingest::Client.new("http://example.org/ingest", "me", "secret")
|
22
|
+
assert_instance_of(Mrt::Ingest::Client, client)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context "ingest clients" do
|
27
|
+
setup do
|
28
|
+
@client = Mrt::Ingest::Client.new("http://example.org/ingest", "me", "secret")
|
29
|
+
@iobject = Mrt::Ingest::IObject.new
|
30
|
+
@ingest_req = @iobject.mk_request("profile", "submitter")
|
31
|
+
end
|
32
|
+
|
33
|
+
should "should create a good rest client request" do
|
34
|
+
rest_req = @client.mk_rest_request(@ingest_req)
|
35
|
+
assert_equal("me", rest_req.user)
|
36
|
+
assert_equal("secret", rest_req.password)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
|
6
|
+
require 'checkm'
|
7
|
+
require 'fakeweb'
|
8
|
+
require 'mocha'
|
9
|
+
require 'mrt/ingest'
|
10
|
+
require 'shoulda'
|
11
|
+
require 'open-uri'
|
12
|
+
|
13
|
+
class TestIObject < Test::Unit::TestCase
|
14
|
+
def parse_object_manifest(iobject)
|
15
|
+
req = iobject.mk_request("profile", "submitter")
|
16
|
+
args = req.mk_args
|
17
|
+
return Checkm::Manifest.new(args['file'].read())
|
18
|
+
end
|
19
|
+
|
20
|
+
def write_to_tempfile(content)
|
21
|
+
tempfile = Tempfile.new('test_iobject')
|
22
|
+
tempfile << content
|
23
|
+
tempfile.open
|
24
|
+
return tempfile
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_uri_for_name(iobject, name)
|
28
|
+
manifest = parse_object_manifest(iobject)
|
29
|
+
return manifest.entries.find { |entry|
|
30
|
+
entry.values[-2] == name
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
def parse_erc(erc)
|
35
|
+
return Hash[erc.map { |l| l.chomp.split(/:\s+/) }]
|
36
|
+
end
|
37
|
+
|
38
|
+
def parse_erc_entry(erc_entry)
|
39
|
+
return parse_erc(open(erc_entry.values[0]).read())
|
40
|
+
end
|
41
|
+
|
42
|
+
def check_erc_content(iobject, asserted_erc)
|
43
|
+
erc_entry = get_uri_for_name(iobject, "mrt-erc.txt")
|
44
|
+
if erc_entry.nil?
|
45
|
+
assert(false, "Could not find mrt-erc.txt file!")
|
46
|
+
else
|
47
|
+
iobject.start_server()
|
48
|
+
assert_equal(asserted_erc, parse_erc_entry(erc_entry))
|
49
|
+
iobject.stop_server()
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
context "an iobject" do
|
54
|
+
setup do
|
55
|
+
@iobject = Mrt::Ingest::IObject.new
|
56
|
+
end
|
57
|
+
|
58
|
+
should "be able to add a URI component" do
|
59
|
+
@iobject.add_component(URI.parse("http://example.org/file"))
|
60
|
+
end
|
61
|
+
|
62
|
+
should "not be able to add a non-URI component" do
|
63
|
+
assert_raise(Mrt::Ingest::IngestException) do
|
64
|
+
@iobject.add_component("http://example.org/file")
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
should "be able to make a request" do
|
69
|
+
req = @iobject.mk_request("profile", "submitter")
|
70
|
+
assert_equal("profile", req.profile)
|
71
|
+
assert_equal("submitter", req.submitter)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
context "the created request" do
|
76
|
+
setup do
|
77
|
+
@iobject = Mrt::Ingest::IObject.new
|
78
|
+
@manifest = parse_object_manifest(@iobject)
|
79
|
+
@erc_entry = get_uri_for_name(@iobject, "mrt-erc.txt")
|
80
|
+
end
|
81
|
+
|
82
|
+
should "generate a valid manifest file with more than one line" do
|
83
|
+
assert(@manifest.entries.length > 0, "Empty manifest?")
|
84
|
+
end
|
85
|
+
|
86
|
+
should "have a mrt-erc.txt entry, and it should be fetchable" do
|
87
|
+
if @erc_entry.nil?
|
88
|
+
assert(false, "Could not find mrt-erc.txt file!")
|
89
|
+
else
|
90
|
+
@iobject.start_server()
|
91
|
+
erc_lines = open(@erc_entry.values[0]).read().lines().to_a
|
92
|
+
@iobject.stop_server()
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
ERC_CONTENT = <<EOS
|
98
|
+
who: John Doe
|
99
|
+
what: Something
|
100
|
+
when: now
|
101
|
+
EOS
|
102
|
+
|
103
|
+
context "an iobject" do
|
104
|
+
should "be able to specify a file for ERC" do
|
105
|
+
erc_tempfile = write_to_tempfile(ERC_CONTENT)
|
106
|
+
iobject = Mrt::Ingest::IObject.new(:erc=>File.new(erc_tempfile.path))
|
107
|
+
check_erc_content(iobject, parse_erc(ERC_CONTENT))
|
108
|
+
end
|
109
|
+
|
110
|
+
should "be able to use a hash for ERC" do
|
111
|
+
erc = {
|
112
|
+
"who" => "John Doe",
|
113
|
+
"what" => "Something",
|
114
|
+
"when" => "now" }
|
115
|
+
iobject = Mrt::Ingest::IObject.new(:erc=>erc)
|
116
|
+
check_erc_content(iobject, erc)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
FILE_CONTENT = <<EOS
|
121
|
+
Hello, world!
|
122
|
+
EOS
|
123
|
+
|
124
|
+
FILE_CONTENT_MD5 = "746308829575e17c3331bbcb00c0898b"
|
125
|
+
|
126
|
+
context "serving local files" do
|
127
|
+
should "be able to add a local file component" do
|
128
|
+
iobject = Mrt::Ingest::IObject.new
|
129
|
+
tempfile = write_to_tempfile(FILE_CONTENT)
|
130
|
+
iobject.add_component(tempfile, {:name => "helloworld" })
|
131
|
+
uri_entry = get_uri_for_name(iobject, "helloworld")
|
132
|
+
erc_entry = get_uri_for_name(iobject, "mrt-erc.txt")
|
133
|
+
manifest = parse_object_manifest(iobject)
|
134
|
+
if uri_entry.nil?
|
135
|
+
assert(false, "Could not find hosted file URI!")
|
136
|
+
else
|
137
|
+
iobject.start_server
|
138
|
+
assert_equal(FILE_CONTENT, open(uri_entry.values[0]).read())
|
139
|
+
iobject.stop_server()
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
|
6
|
+
require 'fakeweb'
|
7
|
+
require 'mocha'
|
8
|
+
require 'mrt/ingest'
|
9
|
+
require 'shoulda'
|
10
|
+
|
11
|
+
class TestRequest < Test::Unit::TestCase
|
12
|
+
context "when creating a request" do
|
13
|
+
setup do
|
14
|
+
end
|
15
|
+
|
16
|
+
should "not supplying a required parameter should raise an exception" do
|
17
|
+
assert_raise(Mrt::Ingest::RequestException) do
|
18
|
+
Mrt::Ingest::Request.
|
19
|
+
new(:submitter => "jd/John Doe",
|
20
|
+
:type => "file")
|
21
|
+
end
|
22
|
+
|
23
|
+
assert_raise(Mrt::Ingest::RequestException) do
|
24
|
+
Mrt::Ingest::Request.
|
25
|
+
new(:profile => "demo_merritt",
|
26
|
+
:type => "file")
|
27
|
+
end
|
28
|
+
|
29
|
+
assert_raise(Mrt::Ingest::RequestException) do
|
30
|
+
Mrt::Ingest::Request.
|
31
|
+
new(:profile => "demo_merritt",
|
32
|
+
:submitter => "jd/John Doe")
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
|
6
|
+
require 'fakeweb'
|
7
|
+
require 'mocha'
|
8
|
+
require 'mrt/ingest'
|
9
|
+
require 'shoulda'
|
10
|
+
|
11
|
+
class TestResponse < Test::Unit::TestCase
|
12
|
+
RESPONSE_JSON = <<EOS
|
13
|
+
{
|
14
|
+
|
15
|
+
"batchState": {
|
16
|
+
"batchID":"bid-8c0fa0c2-f3d7-4deb-bd49-f953f6752b59",
|
17
|
+
"updateFlag":false,
|
18
|
+
"targetQueue":"example.org:2181",
|
19
|
+
"batchStatus":"QUEUED",
|
20
|
+
"userAgent":"egh/Erik Hetzner",
|
21
|
+
"submissionDate":"2011-08-31T15:40:26-07:00",
|
22
|
+
"targetQueueNode":"/ingest.example.1",
|
23
|
+
"batchProfile": {
|
24
|
+
"owner":"ark:/99999/fk4tt4wsh",
|
25
|
+
"creationDate":"2010-01-19T13:28:14-08:00",
|
26
|
+
"targetStorage": {
|
27
|
+
"storageLink":"http://example.org:35121",
|
28
|
+
"nodeID":10
|
29
|
+
},
|
30
|
+
"objectType":"MRT-curatorial",
|
31
|
+
"modificationDate":"2010-01-26T23:28:14-08:00",
|
32
|
+
"aggregateType":"",
|
33
|
+
"objectMinterURL":"https://example.org/ezid/shoulder/ark:/99999/fk4",
|
34
|
+
"collection": {
|
35
|
+
},
|
36
|
+
"profileID":"merritt_content",
|
37
|
+
"profileDescription":"Merritt demo content",
|
38
|
+
"fixityURL":"http://example.org:33143",
|
39
|
+
"contactsEmail": {
|
40
|
+
"notification": {
|
41
|
+
"contactEmail":"erik.hetzner@example.org"
|
42
|
+
}
|
43
|
+
},
|
44
|
+
"identifierScheme":"ARK",
|
45
|
+
"identifierNamespace":"99999",
|
46
|
+
"objectRole":"MRT-content"
|
47
|
+
}
|
48
|
+
}
|
49
|
+
}
|
50
|
+
EOS
|
51
|
+
|
52
|
+
context "when creating a response" do
|
53
|
+
setup do
|
54
|
+
@response = Mrt::Ingest::Response.new(RESPONSE_JSON)
|
55
|
+
end
|
56
|
+
|
57
|
+
should "have the right properties" do
|
58
|
+
assert_equal("bid-8c0fa0c2-f3d7-4deb-bd49-f953f6752b59", @response.batch_id)
|
59
|
+
assert_equal(Time.at(1314830426), @response.submission_date)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
metadata
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mrt-ingest
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 2
|
10
|
+
version: 0.0.2
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Erik Hetzner
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-12-20 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: json
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 3
|
29
|
+
segments:
|
30
|
+
- 1
|
31
|
+
- 5
|
32
|
+
- 0
|
33
|
+
version: 1.5.0
|
34
|
+
type: :runtime
|
35
|
+
version_requirements: *id001
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: rest-client
|
38
|
+
prerelease: false
|
39
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
hash: 15
|
45
|
+
segments:
|
46
|
+
- 1
|
47
|
+
- 6
|
48
|
+
- 0
|
49
|
+
version: 1.6.0
|
50
|
+
type: :runtime
|
51
|
+
version_requirements: *id002
|
52
|
+
description: A client for the Merritt ingest system. More details available from http://wiki.ucop.edu/display/curation.
|
53
|
+
email:
|
54
|
+
- erik.hetzner@ucop.edu
|
55
|
+
executables: []
|
56
|
+
|
57
|
+
extensions: []
|
58
|
+
|
59
|
+
extra_rdoc_files: []
|
60
|
+
|
61
|
+
files:
|
62
|
+
- .hgignore
|
63
|
+
- Gemfile
|
64
|
+
- LICENSE
|
65
|
+
- README
|
66
|
+
- Rakefile
|
67
|
+
- lib/mrt/ingest.rb
|
68
|
+
- lib/mrt/ingest/client.rb
|
69
|
+
- lib/mrt/ingest/iobject.rb
|
70
|
+
- lib/mrt/ingest/message_digest.rb
|
71
|
+
- lib/mrt/ingest/one_time_server.rb
|
72
|
+
- lib/mrt/ingest/request.rb
|
73
|
+
- lib/mrt/ingest/response.rb
|
74
|
+
- mrt-ingest.gemspec
|
75
|
+
- test/test_client.rb
|
76
|
+
- test/test_iobject.rb
|
77
|
+
- test/test_request.rb
|
78
|
+
- test/test_response.rb
|
79
|
+
homepage: http://bitbucket.org/merritt/mrt-ingest-ruby
|
80
|
+
licenses: []
|
81
|
+
|
82
|
+
post_install_message:
|
83
|
+
rdoc_options: []
|
84
|
+
|
85
|
+
require_paths:
|
86
|
+
- lib
|
87
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
88
|
+
none: false
|
89
|
+
requirements:
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
hash: 3
|
93
|
+
segments:
|
94
|
+
- 0
|
95
|
+
version: "0"
|
96
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ">="
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
hash: 3
|
102
|
+
segments:
|
103
|
+
- 0
|
104
|
+
version: "0"
|
105
|
+
requirements: []
|
106
|
+
|
107
|
+
rubyforge_project: mrt-ingest
|
108
|
+
rubygems_version: 1.8.12
|
109
|
+
signing_key:
|
110
|
+
specification_version: 3
|
111
|
+
summary: A client for Merritt ingest.
|
112
|
+
test_files: []
|
113
|
+
|