mrt-ingest 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/.hgignore +4 -0
- data/Gemfile +10 -0
- data/LICENSE +26 -0
- data/README +29 -0
- data/Rakefile +21 -0
- data/lib/mrt/ingest.rb +16 -0
- data/lib/mrt/ingest/client.rb +38 -0
- data/lib/mrt/ingest/iobject.rb +144 -0
- data/lib/mrt/ingest/message_digest.rb +51 -0
- data/lib/mrt/ingest/one_time_server.rb +116 -0
- data/lib/mrt/ingest/request.rb +57 -0
- data/lib/mrt/ingest/response.rb +29 -0
- data/mrt-ingest.gemspec +24 -0
- data/test/test_client.rb +39 -0
- data/test/test_iobject.rb +143 -0
- data/test/test_request.rb +36 -0
- data/test/test_response.rb +62 -0
- metadata +113 -0
data/.hgignore
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
Copyright (c) 2011, Regents of the University of California
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
|
7
|
+
- Redistributions of source code must retain the above copyright notice,
|
8
|
+
this list of conditions and the following disclaimer.
|
9
|
+
- Redistributions in binary form must reproduce the above copyright notice,
|
10
|
+
this list of conditions and the following disclaimer in the documentation
|
11
|
+
and/or other materials provided with the distribution.
|
12
|
+
- Neither the name of the University of California nor the names of its
|
13
|
+
contributors may be used to endorse or promote products derived from this
|
14
|
+
software without specific prior written permission.
|
15
|
+
|
16
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
17
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
18
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
19
|
+
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
20
|
+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
21
|
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
22
|
+
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
23
|
+
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
24
|
+
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
25
|
+
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
26
|
+
POSSIBILITY OF SUCH DAMAGE.
|
data/README
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
= mrt-ingest (ruby)
|
2
|
+
Date:: 6 Sept. 2011
|
3
|
+
Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
4
|
+
|
5
|
+
== What?
|
6
|
+
|
7
|
+
A Ruby ingest client for Merritt[http://merritt.cdlib.org/].
|
8
|
+
|
9
|
+
== Install
|
10
|
+
|
11
|
+
$ gem build mrt-ingest.gemspec
|
12
|
+
$ sudo gem install mrt-ingest-0.0.1.gem
|
13
|
+
|
14
|
+
== How?
|
15
|
+
|
16
|
+
require 'rubygems'
|
17
|
+
require 'mrt/ingest'
|
18
|
+
client = Mrt::Ingest::Client.new("http://merritt.cdlib.org/object/ingest", USERNAME, PASSWORD)
|
19
|
+
obj = Mrt::Ingest::IObject.new(:erc => {
|
20
|
+
"who" => "Doe, John",
|
21
|
+
"what" => "Hello, world",
|
22
|
+
"when/created" => "2011" })
|
23
|
+
obj.add_component(File.new("/tmp/helloworld_a"))
|
24
|
+
obj.add_component(File.new("/tmp/helloworld_b"))
|
25
|
+
obj.add_component(URI.parse("http://example.org/xxx"),
|
26
|
+
:name => "helloworld_c",
|
27
|
+
:digest => Mrt::Ingest::MessageDigest::MD5.new("6f5902ac237024bdd0c176cb93063dc4"))
|
28
|
+
obj.start_ingest(client, "demo_merritt_content", "me/My Name")
|
29
|
+
obj.finish_ingest()
|
data/Rakefile
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rdoc/task'
|
4
|
+
|
5
|
+
require 'bundler'
|
6
|
+
include Rake::DSL
|
7
|
+
Bundler::GemHelper.install_tasks
|
8
|
+
|
9
|
+
task :default => [:test]
|
10
|
+
Rake::TestTask.new(:test) do |t|
|
11
|
+
t.libs << 'lib' << 'test'
|
12
|
+
t.pattern = 'test/**/test_*.rb'
|
13
|
+
t.verbose = true
|
14
|
+
end
|
15
|
+
|
16
|
+
RDoc::Task.new do |rd|
|
17
|
+
rd.title = "Merritt Ingest Client"
|
18
|
+
rd.rdoc_files.include("README", "lib/**/*.rb")
|
19
|
+
|
20
|
+
rd.options += ['-f', 'darkfish',]
|
21
|
+
end
|
data/lib/mrt/ingest.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
module Mrt
|
5
|
+
module Ingest
|
6
|
+
autoload :Client, "mrt/ingest/client"
|
7
|
+
autoload :IObject, "mrt/ingest/iobject"
|
8
|
+
autoload :MessageDigest, "mrt/ingest/message_digest"
|
9
|
+
autoload :OneTimeServer, "mrt/ingest/one_time_server"
|
10
|
+
autoload :Request, "mrt/ingest/request"
|
11
|
+
autoload :Response, "mrt/ingest/response"
|
12
|
+
|
13
|
+
class IngestException < Exception
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
|
6
|
+
require 'rest-client'
|
7
|
+
|
8
|
+
module Mrt
|
9
|
+
module Ingest
|
10
|
+
|
11
|
+
# A client for ingesting objects into a Merritt.
|
12
|
+
class Client
|
13
|
+
def initialize(base_uri, username=nil, password=nil)
|
14
|
+
@base_uri = base_uri
|
15
|
+
@username = username
|
16
|
+
@password = password
|
17
|
+
end
|
18
|
+
|
19
|
+
# Send a request to the client.
|
20
|
+
def ingest(ingest_req)
|
21
|
+
return Response.new(mk_rest_request(ingest_req).execute())
|
22
|
+
end
|
23
|
+
|
24
|
+
# :nodoc:
|
25
|
+
def mk_rest_request(ingest_req)
|
26
|
+
args = {
|
27
|
+
:method => :post,
|
28
|
+
:url => @base_uri,
|
29
|
+
:user => @username,
|
30
|
+
:password => @password,
|
31
|
+
:payload => ingest_req.mk_args(),
|
32
|
+
:headers => { :multipart => true } }.delete_if { |k,v| (v.nil? || v == "") }
|
33
|
+
return RestClient::Request.new(args)
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,144 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
require 'mrt/ingest'
|
5
|
+
require 'tempfile'
|
6
|
+
require 'uri'
|
7
|
+
|
8
|
+
module Mrt
|
9
|
+
module Ingest
|
10
|
+
# Represents a component of an object to ingest. Either a #URI or a
|
11
|
+
# #File.
|
12
|
+
class Component # :nodoc:
|
13
|
+
def initialize(server, where, options)
|
14
|
+
@name = options[:name]
|
15
|
+
@digest = options[:digest]
|
16
|
+
@mime_type = options[:mime_type]
|
17
|
+
@size = options[:size]
|
18
|
+
|
19
|
+
case where
|
20
|
+
when File, Tempfile
|
21
|
+
@name = File.basename(where.path) if @name.nil?
|
22
|
+
@uri = server.add_file(where)[0]
|
23
|
+
if @digest.nil? then
|
24
|
+
@digest = Mrt::Ingest::MessageDigest::MD5.from_file(where)
|
25
|
+
end
|
26
|
+
@size = File.size(where.path) if @size.nil?
|
27
|
+
when URI
|
28
|
+
@name = File.basename(where.to_s) if @name.nil?
|
29
|
+
@uri = where
|
30
|
+
else
|
31
|
+
raise IngestException.new("Trying to add a component that is not a File or URI")
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
def to_manifest_entry
|
37
|
+
(digest_alg, digest_value) = if @digest.nil? then
|
38
|
+
['', '']
|
39
|
+
else
|
40
|
+
[@digest.type, @digest.value]
|
41
|
+
end
|
42
|
+
return "#{@uri} | #{digest_alg} | #{digest_value} | #{@size || ''} | | #{@name} | #{@mime_type || '' }\n"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# An object prepared for ingest into Merritt.
|
47
|
+
class IObject
|
48
|
+
|
49
|
+
attr_accessor :primary_identifier, :local_identifier, :erc
|
50
|
+
|
51
|
+
# Options can have the keys :primary_identifier,
|
52
|
+
# :local_identifier, :server, or :erc. :erc can be a #File, #Uri
|
53
|
+
# or a #Hash of metadata. :server is a #OneTimeServer.
|
54
|
+
def initialize(options={})
|
55
|
+
@primary_identifier = options[:primary_identifier]
|
56
|
+
@local_identifier = options[:local_identifier]
|
57
|
+
@erc = options[:erc] || Hash.new
|
58
|
+
@components = []
|
59
|
+
@server = options[:server] || Mrt::Ingest::OneTimeServer.new
|
60
|
+
end
|
61
|
+
|
62
|
+
# Add a component to the object. where can be either a #URI or a
|
63
|
+
# #File. Options is a hash whose keys may be :name, :digest,
|
64
|
+
# :mime_type, or :size. If :digest is supplied, it must be a
|
65
|
+
# subclass of Mrt::Ingest::MessageDigest::Base. If where is a
|
66
|
+
# #File, it will be hosted on an embedded web server.
|
67
|
+
def add_component(where, options={})
|
68
|
+
@components.push(Component.new(@server, where, options))
|
69
|
+
end
|
70
|
+
|
71
|
+
# Make a Mrt::Ingest::Request object for this mrt-object
|
72
|
+
def mk_request(profile, submitter)
|
73
|
+
erc_component = case @erc
|
74
|
+
when URI, File, Tempfile
|
75
|
+
Component.new(@server, @erc, :name => 'mrt-erc.txt')
|
76
|
+
when Hash
|
77
|
+
uri_str, path = @server.add_file do |f|
|
78
|
+
@erc.each_pair do |k, v|
|
79
|
+
f.write("#{k}: #{v}\n")
|
80
|
+
end
|
81
|
+
end
|
82
|
+
Component.new(@server,
|
83
|
+
URI.parse(uri_str),
|
84
|
+
:name => 'mrt-erc.txt',
|
85
|
+
:digest => Mrt::Ingest::MessageDigest::MD5.from_file(File.new(path)))
|
86
|
+
else
|
87
|
+
raise IngestException.new("Bad ERC supplied: must be a URI, File, or Hash")
|
88
|
+
end
|
89
|
+
manifest_file = Tempfile.new("mrt-ingest")
|
90
|
+
mk_manifest(manifest_file, erc_component)
|
91
|
+
# reset to beginning
|
92
|
+
manifest_file.open
|
93
|
+
return Mrt::Ingest::Request.
|
94
|
+
new(:file => manifest_file,
|
95
|
+
:filename => manifest_file.path.split(/\//).last,
|
96
|
+
:type => "object-manifest",
|
97
|
+
:submitter => submitter,
|
98
|
+
:profile => profile,
|
99
|
+
:primary_identifier => @primary_identifier)
|
100
|
+
end
|
101
|
+
|
102
|
+
def start_server # :nodoc:
|
103
|
+
return @server.start_server()
|
104
|
+
end
|
105
|
+
|
106
|
+
def join_server # :nodoc:
|
107
|
+
return @server.join_server()
|
108
|
+
end
|
109
|
+
|
110
|
+
def stop_server # :nodoc:
|
111
|
+
return @server.stop_server()
|
112
|
+
end
|
113
|
+
|
114
|
+
def mk_manifest(manifest, erc_component) # :nodoc:
|
115
|
+
manifest.write("#%checkm_0.7\n")
|
116
|
+
manifest.write("#%profile http://uc3.cdlib.org/registry/ingest/manifest/mrt-ingest-manifest\n")
|
117
|
+
manifest.write("#%prefix | mrt: | http://uc3.cdlib.org/ontology/mom#\n")
|
118
|
+
manifest.write("#%prefix | nfo: | http://www.semanticdesktop.org/ontologies/2007/03/22/nfo#\n")
|
119
|
+
manifest.write("#%fields | nfo:fileUrl | nfo:hashAlgorithm | nfo:hashValue | nfo:fileSize | nfo:fileLastModified | nfo:fileName | mrt:mimeType\n")
|
120
|
+
@components.each { |c|
|
121
|
+
manifest.write(c.to_manifest_entry)
|
122
|
+
}
|
123
|
+
manifest.write(erc_component.to_manifest_entry)
|
124
|
+
manifest.write("#%EOF\n")
|
125
|
+
end
|
126
|
+
|
127
|
+
# Begin an ingest on the given client, with a profile and
|
128
|
+
# submitter.
|
129
|
+
def start_ingest(client, profile, submitter)
|
130
|
+
request = mk_request(profile, submitter)
|
131
|
+
start_server
|
132
|
+
@response = client.ingest(request)
|
133
|
+
return @response
|
134
|
+
end
|
135
|
+
|
136
|
+
# Wait for the ingest of this object to finish.
|
137
|
+
def finish_ingest
|
138
|
+
# XXX Right now we only join the hosting server; in the future
|
139
|
+
# we will check the status via the ingest server.
|
140
|
+
join_server
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
require 'digest/md5'
|
5
|
+
|
6
|
+
module Mrt
|
7
|
+
module Ingest
|
8
|
+
module MessageDigest
|
9
|
+
class Base # :nodoc:
|
10
|
+
attr_reader :value, :type
|
11
|
+
def initialize(value, type)
|
12
|
+
@value = value
|
13
|
+
@type = type
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Represents a SHA256 digest suitable for a Checkm manifest.
|
18
|
+
class SHA256 < Base
|
19
|
+
def initialize(value)
|
20
|
+
super(value, "sha-256")
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Represents an MD5 digest suitable for a Checkm manifest.
|
25
|
+
class MD5 < Base
|
26
|
+
def initialize(value)
|
27
|
+
super(value, "md5")
|
28
|
+
end
|
29
|
+
|
30
|
+
# Generate a digest from a file.
|
31
|
+
def self.from_file(file)
|
32
|
+
digest = Digest::MD5.new
|
33
|
+
File.open(file.path, 'r') do |f|
|
34
|
+
buff = ""
|
35
|
+
while (f.read(1024, buff) != nil)
|
36
|
+
digest << buff
|
37
|
+
end
|
38
|
+
end
|
39
|
+
return Mrt::Ingest::MessageDigest::MD5.new(digest.hexdigest)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Represents a SHA1 digest suitable for a Checkm manifest.
|
44
|
+
class SHA1 < Base
|
45
|
+
def initialize(value)
|
46
|
+
super(value, "sha1")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
require 'webrick'
|
5
|
+
|
6
|
+
# An HTTP server that will serve each file ONCE before shutting down.
|
7
|
+
module Mrt
|
8
|
+
module Ingest
|
9
|
+
class OneTimeServer
|
10
|
+
# Find an open port, starting with start and adding one until we get
|
11
|
+
# an open port
|
12
|
+
def get_open_port(start=8080)
|
13
|
+
try_port = start
|
14
|
+
while (true)
|
15
|
+
begin
|
16
|
+
s = TCPServer.open(try_port)
|
17
|
+
s.close
|
18
|
+
return try_port
|
19
|
+
rescue Errno::EADDRINUSE
|
20
|
+
try_port = try_port + 1
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def initialize
|
26
|
+
@dir = Dir.mktmpdir
|
27
|
+
@mutex = Mutex.new
|
28
|
+
@known_paths = {}
|
29
|
+
@requested = {}
|
30
|
+
@port = get_open_port()
|
31
|
+
@file_callback = lambda do |req, res|
|
32
|
+
@requested[req.path] ||= true
|
33
|
+
end
|
34
|
+
|
35
|
+
config = { :Port => @port }
|
36
|
+
@server = WEBrick::HTTPServer.new(config)
|
37
|
+
@server.mount("/", WEBrick::HTTPServlet::FileHandler, @dir,
|
38
|
+
{ :FileCallback=>@file_callback })
|
39
|
+
end
|
40
|
+
|
41
|
+
# Return true if each file has been served.
|
42
|
+
def finished?
|
43
|
+
Dir.entries(@dir).each do |entry|
|
44
|
+
next if (entry == "." || entry == "..")
|
45
|
+
if @requested["/#{entry}"].nil? then
|
46
|
+
return false
|
47
|
+
end
|
48
|
+
end
|
49
|
+
return true
|
50
|
+
end
|
51
|
+
|
52
|
+
def get_temppath
|
53
|
+
tmpfile = Tempfile.new("tmp", @dir)
|
54
|
+
tmppath = tmpfile.path
|
55
|
+
tmpfile.close!
|
56
|
+
@mutex.synchronize do
|
57
|
+
if !@known_paths.has_key?(tmppath) then
|
58
|
+
# no collision
|
59
|
+
@known_paths[tmppath] = true
|
60
|
+
return tmppath
|
61
|
+
end
|
62
|
+
end
|
63
|
+
# need to retry, there was a collision
|
64
|
+
return get_temppath
|
65
|
+
end
|
66
|
+
|
67
|
+
# Add a file to this server. Returns the URL to use
|
68
|
+
# to fetch the file & the file path
|
69
|
+
def add_file(sourcefile=nil)
|
70
|
+
fullpath = get_temppath()
|
71
|
+
path = File.basename(fullpath)
|
72
|
+
if !sourcefile.nil? then
|
73
|
+
@server.mount("/#{path}",
|
74
|
+
WEBrick::HTTPServlet::FileHandler,
|
75
|
+
sourcefile.path,
|
76
|
+
{ :FileCallback=>@file_callback })
|
77
|
+
else
|
78
|
+
File.open(fullpath, 'w+') do |f|
|
79
|
+
yield f
|
80
|
+
end
|
81
|
+
end
|
82
|
+
return "http://#{Socket.gethostname}:#{@port}/#{path}", fullpath
|
83
|
+
end
|
84
|
+
|
85
|
+
def start_server
|
86
|
+
@thread = Thread.new do
|
87
|
+
@server.start
|
88
|
+
end
|
89
|
+
return @thread
|
90
|
+
end
|
91
|
+
|
92
|
+
# Stop server unconditionally.
|
93
|
+
def stop_server
|
94
|
+
@server.shutdown
|
95
|
+
@thread.join
|
96
|
+
end
|
97
|
+
|
98
|
+
# Wait for server to finish serving all files.
|
99
|
+
def join_server
|
100
|
+
# ensure that each file is requested once before shutting down
|
101
|
+
while (!self.finished?) do sleep(1) end
|
102
|
+
@server.shutdown
|
103
|
+
@thread.join
|
104
|
+
end
|
105
|
+
|
106
|
+
# Run the server and wait until each file has been served once.
|
107
|
+
# Cleans up files before it returns.
|
108
|
+
def run
|
109
|
+
start_server()
|
110
|
+
join_server()
|
111
|
+
# FileUtils.rm_rf(@dir)
|
112
|
+
return
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
module Mrt
|
5
|
+
module Ingest
|
6
|
+
class RequestException < Exception
|
7
|
+
end
|
8
|
+
|
9
|
+
# Represents a request to be sent to an ingest server.
|
10
|
+
class Request
|
11
|
+
attr_accessor :creator, :date, :local_identifier,
|
12
|
+
:primary_identifier, :profile, :note, :submitter,
|
13
|
+
:title, :type
|
14
|
+
|
15
|
+
# Options is a hash; required are :profile, :submitter, :type.
|
16
|
+
# May also include :creator, :date, :digest, :file, :filename,
|
17
|
+
# :local_identifier, :primary_identifier, :note, :title.
|
18
|
+
def initialize(options)
|
19
|
+
@creator = options[:creator]
|
20
|
+
@date = options[:date]
|
21
|
+
@digest = options[:digest]
|
22
|
+
@file = options[:file]
|
23
|
+
@filename = options[:filename]
|
24
|
+
@local_identifier = options[:local_identifier]
|
25
|
+
@primary_identifier = options[:primary_identifier]
|
26
|
+
@profile = options[:profile]
|
27
|
+
@note = options[:note]
|
28
|
+
@submitter = options[:submitter]
|
29
|
+
@title = options[:title]
|
30
|
+
@type = options[:type]
|
31
|
+
[:profile, :submitter, :type].each do |arg|
|
32
|
+
raise RequestException.new("#{arg} is required.") if options[arg].nil?
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Returns a hash of arguments suitable for sending to a server.
|
37
|
+
def mk_args
|
38
|
+
return {
|
39
|
+
'creator' => @creator,
|
40
|
+
'date' => @date,
|
41
|
+
'digestType' => ((!@digest.nil? && @digest.type) || nil),
|
42
|
+
'digestValue' => ((!@digest.nil? && @digest.value) || nil),
|
43
|
+
'file' => @file,
|
44
|
+
'filename' => @filename,
|
45
|
+
'localIdentifier' => @local_identifier,
|
46
|
+
'primaryIdentifier' => @primary_identifier,
|
47
|
+
'profile' => @profile,
|
48
|
+
'note' => @note,
|
49
|
+
'responseForm' => 'json',
|
50
|
+
'submitter' => @submitter,
|
51
|
+
'title' => @title,
|
52
|
+
'type' => @type
|
53
|
+
}.reject{|k, v| v.nil? || (v == '')}
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
|
6
|
+
require 'json'
|
7
|
+
require 'time'
|
8
|
+
|
9
|
+
module Mrt
|
10
|
+
module Ingest
|
11
|
+
class Response
|
12
|
+
def initialize(data)
|
13
|
+
@parsed = JSON.parse(data)['batchState']
|
14
|
+
end
|
15
|
+
|
16
|
+
def batch_id
|
17
|
+
return @parsed['batchID']
|
18
|
+
end
|
19
|
+
|
20
|
+
def user_agent
|
21
|
+
return @parsed['userAgent']
|
22
|
+
end
|
23
|
+
|
24
|
+
def submission_date
|
25
|
+
return Time.parse(@parsed['submissionDate'])
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
data/mrt-ingest.gemspec
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "mrt/ingest/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "mrt-ingest"
|
7
|
+
s.version = "0.0.2"
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["Erik Hetzner"]
|
10
|
+
s.email = ["erik.hetzner@ucop.edu"]
|
11
|
+
s.homepage = "http://bitbucket.org/merritt/mrt-ingest-ruby"
|
12
|
+
s.summary = %q{A client for Merritt ingest.}
|
13
|
+
s.description = %q{A client for the Merritt ingest system. More details available from http://wiki.ucop.edu/display/curation.}
|
14
|
+
|
15
|
+
s.add_dependency "json", ">=1.5.0"
|
16
|
+
s.add_dependency "rest-client", ">=1.6.0"
|
17
|
+
|
18
|
+
s.rubyforge_project = "mrt-ingest"
|
19
|
+
|
20
|
+
s.files = `hg locate`.split("\n")
|
21
|
+
s.test_files = `hg locate --include '{spec,features}'`.split("\n")
|
22
|
+
s.executables = `hg locate --include bin`.split("\n").map{ |f| File.basename(f) }
|
23
|
+
s.require_paths = ["lib"]
|
24
|
+
end
|
data/test/test_client.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
|
6
|
+
require 'checkm'
|
7
|
+
require 'fakeweb'
|
8
|
+
require 'mocha'
|
9
|
+
require 'mrt/ingest'
|
10
|
+
require 'shoulda'
|
11
|
+
require 'open-uri'
|
12
|
+
|
13
|
+
class TestClient < Test::Unit::TestCase
|
14
|
+
context "creating a client" do
|
15
|
+
should "be able to create an ingest client" do
|
16
|
+
client = Mrt::Ingest::Client.new("http://example.org/ingest")
|
17
|
+
assert_instance_of(Mrt::Ingest::Client, client)
|
18
|
+
end
|
19
|
+
|
20
|
+
should "be able to create an ingest client with login credentials" do
|
21
|
+
client = Mrt::Ingest::Client.new("http://example.org/ingest", "me", "secret")
|
22
|
+
assert_instance_of(Mrt::Ingest::Client, client)
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
context "ingest clients" do
|
27
|
+
setup do
|
28
|
+
@client = Mrt::Ingest::Client.new("http://example.org/ingest", "me", "secret")
|
29
|
+
@iobject = Mrt::Ingest::IObject.new
|
30
|
+
@ingest_req = @iobject.mk_request("profile", "submitter")
|
31
|
+
end
|
32
|
+
|
33
|
+
should "should create a good rest client request" do
|
34
|
+
rest_req = @client.mk_rest_request(@ingest_req)
|
35
|
+
assert_equal("me", rest_req.user)
|
36
|
+
assert_equal("secret", rest_req.password)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
|
6
|
+
require 'checkm'
|
7
|
+
require 'fakeweb'
|
8
|
+
require 'mocha'
|
9
|
+
require 'mrt/ingest'
|
10
|
+
require 'shoulda'
|
11
|
+
require 'open-uri'
|
12
|
+
|
13
|
+
class TestIObject < Test::Unit::TestCase
|
14
|
+
def parse_object_manifest(iobject)
|
15
|
+
req = iobject.mk_request("profile", "submitter")
|
16
|
+
args = req.mk_args
|
17
|
+
return Checkm::Manifest.new(args['file'].read())
|
18
|
+
end
|
19
|
+
|
20
|
+
def write_to_tempfile(content)
|
21
|
+
tempfile = Tempfile.new('test_iobject')
|
22
|
+
tempfile << content
|
23
|
+
tempfile.open
|
24
|
+
return tempfile
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_uri_for_name(iobject, name)
|
28
|
+
manifest = parse_object_manifest(iobject)
|
29
|
+
return manifest.entries.find { |entry|
|
30
|
+
entry.values[-2] == name
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
def parse_erc(erc)
|
35
|
+
return Hash[erc.map { |l| l.chomp.split(/:\s+/) }]
|
36
|
+
end
|
37
|
+
|
38
|
+
def parse_erc_entry(erc_entry)
|
39
|
+
return parse_erc(open(erc_entry.values[0]).read())
|
40
|
+
end
|
41
|
+
|
42
|
+
def check_erc_content(iobject, asserted_erc)
|
43
|
+
erc_entry = get_uri_for_name(iobject, "mrt-erc.txt")
|
44
|
+
if erc_entry.nil?
|
45
|
+
assert(false, "Could not find mrt-erc.txt file!")
|
46
|
+
else
|
47
|
+
iobject.start_server()
|
48
|
+
assert_equal(asserted_erc, parse_erc_entry(erc_entry))
|
49
|
+
iobject.stop_server()
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
context "an iobject" do
|
54
|
+
setup do
|
55
|
+
@iobject = Mrt::Ingest::IObject.new
|
56
|
+
end
|
57
|
+
|
58
|
+
should "be able to add a URI component" do
|
59
|
+
@iobject.add_component(URI.parse("http://example.org/file"))
|
60
|
+
end
|
61
|
+
|
62
|
+
should "not be able to add a non-URI component" do
|
63
|
+
assert_raise(Mrt::Ingest::IngestException) do
|
64
|
+
@iobject.add_component("http://example.org/file")
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
should "be able to make a request" do
|
69
|
+
req = @iobject.mk_request("profile", "submitter")
|
70
|
+
assert_equal("profile", req.profile)
|
71
|
+
assert_equal("submitter", req.submitter)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
context "the created request" do
|
76
|
+
setup do
|
77
|
+
@iobject = Mrt::Ingest::IObject.new
|
78
|
+
@manifest = parse_object_manifest(@iobject)
|
79
|
+
@erc_entry = get_uri_for_name(@iobject, "mrt-erc.txt")
|
80
|
+
end
|
81
|
+
|
82
|
+
should "generate a valid manifest file with more than one line" do
|
83
|
+
assert(@manifest.entries.length > 0, "Empty manifest?")
|
84
|
+
end
|
85
|
+
|
86
|
+
should "have a mrt-erc.txt entry, and it should be fetchable" do
|
87
|
+
if @erc_entry.nil?
|
88
|
+
assert(false, "Could not find mrt-erc.txt file!")
|
89
|
+
else
|
90
|
+
@iobject.start_server()
|
91
|
+
erc_lines = open(@erc_entry.values[0]).read().lines().to_a
|
92
|
+
@iobject.stop_server()
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
ERC_CONTENT = <<EOS
|
98
|
+
who: John Doe
|
99
|
+
what: Something
|
100
|
+
when: now
|
101
|
+
EOS
|
102
|
+
|
103
|
+
context "an iobject" do
|
104
|
+
should "be able to specify a file for ERC" do
|
105
|
+
erc_tempfile = write_to_tempfile(ERC_CONTENT)
|
106
|
+
iobject = Mrt::Ingest::IObject.new(:erc=>File.new(erc_tempfile.path))
|
107
|
+
check_erc_content(iobject, parse_erc(ERC_CONTENT))
|
108
|
+
end
|
109
|
+
|
110
|
+
should "be able to use a hash for ERC" do
|
111
|
+
erc = {
|
112
|
+
"who" => "John Doe",
|
113
|
+
"what" => "Something",
|
114
|
+
"when" => "now" }
|
115
|
+
iobject = Mrt::Ingest::IObject.new(:erc=>erc)
|
116
|
+
check_erc_content(iobject, erc)
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
FILE_CONTENT = <<EOS
|
121
|
+
Hello, world!
|
122
|
+
EOS
|
123
|
+
|
124
|
+
FILE_CONTENT_MD5 = "746308829575e17c3331bbcb00c0898b"
|
125
|
+
|
126
|
+
context "serving local files" do
|
127
|
+
should "be able to add a local file component" do
|
128
|
+
iobject = Mrt::Ingest::IObject.new
|
129
|
+
tempfile = write_to_tempfile(FILE_CONTENT)
|
130
|
+
iobject.add_component(tempfile, {:name => "helloworld" })
|
131
|
+
uri_entry = get_uri_for_name(iobject, "helloworld")
|
132
|
+
erc_entry = get_uri_for_name(iobject, "mrt-erc.txt")
|
133
|
+
manifest = parse_object_manifest(iobject)
|
134
|
+
if uri_entry.nil?
|
135
|
+
assert(false, "Could not find hosted file URI!")
|
136
|
+
else
|
137
|
+
iobject.start_server
|
138
|
+
assert_equal(FILE_CONTENT, open(uri_entry.values[0]).read())
|
139
|
+
iobject.stop_server()
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
|
6
|
+
require 'fakeweb'
|
7
|
+
require 'mocha'
|
8
|
+
require 'mrt/ingest'
|
9
|
+
require 'shoulda'
|
10
|
+
|
11
|
+
class TestRequest < Test::Unit::TestCase
|
12
|
+
context "when creating a request" do
|
13
|
+
setup do
|
14
|
+
end
|
15
|
+
|
16
|
+
should "not supplying a required parameter should raise an exception" do
|
17
|
+
assert_raise(Mrt::Ingest::RequestException) do
|
18
|
+
Mrt::Ingest::Request.
|
19
|
+
new(:submitter => "jd/John Doe",
|
20
|
+
:type => "file")
|
21
|
+
end
|
22
|
+
|
23
|
+
assert_raise(Mrt::Ingest::RequestException) do
|
24
|
+
Mrt::Ingest::Request.
|
25
|
+
new(:profile => "demo_merritt",
|
26
|
+
:type => "file")
|
27
|
+
end
|
28
|
+
|
29
|
+
assert_raise(Mrt::Ingest::RequestException) do
|
30
|
+
Mrt::Ingest::Request.
|
31
|
+
new(:profile => "demo_merritt",
|
32
|
+
:submitter => "jd/John Doe")
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# Author:: Erik Hetzner (mailto:erik.hetzner@ucop.edu)
|
2
|
+
# Copyright:: Copyright (c) 2011, Regents of the University of California
|
3
|
+
|
4
|
+
require 'rubygems'
|
5
|
+
|
6
|
+
require 'fakeweb'
|
7
|
+
require 'mocha'
|
8
|
+
require 'mrt/ingest'
|
9
|
+
require 'shoulda'
|
10
|
+
|
11
|
+
class TestResponse < Test::Unit::TestCase
|
12
|
+
RESPONSE_JSON = <<EOS
|
13
|
+
{
|
14
|
+
|
15
|
+
"batchState": {
|
16
|
+
"batchID":"bid-8c0fa0c2-f3d7-4deb-bd49-f953f6752b59",
|
17
|
+
"updateFlag":false,
|
18
|
+
"targetQueue":"example.org:2181",
|
19
|
+
"batchStatus":"QUEUED",
|
20
|
+
"userAgent":"egh/Erik Hetzner",
|
21
|
+
"submissionDate":"2011-08-31T15:40:26-07:00",
|
22
|
+
"targetQueueNode":"/ingest.example.1",
|
23
|
+
"batchProfile": {
|
24
|
+
"owner":"ark:/99999/fk4tt4wsh",
|
25
|
+
"creationDate":"2010-01-19T13:28:14-08:00",
|
26
|
+
"targetStorage": {
|
27
|
+
"storageLink":"http://example.org:35121",
|
28
|
+
"nodeID":10
|
29
|
+
},
|
30
|
+
"objectType":"MRT-curatorial",
|
31
|
+
"modificationDate":"2010-01-26T23:28:14-08:00",
|
32
|
+
"aggregateType":"",
|
33
|
+
"objectMinterURL":"https://example.org/ezid/shoulder/ark:/99999/fk4",
|
34
|
+
"collection": {
|
35
|
+
},
|
36
|
+
"profileID":"merritt_content",
|
37
|
+
"profileDescription":"Merritt demo content",
|
38
|
+
"fixityURL":"http://example.org:33143",
|
39
|
+
"contactsEmail": {
|
40
|
+
"notification": {
|
41
|
+
"contactEmail":"erik.hetzner@example.org"
|
42
|
+
}
|
43
|
+
},
|
44
|
+
"identifierScheme":"ARK",
|
45
|
+
"identifierNamespace":"99999",
|
46
|
+
"objectRole":"MRT-content"
|
47
|
+
}
|
48
|
+
}
|
49
|
+
}
|
50
|
+
EOS
|
51
|
+
|
52
|
+
context "when creating a response" do
|
53
|
+
setup do
|
54
|
+
@response = Mrt::Ingest::Response.new(RESPONSE_JSON)
|
55
|
+
end
|
56
|
+
|
57
|
+
should "have the right properties" do
|
58
|
+
assert_equal("bid-8c0fa0c2-f3d7-4deb-bd49-f953f6752b59", @response.batch_id)
|
59
|
+
assert_equal(Time.at(1314830426), @response.submission_date)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
metadata
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: mrt-ingest
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 27
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 2
|
10
|
+
version: 0.0.2
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Erik Hetzner
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2011-12-20 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: json
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 3
|
29
|
+
segments:
|
30
|
+
- 1
|
31
|
+
- 5
|
32
|
+
- 0
|
33
|
+
version: 1.5.0
|
34
|
+
type: :runtime
|
35
|
+
version_requirements: *id001
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: rest-client
|
38
|
+
prerelease: false
|
39
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
40
|
+
none: false
|
41
|
+
requirements:
|
42
|
+
- - ">="
|
43
|
+
- !ruby/object:Gem::Version
|
44
|
+
hash: 15
|
45
|
+
segments:
|
46
|
+
- 1
|
47
|
+
- 6
|
48
|
+
- 0
|
49
|
+
version: 1.6.0
|
50
|
+
type: :runtime
|
51
|
+
version_requirements: *id002
|
52
|
+
description: A client for the Merritt ingest system. More details available from http://wiki.ucop.edu/display/curation.
|
53
|
+
email:
|
54
|
+
- erik.hetzner@ucop.edu
|
55
|
+
executables: []
|
56
|
+
|
57
|
+
extensions: []
|
58
|
+
|
59
|
+
extra_rdoc_files: []
|
60
|
+
|
61
|
+
files:
|
62
|
+
- .hgignore
|
63
|
+
- Gemfile
|
64
|
+
- LICENSE
|
65
|
+
- README
|
66
|
+
- Rakefile
|
67
|
+
- lib/mrt/ingest.rb
|
68
|
+
- lib/mrt/ingest/client.rb
|
69
|
+
- lib/mrt/ingest/iobject.rb
|
70
|
+
- lib/mrt/ingest/message_digest.rb
|
71
|
+
- lib/mrt/ingest/one_time_server.rb
|
72
|
+
- lib/mrt/ingest/request.rb
|
73
|
+
- lib/mrt/ingest/response.rb
|
74
|
+
- mrt-ingest.gemspec
|
75
|
+
- test/test_client.rb
|
76
|
+
- test/test_iobject.rb
|
77
|
+
- test/test_request.rb
|
78
|
+
- test/test_response.rb
|
79
|
+
homepage: http://bitbucket.org/merritt/mrt-ingest-ruby
|
80
|
+
licenses: []
|
81
|
+
|
82
|
+
post_install_message:
|
83
|
+
rdoc_options: []
|
84
|
+
|
85
|
+
require_paths:
|
86
|
+
- lib
|
87
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
88
|
+
none: false
|
89
|
+
requirements:
|
90
|
+
- - ">="
|
91
|
+
- !ruby/object:Gem::Version
|
92
|
+
hash: 3
|
93
|
+
segments:
|
94
|
+
- 0
|
95
|
+
version: "0"
|
96
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ">="
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
hash: 3
|
102
|
+
segments:
|
103
|
+
- 0
|
104
|
+
version: "0"
|
105
|
+
requirements: []
|
106
|
+
|
107
|
+
rubyforge_project: mrt-ingest
|
108
|
+
rubygems_version: 1.8.12
|
109
|
+
signing_key:
|
110
|
+
specification_version: 3
|
111
|
+
summary: A client for Merritt ingest.
|
112
|
+
test_files: []
|
113
|
+
|