lyber-core 1.3.0 → 3.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/LICENSE +1 -1
- data/README.md +81 -0
- data/lib/lyber_core.rb +1 -15
- data/lib/lyber_core/destroyer.rb +2 -9
- data/lib/lyber_core/log.rb +26 -30
- data/lib/lyber_core/robot.rb +67 -0
- metadata +131 -370
- data/README.rdoc +0 -76
- data/lib/dlss_service.rb +0 -81
- data/lib/dor_service.rb +0 -588
- data/lib/lyber_core/config.rb +0 -13
- data/lib/lyber_core/connection.rb +0 -130
- data/lib/lyber_core/exceptions/chained_error.rb +0 -21
- data/lib/lyber_core/exceptions/empty_queue.rb +0 -9
- data/lib/lyber_core/exceptions/fatal_error.rb +0 -10
- data/lib/lyber_core/exceptions/item_error.rb +0 -19
- data/lib/lyber_core/exceptions/service_error.rb +0 -10
- data/lib/lyber_core/robots/robot.rb +0 -333
- data/lib/lyber_core/robots/service_controller.rb +0 -174
- data/lib/lyber_core/robots/work_item.rb +0 -112
- data/lib/lyber_core/robots/work_queue.rb +0 -177
- data/lib/lyber_core/robots/workflow.rb +0 -104
- data/lib/lyber_core/robots/workspace.rb +0 -77
- data/lib/lyber_core/utils.rb +0 -4
- data/lib/lyber_core/utils/bagit_bag.rb +0 -100
- data/lib/lyber_core/utils/checksum_validate.rb +0 -65
- data/lib/lyber_core/utils/file_utilities.rb +0 -168
- data/lib/xml_models/identity_metadata/dublin_core.rb +0 -116
- data/lib/xml_models/identity_metadata/identity_metadata.rb +0 -264
data/README.rdoc
DELETED
@@ -1,76 +0,0 @@
|
|
1
|
-
= lyber_core
|
2
|
-
|
3
|
-
Require the following:
|
4
|
-
require 'lyber_core'
|
5
|
-
|
6
|
-
Constants that need to be defined with sample values:
|
7
|
-
|
8
|
-
If using WorkflowService:
|
9
|
-
Dor::WF_URI = 'http://lyberservices-dev.stanford.edu/workflow'
|
10
|
-
Dor::CREATE_WORKFLOW = true
|
11
|
-
|
12
|
-
If using SuriService:
|
13
|
-
Dor::MINT_SURI_IDS = true
|
14
|
-
Dor::SURI_URL = 'http://some.suri.host:8080'
|
15
|
-
Dor::ID_NAMESPACE = 'druid'
|
16
|
-
Dor::SURI_USER = 'suriuser'
|
17
|
-
Dor::SURI_PASSWORD = 'suripword'
|
18
|
-
|
19
|
-
If connecting to https servers:
|
20
|
-
LyberCore::CERT_FILE = File.dirname(__FILE__) + '/../certs/dummy.crt'
|
21
|
-
LyberCore::KEY_FILE = File.dirname(__FILE__) + '/../certs/dummy.key'
|
22
|
-
LyberCore::KEY_PASS = 'dummy'
|
23
|
-
|
24
|
-
== lyber_core/utils
|
25
|
-
|
26
|
-
If using Utilities, require the following:
|
27
|
-
require 'lyber_core/utils'
|
28
|
-
|
29
|
-
This will give you:
|
30
|
-
LyberCore::Utils::BagitBag
|
31
|
-
LyberCore::Utils::ChecksumValidate
|
32
|
-
LyberCore::Utils::FileUtilities
|
33
|
-
|
34
|
-
If you do not want all 3, you can require the individual classes. I.E. if you only want the bagit utils, then require:
|
35
|
-
require 'lyber_core/utils/bagit_bat'
|
36
|
-
|
37
|
-
The BagitBag class requires the bagit gem
|
38
|
-
http://github.com/flazz/bagit
|
39
|
-
|
40
|
-
== Build and release procedure
|
41
|
-
Modify the version number in lyber-core.gemspec, then push your commits to AFS. DO NOT TAG!
|
42
|
-
Run: 'rake dlss_release' to tag, build, and publish the lyber-core gem
|
43
|
-
See the Rakefile and the LyberCore::DlssRelease task in lib/lyber_core/rake/dlss_release.rb for more details
|
44
|
-
|
45
|
-
== Releases
|
46
|
-
- <b>1.3</b> Started to use Dor::Config for workspace configuration
|
47
|
-
- <b>1.2.1</b> Clean up logging of exceptions in LyberCore::Log
|
48
|
-
- <b>1.2</b> Robots can now run as daemons via the LyberCore::Robots::ServiceController
|
49
|
-
- <b>1.1.2</b> Can pass an array of "command line" arguments to the Robot constructor
|
50
|
-
- <b>1.1.1</b> Robot#start now returns LyberCore::Robots::CONTINUE if it did work without error, LyberCore::Robots::SLEEP if it did no work,
|
51
|
-
and LyberCore::Robots::HALT if it reached its error limit while working on its queue
|
52
|
-
- <b>1.1.0</b> Allow Robots::WorkQueue to resolve an arbitrary number of prerequisites
|
53
|
-
- <b>1.0.0</b> Factored all Dor::* classes and object models out of lyber-core and into a separate dor-services gem. WARNING: MAY BREAK COMPATIBILITY WITH PREVIOUS DOR-ENABLED CODE.
|
54
|
-
- <b>0.9.8</b> Created branch for legacy work "0.9-legacy". Robots can now be configured with fully qualified workflows for prerequisites
|
55
|
-
eg <i>dor:googleScannedBookWF:register-object</i>
|
56
|
-
- <b>0.9.7.4</b> Untangled a couple development dependencies; fixed issue where "include REXML" was polluting the Object namespace
|
57
|
-
- <b>0.9.7.3</b> Logging enhancements
|
58
|
-
- <b>0.9.7.2</b> IdentityMetadata bugfixes
|
59
|
-
- <b>0.9.7.1</b> Enhanced exception handling
|
60
|
-
- <b>0.9.7</b> ActiveMQ message-based robot parallelization as described here: https://consul.stanford.edu/x/tQjdBw . Removal of ROXML models.
|
61
|
-
- <b>0.9.6.3</b> Better error reporting for LyberCore::Utils::FileUtilities.execute, which means when a system command fails we have a better idea of why.
|
62
|
-
- <b>0.9.6.2</b> Handles new response from workflow service when there are no objects in the queue: <objects count="0"\>
|
63
|
-
- <b>0.9.6</b> DorService.get_objects_for_workstep can handle one or two completed steps. Trimmed-down gem dependencies now defined in lyber-core.gemspec. 'rake dlss_release' will tag, build and publish gem
|
64
|
-
- <b>0.9.5.5</b> Robots now log to ROBOT_ROOT/log/robot_name.log unless specified in constructor
|
65
|
-
- <b>0.9.5.4</b> Custom exception classes, more checking of error conditions
|
66
|
-
- <b>0.9.5.3</b> More robust testing, minor bug fixes, compatible with active_fedora 1.2.6
|
67
|
-
- <b>0.9.5</b> Significantly refactored to provide central logging and many more debugging statements.
|
68
|
-
- <b>0.9.4</b> First version that requires Ruby 1.8.7. Built with bundler and rvm
|
69
|
-
- <b>0.9.3.9</b> Last version compatible with Ruby 1.8.6. Stored in source control as the 'facets-282' branch.
|
70
|
-
- <b>0.9.3</b> Compatibility with bagit 1.0.0. Bump to active-fedora 1.1.13
|
71
|
-
- <b>0.9.2</b> Workflow bug fixes. Last version that supports active-fedora 1.0.7
|
72
|
-
- We recommend that you <b>DO NOT USE</b> any version older than these
|
73
|
-
|
74
|
-
== Copyright
|
75
|
-
|
76
|
-
Copyright (c) 2010 Stanford University Library. See LICENSE for details.
|
data/lib/dlss_service.rb
DELETED
@@ -1,81 +0,0 @@
|
|
1
|
-
require 'net/http'
|
2
|
-
require 'net/https'
|
3
|
-
require 'uri'
|
4
|
-
require 'cgi'
|
5
|
-
require 'active_fedora'
|
6
|
-
require 'lyber_core'
|
7
|
-
require 'nokogiri'
|
8
|
-
|
9
|
-
class DlssService
|
10
|
-
|
11
|
-
# the fedora object to operate on
|
12
|
-
attr_reader :fedora_url
|
13
|
-
|
14
|
-
def initialize(fedora_url)
|
15
|
-
@fedora_url = fedora_url
|
16
|
-
solr_url = "http://localhost:8983/solr"
|
17
|
-
Fedora::Repository.register(@fedora_url)
|
18
|
-
ActiveFedora::SolrService.register(solr_url)
|
19
|
-
end
|
20
|
-
|
21
|
-
# Get an https connection to the given url
|
22
|
-
def get_https_connection(url)
|
23
|
-
https = Net::HTTP.new(url.host, url.port)
|
24
|
-
if(url.scheme == 'https')
|
25
|
-
https.use_ssl = true
|
26
|
-
https.cert = OpenSSL::X509::Certificate.new( File.read(CERT_FILE) )
|
27
|
-
https.key = OpenSSL::PKey::RSA.new( File.read(KEY_FILE), KEY_PASS )
|
28
|
-
https.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
29
|
-
end
|
30
|
-
https
|
31
|
-
end
|
32
|
-
|
33
|
-
# Retrieve the metadata of a datastream of a DOR object
|
34
|
-
# e.g. FEDORA_URI + /objects/ + druid + /datastreams/dor gets "dor" datastream metadata
|
35
|
-
def get_datastream_md(druid, ds_id)
|
36
|
-
begin
|
37
|
-
url = URI.parse(@fedora_url + '/objects/' + druid + '/datastreams/' + ds_id)
|
38
|
-
req = Net::HTTP::Get.new(url.request_uri)
|
39
|
-
req.basic_auth FEDORA_USER, FEDORA_PASS
|
40
|
-
res = DorService.get_https_connection(url).start {|http| http.request(req) }
|
41
|
-
case res
|
42
|
-
when Net::HTTPSuccess
|
43
|
-
return res.body
|
44
|
-
else
|
45
|
-
LyberCore::Log.error("Datastream " + ds_id + " not found for " + druid)
|
46
|
-
return nil
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
|
51
|
-
# This is maintained for backward compatibility, but
|
52
|
-
# get_all_druids_from_object_list or get_some_druids_from_object_list
|
53
|
-
# are preferred.
|
54
|
-
def DlssService.get_druids_from_object_list(objectListXml)
|
55
|
-
DlssService.get_all_druids_from_object_list(objectListXml)
|
56
|
-
end
|
57
|
-
|
58
|
-
# Transforms the XML from getObjectsForWorkStep into a list of druids
|
59
|
-
def DlssService.get_all_druids_from_object_list(objectListXml)
|
60
|
-
DlssService.get_some_druids_from_object_list(objectListXml, nil)
|
61
|
-
end
|
62
|
-
|
63
|
-
# Takes XML of the form
|
64
|
-
# <objects><object id='druid:hx066mp6063' url='https://lyberservices-test.stanford.edu/workflow/objects/druid:hx066mp6063'/></objects>
|
65
|
-
# if count is an integer, return at most that number of druids
|
66
|
-
# otherwise, return all druids in the queue
|
67
|
-
def DlssService.get_some_druids_from_object_list(objectListXml, count)
|
68
|
-
druids = []
|
69
|
-
|
70
|
-
# parse the xml into a document object
|
71
|
-
xmldoc = Nokogiri::XML::Reader(objectListXml)
|
72
|
-
|
73
|
-
xmldoc.each do |node|
|
74
|
-
druids << node.attribute("id") unless node.attribute("id").nil?
|
75
|
-
break if druids.length == count
|
76
|
-
end
|
77
|
-
return druids
|
78
|
-
end
|
79
|
-
|
80
|
-
|
81
|
-
end
|
data/lib/dor_service.rb
DELETED
@@ -1,588 +0,0 @@
|
|
1
|
-
require 'net/http'
|
2
|
-
require 'net/https'
|
3
|
-
require 'uri'
|
4
|
-
require 'cgi'
|
5
|
-
require 'rexml/document'
|
6
|
-
|
7
|
-
class DorService
|
8
|
-
|
9
|
-
include REXML
|
10
|
-
|
11
|
-
def DorService.get_https_connection(url)
|
12
|
-
https = Net::HTTP.new(url.host, url.port)
|
13
|
-
if(url.scheme == 'https')
|
14
|
-
https.use_ssl = true
|
15
|
-
https.cert = OpenSSL::X509::Certificate.new( File.read(CERT_FILE) )
|
16
|
-
https.key = OpenSSL::PKey::RSA.new( File.read(KEY_FILE), KEY_PASS )
|
17
|
-
https.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
18
|
-
end
|
19
|
-
https
|
20
|
-
end
|
21
|
-
|
22
|
-
# This should check to see if an object with the given PID already
|
23
|
-
# exists in the repository
|
24
|
-
def DorService.create_object(form_data)
|
25
|
-
begin
|
26
|
-
url = DOR_URI + '/objects'
|
27
|
-
body = DorService.encodeParams(form_data)
|
28
|
-
content_type = 'application/x-www-form-urlencoded'
|
29
|
-
res = LyberCore::Connection.post(url, body, :content_type => content_type)
|
30
|
-
res =~ /\/objects\/(.*)/
|
31
|
-
druid = $1
|
32
|
-
return druid
|
33
|
-
rescue Exception => e
|
34
|
-
LyberCore::Log.debug("Unable to create object #{e.backtrace}")
|
35
|
-
raise e
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
#objects/dr:123/resources
|
40
|
-
#parms: model, id
|
41
|
-
#will create object of type dor:GoogleScannedPage
|
42
|
-
def DorService.create_child_object(parent_druid, child_id)
|
43
|
-
begin
|
44
|
-
#See if page exists before creating new fedora object
|
45
|
-
# raise "Object exists with id: " + child_id if(DorService.get_druid_by_id(child_id))
|
46
|
-
form_data = {'model' => 'dor:googleScannedPage', 'id' => child_id}
|
47
|
-
url = DOR_URI + '/objects/' + parent_druid + '/resources'
|
48
|
-
body = DorService.encodeParams(form_data)
|
49
|
-
content_type = 'application/x-www-form-urlencoded'
|
50
|
-
res = LyberCore::Connection.post(url, body, :content_type => content_type)
|
51
|
-
res=~ /\/resources\/(.*)/
|
52
|
-
druid = $1
|
53
|
-
LyberCore::Log.info("Child googleScannedPage object created for parent #{parent_druid}")
|
54
|
-
LyberCore::Log.debug("child_id = #{child_id}")
|
55
|
-
LyberCore::Log.debug("new druid = #{druid}")
|
56
|
-
return druid
|
57
|
-
rescue Exception => e
|
58
|
-
LyberCore::Log.debug("Unable to create object")
|
59
|
-
raise e, "Unable to create object "
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
|
64
|
-
# Takes a hash of arrays and builds a x-www-form-urlencoded string for POSTing form parameters
|
65
|
-
#
|
66
|
-
# == Parameters
|
67
|
-
# - <b>form_data</b> - a hash of arrays that contains the form data, ie. {'param1' => ['val1', 'val2'], 'param2' => ['val3']}
|
68
|
-
def DorService.encodeParams(form_data)
|
69
|
-
body = ""
|
70
|
-
form_data.each_pair do |param, array|
|
71
|
-
array.each do |value|
|
72
|
-
encoded = CGI.escape value
|
73
|
-
body += '&' unless (body == "")
|
74
|
-
body += param + '=' + encoded
|
75
|
-
end
|
76
|
-
end
|
77
|
-
body
|
78
|
-
end
|
79
|
-
|
80
|
-
|
81
|
-
# Depricated. Use Dor::WorkflowService#create_workflow in lyber_core gem
|
82
|
-
# def DorService.create_workflow(workflow, druid)
|
83
|
-
# begin
|
84
|
-
# url = URI.parse(DOR_URI + '/objects/' + druid + '/workflows/' + workflow.workflow_id)
|
85
|
-
# req = Net::HTTP::Put.new(url.path)
|
86
|
-
# #req.basic_auth 'fedoraUser', 'pass'
|
87
|
-
# req.body = workflow.workflow_process_xml
|
88
|
-
# req.content_type = 'application/xml'
|
89
|
-
# res = DorService.get_https_connection(url).start {|http| http.request(req) }
|
90
|
-
#
|
91
|
-
# WorkflowService.create_workflow()
|
92
|
-
#
|
93
|
-
# case res
|
94
|
-
# when Net::HTTPSuccess
|
95
|
-
# puts workflow.workflow_id + " created for " + druid
|
96
|
-
# else
|
97
|
-
# $stderr.print res.body
|
98
|
-
# raise res.error!
|
99
|
-
# end
|
100
|
-
# rescue Exception => e
|
101
|
-
# $stderr.print "Unable to create workflow " + e
|
102
|
-
# raise
|
103
|
-
# end
|
104
|
-
# end
|
105
|
-
|
106
|
-
|
107
|
-
# See if an object exists with this dor_id (not druid, but sub-identifier)
|
108
|
-
# Caller will have to handle any exception thrown
|
109
|
-
def DorService.get_druid_by_id(dor_id)
|
110
|
-
url_string = "#{DOR_URI}/query_by_id?id=#{dor_id}"
|
111
|
-
LyberCore::Log.debug("Fetching druid for dor_id #{dor_id} at url #{url_string}")
|
112
|
-
url = URI.parse(url_string)
|
113
|
-
req = Net::HTTP::Get.new(url.request_uri)
|
114
|
-
# res = DorService.get_https_connection(url).start {|http| http.request(req) }
|
115
|
-
res = LyberCore::Connection.send_request(url,req)
|
116
|
-
|
117
|
-
case res
|
118
|
-
when Net::HTTPSuccess
|
119
|
-
res.body =~ /druid="([^"\r\n]*)"/
|
120
|
-
return $1
|
121
|
-
when Net::HTTPClientError
|
122
|
-
LyberCore::Log.debug("Barcode does not yet exist in DOR: #{dor_id}")
|
123
|
-
return nil
|
124
|
-
when Net::HTTPServerError
|
125
|
-
LyberCore::Log.debug("Encountered HTTPServerError error when requesting #{url}: #{res.inspect}")
|
126
|
-
raise "Encountered 500 error when requesting #{url}: #{res.inspect}"
|
127
|
-
else
|
128
|
-
LyberCore::Log.debug("Encountered unknown error when requesting #{url}: #{res.inspect}")
|
129
|
-
raise "Encountered unknown error when requesting #{url}: #{res.inspect}"
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
|
-
############################################# Start of Datastream methods
|
134
|
-
# Until ActiveFedora supports client-side certificate configuration, we are stuck with our own methods to access datastreams
|
135
|
-
|
136
|
-
#/objects/pid/datastreams/dsID ? [controlGroup] [dsLocation] [altIDs] [dsLabel] [versionable] [dsState] [formatURI] [checksumType] [checksum] [logMessage]
|
137
|
-
def DorService.add_datastream(druid, ds_id, ds_label, xml, content_type='application/xml', versionable = false )
|
138
|
-
DorService.add_datastream_managed(druid, ds_id, ds_label, xml, content_type, versionable)
|
139
|
-
end
|
140
|
-
|
141
|
-
def DorService.add_datastream_external_url(druid, ds_id, ds_label, ext_url, content_type, versionable = false)
|
142
|
-
parms = '?controlGroup=E'
|
143
|
-
parms += '&dsLabel=' + CGI.escape(ds_label)
|
144
|
-
parms += '&versionable=false' unless(versionable)
|
145
|
-
parms += '&dsLocation=' + ext_url
|
146
|
-
DorService.set_datastream(druid, ds_id, parms, :post, {:type => content_type})
|
147
|
-
end
|
148
|
-
|
149
|
-
def DorService.update_datastream(druid, ds_id, xml, content_type='application/xml', versionable = false)
|
150
|
-
parms = '?controlGroup=M'
|
151
|
-
parms += '&versionable=false' unless(versionable)
|
152
|
-
DorService.set_datastream(druid, ds_id, parms, :put, {:type => content_type, :xml => xml})
|
153
|
-
end
|
154
|
-
|
155
|
-
def DorService.add_datastream_managed(druid, ds_id, ds_label, xml, content_type='application/xml', versionable = false )
|
156
|
-
parms = '?controlGroup=M'
|
157
|
-
parms += '&dsLabel=' + CGI.escape(ds_label)
|
158
|
-
parms += '&versionable=false' unless(versionable)
|
159
|
-
DorService.set_datastream(druid, ds_id, parms, :post, {:type => content_type, :xml => xml})
|
160
|
-
end
|
161
|
-
|
162
|
-
# Retrieve the content of a datastream of a DOR object
|
163
|
-
# e.g. FEDORA_URI + /objects/ + druid + /datastreams/dor/content gets "dor" datastream content
|
164
|
-
def DorService.get_datastream(druid, ds_id)
|
165
|
-
begin
|
166
|
-
LyberCore::Log.debug("Connecting to #{FEDORA_URI}...")
|
167
|
-
url_string = "#{FEDORA_URI}/objects/#{druid}/datastreams/#{ds_id}/content"
|
168
|
-
url = URI.parse(url_string)
|
169
|
-
LyberCore::Log.debug("Connecting to #{url_string}...")
|
170
|
-
req = Net::HTTP::Get.new(url.request_uri)
|
171
|
-
LyberCore::Log.debug("request object: #{req.inspect}")
|
172
|
-
# res = DorService.get_https_connection(url).start {|http| http.request(req) }
|
173
|
-
res = LyberCore::Connection.send_request(url,req)
|
174
|
-
|
175
|
-
case res
|
176
|
-
when Net::HTTPSuccess
|
177
|
-
return res.body
|
178
|
-
when Net::HTTPClientError
|
179
|
-
LyberCore::Log.debug("Datastream not found at url #{url_string}")
|
180
|
-
return nil
|
181
|
-
when Net::HTTPServerError
|
182
|
-
LyberCore::Log.debug("Attempted to reach #{url_string} but failed")
|
183
|
-
raise "Encountered 500 error when requesting #{url_string}: #{res.inspect}"
|
184
|
-
else
|
185
|
-
LyberCore::Log.debug("Encountered unknown error when requesting #{url}: #{res.inspect}")
|
186
|
-
raise "Encountered unknown error when requesting #{url}: #{res.inspect}"
|
187
|
-
end
|
188
|
-
rescue Exception => e
|
189
|
-
raise e
|
190
|
-
end
|
191
|
-
end
|
192
|
-
|
193
|
-
# Depricated - use Dor::WorkflowService#get_workflow_xml
|
194
|
-
def DorService.get_workflow_xml(druid, workflow)
|
195
|
-
raise Exception.new("This method is deprecated. Please use Dor::WorkflowService#get_workflow_xml")
|
196
|
-
end
|
197
|
-
|
198
|
-
# Retrieve the metadata of a datastream of a DOR object
|
199
|
-
# e.g. FEDORA_URI + /objects/ + druid + /datastreams/dor gets "dor" datastream metadata
|
200
|
-
def DorService.get_datastream_md(druid, ds_id)
|
201
|
-
begin
|
202
|
-
LyberCore::Log.debug("Connecting to #{FEDORA_URI}...")
|
203
|
-
url = URI.parse(FEDORA_URI + '/objects/' + druid + '/datastreams/' + ds_id)
|
204
|
-
LyberCore::Log.debug("Connecting to #{url}...")
|
205
|
-
req = Net::HTTP::Get.new(url.request_uri)
|
206
|
-
req.basic_auth FEDORA_USER, FEDORA_PASS
|
207
|
-
LyberCore::Log.debug("request object: #{req.inspect}")
|
208
|
-
# res = DorService.get_https_connection(url).start {|http| http.request(req) }
|
209
|
-
res = LyberCore::Connection.send_request(url,req)
|
210
|
-
case res
|
211
|
-
when Net::HTTPSuccess
|
212
|
-
return res.body
|
213
|
-
else
|
214
|
-
LyberCore::Log.error("Attempted to reach #{url} but failed")
|
215
|
-
LyberCore::Log.error("Datastream #{dsid} not found for #{druid}")
|
216
|
-
end
|
217
|
-
rescue Exception => e
|
218
|
-
raise e, "Couldn't get datastream from #{url}"
|
219
|
-
end
|
220
|
-
end
|
221
|
-
|
222
|
-
# Add a new datastream, but only if it does not yet exist
|
223
|
-
def DorService.add_datastream_unless_exists(druid, ds_id, ds_label, xml)
|
224
|
-
# make sure xml is not empty
|
225
|
-
unless xml
|
226
|
-
raise "No data supplied for datastream " + ds + "of " + druid
|
227
|
-
end
|
228
|
-
# check to make sure datastream does not yet exist
|
229
|
-
unless DorService.get_datastream(druid, ds_id)
|
230
|
-
DorService.add_datastream(druid, ds_id, ds_label, xml)
|
231
|
-
end
|
232
|
-
end
|
233
|
-
|
234
|
-
############################################# End of Datastream methods
|
235
|
-
|
236
|
-
|
237
|
-
# Deprecated. Use Dor::WorkflowService#update_workflow_status
|
238
|
-
#PUT "objects/pid:123/workflows/GoogleScannedWF/convert"
|
239
|
-
#<process name=\"convert\" status=\"waiting\" elapsed="0.11" lifecycle="released" "/>"
|
240
|
-
#TODO increment attempts
|
241
|
-
# def DorService.updateWorkflowStatus(repository, druid, workflow, process, status, elapsed = 0, lifecycle = nil)
|
242
|
-
# begin
|
243
|
-
# url = URI.parse(WORKFLOW_URI + '/' + repository + '/objects/' + druid + '/workflows/' + workflow + '/' + process)
|
244
|
-
# req = Net::HTTP::Put.new(url.path)
|
245
|
-
# process_xml = '<process name="'+ process + '" status="' + status + '" '
|
246
|
-
# process_xml << 'elapsed="' + elapsed.to_s + '" '
|
247
|
-
# process_xml << 'lifecycle="' + lifecycle + '" ' if(lifecycle)
|
248
|
-
# process_xml << '/>'
|
249
|
-
# req.body = process_xml
|
250
|
-
# req.content_type = 'application/xml'
|
251
|
-
# res = DorService.get_https_connection(url).start {|http| http.request(req) }
|
252
|
-
# case res
|
253
|
-
# when Net::HTTPSuccess
|
254
|
-
# puts "#{workflow} process updated for " + druid
|
255
|
-
# else
|
256
|
-
# $stderr.print res.body
|
257
|
-
# raise res.error!
|
258
|
-
# end
|
259
|
-
# rescue Exception => e
|
260
|
-
# $stderr.print "Unable to update workflow " + e
|
261
|
-
# raise
|
262
|
-
# end
|
263
|
-
#
|
264
|
-
# end
|
265
|
-
|
266
|
-
# Returns string containing object list XML from a workflow DOR query
|
267
|
-
#
|
268
|
-
# @param [String] repository name of the repository you are querying. Right now, <tt>dor</tt> and <tt>sdr</tt> are supported
|
269
|
-
# @param [String] workflow name of the workflow being queried, eg <tt>googleScannedBookWF</tt>
|
270
|
-
# @param [String, Array] completed if only querying for one completed step, pass in a String.
|
271
|
-
# If querying for two completed steps, pass in an Array of the two completed steps
|
272
|
-
# @param [String] waiting the name of the waiting step
|
273
|
-
# @raise [LyberCore::Exceptions::EmptyQueue] When the query is successful, but no objects are found in that queue
|
274
|
-
# @raise [Exception] For other problems like connection failures
|
275
|
-
# @return [String] XML containing all the objects that match the specific query. It looks like:
|
276
|
-
# <objects>
|
277
|
-
# <object druid="dr:123" url="http://localhost:9999/jersey-spring/objects/dr:123%5c" />
|
278
|
-
# <object druid="dr:abc" url="http://localhost:9999/jersey-spring/objects/dr:abc%5c" />
|
279
|
-
# </objects>
|
280
|
-
def DorService.get_objects_for_workstep(repository, workflow, completed, waiting)
|
281
|
-
LyberCore::Log.debug("DorService.get_objects_for_workstep(#{repository}, #{workflow}, #{completed}, #{waiting})")
|
282
|
-
|
283
|
-
if repository.nil? or workflow.nil? or completed.nil? or waiting.nil?
|
284
|
-
LyberCore::Log.fatal("Can't execute DorService.get_objects_for_workstep: missing info")
|
285
|
-
end
|
286
|
-
|
287
|
-
unless defined?(WORKFLOW_URI) and WORKFLOW_URI != nil
|
288
|
-
LyberCore::Log.fatal("WORKFLOW_URI is not set. ROBOT_ROOT = #{ROBOT_ROOT}")
|
289
|
-
raise "WORKFLOW_URI is not set"
|
290
|
-
end
|
291
|
-
|
292
|
-
uri_string = "#{WORKFLOW_URI}/workflow_queue?repository=#{repository}&workflow=#{workflow}&waiting=#{waiting}"
|
293
|
-
if(completed.class == Array)
|
294
|
-
raise "The workflow service can only handle queries with no more than 2 completed steps" if completed.size > 2
|
295
|
-
completed.each {|step| uri_string << "&completed=#{step}"}
|
296
|
-
else
|
297
|
-
uri_string << "&completed=#{completed}"
|
298
|
-
end
|
299
|
-
|
300
|
-
return DorService.execute_workflow_xml_query(uri_string)
|
301
|
-
end
|
302
|
-
|
303
|
-
# Returns string containing object list XML from a workflow DOR query using fully qualified workflow step names
|
304
|
-
# eg <tt>dor:googleScannedBookWF:register-object</tt>
|
305
|
-
#
|
306
|
-
# @param [String, Array] completed if only querying for one completed step, pass in a String of a fully qualified workflow step.
|
307
|
-
# If querying for two completed steps, pass in an Array of the two completed steps
|
308
|
-
# @param [String] waiting the fully qualified name of the waiting step
|
309
|
-
# @raise [LyberCore::Exceptions::EmptyQueue] When the query is successful, but no objects are found in that queue
|
310
|
-
# @raise [Exception] For other problems like connection failures or passing in non-qualified workflow names
|
311
|
-
# @return [String] XML containing all the objects that match the specific query. It looks like:
|
312
|
-
# <objects>
|
313
|
-
# <object druid="dr:123" url="http://localhost:9999/jersey-spring/objects/dr:123%5c" />
|
314
|
-
# <object druid="dr:abc" url="http://localhost:9999/jersey-spring/objects/dr:abc%5c" />
|
315
|
-
# </objects>
|
316
|
-
def DorService.get_objects_for_qualified_workstep(completed, waiting)
|
317
|
-
LyberCore::Log.debug("DorService.get_objects_for_qualified_workstep(#{completed}, #{waiting})")
|
318
|
-
|
319
|
-
if completed.nil? or waiting.nil?
|
320
|
-
LyberCore::Log.fatal("Can't execute DorService.get_objects_for_qualified_workstep: missing info")
|
321
|
-
end
|
322
|
-
|
323
|
-
unless defined?(WORKFLOW_URI) and WORKFLOW_URI != nil
|
324
|
-
LyberCore::Log.fatal("WORKFLOW_URI is not set. ROBOT_ROOT = #{ROBOT_ROOT}")
|
325
|
-
raise "WORKFLOW_URI is not set"
|
326
|
-
end
|
327
|
-
|
328
|
-
unless(waiting =~ /.+:.+:.+/)
|
329
|
-
raise "The waiting step was not fully qualified or of the form: <repository>:<workflow>:<stepname>. Received #{waiting}"
|
330
|
-
end
|
331
|
-
uri_string = "#{WORKFLOW_URI}/workflow_queue?waiting=#{waiting}"
|
332
|
-
|
333
|
-
completed_steps = Array(completed)
|
334
|
-
raise "The workflow service can only handle queries with no more than 2 completed steps" if completed_steps.size > 2
|
335
|
-
completed_steps.each do |step|
|
336
|
-
raise "A completed step was not fully qualified or of the form: <repository>:<workflow>:<stepname>. Received #{step}" unless(step =~ /.+:.+:.+/)
|
337
|
-
uri_string << "&completed=#{step}"
|
338
|
-
end
|
339
|
-
|
340
|
-
return DorService.execute_workflow_xml_query(uri_string)
|
341
|
-
end
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
def DorService.log_and_raise_workflow_connection_problem(repository, workflow, completed, waiting, response)
|
346
|
-
|
347
|
-
end
|
348
|
-
|
349
|
-
# Transforms the XML from getObjectsForWorkStep into a list of druids
|
350
|
-
# TODO figure out how to return a partial list
|
351
|
-
# This method is here for backward compatibility, but it has
|
352
|
-
# been superceded by DlssService.get_druids_from_object_list(objectListXml)
|
353
|
-
def DorService.get_druids_from_object_list(objectListXml)
|
354
|
-
DlssService.get_all_druids_from_object_list(objectListXml)
|
355
|
-
end
|
356
|
-
|
357
|
-
# Retrieves the identityMetadata datastream for a DOR object,
|
358
|
-
# extracts the otherId values, and returns them in a hash
|
359
|
-
def DorService.get_object_identifiers(druid)
|
360
|
-
begin
|
361
|
-
identifiers = {}
|
362
|
-
identityMetadata = get_datastream(druid, 'identityMetadata')
|
363
|
-
raise "Unable to get identityMetadata datastream for #{druid}" if identityMetadata.nil?
|
364
|
-
dorXml = Document.new(identityMetadata)
|
365
|
-
|
366
|
-
dorXml.elements.each("identityMetadata/otherId") do |element|
|
367
|
-
identifiers[element.attributes["name"]] = case element.text
|
368
|
-
when nil then nil
|
369
|
-
else element.text.strip
|
370
|
-
end
|
371
|
-
end
|
372
|
-
return identifiers
|
373
|
-
rescue Exception => e
|
374
|
-
raise e, "Couldn't get object identifiers for #{druid}"
|
375
|
-
end
|
376
|
-
end
|
377
|
-
|
378
|
-
def DorService.transfer_object(objectid, sourceDir, destinationDir)
|
379
|
-
rsync='rsync -a -e ssh '
|
380
|
-
rsync_cmd = rsync + "'" + sourceDir + objectid + "' " + destinationDir
|
381
|
-
LyberCore::Log.debug(rsync_cmd + "\n")
|
382
|
-
system(rsync_cmd)
|
383
|
-
return File.exists?(File.join(destinationDir, objectid))
|
384
|
-
end
|
385
|
-
|
386
|
-
def DorService.verify_checksums(directory, checksumFile)
|
387
|
-
dirSave = Dir.pwd
|
388
|
-
Dir.chdir(directory)
|
389
|
-
checksumCmd = 'md5sum -c ' + checksumFile + ' | grep -v OK | wc -l'
|
390
|
-
badcount = `#{checksumCmd}`.to_i
|
391
|
-
Dir.chdir(dirSave)
|
392
|
-
return (badcount==0)
|
393
|
-
end
|
394
|
-
|
395
|
-
# Given a process and an error message, constuct an xml fragment that can be
|
396
|
-
# posted to the workflow service to record the error generated for a given druid
|
397
|
-
def DorService.construct_error_update_request(process, error_msg, error_txt)
|
398
|
-
clean_error_msg = error_msg.gsub(/\s+/," ").gsub(/[`'#<>]/,'').gsub(/"/,"'")
|
399
|
-
clean_error_txt = error_txt.gsub(/\s+/," ").gsub(/[`'#<>]/,'').gsub(/"/,"'") unless error_txt.nil?
|
400
|
-
body = '<process name="'+ process + '" status="error" errorMessage="' + clean_error_msg + '" '
|
401
|
-
body += 'errorText="' + clean_error_txt + '" ' unless error_txt.nil?
|
402
|
-
body += '/>'
|
403
|
-
return body
|
404
|
-
end
|
405
|
-
|
406
|
-
# If an object encounters an error during processing, set its status to "error"
|
407
|
-
def DorService.update_workflow_error_status(repository, druid, workflow, process, error_msg, error_txt = nil)
|
408
|
-
begin
|
409
|
-
LyberCore::Log.debug("Updating workflow error status for druid #{druid}")
|
410
|
-
LyberCore::Log.debug("Error message is: #{error_msg}")
|
411
|
-
LyberCore::Log.debug("Error text is: #{error_txt}")
|
412
|
-
url_string = "#{WORKFLOW_URI}/#{repository}/objects/#{druid}/workflows/#{workflow}/#{process}"
|
413
|
-
url = URI.parse(url_string)
|
414
|
-
LyberCore::Log.debug("Using url #{url_string}")
|
415
|
-
req = Net::HTTP::Put.new(url.path)
|
416
|
-
req.body = DorService.construct_error_update_request(process, error_msg, error_txt)
|
417
|
-
req.content_type = 'application/xml'
|
418
|
-
LyberCore::Log::debug("Putting request: #{req.inspect}")
|
419
|
-
# res = DorService.get_https_connection(url).start {|http| http.request(req) }
|
420
|
-
res = LyberCore::Connection.send_request(url,req)
|
421
|
-
LyberCore::Log::debug("Got response: #{res.inspect}")
|
422
|
-
case res
|
423
|
-
when Net::HTTPSuccess
|
424
|
-
LyberCore::Log.info("#{workflow} - #{process} set to error for " + druid)
|
425
|
-
else
|
426
|
-
LyberCore::Log.debug(res.body)
|
427
|
-
raise res.error!, "Received error from the workflow service"
|
428
|
-
end
|
429
|
-
rescue Exception => e
|
430
|
-
msg = "Unable to update workflow service at url #{url_string}"
|
431
|
-
LyberCore::Log.debug(msg)
|
432
|
-
raise e, msg
|
433
|
-
end
|
434
|
-
end
|
435
|
-
|
436
|
-
# This method sends a GET request to jenson and returns MARC XML
|
437
|
-
def DorService.query_symphony(flexkey)
|
438
|
-
begin
|
439
|
-
symphony_url = 'http://zaph.stanford.edu'
|
440
|
-
path_info = '/cgi-bin/holding.pl?'
|
441
|
-
parm_list = URI.escape('search=location&flexkey=' + flexkey)
|
442
|
-
url_string = symphony_url + path_info + parm_list
|
443
|
-
|
444
|
-
url = URI.parse(url_string)
|
445
|
-
LyberCore::Log.debug("Attempting to query symphony: #{url_string}")
|
446
|
-
res = Net::HTTP.start(url.host, url.port) {|http|
|
447
|
-
http.get( path_info + parm_list )
|
448
|
-
}
|
449
|
-
case res
|
450
|
-
when Net::HTTPSuccess
|
451
|
-
LyberCore::Log.debug("Successfully queried symphony for #{flexkey}")
|
452
|
-
return res.body
|
453
|
-
else
|
454
|
-
LyberCore::Log.debug("Encountered an error from symphony: #{res.body}")
|
455
|
-
raise res.error!
|
456
|
-
end
|
457
|
-
rescue Exception => e
|
458
|
-
raise e, "Encountered an error from symphony"
|
459
|
-
end
|
460
|
-
|
461
|
-
end #query_symphony
|
462
|
-
|
463
|
-
|
464
|
-
private
|
465
|
-
|
466
|
-
def DorService.execute_workflow_xml_query(uri_string)
|
467
|
-
LyberCore::Log.info("Attempting to connect to #{uri_string}")
|
468
|
-
url = URI.parse(uri_string)
|
469
|
-
req = Net::HTTP::Get.new(url.request_uri)
|
470
|
-
# res = DorService.get_https_connection(url).start {|http| http.request(req) }
|
471
|
-
res = LyberCore::Connection.send_request(url,req)
|
472
|
-
case res
|
473
|
-
when Net::HTTPSuccess
|
474
|
-
begin
|
475
|
-
doc = Nokogiri::XML(res.body)
|
476
|
-
count = doc.root.at_xpath("//objects/@count").content.to_i
|
477
|
-
rescue Exception => e
|
478
|
-
msg = "Could not parse response from Workflow Service"
|
479
|
-
LyberCore::Log.debug(msg + "\n#{res.body}")
|
480
|
-
raise e, msg
|
481
|
-
end
|
482
|
-
|
483
|
-
if(count == 0)
|
484
|
-
raise LyberCore::Exceptions::EmptyQueue.new, "empty queue"
|
485
|
-
else
|
486
|
-
return res.body
|
487
|
-
end
|
488
|
-
else
|
489
|
-
LyberCore::Log.fatal("Workflow queue not found for #{workflow} : #{waiting}")
|
490
|
-
LyberCore::Log.debug("I am attempting to connect to WORKFLOW_URI #{WORKFLOW_URI}")
|
491
|
-
LyberCore::Log.debug("repository: #{repository}")
|
492
|
-
LyberCore::Log.debug("workflow: #{workflow}")
|
493
|
-
LyberCore::Log.debug("completed: #{completed}")
|
494
|
-
LyberCore::Log.debug("waiting: #{waiting}")
|
495
|
-
LyberCore::Log.debug(res.inspect)
|
496
|
-
raise "Could not connect to url #{uri_string}"
|
497
|
-
end
|
498
|
-
end
|
499
|
-
# druid, ds, url, content_type, method, parms
|
500
|
-
def DorService.set_datastream(druid, ds_id, parms, method, content = {})
|
501
|
-
begin
|
502
|
-
url = URI.parse(FEDORA_URI + '/objects/' + druid + '/datastreams/' + ds_id + parms)
|
503
|
-
case method
|
504
|
-
when :post
|
505
|
-
req = Net::HTTP::Post.new(url.request_uri)
|
506
|
-
when :put
|
507
|
-
req = Net::HTTP::Put.new(url.request_uri)
|
508
|
-
end
|
509
|
-
req.basic_auth FEDORA_USER, FEDORA_PASS
|
510
|
-
req.body = content[:xml] if(content[:xml])
|
511
|
-
req.content_type = content[:type]
|
512
|
-
# res = DorService.get_https_connection(url).start {|http| http.request(req) }
|
513
|
-
res = LyberCore::Connection.send_request(url,req)
|
514
|
-
case res
|
515
|
-
when Net::HTTPSuccess
|
516
|
-
return true
|
517
|
-
when Net::HTTPServerError
|
518
|
-
LyberCore::Log.debug("Attempted to set datastream #{url} but failed")
|
519
|
-
raise "Encountered 500 error setting datastream #{url}: #{res.inspect}"
|
520
|
-
else
|
521
|
-
LyberCore::Log.debug("Encountered unknown error when setting datastream #{url}: #{res.inspect}")
|
522
|
-
raise "Encountered unknown error when setting datastream #{url}: #{res.inspect}"
|
523
|
-
end
|
524
|
-
rescue Exception => e
|
525
|
-
raise
|
526
|
-
end
|
527
|
-
end
|
528
|
-
|
529
|
-
def DorService.get_object_metadata(druid)
|
530
|
-
dor = DorService.get_datastream(druid, 'identityMetadata')
|
531
|
-
mods = DorService.get_datastream(druid, 'descMetadata')
|
532
|
-
googlemets = DorService.get_datastream(druid, 'googlemets')
|
533
|
-
contentMetadata = DorService.get_datastream(druid, 'contentMetadata')
|
534
|
-
adminMetadata = DorService.get_datastream(druid, 'adminMetadata')
|
535
|
-
xml = "<objectMD druid='" + druid + "' >\n" +
|
536
|
-
dor + mods + googlemets + contentMetadata + adminMetadata +
|
537
|
-
"</objectMD>\n"
|
538
|
-
return xml
|
539
|
-
end
|
540
|
-
|
541
|
-
end
|
542
|
-
|
543
|
-
# Given an array of strings, construct valid xml in which each
|
544
|
-
# member of the array becomes a <tag> element
|
545
|
-
def DorService.construct_xml_for_tag_array(tag_array)
|
546
|
-
xml = "<tags>"
|
547
|
-
tag_array.each do |tag|
|
548
|
-
tag = tag.gsub(/\s+/," ").gsub(/[<>!]/,'')
|
549
|
-
xml << "<tag>#{tag}</tag>"
|
550
|
-
end
|
551
|
-
xml << "</tags>"
|
552
|
-
end
|
553
|
-
|
554
|
-
|
555
|
-
def DorService.add_identity_tags(druid, tags)
|
556
|
-
begin
|
557
|
-
url = URI.parse(DOR_URI + '/objects/' + druid + '/datastreams/identityMetadata/tags' )
|
558
|
-
req = Net::HTTP::Put.new(url.path)
|
559
|
-
req.body = DorService.construct_xml_for_tag_array(tags)
|
560
|
-
req.content_type = 'application/xml'
|
561
|
-
# res = DorService.get_https_connection(url).start {|http| http.request(req) }
|
562
|
-
res = LyberCore::Connection.send_request(url,req)
|
563
|
-
case res
|
564
|
-
when Net::HTTPSuccess
|
565
|
-
return true
|
566
|
-
when Net::HTTPServerError
|
567
|
-
LyberCore::Log.debug("Attempted to add identity tags #{url} but failed")
|
568
|
-
raise "Encountered 500 error when adding identity tags #{url}: #{res.inspect}"
|
569
|
-
else
|
570
|
-
LyberCore::Log.debug("Encountered unknown error when adding identity tags #{url}: #{res.inspect}")
|
571
|
-
raise "Encountered unknown error when adding identity tags #{url}: #{res.inspect}"
|
572
|
-
end
|
573
|
-
rescue Exception => e
|
574
|
-
raise e
|
575
|
-
end
|
576
|
-
end
|
577
|
-
|
578
|
-
#DorService.updateWorkflowStatus('dr:rf624mb644', 'GoogleScannedWF', 'descriptive-metadata', 'completed')
|
579
|
-
####Testing
|
580
|
-
#line = 'id="catkey:1990757"||id="barcode:36105045033136"||model="GoogleScannedBook"||label="The poacher"'
|
581
|
-
#form_data = {}
|
582
|
-
#DorService.parse_line_return_hashlist(line, form_data)
|
583
|
-
#form_data.each_pair{|k,v| puts "key: #{k} value: #{v}"}
|
584
|
-
#
|
585
|
-
#puts DorService.encodeParams(form_data)
|
586
|
-
|
587
|
-
#DorService.create_object('id="catkey:454545454545454"||id="barcode:434343434343434343434343434"||model="GoogleScannedBook"||label="Ruby multiple Id parms 3"')
|
588
|
-
|