lyber-core 0.9.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Stanford University Library
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,58 @@
1
+ = lyber_core
2
+
3
+ Require the following:
4
+ require 'lyber_core'
5
+
6
+ Constants that need to be defined with sample values:
7
+
8
+ If using WorkflowService:
9
+ Dor::WF_URI = 'http://lyberservices-dev.stanford.edu/workflow'
10
+ Dor::CREATE_WORKFLOW = true
11
+
12
+ If using SuriService:
13
+ Dor::MINT_SURI_IDS = true
14
+ Dor::SURI_URL = 'http://some.suri.host:8080'
15
+ Dor::ID_NAMESPACE = 'druid'
16
+ Dor::SURI_USER = 'suriuser'
17
+ Dor::SURI_PASSWORD = 'suripword'
18
+
19
+ If connecting to https servers:
20
+ LyberCore::CERT_FILE = File.dirname(__FILE__) + '/../certs/dummy.crt'
21
+ LyberCore::KEY_FILE = File.dirname(__FILE__) + '/../certs/dummy.key'
22
+ LyberCore::KEY_PASS = 'dummy'
23
+
24
+ == lyber_core/utils
25
+
26
+ If using Utilities, require the following:
27
+ require 'lyber_core/utils'
28
+
29
+ This will give you:
30
+ LyberCore::Utils::BagitBag
31
+ LyberCore::Utils::ChecksumValidate
32
+ LyberCore::Utils::FileUtilities
33
+
34
+ If you do not want all 3, you can require the individual classes. I.E. if you only want the bagit utils, then require:
35
+ require 'lyber_core/utils/bagit_bat'
36
+
37
+ The BagitBag class requires the bagit gem
38
+ http://github.com/flazz/bagit
39
+
40
+ == Build and release procedure
41
+ Run: 'rake dlss_release' to tag, build, and publish the lyber-core gem
42
+ See the Rakefile and the LyberCore::DlssRelease task in lib/lyber_core/rake/dlss_release.rb for more details
43
+
44
+ == Releases
45
+ - <b>0.9.6</b> DorService.get_objects_for_workstep can handle one or two completed steps. Trimmed-down gem dependencies now defined in lyber-core.gemspec. 'rake dlss_release' will tag, build and publish gem
46
+ - <b>0.9.5.5</b> Robots now log to ROBOT_ROOT/log/robot_name.log unless specified in constructor
47
+ - <b>0.9.5.4</b> Custom exception classes, more checking of error conditions
48
+ - <b>0.9.5.3</b> More robust testing, minor bug fixes, compatible with active_fedora 1.2.6
49
+ - <b>0.9.5</b> Significantly refactored to provide central logging and many more debugging statements.
50
+ - <b>0.9.4</b> First version that requires Ruby 1.8.7. Built with bundler and rvm
51
+ - <b>0.9.3.9</b> Last version compatible with Ruby 1.8.6. Stored in source control as the 'facets-282' branch.
52
+ - <b>0.9.3</b> Compatibility with bagit 1.0.0. Bump to active-fedora 1.1.13
53
+ - <b>0.9.2</b> Workflow bug fixes. Last version that supports active-fedora 1.0.7
54
+ - We recommend that you <b>DO NOT USE</b> any version older than these
55
+
56
+ == Copyright
57
+
58
+ Copyright (c) 2010 Stanford University Library. See LICENSE for details.
@@ -0,0 +1,82 @@
1
+ require 'rubygems'
2
+ require 'net/http'
3
+ require 'net/https'
4
+ require 'uri'
5
+ require 'cgi'
6
+ require 'active_fedora'
7
+ require 'lyber_core'
8
+ require 'nokogiri'
9
+
10
+ class DlssService
11
+
12
+ # the fedora object to operate on
13
+ attr_reader :fedora_url
14
+
15
+ def initialize(fedora_url)
16
+ @fedora_url = fedora_url
17
+ solr_url = "http://localhost:8983/solr"
18
+ Fedora::Repository.register(@fedora_url)
19
+ ActiveFedora::SolrService.register(solr_url)
20
+ end
21
+
22
+ # Get an https connection to the given url
23
+ def get_https_connection(url)
24
+ https = Net::HTTP.new(url.host, url.port)
25
+ if(url.scheme == 'https')
26
+ https.use_ssl = true
27
+ https.cert = OpenSSL::X509::Certificate.new( File.read(CERT_FILE) )
28
+ https.key = OpenSSL::PKey::RSA.new( File.read(KEY_FILE), KEY_PASS )
29
+ https.verify_mode = OpenSSL::SSL::VERIFY_NONE
30
+ end
31
+ https
32
+ end
33
+
34
+ # Retrieve the metadata of a datastream of a DOR object
35
+ # e.g. FEDORA_URI + /objects/ + druid + /datastreams/dor gets "dor" datastream metadata
36
+ def get_datastream_md(druid, ds_id)
37
+ begin
38
+ url = URI.parse(@fedora_url + '/objects/' + druid + '/datastreams/' + ds_id)
39
+ req = Net::HTTP::Get.new(url.request_uri)
40
+ req.basic_auth FEDORA_USER, FEDORA_PASS
41
+ res = DorService.get_https_connection(url).start {|http| http.request(req) }
42
+ case res
43
+ when Net::HTTPSuccess
44
+ return res.body
45
+ else
46
+ LyberCore::Log.error("Datastream " + ds_id + " not found for " + druid)
47
+ return nil
48
+ end
49
+ end
50
+ end
51
+
52
+ # This is maintained for backward compatibility, but
53
+ # get_all_druids_from_object_list or get_some_druids_from_object_list
54
+ # are preferred.
55
+ def DlssService.get_druids_from_object_list(objectListXml)
56
+ DlssService.get_all_druids_from_object_list(objectListXml)
57
+ end
58
+
59
+ # Transforms the XML from getObjectsForWorkStep into a list of druids
60
+ def DlssService.get_all_druids_from_object_list(objectListXml)
61
+ DlssService.get_some_druids_from_object_list(objectListXml, nil)
62
+ end
63
+
64
+ # Takes XML of the form
65
+ # <objects><object id='druid:hx066mp6063' url='https://lyberservices-test.stanford.edu/workflow/objects/druid:hx066mp6063'/></objects>
66
+ # if count is an integer, return at most that number of druids
67
+ # otherwise, return all druids in the queue
68
+ def DlssService.get_some_druids_from_object_list(objectListXml, count)
69
+ druids = []
70
+
71
+ # parse the xml into a document object
72
+ xmldoc = Nokogiri::XML::Reader(objectListXml)
73
+
74
+ xmldoc.each do |node|
75
+ druids << node.attribute("id") unless node.attribute("id").nil?
76
+ break if druids.length == count
77
+ end
78
+ return druids
79
+ end
80
+
81
+
82
+ end
@@ -0,0 +1,18 @@
1
+
2
+ module Dor
3
+
4
+ class Base < ActiveFedora::Base
5
+ def initialize(attrs = {})
6
+ unless attrs[:pid]
7
+ attrs = attrs.merge!({:pid=>Dor::SuriService.mint_id})
8
+ @new_object=true
9
+ else
10
+ @new_object = attrs[:new_object] == false ? false : true
11
+ end
12
+ @inner_object = Fedora::FedoraObject.new(attrs)
13
+ @datastreams = {}
14
+ configure_defined_datastreams
15
+ end
16
+ end
17
+
18
+ end
@@ -0,0 +1,28 @@
1
+ require 'net/https'
2
+ require 'active_fedora'
3
+
4
+ module Dor
5
+ class SuriService
6
+
7
+ # If Dor::MINT_SURI_IDS is set to ture, then this method
8
+ # Returns ID_NAMESPACE:id_from_suri
9
+ # Throws an exception if there were any problems
10
+ def self.mint_id
11
+ unless(Dor::MINT_SURI_IDS)
12
+ return Fedora::Repository.instance.nextid
13
+ end
14
+
15
+ #Post with no body
16
+ id = LyberCore::Connection.post("#{SURI_URL}/suri2/namespaces/#{ID_NAMESPACE}/identifiers", nil,
17
+ :auth_user => Dor::SURI_USER, :auth_password => Dor::SURI_PASSWORD)
18
+
19
+ return "#{Dor::ID_NAMESPACE}:#{id.strip}"
20
+
21
+ rescue Exception => e
22
+ Rails.logger.error("Unable to mint id from suri: #{e.to_s}")
23
+ raise e
24
+ end
25
+
26
+
27
+ end
28
+ end
@@ -0,0 +1,111 @@
1
+
2
+ module Dor
3
+
4
+ # Methods to create and update workflow
5
+ #
6
+ # ==== Required Constants
7
+ # - Dor::CREATE_WORKFLOW : true or false. Can be used to turn of workflow in a particular environment, like development
8
+ # - Dor::WF_URI : The URI to the workflow service. An example URI is 'http://lyberservices-dev.stanford.edu/workflow'
9
+ module WorkflowService
10
+
11
+ # Creates a workflow for a given object in the repository. If this particular workflow for this objects exists,
12
+ # it will replace the old workflow with wf_xml passed to this method.
13
+ # Returns true on success. Caller must handle any exceptions
14
+ #
15
+ # == Parameters
16
+ # - <b>repo</b> - The repository the object resides in. The service recoginzes "dor" and "sdr" at the moment
17
+ # - <b>druid</b> - The id of the object
18
+ # - <b>workflow_name</b> - The name of the workflow you want to create
19
+ # - <b>wf_xml</b> - The xml that represents the workflow
20
+ #
21
+ def WorkflowService.create_workflow(repo, druid, workflow_name, wf_xml)
22
+ return true unless(Dor::CREATE_WORKFLOW)
23
+
24
+ full_uri = ''
25
+ full_uri << Dor::WF_URI << '/' << repo << '/objects/' << druid << '/workflows/' << workflow_name
26
+
27
+ # On success, an empty body is sent
28
+ LyberCore::Connection.put(full_uri, wf_xml){|response| true}
29
+ end
30
+
31
+ # Updates the status of one step in a workflow.
32
+ # Returns true on success. Caller must handle any exceptions
33
+ #
34
+ # == Required Parameters
35
+ # - <b>repo</b> - The repository the object resides in. The service recoginzes "dor" and "sdr" at the moment
36
+ # - <b>druid</b> - The id of the object
37
+ # - <b>workflow_name</b> - The name of the workflow
38
+ # - <b>status</b> - The status that you want to set. Typical statuses are 'waiting', 'completed', 'error', but could be any string
39
+ #
40
+ # == Optional Parameters
41
+ # - <b>elapsed</b> - The number of seconds it took to complete this step. Can have a decimal. Is set to 0 if not passed in.
42
+ # - <b>lifecycle</b> - Bookeeping label for this particular workflow step. Examples are: 'registered', 'shelved'
43
+ #
44
+ # == Http Call
45
+ # The method does an HTTP PUT to the URL defined in Dor::WF_URI. As an example:
46
+ # PUT "/dor/objects/pid:123/workflows/GoogleScannedWF/convert"
47
+ # <process name=\"convert\" status=\"completed\" />"
48
+ def WorkflowService.update_workflow_status(repo, druid, workflow, process, status, elapsed = 0, lifecycle = nil)
49
+ return true unless(Dor::CREATE_WORKFLOW)
50
+
51
+ uri = ''
52
+ uri << Dor::WF_URI << '/' << repo << '/objects/' << druid << '/workflows/' << workflow << '/' << process
53
+ process_xml = '<process name="'+ process + '" status="' + status + '" '
54
+ process_xml << 'elapsed="' + elapsed.to_s + '" '
55
+ process_xml << 'lifecycle="' + lifecycle + '" ' if(lifecycle)
56
+ process_xml << '/>'
57
+
58
+ # On success, an empty body is sent
59
+ LyberCore::Connection.put(uri, process_xml) {|response| true}
60
+ end
61
+
62
+ #
63
+ # Retrieves the process status of the given workflow for the given object identifier
64
+ #
65
+ def WorkflowService.get_workflow_status(repo, druid, workflow, process)
66
+ uri = ''
67
+ uri << Dor::WF_URI << '/' << repo << '/objects/' << druid << '/workflows/' << workflow
68
+ workflow_md = LyberCore::Connection.get(uri)
69
+
70
+ doc = Nokogiri::XML(workflow_md)
71
+ raise Exception.new("Unable to parse response:\n#{workflow_md}") if(doc.root.nil?)
72
+
73
+ status = doc.root.at_xpath("//process[@name='#{process}']/@status").content
74
+ return status
75
+ end
76
+
77
+ def WorkflowService.get_workflow_xml(repo, druid, workflow)
78
+ uri = ''
79
+ uri << Dor::WF_URI << '/' << repo << '/objects/' << druid << '/workflows/' << workflow
80
+ workflow_md = LyberCore::Connection.get(uri)
81
+ end
82
+
83
+ # Updates the status of one step in a workflow to error.
84
+ # Returns true on success. Caller must handle any exceptions
85
+ #
86
+ # == Required Parameters
87
+ # - <b>repo</b> - The repository the object resides in. The service recoginzes "dor" and "sdr" at the moment
88
+ # - <b>druid</b> - The id of the object
89
+ # - <b>workflow_name</b> - The name of the workflow
90
+ # - <b>error_msg</b> - The error message. Ideally, this is a brief message describing the error
91
+ #
92
+ # == Optional Parameters
93
+ # - <b>error_txt</b> - A slot to hold more information about the error, like a full stacktrace
94
+ #
95
+ # == Http Call
96
+ # The method does an HTTP PUT to the URL defined in Dor::WF_URI. As an example:
97
+ # PUT "/dor/objects/pid:123/workflows/GoogleScannedWF/convert"
98
+ # <process name=\"convert\" status=\"error\" />"
99
+ def WorkflowService.update_workflow_error_status(repo, druid, workflow, process, error_msg, error_txt = nil)
100
+ uri = ''
101
+ uri << Dor::WF_URI << '/' << repo << '/objects/' << druid << '/workflows/' << workflow << '/' << process
102
+ process_xml = '<process name="'+ process + '" status="error" errorMessage="' + error_msg + '" '
103
+ process_xml << 'errorText="' + error_txt + '" ' if(error_txt)
104
+ process_xml << '/>'
105
+
106
+ # On success, an empty body is sent
107
+ LyberCore::Connection.put(uri, process_xml) {|response| true}
108
+ end
109
+
110
+ end
111
+ end
@@ -0,0 +1,535 @@
1
+ require 'net/http'
2
+ require 'net/https'
3
+ require 'uri'
4
+ require 'cgi'
5
+ require 'rexml/document'
6
+
7
+ include REXML
8
+
9
+ class DorService
10
+
11
+ def DorService.get_https_connection(url)
12
+ https = Net::HTTP.new(url.host, url.port)
13
+ if(url.scheme == 'https')
14
+ https.use_ssl = true
15
+ https.cert = OpenSSL::X509::Certificate.new( File.read(CERT_FILE) )
16
+ https.key = OpenSSL::PKey::RSA.new( File.read(KEY_FILE), KEY_PASS )
17
+ https.verify_mode = OpenSSL::SSL::VERIFY_NONE
18
+ end
19
+ https
20
+ end
21
+
22
+ # This should check to see if an object with the given PID already
23
+ # exists in the repository
24
+ def DorService.create_object(form_data)
25
+ begin
26
+ url = DOR_URI + '/objects'
27
+ body = DorService.encodeParams(form_data)
28
+ content_type = 'application/x-www-form-urlencoded'
29
+ res = LyberCore::Connection.post(url, body, :content_type => content_type)
30
+ res =~ /\/objects\/(.*)/
31
+ druid = $1
32
+ return druid
33
+ rescue Exception => e
34
+ LyberCore::Log.error("Unable to create object #{e.backtrace}")
35
+ raise e
36
+ end
37
+ end
38
+
39
+ #objects/dr:123/resources
40
+ #parms: model, id
41
+ #will create object of type dor:GoogleScannedPage
42
+ def DorService.create_child_object(parent_druid, child_id)
43
+ begin
44
+ #See if page exists before creating new fedora object
45
+ # raise "Object exists with id: " + child_id if(DorService.get_druid_by_id(child_id))
46
+ form_data = {'model' => 'dor:googleScannedPage', 'id' => child_id}
47
+ url = DOR_URI + '/objects/' + parent_druid + '/resources'
48
+ body = DorService.encodeParams(form_data)
49
+ content_type = 'application/x-www-form-urlencoded'
50
+ res = LyberCore::Connection.post(url, body, :content_type => content_type)
51
+ res=~ /\/resources\/(.*)/
52
+ druid = $1
53
+ LyberCore::Log.info("Child googleScannedPage object created for parent #{parent_druid}")
54
+ LyberCore::Log.debug("child_id = #{child_id}")
55
+ LyberCore::Log.debug("new druid = #{druid}")
56
+ return druid
57
+ rescue Exception => e
58
+ LyberCore::Log.error("Unable to create object")
59
+ raise e, "Unable to create object "
60
+ end
61
+ end
62
+
63
+
64
+ # Takes a hash of arrays and builds a x-www-form-urlencoded string for POSTing form parameters
65
+ #
66
+ # == Parameters
67
+ # - <b>form_data</b> - a hash of arrays that contains the form data, ie. {'param1' => ['val1', 'val2'], 'param2' => ['val3']}
68
+ def DorService.encodeParams(form_data)
69
+ body = ""
70
+ form_data.each_pair do |param, array|
71
+ array.each do |value|
72
+ encoded = CGI.escape value
73
+ body += '&' unless (body == "")
74
+ body += param + '=' + encoded
75
+ end
76
+ end
77
+ body
78
+ end
79
+
80
+
81
+ # Depricated. Use Dor::WorkflowService#create_workflow in lyber_core gem
82
+ # def DorService.create_workflow(workflow, druid)
83
+ # begin
84
+ # url = URI.parse(DOR_URI + '/objects/' + druid + '/workflows/' + workflow.workflow_id)
85
+ # req = Net::HTTP::Put.new(url.path)
86
+ # #req.basic_auth 'fedoraUser', 'pass'
87
+ # req.body = workflow.workflow_process_xml
88
+ # req.content_type = 'application/xml'
89
+ # res = DorService.get_https_connection(url).start {|http| http.request(req) }
90
+ #
91
+ # WorkflowService.create_workflow()
92
+ #
93
+ # case res
94
+ # when Net::HTTPSuccess
95
+ # puts workflow.workflow_id + " created for " + druid
96
+ # else
97
+ # $stderr.print res.body
98
+ # raise res.error!
99
+ # end
100
+ # rescue Exception => e
101
+ # $stderr.print "Unable to create workflow " + e
102
+ # raise
103
+ # end
104
+ # end
105
+
106
+
107
+ # See if an object exists with this dor_id (not druid, but sub-identifier)
108
+ # Caller will have to handle any exception thrown
109
+ def DorService.get_druid_by_id(dor_id)
110
+ url_string = "#{DOR_URI}/query_by_id?id=#{dor_id}"
111
+ LyberCore::Log.debug("Fetching druid for dor_id #{dor_id} at url #{url_string}")
112
+ url = URI.parse(url_string)
113
+ req = Net::HTTP::Get.new(url.request_uri)
114
+ res = DorService.get_https_connection(url).start {|http| http.request(req) }
115
+
116
+ case res
117
+ when Net::HTTPSuccess
118
+ res.body =~ /druid="([^"\r\n]*)"/
119
+ return $1
120
+ when Net::HTTPClientError
121
+ LyberCore::Log.debug("Barcode does not yet exist in DOR: #{dor_id}")
122
+ return nil
123
+ when Net::HTTPServerError
124
+ LyberCore::Log.error("Encountered HTTPServerError error when requesting #{url}: #{res.inspect}")
125
+ raise "Encountered 500 error when requesting #{url}: #{res.inspect}"
126
+ else
127
+ LyberCore::Log.error("Encountered unknown error when requesting #{url}: #{res.inspect}")
128
+ raise "Encountered unknown error when requesting #{url}: #{res.inspect}"
129
+ end
130
+ end
131
+
132
+ ############################################# Start of Datastream methods
133
+ # Until ActiveFedora supports client-side certificate configuration, we are stuck with our own methods to access datastreams
134
+
135
+ #/objects/{pid}/datastreams/{dsID} ? [controlGroup] [dsLocation] [altIDs] [dsLabel] [versionable] [dsState] [formatURI] [checksumType] [checksum] [logMessage]
136
+ def DorService.add_datastream(druid, ds_id, ds_label, xml, content_type='application/xml', versionable = false )
137
+ DorService.add_datastream_managed(druid, ds_id, ds_label, xml, content_type, versionable)
138
+ end
139
+
140
+ def DorService.add_datastream_external_url(druid, ds_id, ds_label, ext_url, content_type, versionable = false)
141
+ parms = '?controlGroup=E'
142
+ parms += '&dsLabel=' + CGI.escape(ds_label)
143
+ parms += '&versionable=false' unless(versionable)
144
+ parms += '&dsLocation=' + ext_url
145
+ DorService.set_datastream(druid, ds_id, parms, :post, {:type => content_type})
146
+ end
147
+
148
+ def DorService.update_datastream(druid, ds_id, xml, content_type='application/xml', versionable = false)
149
+ parms = '?controlGroup=M'
150
+ parms += '&versionable=false' unless(versionable)
151
+ DorService.set_datastream(druid, ds_id, parms, :put, {:type => content_type, :xml => xml})
152
+ end
153
+
154
+ def DorService.add_datastream_managed(druid, ds_id, ds_label, xml, content_type='application/xml', versionable = false )
155
+ parms = '?controlGroup=M'
156
+ parms += '&dsLabel=' + CGI.escape(ds_label)
157
+ parms += '&versionable=false' unless(versionable)
158
+ DorService.set_datastream(druid, ds_id, parms, :post, {:type => content_type, :xml => xml})
159
+ end
160
+
161
+ # Retrieve the content of a datastream of a DOR object
162
+ # e.g. FEDORA_URI + /objects/ + druid + /datastreams/dor/content gets "dor" datastream content
163
+ def DorService.get_datastream(druid, ds_id)
164
+ begin
165
+ LyberCore::Log.debug("Connecting to #{FEDORA_URI}...")
166
+ url_string = "#{FEDORA_URI}/objects/#{druid}/datastreams/#{ds_id}/content"
167
+ url = URI.parse(url_string)
168
+ LyberCore::Log.debug("Connecting to #{url_string}...")
169
+ req = Net::HTTP::Get.new(url.request_uri)
170
+ LyberCore::Log.debug("request object: #{req.inspect}")
171
+ res = DorService.get_https_connection(url).start {|http| http.request(req) }
172
+
173
+ case res
174
+ when Net::HTTPSuccess
175
+ return res.body
176
+ when Net::HTTPClientError
177
+ LyberCore::Log.debug("Datastream not found at url #{url_string}")
178
+ return nil
179
+ when Net::HTTPServerError
180
+ LyberCore::Log.error("Attempted to reach #{url_string} but failed")
181
+ raise "Encountered 500 error when requesting #{url_string}: #{res.inspect}"
182
+ else
183
+ LyberCore::Log.error("Encountered unknown error when requesting #{url}: #{res.inspect}")
184
+ raise "Encountered unknown error when requesting #{url}: #{res.inspect}"
185
+ end
186
+ rescue Exception => e
187
+ raise e
188
+ end
189
+ end
190
+
191
+ # Depricated - use Dor::WorkflowService#get_workflow_xml
192
+ def DorService.get_workflow_xml(druid, workflow)
193
+ raise Exception.new("This method is deprecated. Please use Dor::WorkflowService#get_workflow_xml")
194
+ end
195
+
196
+ # Retrieve the metadata of a datastream of a DOR object
197
+ # e.g. FEDORA_URI + /objects/ + druid + /datastreams/dor gets "dor" datastream metadata
198
+ def DorService.get_datastream_md(druid, ds_id)
199
+ begin
200
+ LyberCore::Log.debug("Connecting to #{FEDORA_URI}...")
201
+ url = URI.parse(FEDORA_URI + '/objects/' + druid + '/datastreams/' + ds_id)
202
+ LyberCore::Log.debug("Connecting to #{url}...")
203
+ req = Net::HTTP::Get.new(url.request_uri)
204
+ req.basic_auth FEDORA_USER, FEDORA_PASS
205
+ LyberCore::Log.debug("request object: #{req.inspect}")
206
+ res = DorService.get_https_connection(url).start {|http| http.request(req) }
207
+ case res
208
+ when Net::HTTPSuccess
209
+ return res.body
210
+ else
211
+ LyberCore::Log.error("Attempted to reach #{url} but failed")
212
+ LyberCore::Log.error("Datastream #{dsid} not found for #{druid}")
213
+ end
214
+ rescue Exception => e
215
+ raise e, "Couldn't get datastream from #{url}"
216
+ end
217
+ end
218
+
219
+ # Add a new datastream, but only if it does not yet exist
220
+ def DorService.add_datastream_unless_exists(druid, ds_id, ds_label, xml)
221
+ # make sure xml is not empty
222
+ unless xml
223
+ raise "No data supplied for datastream " + ds + "of " + druid
224
+ end
225
+ # check to make sure datastream does not yet exist
226
+ unless DorService.get_datastream(druid, ds_id)
227
+ DorService.add_datastream(druid, ds_id, ds_label, xml)
228
+ end
229
+ end
230
+
231
+ ############################################# End of Datastream methods
232
+
233
+
234
+ # Deprecated. Use Dor::WorkflowService#update_workflow_status
235
+ #PUT "objects/pid:123/workflows/GoogleScannedWF/convert"
236
+ #<process name=\"convert\" status=\"waiting\" elapsed="0.11" lifecycle="released" "/>"
237
+ #TODO increment attempts
238
+ # def DorService.updateWorkflowStatus(repository, druid, workflow, process, status, elapsed = 0, lifecycle = nil)
239
+ # begin
240
+ # url = URI.parse(WORKFLOW_URI + '/' + repository + '/objects/' + druid + '/workflows/' + workflow + '/' + process)
241
+ # req = Net::HTTP::Put.new(url.path)
242
+ # process_xml = '<process name="'+ process + '" status="' + status + '" '
243
+ # process_xml << 'elapsed="' + elapsed.to_s + '" '
244
+ # process_xml << 'lifecycle="' + lifecycle + '" ' if(lifecycle)
245
+ # process_xml << '/>'
246
+ # req.body = process_xml
247
+ # req.content_type = 'application/xml'
248
+ # res = DorService.get_https_connection(url).start {|http| http.request(req) }
249
+ # case res
250
+ # when Net::HTTPSuccess
251
+ # puts "#{workflow} process updated for " + druid
252
+ # else
253
+ # $stderr.print res.body
254
+ # raise res.error!
255
+ # end
256
+ # rescue Exception => e
257
+ # $stderr.print "Unable to update workflow " + e
258
+ # raise
259
+ # end
260
+ #
261
+ # end
262
+
263
+ # Returns string containing object list XML from a workflow DOR query
264
+ #
265
+ # @param [String] repository name of the repository you are querying. Right now, <tt>dor</tt> and <tt>sdr</tt> are supported
266
+ # @param [String] workflow name of the workflow being queried, eg <tt>googleScannedBookWF</tt>
267
+ # @param [String, Array] completed if only querying for one completed step, pass in a String.
268
+ # If querying for two completed steps, pass in an Array of the two completed steps
269
+ # @param [String] waiting the name of the waiting step
270
+ # @raise [LyberCore::Exceptions::EmptyQueue] When the query is successful, but no objects are found in that queue
271
+ # @raise [Exception] For other problems like connection failures
272
+ # @return [String] XML containing all the objects that match the specific query. It looks like:
273
+ # <objects>
274
+ # <object druid="dr:123" url="http://localhost:9999/jersey-spring/objects/dr:123%5c" />
275
+ # <object druid="dr:abc" url="http://localhost:9999/jersey-spring/objects/dr:abc%5c" />
276
+ # </objects>
277
+ def DorService.get_objects_for_workstep(repository, workflow, completed, waiting)
278
+ LyberCore::Log.debug("DorService.get_objects_for_workstep(#{repository}, #{workflow}, #{completed}, #{waiting})")
279
+ begin
280
+ if repository.nil? or workflow.nil? or completed.nil? or waiting.nil?
281
+ LyberCore::Log.fatal("Can't execute DorService.get_objects_for_workstep: missing info")
282
+ end
283
+
284
+ unless defined?(WORKFLOW_URI) and WORKFLOW_URI != nil
285
+ LyberCore::Log.fatal("WORKFLOW_URI is not set. ROBOT_ROOT = #{ROBOT_ROOT}")
286
+ raise "WORKFLOW_URI is not set"
287
+ end
288
+
289
+ uri_string = "#{WORKFLOW_URI}/workflow_queue?repository=#{repository}&workflow=#{workflow}&waiting=#{waiting}"
290
+ if(completed.class == Array)
291
+ raise "The workflow service can only handle queries with no more than 2 completed steps" if completed.size > 2
292
+ completed.each {|step| uri_string << "&completed=#{step}"}
293
+ else
294
+ uri_string << "&completed=#{completed}"
295
+ end
296
+ LyberCore::Log.info("Attempting to connect to #{uri_string}")
297
+ url = URI.parse(uri_string)
298
+ req = Net::HTTP::Get.new(url.request_uri)
299
+ res = DorService.get_https_connection(url).start {|http| http.request(req) }
300
+ case res
301
+ when Net::HTTPSuccess
302
+ begin
303
+ doc = Nokogiri::XML(res.body)
304
+ count = doc.root.at_xpath("//objects/@count").content.to_i
305
+ rescue Exception => e
306
+ msg = "Could not parse response from Workflow Service"
307
+ LyberCore::Log.error(msg + "\n#{res.body}")
308
+ raise e, msg
309
+ end
310
+
311
+ if(count == 0)
312
+ raise LyberCore::Exceptions::EmptyQueue.new, "empty queue"
313
+ else
314
+ return res.body
315
+ end
316
+ else
317
+ LyberCore::Log.fatal("Workflow queue not found for #{workflow} : #{waiting}")
318
+ LyberCore::Log.debug("I am attempting to connect to WORKFLOW_URI #{WORKFLOW_URI}")
319
+ LyberCore::Log.debug("repository: #{repository}")
320
+ LyberCore::Log.debug("workflow: #{workflow}")
321
+ LyberCore::Log.debug("completed: #{completed}")
322
+ LyberCore::Log.debug("waiting: #{waiting}")
323
+ LyberCore::Log.debug(res.inspect)
324
+ raise "Could not connect to url #{uri_string}"
325
+ end
326
+ end
327
+ end
328
+
329
+ def DorService.log_and_raise_workflow_connection_problem(repository, workflow, completed, waiting, response)
330
+
331
+ end
332
+
333
+ # Transforms the XML from getObjectsForWorkStep into a list of druids
334
+ # TODO figure out how to return a partial list
335
+ # This method is here for backward compatibility, but it has
336
+ # been superceded by DlssService.get_druids_from_object_list(objectListXml)
337
+ def DorService.get_druids_from_object_list(objectListXml)
338
+ DlssService.get_all_druids_from_object_list(objectListXml)
339
+ end
340
+
341
+ # Retrieves the identityMetadata datastream for a DOR object,
342
+ # extracts the otherId values, and returns them in a hash
343
+ def DorService.get_object_identifiers(druid)
344
+ begin
345
+ identifiers = {}
346
+ identityMetadata = get_datastream(druid, 'identityMetadata')
347
+ raise "Unable to get identityMetadata datastream for #{druid}" if identityMetadata.nil?
348
+ dorXml = Document.new(identityMetadata)
349
+
350
+ dorXml.elements.each("identityMetadata/otherId") do |element|
351
+ identifiers[element.attributes["name"]] = case element.text
352
+ when nil then nil
353
+ else element.text.strip
354
+ end
355
+ end
356
+ return identifiers
357
+ rescue Exception => e
358
+ raise e, "Couldn't get object identifiers for #{druid}"
359
+ end
360
+ end
361
+
362
+ def DorService.transfer_object(objectid, sourceDir, destinationDir)
363
+ rsync='rsync -a -e ssh '
364
+ rsync_cmd = rsync + "'" + sourceDir + objectid + "' " + destinationDir
365
+ LyberCore::Log.debug(rsync_cmd + "\n")
366
+ system(rsync_cmd)
367
+ return File.exists?(File.join(destinationDir, objectid))
368
+ end
369
+
370
+ def DorService.verify_checksums(directory, checksumFile)
371
+ dirSave = Dir.pwd
372
+ Dir.chdir(directory)
373
+ checksumCmd = 'md5sum -c ' + checksumFile + ' | grep -v OK | wc -l'
374
+ badcount = `#{checksumCmd}`.to_i
375
+ Dir.chdir(dirSave)
376
+ return (badcount==0)
377
+ end
378
+
379
+ # Given a process and an error message, constuct an xml fragment that can be
380
+ # posted to the workflow service to record the error generated for a given druid
381
+ def DorService.construct_error_update_request(process, error_msg, error_txt)
382
+ clean_error_msg = error_msg.gsub(/\s+/," ").gsub(/[`'#<>]/,'').gsub(/"/,"'")
383
+ clean_error_txt = error_txt.gsub(/\s+/," ").gsub(/[`'#<>]/,'').gsub(/"/,"'") unless error_txt.nil?
384
+ body = '<process name="'+ process + '" status="error" errorMessage="' + clean_error_msg + '" '
385
+ body += 'errorText="' + clean_error_txt + '" ' unless error_txt.nil?
386
+ body += '/>'
387
+ return body
388
+ end
389
+
390
+ # If an object encounters an error during processing, set its status to "error"
391
+ def DorService.update_workflow_error_status(repository, druid, workflow, process, error_msg, error_txt = nil)
392
+ begin
393
+ LyberCore::Log.debug("Updating workflow error status for druid #{druid}")
394
+ LyberCore::Log.debug("Error message is: #{error_msg}")
395
+ LyberCore::Log.debug("Error text is: #{error_txt}")
396
+ url_string = "#{WORKFLOW_URI}/#{repository}/objects/#{druid}/workflows/#{workflow}/#{process}"
397
+ url = URI.parse(url_string)
398
+ LyberCore::Log.debug("Using url #{url_string}")
399
+ req = Net::HTTP::Put.new(url.path)
400
+ req.body = DorService.construct_error_update_request(process, error_msg, error_txt)
401
+ req.content_type = 'application/xml'
402
+ LyberCore::Log::debug("Putting request: #{req.inspect}")
403
+ res = DorService.get_https_connection(url).start {|http| http.request(req) }
404
+ LyberCore::Log::debug("Got response: #{res.inspect}")
405
+ case res
406
+ when Net::HTTPSuccess
407
+ LyberCore::Log.error("#{workflow} - #{process} set to error for " + druid)
408
+ else
409
+ LyberCore::Log.error(res.body)
410
+ raise res.error!, "Received error from the workflow service"
411
+ end
412
+ rescue Exception => e
413
+ msg = "Unable to update workflow service at url #{url_string}"
414
+ LyberCore::Log.error(msg)
415
+ raise e, msg
416
+ end
417
+ end
418
+
419
+ # This method sends a GET request to jenson and returns MARC XML
420
+ def DorService.query_symphony(flexkey)
421
+ begin
422
+ symphony_url = 'http://zaph.stanford.edu'
423
+ path_info = '/cgi-bin/holding.pl?'
424
+ parm_list = URI.escape('search=location&flexkey=' + flexkey)
425
+ url_string = symphony_url + path_info + parm_list
426
+
427
+ url = URI.parse(url_string)
428
+ LyberCore::Log.debug("Attempting to query symphony: #{url_string}")
429
+ res = Net::HTTP.start(url.host, url.port) {|http|
430
+ http.get( path_info + parm_list )
431
+ }
432
+ case res
433
+ when Net::HTTPSuccess
434
+ LyberCore::Log.debug("Successfully queried symphony for #{flexkey}")
435
+ return res.body
436
+ else
437
+ LyberCore::Log.error("Encountered an error from symphony: #{res.body}")
438
+ raise res.error!
439
+ end
440
+ rescue Exception => e
441
+ raise e, "Encountered an error from symphony"
442
+ end
443
+
444
+ end #query_symphony
445
+
446
+
447
+ private
448
+ # druid, ds, url, content_type, method, parms
449
+ def DorService.set_datastream(druid, ds_id, parms, method, content = {})
450
+ begin
451
+ url = URI.parse(FEDORA_URI + '/objects/' + druid + '/datastreams/' + ds_id + parms)
452
+ case method
453
+ when :post
454
+ req = Net::HTTP::Post.new(url.request_uri)
455
+ when :put
456
+ req = Net::HTTP::Put.new(url.request_uri)
457
+ end
458
+ req.basic_auth FEDORA_USER, FEDORA_PASS
459
+ req.body = content[:xml] if(content[:xml])
460
+ req.content_type = content[:type]
461
+ res = DorService.get_https_connection(url).start {|http| http.request(req) }
462
+ case res
463
+ when Net::HTTPSuccess
464
+ return true
465
+ when Net::HTTPServerError
466
+ LyberCore::Log.error("Attempted to set datastream #{url} but failed")
467
+ raise "Encountered 500 error setting datastream #{url}: #{res.inspect}"
468
+ else
469
+ LyberCore::Log.error("Encountered unknown error when setting datastream #{url}: #{res.inspect}")
470
+ raise "Encountered unknown error when setting datastream #{url}: #{res.inspect}"
471
+ end
472
+ rescue Exception => e
473
+ raise
474
+ end
475
+ end
476
+
477
+ def DorService.get_object_metadata(druid)
478
+ dor = DorService.get_datastream(druid, 'identityMetadata')
479
+ mods = DorService.get_datastream(druid, 'descMetadata')
480
+ googlemets = DorService.get_datastream(druid, 'googlemets')
481
+ contentMetadata = DorService.get_datastream(druid, 'contentMetadata')
482
+ adminMetadata = DorService.get_datastream(druid, 'adminMetadata')
483
+ xml = "<objectMD druid='" + druid + "' >\n" +
484
+ dor + mods + googlemets + contentMetadata + adminMetadata +
485
+ "</objectMD>\n"
486
+ return xml
487
+ end
488
+
489
+ end
490
+
491
+ # Given an array of strings, construct valid xml in which each
492
+ # member of the array becomes a <tag> element
493
+ def DorService.construct_xml_for_tag_array(tag_array)
494
+ xml = "<tags>"
495
+ tag_array.each do |tag|
496
+ tag = tag.gsub(/\s+/," ").gsub(/[<>!]/,'')
497
+ xml << "<tag>#{tag}</tag>"
498
+ end
499
+ xml << "</tags>"
500
+ end
501
+
502
+
503
+ def DorService.add_identity_tags(druid, tags)
504
+ begin
505
+ url = URI.parse(DOR_URI + '/objects/' + druid + '/datastreams/identityMetadata/tags' )
506
+ req = Net::HTTP::Put.new(url.path)
507
+ req.body = DorService.construct_xml_for_tag_array(tags)
508
+ req.content_type = 'application/xml'
509
+ res = DorService.get_https_connection(url).start {|http| http.request(req) }
510
+ case res
511
+ when Net::HTTPSuccess
512
+ return true
513
+ when Net::HTTPServerError
514
+ LyberCore::Log.error("Attempted to add identity tags #{url} but failed")
515
+ raise "Encountered 500 error when adding identity tags #{url}: #{res.inspect}"
516
+ else
517
+ LyberCore::Log.error("Encountered unknown error when adding identity tags #{url}: #{res.inspect}")
518
+ raise "Encountered unknown error when adding identity tags #{url}: #{res.inspect}"
519
+ end
520
+ rescue Exception => e
521
+ raise e
522
+ end
523
+ end
524
+
525
+ #DorService.updateWorkflowStatus('dr:rf624mb644', 'GoogleScannedWF', 'descriptive-metadata', 'completed')
526
+ ####Testing
527
+ #line = 'id="catkey:1990757"||id="barcode:36105045033136"||model="GoogleScannedBook"||label="The poacher"'
528
+ #form_data = {}
529
+ #DorService.parse_line_return_hashlist(line, form_data)
530
+ #form_data.each_pair{|k,v| puts "key: #{k} value: #{v}"}
531
+ #
532
+ #puts DorService.encodeParams(form_data)
533
+
534
+ #DorService.create_object('id="catkey:454545454545454"||id="barcode:434343434343434343434343434"||model="GoogleScannedBook"||label="Ruby multiple Id parms 3"')
535
+