lyber-core 0.9.6.2

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Stanford University Library
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,58 @@
1
+ = lyber_core
2
+
3
+ Require the following:
4
+ require 'lyber_core'
5
+
6
+ Constants that need to be defined with sample values:
7
+
8
+ If using WorkflowService:
9
+ Dor::WF_URI = 'http://lyberservices-dev.stanford.edu/workflow'
10
+ Dor::CREATE_WORKFLOW = true
11
+
12
+ If using SuriService:
13
+ Dor::MINT_SURI_IDS = true
14
+ Dor::SURI_URL = 'http://some.suri.host:8080'
15
+ Dor::ID_NAMESPACE = 'druid'
16
+ Dor::SURI_USER = 'suriuser'
17
+ Dor::SURI_PASSWORD = 'suripword'
18
+
19
+ If connecting to https servers:
20
+ LyberCore::CERT_FILE = File.dirname(__FILE__) + '/../certs/dummy.crt'
21
+ LyberCore::KEY_FILE = File.dirname(__FILE__) + '/../certs/dummy.key'
22
+ LyberCore::KEY_PASS = 'dummy'
23
+
24
+ == lyber_core/utils
25
+
26
+ If using Utilities, require the following:
27
+ require 'lyber_core/utils'
28
+
29
+ This will give you:
30
+ LyberCore::Utils::BagitBag
31
+ LyberCore::Utils::ChecksumValidate
32
+ LyberCore::Utils::FileUtilities
33
+
34
+ If you do not want all 3, you can require the individual classes. I.E. if you only want the bagit utils, then require:
35
+ require 'lyber_core/utils/bagit_bat'
36
+
37
+ The BagitBag class requires the bagit gem
38
+ http://github.com/flazz/bagit
39
+
40
+ == Build and release procedure
41
+ Run: 'rake dlss_release' to tag, build, and publish the lyber-core gem
42
+ See the Rakefile and the LyberCore::DlssRelease task in lib/lyber_core/rake/dlss_release.rb for more details
43
+
44
+ == Releases
45
+ - <b>0.9.6</b> DorService.get_objects_for_workstep can handle one or two completed steps. Trimmed-down gem dependencies now defined in lyber-core.gemspec. 'rake dlss_release' will tag, build and publish gem
46
+ - <b>0.9.5.5</b> Robots now log to ROBOT_ROOT/log/robot_name.log unless specified in constructor
47
+ - <b>0.9.5.4</b> Custom exception classes, more checking of error conditions
48
+ - <b>0.9.5.3</b> More robust testing, minor bug fixes, compatible with active_fedora 1.2.6
49
+ - <b>0.9.5</b> Significantly refactored to provide central logging and many more debugging statements.
50
+ - <b>0.9.4</b> First version that requires Ruby 1.8.7. Built with bundler and rvm
51
+ - <b>0.9.3.9</b> Last version compatible with Ruby 1.8.6. Stored in source control as the 'facets-282' branch.
52
+ - <b>0.9.3</b> Compatibility with bagit 1.0.0. Bump to active-fedora 1.1.13
53
+ - <b>0.9.2</b> Workflow bug fixes. Last version that supports active-fedora 1.0.7
54
+ - We recommend that you <b>DO NOT USE</b> any version older than these
55
+
56
+ == Copyright
57
+
58
+ Copyright (c) 2010 Stanford University Library. See LICENSE for details.
@@ -0,0 +1,82 @@
1
+ require 'rubygems'
2
+ require 'net/http'
3
+ require 'net/https'
4
+ require 'uri'
5
+ require 'cgi'
6
+ require 'active_fedora'
7
+ require 'lyber_core'
8
+ require 'nokogiri'
9
+
10
+ class DlssService
11
+
12
+ # the fedora object to operate on
13
+ attr_reader :fedora_url
14
+
15
+ def initialize(fedora_url)
16
+ @fedora_url = fedora_url
17
+ solr_url = "http://localhost:8983/solr"
18
+ Fedora::Repository.register(@fedora_url)
19
+ ActiveFedora::SolrService.register(solr_url)
20
+ end
21
+
22
+ # Get an https connection to the given url
23
+ def get_https_connection(url)
24
+ https = Net::HTTP.new(url.host, url.port)
25
+ if(url.scheme == 'https')
26
+ https.use_ssl = true
27
+ https.cert = OpenSSL::X509::Certificate.new( File.read(CERT_FILE) )
28
+ https.key = OpenSSL::PKey::RSA.new( File.read(KEY_FILE), KEY_PASS )
29
+ https.verify_mode = OpenSSL::SSL::VERIFY_NONE
30
+ end
31
+ https
32
+ end
33
+
34
+ # Retrieve the metadata of a datastream of a DOR object
35
+ # e.g. FEDORA_URI + /objects/ + druid + /datastreams/dor gets "dor" datastream metadata
36
+ def get_datastream_md(druid, ds_id)
37
+ begin
38
+ url = URI.parse(@fedora_url + '/objects/' + druid + '/datastreams/' + ds_id)
39
+ req = Net::HTTP::Get.new(url.request_uri)
40
+ req.basic_auth FEDORA_USER, FEDORA_PASS
41
+ res = DorService.get_https_connection(url).start {|http| http.request(req) }
42
+ case res
43
+ when Net::HTTPSuccess
44
+ return res.body
45
+ else
46
+ LyberCore::Log.error("Datastream " + ds_id + " not found for " + druid)
47
+ return nil
48
+ end
49
+ end
50
+ end
51
+
52
+ # This is maintained for backward compatibility, but
53
+ # get_all_druids_from_object_list or get_some_druids_from_object_list
54
+ # are preferred.
55
+ def DlssService.get_druids_from_object_list(objectListXml)
56
+ DlssService.get_all_druids_from_object_list(objectListXml)
57
+ end
58
+
59
+ # Transforms the XML from getObjectsForWorkStep into a list of druids
60
+ def DlssService.get_all_druids_from_object_list(objectListXml)
61
+ DlssService.get_some_druids_from_object_list(objectListXml, nil)
62
+ end
63
+
64
+ # Takes XML of the form
65
+ # <objects><object id='druid:hx066mp6063' url='https://lyberservices-test.stanford.edu/workflow/objects/druid:hx066mp6063'/></objects>
66
+ # if count is an integer, return at most that number of druids
67
+ # otherwise, return all druids in the queue
68
+ def DlssService.get_some_druids_from_object_list(objectListXml, count)
69
+ druids = []
70
+
71
+ # parse the xml into a document object
72
+ xmldoc = Nokogiri::XML::Reader(objectListXml)
73
+
74
+ xmldoc.each do |node|
75
+ druids << node.attribute("id") unless node.attribute("id").nil?
76
+ break if druids.length == count
77
+ end
78
+ return druids
79
+ end
80
+
81
+
82
+ end
@@ -0,0 +1,18 @@
1
+
2
+ module Dor
3
+
4
+ class Base < ActiveFedora::Base
5
+ def initialize(attrs = {})
6
+ unless attrs[:pid]
7
+ attrs = attrs.merge!({:pid=>Dor::SuriService.mint_id})
8
+ @new_object=true
9
+ else
10
+ @new_object = attrs[:new_object] == false ? false : true
11
+ end
12
+ @inner_object = Fedora::FedoraObject.new(attrs)
13
+ @datastreams = {}
14
+ configure_defined_datastreams
15
+ end
16
+ end
17
+
18
+ end
@@ -0,0 +1,28 @@
1
+ require 'net/https'
2
+ require 'active_fedora'
3
+
4
+ module Dor
5
+ class SuriService
6
+
7
+ # If Dor::MINT_SURI_IDS is set to ture, then this method
8
+ # Returns ID_NAMESPACE:id_from_suri
9
+ # Throws an exception if there were any problems
10
+ def self.mint_id
11
+ unless(Dor::MINT_SURI_IDS)
12
+ return Fedora::Repository.instance.nextid
13
+ end
14
+
15
+ #Post with no body
16
+ id = LyberCore::Connection.post("#{SURI_URL}/suri2/namespaces/#{ID_NAMESPACE}/identifiers", nil,
17
+ :auth_user => Dor::SURI_USER, :auth_password => Dor::SURI_PASSWORD)
18
+
19
+ return "#{Dor::ID_NAMESPACE}:#{id.strip}"
20
+
21
+ rescue Exception => e
22
+ Rails.logger.error("Unable to mint id from suri: #{e.to_s}")
23
+ raise e
24
+ end
25
+
26
+
27
+ end
28
+ end
@@ -0,0 +1,111 @@
1
+
2
+ module Dor
3
+
4
+ # Methods to create and update workflow
5
+ #
6
+ # ==== Required Constants
7
+ # - Dor::CREATE_WORKFLOW : true or false. Can be used to turn of workflow in a particular environment, like development
8
+ # - Dor::WF_URI : The URI to the workflow service. An example URI is 'http://lyberservices-dev.stanford.edu/workflow'
9
+ module WorkflowService
10
+
11
+ # Creates a workflow for a given object in the repository. If this particular workflow for this objects exists,
12
+ # it will replace the old workflow with wf_xml passed to this method.
13
+ # Returns true on success. Caller must handle any exceptions
14
+ #
15
+ # == Parameters
16
+ # - <b>repo</b> - The repository the object resides in. The service recoginzes "dor" and "sdr" at the moment
17
+ # - <b>druid</b> - The id of the object
18
+ # - <b>workflow_name</b> - The name of the workflow you want to create
19
+ # - <b>wf_xml</b> - The xml that represents the workflow
20
+ #
21
+ def WorkflowService.create_workflow(repo, druid, workflow_name, wf_xml)
22
+ return true unless(Dor::CREATE_WORKFLOW)
23
+
24
+ full_uri = ''
25
+ full_uri << Dor::WF_URI << '/' << repo << '/objects/' << druid << '/workflows/' << workflow_name
26
+
27
+ # On success, an empty body is sent
28
+ LyberCore::Connection.put(full_uri, wf_xml){|response| true}
29
+ end
30
+
31
+ # Updates the status of one step in a workflow.
32
+ # Returns true on success. Caller must handle any exceptions
33
+ #
34
+ # == Required Parameters
35
+ # - <b>repo</b> - The repository the object resides in. The service recoginzes "dor" and "sdr" at the moment
36
+ # - <b>druid</b> - The id of the object
37
+ # - <b>workflow_name</b> - The name of the workflow
38
+ # - <b>status</b> - The status that you want to set. Typical statuses are 'waiting', 'completed', 'error', but could be any string
39
+ #
40
+ # == Optional Parameters
41
+ # - <b>elapsed</b> - The number of seconds it took to complete this step. Can have a decimal. Is set to 0 if not passed in.
42
+ # - <b>lifecycle</b> - Bookeeping label for this particular workflow step. Examples are: 'registered', 'shelved'
43
+ #
44
+ # == Http Call
45
+ # The method does an HTTP PUT to the URL defined in Dor::WF_URI. As an example:
46
+ # PUT "/dor/objects/pid:123/workflows/GoogleScannedWF/convert"
47
+ # <process name=\"convert\" status=\"completed\" />"
48
+ def WorkflowService.update_workflow_status(repo, druid, workflow, process, status, elapsed = 0, lifecycle = nil)
49
+ return true unless(Dor::CREATE_WORKFLOW)
50
+
51
+ uri = ''
52
+ uri << Dor::WF_URI << '/' << repo << '/objects/' << druid << '/workflows/' << workflow << '/' << process
53
+ process_xml = '<process name="'+ process + '" status="' + status + '" '
54
+ process_xml << 'elapsed="' + elapsed.to_s + '" '
55
+ process_xml << 'lifecycle="' + lifecycle + '" ' if(lifecycle)
56
+ process_xml << '/>'
57
+
58
+ # On success, an empty body is sent
59
+ LyberCore::Connection.put(uri, process_xml) {|response| true}
60
+ end
61
+
62
+ #
63
+ # Retrieves the process status of the given workflow for the given object identifier
64
+ #
65
+ def WorkflowService.get_workflow_status(repo, druid, workflow, process)
66
+ uri = ''
67
+ uri << Dor::WF_URI << '/' << repo << '/objects/' << druid << '/workflows/' << workflow
68
+ workflow_md = LyberCore::Connection.get(uri)
69
+
70
+ doc = Nokogiri::XML(workflow_md)
71
+ raise Exception.new("Unable to parse response:\n#{workflow_md}") if(doc.root.nil?)
72
+
73
+ status = doc.root.at_xpath("//process[@name='#{process}']/@status").content
74
+ return status
75
+ end
76
+
77
+ def WorkflowService.get_workflow_xml(repo, druid, workflow)
78
+ uri = ''
79
+ uri << Dor::WF_URI << '/' << repo << '/objects/' << druid << '/workflows/' << workflow
80
+ workflow_md = LyberCore::Connection.get(uri)
81
+ end
82
+
83
+ # Updates the status of one step in a workflow to error.
84
+ # Returns true on success. Caller must handle any exceptions
85
+ #
86
+ # == Required Parameters
87
+ # - <b>repo</b> - The repository the object resides in. The service recoginzes "dor" and "sdr" at the moment
88
+ # - <b>druid</b> - The id of the object
89
+ # - <b>workflow_name</b> - The name of the workflow
90
+ # - <b>error_msg</b> - The error message. Ideally, this is a brief message describing the error
91
+ #
92
+ # == Optional Parameters
93
+ # - <b>error_txt</b> - A slot to hold more information about the error, like a full stacktrace
94
+ #
95
+ # == Http Call
96
+ # The method does an HTTP PUT to the URL defined in Dor::WF_URI. As an example:
97
+ # PUT "/dor/objects/pid:123/workflows/GoogleScannedWF/convert"
98
+ # <process name=\"convert\" status=\"error\" />"
99
+ def WorkflowService.update_workflow_error_status(repo, druid, workflow, process, error_msg, error_txt = nil)
100
+ uri = ''
101
+ uri << Dor::WF_URI << '/' << repo << '/objects/' << druid << '/workflows/' << workflow << '/' << process
102
+ process_xml = '<process name="'+ process + '" status="error" errorMessage="' + error_msg + '" '
103
+ process_xml << 'errorText="' + error_txt + '" ' if(error_txt)
104
+ process_xml << '/>'
105
+
106
+ # On success, an empty body is sent
107
+ LyberCore::Connection.put(uri, process_xml) {|response| true}
108
+ end
109
+
110
+ end
111
+ end
@@ -0,0 +1,535 @@
1
+ require 'net/http'
2
+ require 'net/https'
3
+ require 'uri'
4
+ require 'cgi'
5
+ require 'rexml/document'
6
+
7
+ include REXML
8
+
9
+ class DorService
10
+
11
+ def DorService.get_https_connection(url)
12
+ https = Net::HTTP.new(url.host, url.port)
13
+ if(url.scheme == 'https')
14
+ https.use_ssl = true
15
+ https.cert = OpenSSL::X509::Certificate.new( File.read(CERT_FILE) )
16
+ https.key = OpenSSL::PKey::RSA.new( File.read(KEY_FILE), KEY_PASS )
17
+ https.verify_mode = OpenSSL::SSL::VERIFY_NONE
18
+ end
19
+ https
20
+ end
21
+
22
+ # This should check to see if an object with the given PID already
23
+ # exists in the repository
24
+ def DorService.create_object(form_data)
25
+ begin
26
+ url = DOR_URI + '/objects'
27
+ body = DorService.encodeParams(form_data)
28
+ content_type = 'application/x-www-form-urlencoded'
29
+ res = LyberCore::Connection.post(url, body, :content_type => content_type)
30
+ res =~ /\/objects\/(.*)/
31
+ druid = $1
32
+ return druid
33
+ rescue Exception => e
34
+ LyberCore::Log.error("Unable to create object #{e.backtrace}")
35
+ raise e
36
+ end
37
+ end
38
+
39
+ #objects/dr:123/resources
40
+ #parms: model, id
41
+ #will create object of type dor:GoogleScannedPage
42
+ def DorService.create_child_object(parent_druid, child_id)
43
+ begin
44
+ #See if page exists before creating new fedora object
45
+ # raise "Object exists with id: " + child_id if(DorService.get_druid_by_id(child_id))
46
+ form_data = {'model' => 'dor:googleScannedPage', 'id' => child_id}
47
+ url = DOR_URI + '/objects/' + parent_druid + '/resources'
48
+ body = DorService.encodeParams(form_data)
49
+ content_type = 'application/x-www-form-urlencoded'
50
+ res = LyberCore::Connection.post(url, body, :content_type => content_type)
51
+ res=~ /\/resources\/(.*)/
52
+ druid = $1
53
+ LyberCore::Log.info("Child googleScannedPage object created for parent #{parent_druid}")
54
+ LyberCore::Log.debug("child_id = #{child_id}")
55
+ LyberCore::Log.debug("new druid = #{druid}")
56
+ return druid
57
+ rescue Exception => e
58
+ LyberCore::Log.error("Unable to create object")
59
+ raise e, "Unable to create object "
60
+ end
61
+ end
62
+
63
+
64
+ # Takes a hash of arrays and builds a x-www-form-urlencoded string for POSTing form parameters
65
+ #
66
+ # == Parameters
67
+ # - <b>form_data</b> - a hash of arrays that contains the form data, ie. {'param1' => ['val1', 'val2'], 'param2' => ['val3']}
68
+ def DorService.encodeParams(form_data)
69
+ body = ""
70
+ form_data.each_pair do |param, array|
71
+ array.each do |value|
72
+ encoded = CGI.escape value
73
+ body += '&' unless (body == "")
74
+ body += param + '=' + encoded
75
+ end
76
+ end
77
+ body
78
+ end
79
+
80
+
81
+ # Depricated. Use Dor::WorkflowService#create_workflow in lyber_core gem
82
+ # def DorService.create_workflow(workflow, druid)
83
+ # begin
84
+ # url = URI.parse(DOR_URI + '/objects/' + druid + '/workflows/' + workflow.workflow_id)
85
+ # req = Net::HTTP::Put.new(url.path)
86
+ # #req.basic_auth 'fedoraUser', 'pass'
87
+ # req.body = workflow.workflow_process_xml
88
+ # req.content_type = 'application/xml'
89
+ # res = DorService.get_https_connection(url).start {|http| http.request(req) }
90
+ #
91
+ # WorkflowService.create_workflow()
92
+ #
93
+ # case res
94
+ # when Net::HTTPSuccess
95
+ # puts workflow.workflow_id + " created for " + druid
96
+ # else
97
+ # $stderr.print res.body
98
+ # raise res.error!
99
+ # end
100
+ # rescue Exception => e
101
+ # $stderr.print "Unable to create workflow " + e
102
+ # raise
103
+ # end
104
+ # end
105
+
106
+
107
+ # See if an object exists with this dor_id (not druid, but sub-identifier)
108
+ # Caller will have to handle any exception thrown
109
+ def DorService.get_druid_by_id(dor_id)
110
+ url_string = "#{DOR_URI}/query_by_id?id=#{dor_id}"
111
+ LyberCore::Log.debug("Fetching druid for dor_id #{dor_id} at url #{url_string}")
112
+ url = URI.parse(url_string)
113
+ req = Net::HTTP::Get.new(url.request_uri)
114
+ res = DorService.get_https_connection(url).start {|http| http.request(req) }
115
+
116
+ case res
117
+ when Net::HTTPSuccess
118
+ res.body =~ /druid="([^"\r\n]*)"/
119
+ return $1
120
+ when Net::HTTPClientError
121
+ LyberCore::Log.debug("Barcode does not yet exist in DOR: #{dor_id}")
122
+ return nil
123
+ when Net::HTTPServerError
124
+ LyberCore::Log.error("Encountered HTTPServerError error when requesting #{url}: #{res.inspect}")
125
+ raise "Encountered 500 error when requesting #{url}: #{res.inspect}"
126
+ else
127
+ LyberCore::Log.error("Encountered unknown error when requesting #{url}: #{res.inspect}")
128
+ raise "Encountered unknown error when requesting #{url}: #{res.inspect}"
129
+ end
130
+ end
131
+
132
+ ############################################# Start of Datastream methods
133
+ # Until ActiveFedora supports client-side certificate configuration, we are stuck with our own methods to access datastreams
134
+
135
+ #/objects/{pid}/datastreams/{dsID} ? [controlGroup] [dsLocation] [altIDs] [dsLabel] [versionable] [dsState] [formatURI] [checksumType] [checksum] [logMessage]
136
+ def DorService.add_datastream(druid, ds_id, ds_label, xml, content_type='application/xml', versionable = false )
137
+ DorService.add_datastream_managed(druid, ds_id, ds_label, xml, content_type, versionable)
138
+ end
139
+
140
+ def DorService.add_datastream_external_url(druid, ds_id, ds_label, ext_url, content_type, versionable = false)
141
+ parms = '?controlGroup=E'
142
+ parms += '&dsLabel=' + CGI.escape(ds_label)
143
+ parms += '&versionable=false' unless(versionable)
144
+ parms += '&dsLocation=' + ext_url
145
+ DorService.set_datastream(druid, ds_id, parms, :post, {:type => content_type})
146
+ end
147
+
148
+ def DorService.update_datastream(druid, ds_id, xml, content_type='application/xml', versionable = false)
149
+ parms = '?controlGroup=M'
150
+ parms += '&versionable=false' unless(versionable)
151
+ DorService.set_datastream(druid, ds_id, parms, :put, {:type => content_type, :xml => xml})
152
+ end
153
+
154
+ def DorService.add_datastream_managed(druid, ds_id, ds_label, xml, content_type='application/xml', versionable = false )
155
+ parms = '?controlGroup=M'
156
+ parms += '&dsLabel=' + CGI.escape(ds_label)
157
+ parms += '&versionable=false' unless(versionable)
158
+ DorService.set_datastream(druid, ds_id, parms, :post, {:type => content_type, :xml => xml})
159
+ end
160
+
161
+ # Retrieve the content of a datastream of a DOR object
162
+ # e.g. FEDORA_URI + /objects/ + druid + /datastreams/dor/content gets "dor" datastream content
163
+ def DorService.get_datastream(druid, ds_id)
164
+ begin
165
+ LyberCore::Log.debug("Connecting to #{FEDORA_URI}...")
166
+ url_string = "#{FEDORA_URI}/objects/#{druid}/datastreams/#{ds_id}/content"
167
+ url = URI.parse(url_string)
168
+ LyberCore::Log.debug("Connecting to #{url_string}...")
169
+ req = Net::HTTP::Get.new(url.request_uri)
170
+ LyberCore::Log.debug("request object: #{req.inspect}")
171
+ res = DorService.get_https_connection(url).start {|http| http.request(req) }
172
+
173
+ case res
174
+ when Net::HTTPSuccess
175
+ return res.body
176
+ when Net::HTTPClientError
177
+ LyberCore::Log.debug("Datastream not found at url #{url_string}")
178
+ return nil
179
+ when Net::HTTPServerError
180
+ LyberCore::Log.error("Attempted to reach #{url_string} but failed")
181
+ raise "Encountered 500 error when requesting #{url_string}: #{res.inspect}"
182
+ else
183
+ LyberCore::Log.error("Encountered unknown error when requesting #{url}: #{res.inspect}")
184
+ raise "Encountered unknown error when requesting #{url}: #{res.inspect}"
185
+ end
186
+ rescue Exception => e
187
+ raise e
188
+ end
189
+ end
190
+
191
+ # Depricated - use Dor::WorkflowService#get_workflow_xml
192
+ def DorService.get_workflow_xml(druid, workflow)
193
+ raise Exception.new("This method is deprecated. Please use Dor::WorkflowService#get_workflow_xml")
194
+ end
195
+
196
+ # Retrieve the metadata of a datastream of a DOR object
197
+ # e.g. FEDORA_URI + /objects/ + druid + /datastreams/dor gets "dor" datastream metadata
198
+ def DorService.get_datastream_md(druid, ds_id)
199
+ begin
200
+ LyberCore::Log.debug("Connecting to #{FEDORA_URI}...")
201
+ url = URI.parse(FEDORA_URI + '/objects/' + druid + '/datastreams/' + ds_id)
202
+ LyberCore::Log.debug("Connecting to #{url}...")
203
+ req = Net::HTTP::Get.new(url.request_uri)
204
+ req.basic_auth FEDORA_USER, FEDORA_PASS
205
+ LyberCore::Log.debug("request object: #{req.inspect}")
206
+ res = DorService.get_https_connection(url).start {|http| http.request(req) }
207
+ case res
208
+ when Net::HTTPSuccess
209
+ return res.body
210
+ else
211
+ LyberCore::Log.error("Attempted to reach #{url} but failed")
212
+ LyberCore::Log.error("Datastream #{dsid} not found for #{druid}")
213
+ end
214
+ rescue Exception => e
215
+ raise e, "Couldn't get datastream from #{url}"
216
+ end
217
+ end
218
+
219
+ # Add a new datastream, but only if it does not yet exist
220
+ def DorService.add_datastream_unless_exists(druid, ds_id, ds_label, xml)
221
+ # make sure xml is not empty
222
+ unless xml
223
+ raise "No data supplied for datastream " + ds + "of " + druid
224
+ end
225
+ # check to make sure datastream does not yet exist
226
+ unless DorService.get_datastream(druid, ds_id)
227
+ DorService.add_datastream(druid, ds_id, ds_label, xml)
228
+ end
229
+ end
230
+
231
+ ############################################# End of Datastream methods
232
+
233
+
234
+ # Deprecated. Use Dor::WorkflowService#update_workflow_status
235
+ #PUT "objects/pid:123/workflows/GoogleScannedWF/convert"
236
+ #<process name=\"convert\" status=\"waiting\" elapsed="0.11" lifecycle="released" "/>"
237
+ #TODO increment attempts
238
+ # def DorService.updateWorkflowStatus(repository, druid, workflow, process, status, elapsed = 0, lifecycle = nil)
239
+ # begin
240
+ # url = URI.parse(WORKFLOW_URI + '/' + repository + '/objects/' + druid + '/workflows/' + workflow + '/' + process)
241
+ # req = Net::HTTP::Put.new(url.path)
242
+ # process_xml = '<process name="'+ process + '" status="' + status + '" '
243
+ # process_xml << 'elapsed="' + elapsed.to_s + '" '
244
+ # process_xml << 'lifecycle="' + lifecycle + '" ' if(lifecycle)
245
+ # process_xml << '/>'
246
+ # req.body = process_xml
247
+ # req.content_type = 'application/xml'
248
+ # res = DorService.get_https_connection(url).start {|http| http.request(req) }
249
+ # case res
250
+ # when Net::HTTPSuccess
251
+ # puts "#{workflow} process updated for " + druid
252
+ # else
253
+ # $stderr.print res.body
254
+ # raise res.error!
255
+ # end
256
+ # rescue Exception => e
257
+ # $stderr.print "Unable to update workflow " + e
258
+ # raise
259
+ # end
260
+ #
261
+ # end
262
+
263
+ # Returns string containing object list XML from a workflow DOR query
264
+ #
265
+ # @param [String] repository name of the repository you are querying. Right now, <tt>dor</tt> and <tt>sdr</tt> are supported
266
+ # @param [String] workflow name of the workflow being queried, eg <tt>googleScannedBookWF</tt>
267
+ # @param [String, Array] completed if only querying for one completed step, pass in a String.
268
+ # If querying for two completed steps, pass in an Array of the two completed steps
269
+ # @param [String] waiting the name of the waiting step
270
+ # @raise [LyberCore::Exceptions::EmptyQueue] When the query is successful, but no objects are found in that queue
271
+ # @raise [Exception] For other problems like connection failures
272
+ # @return [String] XML containing all the objects that match the specific query. It looks like:
273
+ # <objects>
274
+ # <object druid="dr:123" url="http://localhost:9999/jersey-spring/objects/dr:123%5c" />
275
+ # <object druid="dr:abc" url="http://localhost:9999/jersey-spring/objects/dr:abc%5c" />
276
+ # </objects>
277
+ def DorService.get_objects_for_workstep(repository, workflow, completed, waiting)
278
+ LyberCore::Log.debug("DorService.get_objects_for_workstep(#{repository}, #{workflow}, #{completed}, #{waiting})")
279
+ begin
280
+ if repository.nil? or workflow.nil? or completed.nil? or waiting.nil?
281
+ LyberCore::Log.fatal("Can't execute DorService.get_objects_for_workstep: missing info")
282
+ end
283
+
284
+ unless defined?(WORKFLOW_URI) and WORKFLOW_URI != nil
285
+ LyberCore::Log.fatal("WORKFLOW_URI is not set. ROBOT_ROOT = #{ROBOT_ROOT}")
286
+ raise "WORKFLOW_URI is not set"
287
+ end
288
+
289
+ uri_string = "#{WORKFLOW_URI}/workflow_queue?repository=#{repository}&workflow=#{workflow}&waiting=#{waiting}"
290
+ if(completed.class == Array)
291
+ raise "The workflow service can only handle queries with no more than 2 completed steps" if completed.size > 2
292
+ completed.each {|step| uri_string << "&completed=#{step}"}
293
+ else
294
+ uri_string << "&completed=#{completed}"
295
+ end
296
+ LyberCore::Log.info("Attempting to connect to #{uri_string}")
297
+ url = URI.parse(uri_string)
298
+ req = Net::HTTP::Get.new(url.request_uri)
299
+ res = DorService.get_https_connection(url).start {|http| http.request(req) }
300
+ case res
301
+ when Net::HTTPSuccess
302
+ begin
303
+ doc = Nokogiri::XML(res.body)
304
+ count = doc.root.at_xpath("//objects/@count").content.to_i
305
+ rescue Exception => e
306
+ msg = "Could not parse response from Workflow Service"
307
+ LyberCore::Log.error(msg + "\n#{res.body}")
308
+ raise e, msg
309
+ end
310
+
311
+ if(count == 0)
312
+ raise LyberCore::Exceptions::EmptyQueue.new, "empty queue"
313
+ else
314
+ return res.body
315
+ end
316
+ else
317
+ LyberCore::Log.fatal("Workflow queue not found for #{workflow} : #{waiting}")
318
+ LyberCore::Log.debug("I am attempting to connect to WORKFLOW_URI #{WORKFLOW_URI}")
319
+ LyberCore::Log.debug("repository: #{repository}")
320
+ LyberCore::Log.debug("workflow: #{workflow}")
321
+ LyberCore::Log.debug("completed: #{completed}")
322
+ LyberCore::Log.debug("waiting: #{waiting}")
323
+ LyberCore::Log.debug(res.inspect)
324
+ raise "Could not connect to url #{uri_string}"
325
+ end
326
+ end
327
+ end
328
+
329
+ def DorService.log_and_raise_workflow_connection_problem(repository, workflow, completed, waiting, response)
330
+
331
+ end
332
+
333
+ # Transforms the XML from getObjectsForWorkStep into a list of druids
334
+ # TODO figure out how to return a partial list
335
+ # This method is here for backward compatibility, but it has
336
+ # been superceded by DlssService.get_druids_from_object_list(objectListXml)
337
+ def DorService.get_druids_from_object_list(objectListXml)
338
+ DlssService.get_all_druids_from_object_list(objectListXml)
339
+ end
340
+
341
+ # Retrieves the identityMetadata datastream for a DOR object,
342
+ # extracts the otherId values, and returns them in a hash
343
+ def DorService.get_object_identifiers(druid)
344
+ begin
345
+ identifiers = {}
346
+ identityMetadata = get_datastream(druid, 'identityMetadata')
347
+ raise "Unable to get identityMetadata datastream for #{druid}" if identityMetadata.nil?
348
+ dorXml = Document.new(identityMetadata)
349
+
350
+ dorXml.elements.each("identityMetadata/otherId") do |element|
351
+ identifiers[element.attributes["name"]] = case element.text
352
+ when nil then nil
353
+ else element.text.strip
354
+ end
355
+ end
356
+ return identifiers
357
+ rescue Exception => e
358
+ raise e, "Couldn't get object identifiers for #{druid}"
359
+ end
360
+ end
361
+
362
+ def DorService.transfer_object(objectid, sourceDir, destinationDir)
363
+ rsync='rsync -a -e ssh '
364
+ rsync_cmd = rsync + "'" + sourceDir + objectid + "' " + destinationDir
365
+ LyberCore::Log.debug(rsync_cmd + "\n")
366
+ system(rsync_cmd)
367
+ return File.exists?(File.join(destinationDir, objectid))
368
+ end
369
+
370
+ def DorService.verify_checksums(directory, checksumFile)
371
+ dirSave = Dir.pwd
372
+ Dir.chdir(directory)
373
+ checksumCmd = 'md5sum -c ' + checksumFile + ' | grep -v OK | wc -l'
374
+ badcount = `#{checksumCmd}`.to_i
375
+ Dir.chdir(dirSave)
376
+ return (badcount==0)
377
+ end
378
+
379
+ # Given a process and an error message, constuct an xml fragment that can be
380
+ # posted to the workflow service to record the error generated for a given druid
381
+ def DorService.construct_error_update_request(process, error_msg, error_txt)
382
+ clean_error_msg = error_msg.gsub(/\s+/," ").gsub(/[`'#<>]/,'').gsub(/"/,"'")
383
+ clean_error_txt = error_txt.gsub(/\s+/," ").gsub(/[`'#<>]/,'').gsub(/"/,"'") unless error_txt.nil?
384
+ body = '<process name="'+ process + '" status="error" errorMessage="' + clean_error_msg + '" '
385
+ body += 'errorText="' + clean_error_txt + '" ' unless error_txt.nil?
386
+ body += '/>'
387
+ return body
388
+ end
389
+
390
+ # If an object encounters an error during processing, set its status to "error"
391
+ def DorService.update_workflow_error_status(repository, druid, workflow, process, error_msg, error_txt = nil)
392
+ begin
393
+ LyberCore::Log.debug("Updating workflow error status for druid #{druid}")
394
+ LyberCore::Log.debug("Error message is: #{error_msg}")
395
+ LyberCore::Log.debug("Error text is: #{error_txt}")
396
+ url_string = "#{WORKFLOW_URI}/#{repository}/objects/#{druid}/workflows/#{workflow}/#{process}"
397
+ url = URI.parse(url_string)
398
+ LyberCore::Log.debug("Using url #{url_string}")
399
+ req = Net::HTTP::Put.new(url.path)
400
+ req.body = DorService.construct_error_update_request(process, error_msg, error_txt)
401
+ req.content_type = 'application/xml'
402
+ LyberCore::Log::debug("Putting request: #{req.inspect}")
403
+ res = DorService.get_https_connection(url).start {|http| http.request(req) }
404
+ LyberCore::Log::debug("Got response: #{res.inspect}")
405
+ case res
406
+ when Net::HTTPSuccess
407
+ LyberCore::Log.error("#{workflow} - #{process} set to error for " + druid)
408
+ else
409
+ LyberCore::Log.error(res.body)
410
+ raise res.error!, "Received error from the workflow service"
411
+ end
412
+ rescue Exception => e
413
+ msg = "Unable to update workflow service at url #{url_string}"
414
+ LyberCore::Log.error(msg)
415
+ raise e, msg
416
+ end
417
+ end
418
+
419
+ # This method sends a GET request to jenson and returns MARC XML
420
+ def DorService.query_symphony(flexkey)
421
+ begin
422
+ symphony_url = 'http://zaph.stanford.edu'
423
+ path_info = '/cgi-bin/holding.pl?'
424
+ parm_list = URI.escape('search=location&flexkey=' + flexkey)
425
+ url_string = symphony_url + path_info + parm_list
426
+
427
+ url = URI.parse(url_string)
428
+ LyberCore::Log.debug("Attempting to query symphony: #{url_string}")
429
+ res = Net::HTTP.start(url.host, url.port) {|http|
430
+ http.get( path_info + parm_list )
431
+ }
432
+ case res
433
+ when Net::HTTPSuccess
434
+ LyberCore::Log.debug("Successfully queried symphony for #{flexkey}")
435
+ return res.body
436
+ else
437
+ LyberCore::Log.error("Encountered an error from symphony: #{res.body}")
438
+ raise res.error!
439
+ end
440
+ rescue Exception => e
441
+ raise e, "Encountered an error from symphony"
442
+ end
443
+
444
+ end #query_symphony
445
+
446
+
447
+ private
448
+ # druid, ds, url, content_type, method, parms
449
+ def DorService.set_datastream(druid, ds_id, parms, method, content = {})
450
+ begin
451
+ url = URI.parse(FEDORA_URI + '/objects/' + druid + '/datastreams/' + ds_id + parms)
452
+ case method
453
+ when :post
454
+ req = Net::HTTP::Post.new(url.request_uri)
455
+ when :put
456
+ req = Net::HTTP::Put.new(url.request_uri)
457
+ end
458
+ req.basic_auth FEDORA_USER, FEDORA_PASS
459
+ req.body = content[:xml] if(content[:xml])
460
+ req.content_type = content[:type]
461
+ res = DorService.get_https_connection(url).start {|http| http.request(req) }
462
+ case res
463
+ when Net::HTTPSuccess
464
+ return true
465
+ when Net::HTTPServerError
466
+ LyberCore::Log.error("Attempted to set datastream #{url} but failed")
467
+ raise "Encountered 500 error setting datastream #{url}: #{res.inspect}"
468
+ else
469
+ LyberCore::Log.error("Encountered unknown error when setting datastream #{url}: #{res.inspect}")
470
+ raise "Encountered unknown error when setting datastream #{url}: #{res.inspect}"
471
+ end
472
+ rescue Exception => e
473
+ raise
474
+ end
475
+ end
476
+
477
+ def DorService.get_object_metadata(druid)
478
+ dor = DorService.get_datastream(druid, 'identityMetadata')
479
+ mods = DorService.get_datastream(druid, 'descMetadata')
480
+ googlemets = DorService.get_datastream(druid, 'googlemets')
481
+ contentMetadata = DorService.get_datastream(druid, 'contentMetadata')
482
+ adminMetadata = DorService.get_datastream(druid, 'adminMetadata')
483
+ xml = "<objectMD druid='" + druid + "' >\n" +
484
+ dor + mods + googlemets + contentMetadata + adminMetadata +
485
+ "</objectMD>\n"
486
+ return xml
487
+ end
488
+
489
+ end
490
+
491
+ # Given an array of strings, construct valid xml in which each
492
+ # member of the array becomes a <tag> element
493
+ def DorService.construct_xml_for_tag_array(tag_array)
494
+ xml = "<tags>"
495
+ tag_array.each do |tag|
496
+ tag = tag.gsub(/\s+/," ").gsub(/[<>!]/,'')
497
+ xml << "<tag>#{tag}</tag>"
498
+ end
499
+ xml << "</tags>"
500
+ end
501
+
502
+
503
+ def DorService.add_identity_tags(druid, tags)
504
+ begin
505
+ url = URI.parse(DOR_URI + '/objects/' + druid + '/datastreams/identityMetadata/tags' )
506
+ req = Net::HTTP::Put.new(url.path)
507
+ req.body = DorService.construct_xml_for_tag_array(tags)
508
+ req.content_type = 'application/xml'
509
+ res = DorService.get_https_connection(url).start {|http| http.request(req) }
510
+ case res
511
+ when Net::HTTPSuccess
512
+ return true
513
+ when Net::HTTPServerError
514
+ LyberCore::Log.error("Attempted to add identity tags #{url} but failed")
515
+ raise "Encountered 500 error when adding identity tags #{url}: #{res.inspect}"
516
+ else
517
+ LyberCore::Log.error("Encountered unknown error when adding identity tags #{url}: #{res.inspect}")
518
+ raise "Encountered unknown error when adding identity tags #{url}: #{res.inspect}"
519
+ end
520
+ rescue Exception => e
521
+ raise e
522
+ end
523
+ end
524
+
525
+ #DorService.updateWorkflowStatus('dr:rf624mb644', 'GoogleScannedWF', 'descriptive-metadata', 'completed')
526
+ ####Testing
527
+ #line = 'id="catkey:1990757"||id="barcode:36105045033136"||model="GoogleScannedBook"||label="The poacher"'
528
+ #form_data = {}
529
+ #DorService.parse_line_return_hashlist(line, form_data)
530
+ #form_data.each_pair{|k,v| puts "key: #{k} value: #{v}"}
531
+ #
532
+ #puts DorService.encodeParams(form_data)
533
+
534
+ #DorService.create_object('id="catkey:454545454545454"||id="barcode:434343434343434343434343434"||model="GoogleScannedBook"||label="Ruby multiple Id parms 3"')
535
+