dor-services 2.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. data/lib/datastreams/content_metadata_ds.rb +12 -0
  2. data/lib/datastreams/embargo_metadata_ds.rb +107 -0
  3. data/lib/datastreams/events_ds.rb +58 -0
  4. data/lib/datastreams/identity_metadata_ds.rb +28 -0
  5. data/lib/datastreams/ng_tidy.rb +19 -0
  6. data/lib/datastreams/simple_dublin_core_ds.rb +23 -0
  7. data/lib/datastreams/workflow_definition_ds.rb +105 -0
  8. data/lib/datastreams/workflow_ds.rb +16 -0
  9. data/lib/dor-services.rb +19 -0
  10. data/lib/dor/admin_policy_object.rb +11 -0
  11. data/lib/dor/base.rb +81 -0
  12. data/lib/dor/cleanup_service.rb +32 -0
  13. data/lib/dor/config.rb +45 -0
  14. data/lib/dor/digital_stacks_service.rb +82 -0
  15. data/lib/dor/druid_utils.rb +41 -0
  16. data/lib/dor/embargo.rb +41 -0
  17. data/lib/dor/exceptions.rb +13 -0
  18. data/lib/dor/item.rb +141 -0
  19. data/lib/dor/metadata_handlers/catalog_handler.rb +22 -0
  20. data/lib/dor/metadata_handlers/mdtoolkit_handler.rb +42 -0
  21. data/lib/dor/metadata_service.rb +88 -0
  22. data/lib/dor/mods2dc.xslt +447 -0
  23. data/lib/dor/provenance_metadata_service.rb +65 -0
  24. data/lib/dor/registration_service.rb +87 -0
  25. data/lib/dor/rsolr.rb +27 -0
  26. data/lib/dor/sdr_ingest_service.rb +117 -0
  27. data/lib/dor/search_service.rb +86 -0
  28. data/lib/dor/suri_service.rb +37 -0
  29. data/lib/dor/tei2dc.xslt +102 -0
  30. data/lib/dor/workflow_object.rb +13 -0
  31. data/lib/dor/workflow_service.rb +111 -0
  32. data/lib/gsearch/demoFoxmlToSolr.xslt +384 -0
  33. data/lib/gsearch/schema.xml +229 -0
  34. data/lib/tasks/rdoc.rake +32 -0
  35. data/lib/xml_models/foxml.rb +261 -0
  36. data/lib/xml_models/identity_metadata/dublin_core.rb +119 -0
  37. data/lib/xml_models/identity_metadata/identity_metadata.rb +288 -0
  38. metadata +462 -0
@@ -0,0 +1,32 @@
1
+ require 'fileutils'
2
+ require 'lyber-utils'
3
+
4
+ module Dor
5
+ class CleanupService
6
+ Config.declare(:cleanup) do
7
+ local_workspace_root '/dor/workspace'
8
+ local_export_home '/dor/export'
9
+ end
10
+
11
+ # Delete all workspace and export entities for the druid
12
+ # @param [LyberCore::Robots::WorkItem]
13
+ def self.cleanup(dor_item)
14
+ druid = dor_item.pid
15
+ workspace_dir = Druid.new(druid).path(Config.cleanup.local_workspace_root)
16
+ self.remove_entry(workspace_dir)
17
+ bag_dir = File.join(Config.cleanup.local_export_home, druid)
18
+ self.remove_entry(bag_dir)
19
+ tarfile = "#{bag_dir}.tar"
20
+ self.remove_entry(tarfile)
21
+ end
22
+
23
+ # Deleta a filesystem entry
24
+ # @param [String]
25
+ def self.remove_entry(entry)
26
+ FileUtils.remove_entry(entry) if File.exist?(entry)
27
+ end
28
+
29
+
30
+ end
31
+
32
+ end
@@ -0,0 +1,45 @@
1
+ require 'mod_cons'
2
+
3
+ module Dor
4
+ Config = ModCons::Configuration.new(:'Dor::Config')
5
+
6
+ Config.declare do
7
+ fedora do
8
+ url nil
9
+ safeurl nil
10
+ cert_file nil
11
+ key_file nil
12
+ key_pass ''
13
+
14
+ instance_eval do
15
+ def client
16
+ RestClient::Resource.new(
17
+ self.url,
18
+ :ssl_client_cert => OpenSSL::X509::Certificate.new(File.read(self.cert_file)),
19
+ :ssl_client_key => OpenSSL::PKey::RSA.new(File.read(self.key_file), self.key_pass)
20
+ )
21
+ end
22
+ end
23
+
24
+ config_changed do |fedora|
25
+ fedora_uri = URI.parse(fedora.url)
26
+ fedora_uri.user = fedora_uri.password = nil
27
+ fedora.safeurl fedora_uri.to_s
28
+
29
+ temp_v = $-v
30
+ $-v = nil
31
+ begin
32
+ ::ENABLE_SOLR_UPDATES = false
33
+ ::Fedora::Repository.register(fedora.url)
34
+ ::Fedora::Connection.const_set(:SSL_CLIENT_CERT_FILE,fedora.cert_file)
35
+ ::Fedora::Connection.const_set(:SSL_CLIENT_KEY_FILE,fedora.key_file)
36
+ ::Fedora::Connection.const_set(:SSL_CLIENT_KEY_PASS,fedora.key_pass)
37
+ ensure
38
+ $-v = temp_v
39
+ end
40
+ end
41
+ end
42
+ end
43
+
44
+ end
45
+
@@ -0,0 +1,82 @@
1
+ require 'tempfile'
2
+ require 'systemu'
3
+
4
+ module Dor
5
+ class DigitalStacksService
6
+
7
+ Config.declare(:stacks) do
8
+ document_cache_storage_root nil
9
+ document_cache_host nil
10
+ document_cache_user nil
11
+
12
+ storage_root '/stacks'
13
+ host nil
14
+ user nil
15
+
16
+ local_workspace_root '/dor'
17
+ end
18
+
19
+ # TODO copied from lyber-core, but didn't want to create circular dependency for between gems for this one method
20
+ # Executes a system command in a subprocess.
21
+ # The method will return stdout from the command if execution was successful.
22
+ # The method will raise an exception if if execution fails.
23
+ # The exception's message will contain the explaination of the failure.
24
+ # @param [String] command the command to be executed
25
+ # @return [String] stdout from the command if execution was successful
26
+ def self.execute(command)
27
+ status, stdout, stderr = systemu(command)
28
+ if (status.exitstatus != 0)
29
+ raise stderr
30
+ end
31
+ return stdout
32
+ rescue
33
+ msg = "Command failed to execute: [#{command}] caused by <STDERR =\n#{stderr.split($/).join("\n")}>"
34
+ msg << "\nSTDOUT =\n#{stdout.split($/).join("\n")}" if (stdout && (stdout.length > 0))
35
+ raise msg
36
+ end
37
+
38
+ def self.druid_tree(druid)
39
+ Druid.new(druid).path
40
+ rescue
41
+ nil
42
+ end
43
+
44
+ def self.transfer_to_document_store(id, content, filename)
45
+ path = self.druid_tree(id)
46
+ raise "Invalid druid: #{id}" if(path.nil?)
47
+
48
+ # create the remote directory in the document cache
49
+ remote_document_cache_dir = File.join(Config.stacks.document_cache_storage_root, path)
50
+ command = "ssh #{Config.stacks.document_cache_user}@#{Config.stacks.document_cache_host} mkdir -p #{remote_document_cache_dir}"
51
+ self.execute(command)
52
+
53
+ # create a temp file containing the content and copy the contents to the remote document cache
54
+ Tempfile.open(filename) do |tf|
55
+ tf.write(content)
56
+ tf.flush
57
+ command = "scp \"#{tf.path}\" #{Config.stacks.document_cache_user}@#{Config.stacks.document_cache_host}:#{remote_document_cache_dir}/#{filename}"
58
+ self.execute(command)
59
+ end
60
+ end
61
+
62
+ def self.shelve_to_stacks(id, files)
63
+ path = self.druid_tree(id)
64
+ raise "Invalid druid: #{id}" if(path.nil?)
65
+
66
+ # create the remote directory on the digital stacks
67
+ remote_storage_dir = File.join(Config.stacks.storage_root, path)
68
+ command = "ssh #{Config.stacks.user}@#{Config.stacks.host} mkdir -p #{remote_storage_dir}"
69
+ self.execute(command)
70
+
71
+ # copy the contents for the given object from the local workspace directory to the remote directory
72
+ local_storage_dir = File.join(Config.stacks.local_workspace_root, path)
73
+ files.each do |file|
74
+ command = "scp \"#{local_storage_dir}/#{file}\" #{Config.stacks.user}@#{Config.stacks.host}:#{remote_storage_dir}"
75
+ self.execute(command)
76
+ end
77
+ end
78
+
79
+ end
80
+
81
+ end
82
+
@@ -0,0 +1,41 @@
1
+ class Druid
2
+ attr_accessor :druid
3
+
4
+ DRUID_PATTERN = /^(?:druid:)?([a-z]{2})(\d{3})([a-z]{2})(\d{4})$/
5
+ def initialize(druid)
6
+ if druid !~ DRUID_PATTERN
7
+ raise ArgumentError, "Invalid DRUID: #{druid}"
8
+ end
9
+ @druid = druid
10
+ end
11
+
12
+ def id
13
+ @druid.scan(/^(?:druid:)?(.+)$/).flatten.last
14
+ end
15
+
16
+ def tree
17
+ @druid.scan(DRUID_PATTERN).flatten
18
+ end
19
+
20
+ def path(base=nil)
21
+ File.join(*([base,tree].compact))
22
+ end
23
+
24
+ def mkdir(base)
25
+ FileUtils.mkdir_p(path(base))
26
+ end
27
+
28
+ def rmdir(base)
29
+ parts = tree
30
+ while parts.length > 0
31
+ dir = File.join(base, *parts)
32
+ begin
33
+ FileUtils.rm(File.join(dir,'.DS_Store'), :force => true)
34
+ FileUtils.rmdir(dir)
35
+ rescue Errno::ENOTEMPTY
36
+ break
37
+ end
38
+ parts.pop
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,41 @@
1
+ require 'datastreams/embargo_metadata_ds'
2
+ require 'datastreams/events_ds'
3
+
4
+ module Dor
5
+
6
+ # These methods manipulate the object for embargo purposes
7
+ # They assume the object has embargoMetadata, rightsMetadata, and events datastreams
8
+ module Embargo
9
+
10
+ # Manipulates datastreams in the object when embargo is lifted:
11
+ # Sets embargo status to released in embargoMetadata
12
+ # Modifies rightsMetadata to remove embargoReleaseDate and updates/adds access from embargoMetadata/releaseAccess
13
+ # @param [String] release_agent name of the person, application or thing that released embargo
14
+ # @note The caller should save the object to fedora to commit the changes
15
+ def release_embargo(release_agent="unknown")
16
+ # Set status to released
17
+ embargo_md = datastreams['embargoMetadata']
18
+ embargo_md.status = 'released'
19
+
20
+ # Remove embargoReleaseDate from rights
21
+ rights_xml = datastreams['rightsMetadata'].ng_xml
22
+ rights_xml.xpath("//rightsMetadata/access[@type='read']/machine/embargoReleaseDate").remove
23
+
24
+ # Replace rights <access> nodes with those from embargoMetadta
25
+ release_access = embargo_md.release_access_node
26
+ release_access.xpath('//releaseAccess/access').each do |new_access|
27
+ type = new_access['type']
28
+ rights_xml.xpath("//rightsMetadata/access[@type='#{type}']").remove
29
+ access_sibling = rights_xml.at_xpath("//rightsMetadata/access[last()]")
30
+ if(access_sibling)
31
+ access_sibling.add_next_sibling(new_access.clone)
32
+ else
33
+ rights_xml.root.add_child(new_access.clone)
34
+ end
35
+ end
36
+
37
+ datastreams['rightsMetadata'].dirty = true
38
+ datastreams['events'].add_event("embargo", release_agent, "Embargo released")
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,13 @@
1
+ module Dor
2
+
3
+ class Exception < ::Exception; end
4
+ class ParameterError < Exception; end
5
+ class DuplicateIdError < Exception
6
+ attr_reader :pid
7
+
8
+ def initialize(pid)
9
+ @pid = pid
10
+ end
11
+ end
12
+
13
+ end
@@ -0,0 +1,141 @@
1
+ require 'dor/base'
2
+ require 'datastreams/content_metadata_ds'
3
+ require 'datastreams/ng_tidy'
4
+ require 'tmpdir'
5
+
6
+ module Dor
7
+
8
+ class Item < Base
9
+
10
+ has_metadata :name => "contentMetadata", :type => ContentMetadataDS
11
+ has_metadata :name => "descMetadata", :type => ActiveFedora::NokogiriDatastream
12
+ has_metadata :name => "rightsMetadata", :type => ActiveFedora::NokogiriDatastream
13
+ has_metadata :name => "provenanceMetadata", :type => ActiveFedora::NokogiriDatastream
14
+ has_metadata :name => "technicalMetadata", :type => ActiveFedora::NokogiriDatastream
15
+
16
+ def admin_policy_object
17
+ apo_ref = Array(self.rels_ext.relationships[:self]['hydra_isGovernedBy']).first
18
+ if apo_ref.nil?
19
+ return nil
20
+ else
21
+ apo_id = apo_ref.value.split(%r{/}).last
22
+ if apo_id.empty?
23
+ return nil
24
+ else
25
+ return Dor::AdminPolicyObject.load_instance(apo_id)
26
+ end
27
+ end
28
+ end
29
+
30
+ def fetch_descMetadata_datastream
31
+ candidates = self.identity_metadata.otherIds.collect { |oid| oid.to_s }
32
+ metadata_id = Dor::MetadataService.resolvable(candidates).first
33
+ unless metadata_id.nil?
34
+ return Dor::MetadataService.fetch(metadata_id.to_s)
35
+ else
36
+ return nil
37
+ end
38
+ end
39
+
40
+ def build_contentMetadata_datastream(ds)
41
+ path = Druid.new(self.pid).path(Dor::Config.stacks.local_workspace_root)
42
+ if File.exists?(File.join(path, 'content_metadata.xml'))
43
+ ds.label = 'Content Metadata'
44
+ ds.ng_xml = Nokogiri::XML(File.read(File.join(path, 'content_metadata.xml')))
45
+ end
46
+ end
47
+
48
+ def build_descMetadata_datastream(ds)
49
+ content = fetch_descMetadata_datastream
50
+ unless content.nil?
51
+ ds.label = 'Descriptive Metadata'
52
+ ds.ng_xml = Nokogiri::XML(content)
53
+ ds.ng_xml.normalize_text!
54
+ end
55
+ end
56
+
57
+ def build_rightsMetadata_datastream(ds)
58
+ content_ds = self.admin_policy_object.datastreams['defaultObjectRights']
59
+ ds.label = 'Rights Metadata'
60
+ ds.ng_xml = content_ds.ng_xml.clone
61
+ end
62
+
63
+ def public_xml
64
+ pub = Nokogiri::XML("<publicObject/>").root
65
+ pub['id'] = pid
66
+ pub.add_child(self.datastreams['identityMetadata'].ng_xml.root.clone)
67
+ pub.add_child(self.datastreams['contentMetadata'].public_xml.root.clone)
68
+ pub.add_child(self.datastreams['rightsMetadata'].ng_xml.root.clone)
69
+ pub.add_child(generate_dublin_core.root)
70
+ Nokogiri::XML(pub.to_xml) { |x| x.noblanks }.to_xml { |config| config.no_declaration }
71
+ end
72
+
73
+ # Generates Dublin Core from the MODS in the descMetadata datastream using the LoC mods2dc stylesheet
74
+ # Should not be used for the Fedora DC datastream
75
+ def generate_dublin_core
76
+ apo = self.admin_policy_object
77
+ format = apo.nil? ? 'mods' : apo.datastreams['administrativeMetadata'].ng_xml.at('/administrativeMetadata/descMetadata/format').text.downcase
78
+ xslt = Nokogiri::XSLT(File.new(File.expand_path(File.dirname(__FILE__) + "/#{format}2dc.xslt")) )
79
+ xslt.transform(self.datastreams['descMetadata'].ng_xml)
80
+ end
81
+
82
+ def publish_metadata
83
+ DigitalStacksService.transfer_to_document_store(pid, self.datastreams['identityMetadata'].to_xml, 'identityMetadata')
84
+ DigitalStacksService.transfer_to_document_store(pid, self.datastreams['contentMetadata'].to_xml, 'contentMetadata')
85
+ DigitalStacksService.transfer_to_document_store(pid, self.datastreams['rightsMetadata'].to_xml, 'rightsMetadata')
86
+ dc_xml = self.generate_dublin_core.to_xml {|config| config.no_declaration}
87
+ DigitalStacksService.transfer_to_document_store(pid, dc_xml, 'dc')
88
+ DigitalStacksService.transfer_to_document_store(pid, public_xml, 'public')
89
+ end
90
+
91
+ def build_provenanceMetadata_datastream(workflow_id, event_text)
92
+ ProvenanceMetadataService.add_provenance(self, workflow_id, event_text)
93
+ end
94
+
95
+ def build_technicalMetadata_datastream(ds)
96
+ unless defined? ::JhoveService
97
+ begin
98
+ require 'jhove_service'
99
+ rescue LoadError => e
100
+ puts e.inspect
101
+ raise "jhove-service dependency gem was not found. Please add it to your Gemfile and run bundle install"
102
+ end
103
+ end
104
+ begin
105
+ content_dir = Druid.new(self.pid).path(Config.sdr.local_workspace_root)
106
+ temp_dir = Dir.mktmpdir(self.pid)
107
+ jhove_service = ::JhoveService.new(temp_dir)
108
+ jhove_output_file = jhove_service.run_jhove(content_dir)
109
+ tech_md_file = jhove_service.create_technical_metadata(jhove_output_file)
110
+ ds.label = 'Technical Metadata'
111
+ ds.ng_xml = Nokogiri::XML(IO.read(tech_md_file))
112
+ ensure
113
+ FileUtils.remove_entry_secure(temp_dir) if File.exist?(temp_dir)
114
+ end
115
+ end
116
+
117
+ def shelve
118
+ files = [] # doc.xpath("//file").select {|f| f['shelve'] == 'yes'}.map{|f| f['id']}
119
+ self.datastreams['contentMetadata'].ng_xml.xpath('//file').each do |file|
120
+ files << file['id'] if(file['shelve'].downcase == 'yes')
121
+ end
122
+
123
+ DigitalStacksService.shelve_to_stacks(pid, files)
124
+ end
125
+
126
+ def sdr_ingest_transfer(agreement_id)
127
+ SdrIngestService.transfer(self,agreement_id)
128
+ end
129
+
130
+ def cleanup()
131
+ CleanupService.cleanup(self)
132
+ end
133
+
134
+ def initiate_apo_workflow(name)
135
+ wf_xml = admin_policy_object.datastreams['administrativeMetadata'].ng_xml.xpath(%{//workflow[@id="#{name}"]}).first.to_xml
136
+ Dor::WorkflowService.create_workflow('dor',self.pid,name,wf_xml)
137
+ end
138
+
139
+ end
140
+
141
+ end
@@ -0,0 +1,22 @@
1
+ require 'rest-client'
2
+
3
+ handler = Class.new do
4
+ Dor::Config.metadata.declare(:catalog) { url nil }
5
+
6
+ def fetch(prefix, identifier)
7
+ client = RestClient::Resource.new(Dor::Config.metadata.catalog.url)
8
+ client["?#{prefix.chomp}=#{identifier.chomp}"].get
9
+ end
10
+
11
+ def label(metadata)
12
+ mods = Nokogiri::XML(metadata)
13
+ mods.root.add_namespace_definition('mods','http://www.loc.gov/mods/v3')
14
+ mods.xpath('/mods:mods/mods:titleInfo[1]').xpath('mods:title|mods:nonSort').collect { |n| n.text }.join(' ').strip
15
+ end
16
+
17
+ def prefixes
18
+ ['catkey','barcode']
19
+ end
20
+ end
21
+
22
+ Dor::MetadataService.register(handler)
@@ -0,0 +1,42 @@
1
+ require 'nokogiri'
2
+ require 'rest-client'
3
+
4
+ handler = Class.new do
5
+ Dor::Config.metadata.declare(:exist) { url nil }
6
+
7
+ def fetch(prefix, identifier)
8
+ query = <<-QUERY
9
+ <?xml version="1.0" encoding="UTF-8"?>
10
+ <query xmlns="http://exist.sourceforge.net/NS/exist">
11
+ <text>
12
+ collection('orbeon/fr')[contains(base-uri(), "#{identifier}")]
13
+ </text>
14
+ </query>
15
+ QUERY
16
+ client = RestClient::Resource.new(Dor::Config.metadata.exist.url)
17
+ response = client['db'].post(query, :content_type => 'application/xquery')
18
+ doc = Nokogiri::XML(response)
19
+ doc.root.add_namespace_definition('exist','http://exist.sourceforge.net/NS/exist')
20
+ result = doc.xpath('/exist:result/*[1]').first
21
+ result.nil? ? nil : result.to_s
22
+ end
23
+
24
+ def label(metadata)
25
+ xml = Nokogiri::XML(metadata)
26
+ if xml.root.nil?
27
+ return ""
28
+ end
29
+ case xml.root.name
30
+ when 'msDesc' then xml.xpath('/msDesc/msIdentifier/collection').text
31
+ when 'mods' then
32
+ xml.root.add_namespace_definition('mods','http://www.loc.gov/mods/v3')
33
+ xml.xpath('/mods:mods/mods:titleInfo[1]').xpath('mods:title|mods:nonSort').collect { |n| n.text }.join(' ').strip
34
+ end
35
+ end
36
+
37
+ def prefixes
38
+ ['mdtoolkit','druid']
39
+ end
40
+ end
41
+
42
+ Dor::MetadataService.register(handler)