dor-services 2.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. data/lib/datastreams/content_metadata_ds.rb +12 -0
  2. data/lib/datastreams/embargo_metadata_ds.rb +107 -0
  3. data/lib/datastreams/events_ds.rb +58 -0
  4. data/lib/datastreams/identity_metadata_ds.rb +28 -0
  5. data/lib/datastreams/ng_tidy.rb +19 -0
  6. data/lib/datastreams/simple_dublin_core_ds.rb +23 -0
  7. data/lib/datastreams/workflow_definition_ds.rb +105 -0
  8. data/lib/datastreams/workflow_ds.rb +16 -0
  9. data/lib/dor-services.rb +19 -0
  10. data/lib/dor/admin_policy_object.rb +11 -0
  11. data/lib/dor/base.rb +81 -0
  12. data/lib/dor/cleanup_service.rb +32 -0
  13. data/lib/dor/config.rb +45 -0
  14. data/lib/dor/digital_stacks_service.rb +82 -0
  15. data/lib/dor/druid_utils.rb +41 -0
  16. data/lib/dor/embargo.rb +41 -0
  17. data/lib/dor/exceptions.rb +13 -0
  18. data/lib/dor/item.rb +141 -0
  19. data/lib/dor/metadata_handlers/catalog_handler.rb +22 -0
  20. data/lib/dor/metadata_handlers/mdtoolkit_handler.rb +42 -0
  21. data/lib/dor/metadata_service.rb +88 -0
  22. data/lib/dor/mods2dc.xslt +447 -0
  23. data/lib/dor/provenance_metadata_service.rb +65 -0
  24. data/lib/dor/registration_service.rb +87 -0
  25. data/lib/dor/rsolr.rb +27 -0
  26. data/lib/dor/sdr_ingest_service.rb +117 -0
  27. data/lib/dor/search_service.rb +86 -0
  28. data/lib/dor/suri_service.rb +37 -0
  29. data/lib/dor/tei2dc.xslt +102 -0
  30. data/lib/dor/workflow_object.rb +13 -0
  31. data/lib/dor/workflow_service.rb +111 -0
  32. data/lib/gsearch/demoFoxmlToSolr.xslt +384 -0
  33. data/lib/gsearch/schema.xml +229 -0
  34. data/lib/tasks/rdoc.rake +32 -0
  35. data/lib/xml_models/foxml.rb +261 -0
  36. data/lib/xml_models/identity_metadata/dublin_core.rb +119 -0
  37. data/lib/xml_models/identity_metadata/identity_metadata.rb +288 -0
  38. metadata +462 -0
@@ -0,0 +1,32 @@
1
+ require 'fileutils'
2
+ require 'lyber-utils'
3
+
4
+ module Dor
5
+ class CleanupService
6
+ Config.declare(:cleanup) do
7
+ local_workspace_root '/dor/workspace'
8
+ local_export_home '/dor/export'
9
+ end
10
+
11
+ # Delete all workspace and export entities for the druid
12
+ # @param [LyberCore::Robots::WorkItem]
13
+ def self.cleanup(dor_item)
14
+ druid = dor_item.pid
15
+ workspace_dir = Druid.new(druid).path(Config.cleanup.local_workspace_root)
16
+ self.remove_entry(workspace_dir)
17
+ bag_dir = File.join(Config.cleanup.local_export_home, druid)
18
+ self.remove_entry(bag_dir)
19
+ tarfile = "#{bag_dir}.tar"
20
+ self.remove_entry(tarfile)
21
+ end
22
+
23
+ # Deleta a filesystem entry
24
+ # @param [String]
25
+ def self.remove_entry(entry)
26
+ FileUtils.remove_entry(entry) if File.exist?(entry)
27
+ end
28
+
29
+
30
+ end
31
+
32
+ end
@@ -0,0 +1,45 @@
1
+ require 'mod_cons'
2
+
3
+ module Dor
4
+ Config = ModCons::Configuration.new(:'Dor::Config')
5
+
6
+ Config.declare do
7
+ fedora do
8
+ url nil
9
+ safeurl nil
10
+ cert_file nil
11
+ key_file nil
12
+ key_pass ''
13
+
14
+ instance_eval do
15
+ def client
16
+ RestClient::Resource.new(
17
+ self.url,
18
+ :ssl_client_cert => OpenSSL::X509::Certificate.new(File.read(self.cert_file)),
19
+ :ssl_client_key => OpenSSL::PKey::RSA.new(File.read(self.key_file), self.key_pass)
20
+ )
21
+ end
22
+ end
23
+
24
+ config_changed do |fedora|
25
+ fedora_uri = URI.parse(fedora.url)
26
+ fedora_uri.user = fedora_uri.password = nil
27
+ fedora.safeurl fedora_uri.to_s
28
+
29
+ temp_v = $-v
30
+ $-v = nil
31
+ begin
32
+ ::ENABLE_SOLR_UPDATES = false
33
+ ::Fedora::Repository.register(fedora.url)
34
+ ::Fedora::Connection.const_set(:SSL_CLIENT_CERT_FILE,fedora.cert_file)
35
+ ::Fedora::Connection.const_set(:SSL_CLIENT_KEY_FILE,fedora.key_file)
36
+ ::Fedora::Connection.const_set(:SSL_CLIENT_KEY_PASS,fedora.key_pass)
37
+ ensure
38
+ $-v = temp_v
39
+ end
40
+ end
41
+ end
42
+ end
43
+
44
+ end
45
+
@@ -0,0 +1,82 @@
1
+ require 'tempfile'
2
+ require 'systemu'
3
+
4
+ module Dor
5
+ class DigitalStacksService
6
+
7
+ Config.declare(:stacks) do
8
+ document_cache_storage_root nil
9
+ document_cache_host nil
10
+ document_cache_user nil
11
+
12
+ storage_root '/stacks'
13
+ host nil
14
+ user nil
15
+
16
+ local_workspace_root '/dor'
17
+ end
18
+
19
+ # TODO copied from lyber-core, but didn't want to create circular dependency for between gems for this one method
20
+ # Executes a system command in a subprocess.
21
+ # The method will return stdout from the command if execution was successful.
22
+ # The method will raise an exception if if execution fails.
23
+ # The exception's message will contain the explaination of the failure.
24
+ # @param [String] command the command to be executed
25
+ # @return [String] stdout from the command if execution was successful
26
+ def self.execute(command)
27
+ status, stdout, stderr = systemu(command)
28
+ if (status.exitstatus != 0)
29
+ raise stderr
30
+ end
31
+ return stdout
32
+ rescue
33
+ msg = "Command failed to execute: [#{command}] caused by <STDERR =\n#{stderr.split($/).join("\n")}>"
34
+ msg << "\nSTDOUT =\n#{stdout.split($/).join("\n")}" if (stdout && (stdout.length > 0))
35
+ raise msg
36
+ end
37
+
38
+ def self.druid_tree(druid)
39
+ Druid.new(druid).path
40
+ rescue
41
+ nil
42
+ end
43
+
44
+ def self.transfer_to_document_store(id, content, filename)
45
+ path = self.druid_tree(id)
46
+ raise "Invalid druid: #{id}" if(path.nil?)
47
+
48
+ # create the remote directory in the document cache
49
+ remote_document_cache_dir = File.join(Config.stacks.document_cache_storage_root, path)
50
+ command = "ssh #{Config.stacks.document_cache_user}@#{Config.stacks.document_cache_host} mkdir -p #{remote_document_cache_dir}"
51
+ self.execute(command)
52
+
53
+ # create a temp file containing the content and copy the contents to the remote document cache
54
+ Tempfile.open(filename) do |tf|
55
+ tf.write(content)
56
+ tf.flush
57
+ command = "scp \"#{tf.path}\" #{Config.stacks.document_cache_user}@#{Config.stacks.document_cache_host}:#{remote_document_cache_dir}/#{filename}"
58
+ self.execute(command)
59
+ end
60
+ end
61
+
62
+ def self.shelve_to_stacks(id, files)
63
+ path = self.druid_tree(id)
64
+ raise "Invalid druid: #{id}" if(path.nil?)
65
+
66
+ # create the remote directory on the digital stacks
67
+ remote_storage_dir = File.join(Config.stacks.storage_root, path)
68
+ command = "ssh #{Config.stacks.user}@#{Config.stacks.host} mkdir -p #{remote_storage_dir}"
69
+ self.execute(command)
70
+
71
+ # copy the contents for the given object from the local workspace directory to the remote directory
72
+ local_storage_dir = File.join(Config.stacks.local_workspace_root, path)
73
+ files.each do |file|
74
+ command = "scp \"#{local_storage_dir}/#{file}\" #{Config.stacks.user}@#{Config.stacks.host}:#{remote_storage_dir}"
75
+ self.execute(command)
76
+ end
77
+ end
78
+
79
+ end
80
+
81
+ end
82
+
@@ -0,0 +1,41 @@
1
+ class Druid
2
+ attr_accessor :druid
3
+
4
+ DRUID_PATTERN = /^(?:druid:)?([a-z]{2})(\d{3})([a-z]{2})(\d{4})$/
5
+ def initialize(druid)
6
+ if druid !~ DRUID_PATTERN
7
+ raise ArgumentError, "Invalid DRUID: #{druid}"
8
+ end
9
+ @druid = druid
10
+ end
11
+
12
+ def id
13
+ @druid.scan(/^(?:druid:)?(.+)$/).flatten.last
14
+ end
15
+
16
+ def tree
17
+ @druid.scan(DRUID_PATTERN).flatten
18
+ end
19
+
20
+ def path(base=nil)
21
+ File.join(*([base,tree].compact))
22
+ end
23
+
24
+ def mkdir(base)
25
+ FileUtils.mkdir_p(path(base))
26
+ end
27
+
28
+ def rmdir(base)
29
+ parts = tree
30
+ while parts.length > 0
31
+ dir = File.join(base, *parts)
32
+ begin
33
+ FileUtils.rm(File.join(dir,'.DS_Store'), :force => true)
34
+ FileUtils.rmdir(dir)
35
+ rescue Errno::ENOTEMPTY
36
+ break
37
+ end
38
+ parts.pop
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,41 @@
1
+ require 'datastreams/embargo_metadata_ds'
2
+ require 'datastreams/events_ds'
3
+
4
+ module Dor
5
+
6
+ # These methods manipulate the object for embargo purposes
7
+ # They assume the object has embargoMetadata, rightsMetadata, and events datastreams
8
+ module Embargo
9
+
10
+ # Manipulates datastreams in the object when embargo is lifted:
11
+ # Sets embargo status to released in embargoMetadata
12
+ # Modifies rightsMetadata to remove embargoReleaseDate and updates/adds access from embargoMetadata/releaseAccess
13
+ # @param [String] release_agent name of the person, application or thing that released embargo
14
+ # @note The caller should save the object to fedora to commit the changes
15
+ def release_embargo(release_agent="unknown")
16
+ # Set status to released
17
+ embargo_md = datastreams['embargoMetadata']
18
+ embargo_md.status = 'released'
19
+
20
+ # Remove embargoReleaseDate from rights
21
+ rights_xml = datastreams['rightsMetadata'].ng_xml
22
+ rights_xml.xpath("//rightsMetadata/access[@type='read']/machine/embargoReleaseDate").remove
23
+
24
+ # Replace rights <access> nodes with those from embargoMetadta
25
+ release_access = embargo_md.release_access_node
26
+ release_access.xpath('//releaseAccess/access').each do |new_access|
27
+ type = new_access['type']
28
+ rights_xml.xpath("//rightsMetadata/access[@type='#{type}']").remove
29
+ access_sibling = rights_xml.at_xpath("//rightsMetadata/access[last()]")
30
+ if(access_sibling)
31
+ access_sibling.add_next_sibling(new_access.clone)
32
+ else
33
+ rights_xml.root.add_child(new_access.clone)
34
+ end
35
+ end
36
+
37
+ datastreams['rightsMetadata'].dirty = true
38
+ datastreams['events'].add_event("embargo", release_agent, "Embargo released")
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,13 @@
1
+ module Dor
2
+
3
+ class Exception < ::Exception; end
4
+ class ParameterError < Exception; end
5
+ class DuplicateIdError < Exception
6
+ attr_reader :pid
7
+
8
+ def initialize(pid)
9
+ @pid = pid
10
+ end
11
+ end
12
+
13
+ end
@@ -0,0 +1,141 @@
1
+ require 'dor/base'
2
+ require 'datastreams/content_metadata_ds'
3
+ require 'datastreams/ng_tidy'
4
+ require 'tmpdir'
5
+
6
+ module Dor
7
+
8
+ class Item < Base
9
+
10
+ has_metadata :name => "contentMetadata", :type => ContentMetadataDS
11
+ has_metadata :name => "descMetadata", :type => ActiveFedora::NokogiriDatastream
12
+ has_metadata :name => "rightsMetadata", :type => ActiveFedora::NokogiriDatastream
13
+ has_metadata :name => "provenanceMetadata", :type => ActiveFedora::NokogiriDatastream
14
+ has_metadata :name => "technicalMetadata", :type => ActiveFedora::NokogiriDatastream
15
+
16
+ def admin_policy_object
17
+ apo_ref = Array(self.rels_ext.relationships[:self]['hydra_isGovernedBy']).first
18
+ if apo_ref.nil?
19
+ return nil
20
+ else
21
+ apo_id = apo_ref.value.split(%r{/}).last
22
+ if apo_id.empty?
23
+ return nil
24
+ else
25
+ return Dor::AdminPolicyObject.load_instance(apo_id)
26
+ end
27
+ end
28
+ end
29
+
30
+ def fetch_descMetadata_datastream
31
+ candidates = self.identity_metadata.otherIds.collect { |oid| oid.to_s }
32
+ metadata_id = Dor::MetadataService.resolvable(candidates).first
33
+ unless metadata_id.nil?
34
+ return Dor::MetadataService.fetch(metadata_id.to_s)
35
+ else
36
+ return nil
37
+ end
38
+ end
39
+
40
+ def build_contentMetadata_datastream(ds)
41
+ path = Druid.new(self.pid).path(Dor::Config.stacks.local_workspace_root)
42
+ if File.exists?(File.join(path, 'content_metadata.xml'))
43
+ ds.label = 'Content Metadata'
44
+ ds.ng_xml = Nokogiri::XML(File.read(File.join(path, 'content_metadata.xml')))
45
+ end
46
+ end
47
+
48
+ def build_descMetadata_datastream(ds)
49
+ content = fetch_descMetadata_datastream
50
+ unless content.nil?
51
+ ds.label = 'Descriptive Metadata'
52
+ ds.ng_xml = Nokogiri::XML(content)
53
+ ds.ng_xml.normalize_text!
54
+ end
55
+ end
56
+
57
+ def build_rightsMetadata_datastream(ds)
58
+ content_ds = self.admin_policy_object.datastreams['defaultObjectRights']
59
+ ds.label = 'Rights Metadata'
60
+ ds.ng_xml = content_ds.ng_xml.clone
61
+ end
62
+
63
+ def public_xml
64
+ pub = Nokogiri::XML("<publicObject/>").root
65
+ pub['id'] = pid
66
+ pub.add_child(self.datastreams['identityMetadata'].ng_xml.root.clone)
67
+ pub.add_child(self.datastreams['contentMetadata'].public_xml.root.clone)
68
+ pub.add_child(self.datastreams['rightsMetadata'].ng_xml.root.clone)
69
+ pub.add_child(generate_dublin_core.root)
70
+ Nokogiri::XML(pub.to_xml) { |x| x.noblanks }.to_xml { |config| config.no_declaration }
71
+ end
72
+
73
+ # Generates Dublin Core from the MODS in the descMetadata datastream using the LoC mods2dc stylesheet
74
+ # Should not be used for the Fedora DC datastream
75
+ def generate_dublin_core
76
+ apo = self.admin_policy_object
77
+ format = apo.nil? ? 'mods' : apo.datastreams['administrativeMetadata'].ng_xml.at('/administrativeMetadata/descMetadata/format').text.downcase
78
+ xslt = Nokogiri::XSLT(File.new(File.expand_path(File.dirname(__FILE__) + "/#{format}2dc.xslt")) )
79
+ xslt.transform(self.datastreams['descMetadata'].ng_xml)
80
+ end
81
+
82
+ def publish_metadata
83
+ DigitalStacksService.transfer_to_document_store(pid, self.datastreams['identityMetadata'].to_xml, 'identityMetadata')
84
+ DigitalStacksService.transfer_to_document_store(pid, self.datastreams['contentMetadata'].to_xml, 'contentMetadata')
85
+ DigitalStacksService.transfer_to_document_store(pid, self.datastreams['rightsMetadata'].to_xml, 'rightsMetadata')
86
+ dc_xml = self.generate_dublin_core.to_xml {|config| config.no_declaration}
87
+ DigitalStacksService.transfer_to_document_store(pid, dc_xml, 'dc')
88
+ DigitalStacksService.transfer_to_document_store(pid, public_xml, 'public')
89
+ end
90
+
91
+ def build_provenanceMetadata_datastream(workflow_id, event_text)
92
+ ProvenanceMetadataService.add_provenance(self, workflow_id, event_text)
93
+ end
94
+
95
+ def build_technicalMetadata_datastream(ds)
96
+ unless defined? ::JhoveService
97
+ begin
98
+ require 'jhove_service'
99
+ rescue LoadError => e
100
+ puts e.inspect
101
+ raise "jhove-service dependency gem was not found. Please add it to your Gemfile and run bundle install"
102
+ end
103
+ end
104
+ begin
105
+ content_dir = Druid.new(self.pid).path(Config.sdr.local_workspace_root)
106
+ temp_dir = Dir.mktmpdir(self.pid)
107
+ jhove_service = ::JhoveService.new(temp_dir)
108
+ jhove_output_file = jhove_service.run_jhove(content_dir)
109
+ tech_md_file = jhove_service.create_technical_metadata(jhove_output_file)
110
+ ds.label = 'Technical Metadata'
111
+ ds.ng_xml = Nokogiri::XML(IO.read(tech_md_file))
112
+ ensure
113
+ FileUtils.remove_entry_secure(temp_dir) if File.exist?(temp_dir)
114
+ end
115
+ end
116
+
117
+ def shelve
118
+ files = [] # doc.xpath("//file").select {|f| f['shelve'] == 'yes'}.map{|f| f['id']}
119
+ self.datastreams['contentMetadata'].ng_xml.xpath('//file').each do |file|
120
+ files << file['id'] if(file['shelve'].downcase == 'yes')
121
+ end
122
+
123
+ DigitalStacksService.shelve_to_stacks(pid, files)
124
+ end
125
+
126
+ def sdr_ingest_transfer(agreement_id)
127
+ SdrIngestService.transfer(self,agreement_id)
128
+ end
129
+
130
+ def cleanup()
131
+ CleanupService.cleanup(self)
132
+ end
133
+
134
+ def initiate_apo_workflow(name)
135
+ wf_xml = admin_policy_object.datastreams['administrativeMetadata'].ng_xml.xpath(%{//workflow[@id="#{name}"]}).first.to_xml
136
+ Dor::WorkflowService.create_workflow('dor',self.pid,name,wf_xml)
137
+ end
138
+
139
+ end
140
+
141
+ end
@@ -0,0 +1,22 @@
1
+ require 'rest-client'
2
+
3
+ handler = Class.new do
4
+ Dor::Config.metadata.declare(:catalog) { url nil }
5
+
6
+ def fetch(prefix, identifier)
7
+ client = RestClient::Resource.new(Dor::Config.metadata.catalog.url)
8
+ client["?#{prefix.chomp}=#{identifier.chomp}"].get
9
+ end
10
+
11
+ def label(metadata)
12
+ mods = Nokogiri::XML(metadata)
13
+ mods.root.add_namespace_definition('mods','http://www.loc.gov/mods/v3')
14
+ mods.xpath('/mods:mods/mods:titleInfo[1]').xpath('mods:title|mods:nonSort').collect { |n| n.text }.join(' ').strip
15
+ end
16
+
17
+ def prefixes
18
+ ['catkey','barcode']
19
+ end
20
+ end
21
+
22
+ Dor::MetadataService.register(handler)
@@ -0,0 +1,42 @@
1
+ require 'nokogiri'
2
+ require 'rest-client'
3
+
4
+ handler = Class.new do
5
+ Dor::Config.metadata.declare(:exist) { url nil }
6
+
7
+ def fetch(prefix, identifier)
8
+ query = <<-QUERY
9
+ <?xml version="1.0" encoding="UTF-8"?>
10
+ <query xmlns="http://exist.sourceforge.net/NS/exist">
11
+ <text>
12
+ collection('orbeon/fr')[contains(base-uri(), "#{identifier}")]
13
+ </text>
14
+ </query>
15
+ QUERY
16
+ client = RestClient::Resource.new(Dor::Config.metadata.exist.url)
17
+ response = client['db'].post(query, :content_type => 'application/xquery')
18
+ doc = Nokogiri::XML(response)
19
+ doc.root.add_namespace_definition('exist','http://exist.sourceforge.net/NS/exist')
20
+ result = doc.xpath('/exist:result/*[1]').first
21
+ result.nil? ? nil : result.to_s
22
+ end
23
+
24
+ def label(metadata)
25
+ xml = Nokogiri::XML(metadata)
26
+ if xml.root.nil?
27
+ return ""
28
+ end
29
+ case xml.root.name
30
+ when 'msDesc' then xml.xpath('/msDesc/msIdentifier/collection').text
31
+ when 'mods' then
32
+ xml.root.add_namespace_definition('mods','http://www.loc.gov/mods/v3')
33
+ xml.xpath('/mods:mods/mods:titleInfo[1]').xpath('mods:title|mods:nonSort').collect { |n| n.text }.join(' ').strip
34
+ end
35
+ end
36
+
37
+ def prefixes
38
+ ['mdtoolkit','druid']
39
+ end
40
+ end
41
+
42
+ Dor::MetadataService.register(handler)