dor-services 2.2.4 → 4.4.10

Sign up to get free protection for your applications and to get access to all the features.
Files changed (106) hide show
  1. checksums.yaml +15 -0
  2. data/bin/dor-indexer +108 -0
  3. data/bin/dor-indexerd +73 -0
  4. data/bin/nokogiri +19 -0
  5. data/bin/rake +19 -0
  6. data/bin/ruby_noexec_wrapper +14 -0
  7. data/bin/solrizer +19 -0
  8. data/bin/solrizerd +19 -0
  9. data/config/certs/README +1 -0
  10. data/config/config_defaults.yml +62 -0
  11. data/config/dev_console_env.rb.example +67 -0
  12. data/config/predicate_mappings.yml +55 -0
  13. data/lib/dor-services.rb +152 -19
  14. data/lib/dor/config.rb +133 -35
  15. data/lib/dor/datastreams/administrative_metadata_ds.rb +84 -0
  16. data/lib/dor/datastreams/content_metadata_ds.rb +337 -0
  17. data/lib/dor/datastreams/datastream_spec_solrizer.rb +18 -0
  18. data/lib/dor/datastreams/default_object_rights_ds.rb +52 -0
  19. data/lib/dor/datastreams/desc_metadata_ds.rb +39 -0
  20. data/lib/{datastreams → dor/datastreams}/embargo_metadata_ds.rb +25 -20
  21. data/lib/{datastreams → dor/datastreams}/events_ds.rb +14 -9
  22. data/lib/dor/datastreams/identity.xsl +8 -0
  23. data/lib/dor/datastreams/identity_metadata_ds.rb +112 -0
  24. data/lib/dor/datastreams/role_metadata_ds.rb +51 -0
  25. data/lib/dor/datastreams/simple_dublin_core_ds.rb +45 -0
  26. data/lib/dor/datastreams/version_metadata_ds.rb +214 -0
  27. data/lib/dor/datastreams/workflow_definition_ds.rb +113 -0
  28. data/lib/dor/datastreams/workflow_ds.rb +103 -0
  29. data/lib/dor/exceptions.rb +0 -1
  30. data/lib/dor/migrations/content_metadata_ds/change_content_type.rb +7 -0
  31. data/lib/dor/migrations/identifiable/assert_adminPolicy.rb +9 -0
  32. data/lib/dor/migrations/identifiable/fix_model_assertions.rb +13 -0
  33. data/lib/dor/migrations/identifiable/record_remediation.rb +18 -0
  34. data/lib/dor/migrations/identifiable/uriify_augmented_contentlocation_refs.rb +18 -0
  35. data/lib/dor/migrations/identifiable/uriify_contentlocation_refs.rb +18 -0
  36. data/lib/dor/migrations/processable/unify_workflows.rb +17 -0
  37. data/lib/dor/migrations/versionable/add_missing_version_md.rb +9 -0
  38. data/lib/dor/models/admin_policy_object.rb +16 -0
  39. data/lib/dor/models/assembleable.rb +14 -0
  40. data/lib/dor/models/collection.rb +14 -0
  41. data/lib/dor/models/contentable.rb +227 -0
  42. data/lib/dor/models/describable.rb +194 -0
  43. data/lib/dor/models/discoverable.rb +66 -0
  44. data/lib/dor/models/editable.rb +267 -0
  45. data/lib/dor/models/embargoable.rb +97 -0
  46. data/lib/dor/models/eventable.rb +12 -0
  47. data/lib/dor/models/governable.rb +162 -0
  48. data/lib/dor/models/identifiable.rb +211 -0
  49. data/lib/dor/models/item.rb +44 -0
  50. data/lib/dor/models/itemizable.rb +66 -0
  51. data/lib/dor/{mods2dc.xslt → models/mods2dc.xslt} +39 -12
  52. data/lib/dor/models/preservable.rb +50 -0
  53. data/lib/dor/models/processable.rb +229 -0
  54. data/lib/dor/models/publishable.rb +74 -0
  55. data/lib/dor/models/set.rb +12 -0
  56. data/lib/dor/models/shelvable.rb +27 -0
  57. data/lib/dor/models/upgradable.rb +74 -0
  58. data/lib/dor/models/versionable.rb +94 -0
  59. data/lib/dor/models/workflow_object.rb +54 -0
  60. data/lib/dor/services/cleanup_service.rb +47 -0
  61. data/lib/dor/services/digital_stacks_service.rb +55 -0
  62. data/lib/dor/services/merge_service.rb +96 -0
  63. data/lib/dor/{metadata_handlers → services/metadata_handlers}/catalog_handler.rb +0 -2
  64. data/lib/dor/{metadata_handlers → services/metadata_handlers}/mdtoolkit_handler.rb +0 -2
  65. data/lib/dor/{metadata_service.rb → services/metadata_service.rb} +1 -3
  66. data/lib/dor/services/registration_service.rb +181 -0
  67. data/lib/dor/services/sdr_ingest_service.rb +181 -0
  68. data/lib/dor/services/search_service.rb +131 -0
  69. data/lib/dor/services/suri_service.rb +32 -0
  70. data/lib/dor/services/technical_metadata_service.rb +226 -0
  71. data/lib/dor/{tei2dc.xslt → services/tei2dc.xslt} +0 -0
  72. data/lib/dor/utils/ng_tidy.rb +37 -0
  73. data/lib/dor/utils/predicate_patch.rb +23 -0
  74. data/lib/dor/utils/solr_doc_helper.rb +9 -0
  75. data/lib/dor/utils/utc_date_field_mapper.rb +7 -0
  76. data/lib/dor/version.rb +3 -0
  77. data/lib/dor/workflow/document.rb +131 -0
  78. data/lib/dor/workflow/graph.rb +166 -0
  79. data/lib/dor/workflow/process.rb +99 -0
  80. data/lib/gsearch/demoFoxmlToSolr.xslt +340 -122
  81. data/lib/tasks/dor.rake +39 -0
  82. metadata +494 -384
  83. data/lib/datastreams/content_metadata_ds.rb +0 -12
  84. data/lib/datastreams/identity_metadata_ds.rb +0 -28
  85. data/lib/datastreams/ng_tidy.rb +0 -19
  86. data/lib/datastreams/simple_dublin_core_ds.rb +0 -23
  87. data/lib/datastreams/workflow_definition_ds.rb +0 -105
  88. data/lib/datastreams/workflow_ds.rb +0 -16
  89. data/lib/dor/admin_policy_object.rb +0 -11
  90. data/lib/dor/base.rb +0 -81
  91. data/lib/dor/cleanup_service.rb +0 -32
  92. data/lib/dor/digital_stacks_service.rb +0 -82
  93. data/lib/dor/druid_utils.rb +0 -41
  94. data/lib/dor/embargo.rb +0 -41
  95. data/lib/dor/item.rb +0 -141
  96. data/lib/dor/provenance_metadata_service.rb +0 -65
  97. data/lib/dor/registration_service.rb +0 -87
  98. data/lib/dor/rsolr.rb +0 -27
  99. data/lib/dor/sdr_ingest_service.rb +0 -117
  100. data/lib/dor/search_service.rb +0 -86
  101. data/lib/dor/suri_service.rb +0 -37
  102. data/lib/dor/workflow_object.rb +0 -13
  103. data/lib/dor/workflow_service.rb +0 -111
  104. data/lib/xml_models/foxml.rb +0 -261
  105. data/lib/xml_models/identity_metadata/dublin_core.rb +0 -119
  106. data/lib/xml_models/identity_metadata/identity_metadata.rb +0 -288
@@ -0,0 +1,13 @@
1
+ Dor::Identifiable.on_upgrade '3.6.1', 'Assert correct models' do |obj|
2
+ applied = false
3
+ if obj.relationships.any? { |r| r.predicate.to_s == 'info:fedora/fedora-system:def/model#hasModel' && r.object.to_s == 'info:fedora/hydra:commonMetadata' }
4
+ obj.remove_relationship :has_model, 'info:fedora/hydra:commonMetadata'
5
+ applied = true
6
+ end
7
+
8
+ unless obj.relationships.predicates.any? { |p| p.to_s == 'info:fedora/fedora-system:def/model#hasModel' }
9
+ obj.assert_content_model
10
+ applied = true
11
+ end
12
+ applied
13
+ end
@@ -0,0 +1,18 @@
1
+ Dor::Identifiable.on_upgrade '3.6.1', 'Record Remediation Version' do |obj|
2
+ version_tag = obj.identityMetadata.find_by_terms(:tag).find { |e| e.text =~ /Remediated By\s*:\s*(.+)/ }
3
+ add_tag = false
4
+ if version_tag
5
+ current_version = Gem::Version.new($1)
6
+ if current_version < Gem::Version.new(Dor::VERSION)
7
+ version_tag.remove
8
+ add_tag = true
9
+ end
10
+ else
11
+ add_tag = true
12
+ end
13
+
14
+ if add_tag
15
+ obj.identityMetadata.add_value :tag, "Remediated By : #{Dor::VERSION}"
16
+ end
17
+ add_tag
18
+ end
@@ -0,0 +1,18 @@
1
+ Dor::Identifiable.on_upgrade '3.14.8', 'Fix up invalid URIs in content-augmented datastreams' do |obj|
2
+ bad_content_location_uri = begin
3
+ URI.parse(obj.send("content-augmented").dsLocation)
4
+ false
5
+ rescue URI::InvalidURIError
6
+ true
7
+ rescue
8
+ false
9
+ end
10
+
11
+ next unless bad_content_location_uri
12
+
13
+ parts = obj.send("content-augmented").dsLocation.split('/')
14
+ parts[parts.length - 1] = URI.escape(parts.last)
15
+ obj.send("content-augmented").dsLocation = parts.join('/')
16
+
17
+ obj.send("content-augmented").save
18
+ end
@@ -0,0 +1,18 @@
1
+ Dor::Identifiable.on_upgrade '3.11.6', 'Fix up invalid URIs in objects' do |obj|
2
+ bad_content_location_uri = begin
3
+ URI.parse(obj.content.dsLocation)
4
+ false
5
+ rescue URI::InvalidURIError
6
+ true
7
+ rescue
8
+ false
9
+ end
10
+
11
+ next unless bad_content_location_uri
12
+
13
+ parts = obj.content.dsLocation.split('/')
14
+ parts[parts.length - 1] = URI.escape(parts.last)
15
+ obj.content.dsLocation = parts.join('/')
16
+
17
+ obj.content.save
18
+ end
@@ -0,0 +1,17 @@
1
+ Dor::Processable.on_upgrade '3.5.0', 'Replace individual *WF datastreams with unified workflows datastream' do |obj|
2
+ run = false
3
+ # Touch workflows datastream to ensure it gets saved
4
+ if obj.workflows.new?
5
+ obj.workflows.content
6
+ run = true
7
+ end
8
+
9
+ # Remove individual *WF datastreams
10
+ obj.datastreams.each_pair do |dsid,ds|
11
+ if ds.controlGroup == 'E' and dsid =~ /WF$/
12
+ ds.delete
13
+ run = true
14
+ end
15
+ end
16
+ run
17
+ end
@@ -0,0 +1,9 @@
1
+ Dor::Versionable.on_upgrade '3.12.2', 'Add missing versionMetadata' do |obj|
2
+ run = false
3
+ vm = obj.datastreams['versionMetadata']
4
+ if(vm.content.nil? || vm.content.strip == '' || vm.new?) # We do not have a versionMetadata ds
5
+ vm.content = vm.ng_xml.to_s
6
+ run = true
7
+ end
8
+ run
9
+ end
@@ -0,0 +1,16 @@
1
+ module Dor
2
+ class AdminPolicyObject < ::ActiveFedora::Base
3
+ include Identifiable
4
+ include Governable
5
+ include Editable
6
+ include Describable
7
+ include Processable
8
+ include Versionable
9
+
10
+ has_many :things, :property => :is_governed_by, :inbound => :true, :class_name => "ActiveFedora::Base"
11
+ has_object_type 'adminPolicy'
12
+ has_metadata :name => "administrativeMetadata", :type => Dor::AdministrativeMetadataDS, :label => 'Administrative Metadata'
13
+ has_metadata :name => "roleMetadata", :type => Dor::RoleMetadataDS, :label => 'Role Metadata'
14
+ has_metadata :name => "defaultObjectRights", :type => Dor::DefaultObjectRightsDS, :label => 'Default Object Rights'
15
+ end
16
+ end
@@ -0,0 +1,14 @@
1
+ module Dor
2
+ module Assembleable
3
+
4
+ def initialize_workspace(source=nil)
5
+ if(source.nil?)
6
+ druid = DruidTools::Druid.new(self.pid,Config.stacks.local_workspace_root)
7
+ druid.mkdir
8
+ else
9
+ druid = DruidTools::Druid.new(self.pid, Config.stacks.local_workspace_root)
10
+ druid.mkdir_with_final_link(source)
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,14 @@
1
+ module Dor
2
+ class Collection < ::ActiveFedora::Base
3
+ include Identifiable
4
+ include Processable
5
+ include Governable
6
+ include Describable
7
+ include Publishable
8
+ include Versionable
9
+ include Discoverable
10
+
11
+ has_many :members, :property => :is_member_of_collection, :inbound => true, :class_name => "ActiveFedora::Base"
12
+ has_object_type 'collection'
13
+ end
14
+ end
@@ -0,0 +1,227 @@
1
+ module Dor
2
+ module Contentable
3
+ extend ActiveSupport::Concern
4
+
5
+ #add a file to a resource, not to be confused with add a resource to an object
6
+ def add_file file, resource, file_name, mime_type=nil,publish='no', shelve='no', preserve='no'
7
+ contentMD=self.datastreams['contentMetadata']
8
+ xml=contentMD.ng_xml
9
+ #make sure the resource exists
10
+ if xml.search('//resource[@id=\''+resource+'\']').length == 0
11
+ raise 'resource doesnt exist.'
12
+ end
13
+ sftp=Net::SFTP.start(Config.content.content_server,Config.content.content_user,:auth_methods=>['publickey'])
14
+ druid_tools=DruidTools::Druid.new(self.pid,Config.content.content_base_dir)
15
+ location=druid_tools.path(file_name)
16
+ oldlocation=location.gsub('/'+self.pid.gsub('druid:',''),'')
17
+ md5=Digest::MD5.file(file.path).hexdigest
18
+ sha1=Digest::SHA1.file(file.path).hexdigest
19
+ size=File.size?(file.path)
20
+ #update contentmd
21
+ file_hash={:name=>file_name,:md5 => md5, :publish=>publish, :shelve=> shelve, :preserve => preserve, :size=>size.to_s, :sha1=>sha1, :mime_type => mime_type}
22
+ begin
23
+ request=sftp.stat!(location.gsub(file_name,''))
24
+ begin
25
+ request=sftp.stat!(location)
26
+ raise 'The file '+file_name+' already exists!'
27
+ rescue Net::SFTP::StatusException
28
+ sftp.upload!(file.path,location)
29
+ self.contentMetadata.add_file file_hash,resource
30
+ end
31
+ rescue Net::SFTP::StatusException
32
+ #the directory layout doesnt match the new style, so use the old style.
33
+ begin
34
+ request=sftp.stat!(oldlocation)
35
+ raise 'The file '+file_name+' already exists!'
36
+ rescue Net::SFTP::StatusException
37
+ #the file doesnt already exist, which is good. Add it
38
+ sftp.upload!(file.path,oldlocation)
39
+ self.contentMetadata.add_file file_hash,resource
40
+ end
41
+ end
42
+ #can only arrive at this point if a non status exception occurred.
43
+ end
44
+
45
+ def replace_file file,file_name
46
+ sftp=Net::SFTP.start(Config.content.content_server,Config.content.content_user,:auth_methods=>['publickey'])
47
+ item=Dor::Item.find(self.pid)
48
+ druid_tools=DruidTools::Druid.new(self.pid,Config.content.content_base_dir)
49
+ location=druid_tools.path(file_name)
50
+ oldlocation=location.gsub('/'+self.pid.gsub('druid:',''),'')
51
+
52
+ md5=Digest::MD5.file(file.path).hexdigest
53
+ sha1=Digest::SHA1.file(file.path).hexdigest
54
+ size=File.size?(file.path)
55
+ #update contentmd
56
+ file_hash={:name=>file_name,:md5 => md5, :size=>size.to_s, :sha1=>sha1}
57
+ begin
58
+ request=sftp.stat!(location)
59
+ sftp.upload!(file.path,location)
60
+ #this doesnt allow renaming files
61
+ item.contentMetadata.update_file(file_hash, file_name)
62
+ rescue
63
+ sftp.upload!(file.path,oldlocation)
64
+ item.contentMetadata.update_file(file_hash, file_name)
65
+ end
66
+ end
67
+
68
+ def get_preserved_file file, version
69
+ preservation_server=Config.content.sdr_server+'/sdr/objects/'+self.pid+"/content/"
70
+ file=URI.encode(file)
71
+ add=preservation_server+file+"?version="+version
72
+ uri = URI(add)
73
+ req = Net::HTTP::Get.new(uri.request_uri)
74
+ req.basic_auth Config.content.sdr_user, Config.content.sdr_pass
75
+ res = Net::HTTP.start(uri.hostname, uri.port, :use_ssl => uri.scheme == 'https') {|http|
76
+ http.request(req)
77
+ }
78
+ end
79
+
80
+ def get_file file
81
+ druid_tools=DruidTools::Druid.new(self.pid,Config.content.content_base_dir)
82
+ location=druid_tools.path(file)
83
+ oldlocation=location.gsub('/'+file,'').gsub('/'+self.pid.gsub('druid:',''),'')+'/'+file
84
+ sftp=Net::SFTP.start(Config.content.content_server,Config.content.content_user,:auth_methods=>['publickey'])
85
+ begin
86
+ data=sftp.download!(location)
87
+ rescue
88
+ data=sftp.download!(oldlocation)
89
+ end
90
+ end
91
+ def remove_file filename
92
+ druid_tools=DruidTools::Druid.new(self.pid,Config.content.content_base_dir)
93
+ location=druid_tools.path(filename)
94
+ oldlocation=location.gsub('/'+self.pid.gsub('druid:',''),'')
95
+ sftp=Net::SFTP.start(Config.content.content_server,Config.content.content_user,:auth_methods=>['publickey'])
96
+ begin
97
+ data=sftp.remove!(location)
98
+ rescue
99
+ #if the file doesnt exist, that is ok, not all files will be present in the workspace
100
+ begin
101
+ data=sftp.remove!(oldlocation)
102
+ rescue Net::SFTP::StatusException
103
+ end
104
+ end
105
+ self.contentMetadata.remove_file filename
106
+ end
107
+ def rename_file old_name, new_name
108
+ druid_tools=DruidTools::Druid.new(self.pid,Config.content.content_base_dir)
109
+ location=druid_tools.path(old_name)
110
+ oldlocation=location.gsub('/'+self.pid.gsub('druid:',''),'')
111
+ sftp=Net::SFTP.start(Config.content.content_server,Config.content.content_user,:auth_methods=>['publickey'])
112
+ begin
113
+ data=sftp.rename!(location,location.gsub(old_name,new_name))
114
+ rescue
115
+ data=sftp.rename!(oldlocation,oldlocation.gsub(old_name,new_name))
116
+ end
117
+ self.contentMetadata.rename_file(old_name, new_name)
118
+ end
119
+ def remove_resource resource_name
120
+ #run delete for all of the files in the resource
121
+ xml=self.contentMetadata.ng_xml
122
+ files=xml.search('//resource[@id=\''+resource_name+'\']/file').each do |file|
123
+ self.remove_file(file['id'])
124
+ end
125
+ #remove the resource record from the metadata and renumber the resource sequence
126
+ self.contentMetadata.remove_resource resource_name
127
+ end
128
+ #list files in the workspace
129
+ def list_files
130
+ filename='none'
131
+ files=[]
132
+ sftp=Net::SFTP.start(Config.content.content_server,Config.content.content_user,:auth_methods=>['publickey'])
133
+ druid_tools=DruidTools::Druid.new(self.pid,Config.content.content_base_dir)
134
+ location=druid_tools.path(filename).gsub(filename,'')
135
+ oldlocation=location.gsub('/'+self.pid.gsub('druid:',''),'')
136
+ begin
137
+ sftp.dir.entries(location, "*") do |file|
138
+ files<<file.name
139
+ end
140
+ rescue
141
+ begin
142
+ sftp.dir.glob(oldlocation, "*") do |file|
143
+ files<<file.name
144
+ end
145
+ rescue Net::SFTP::StatusException
146
+ return files
147
+ end
148
+ end
149
+ return files
150
+ end
151
+
152
+ # Appends contentMetadata file resources from the source objects to this object
153
+ # @param [Array<String>] source_obj_pids ids of the secondary objects that will get their contentMetadata merged into this one
154
+ # @param [Logger] logger optional logger to record warnings. Otherwise, warnings get sent to STDOUT
155
+ def copy_file_resources source_obj_pids
156
+ primary_cm = contentMetadata.ng_xml
157
+ base_id = primary_cm.at_xpath('/contentMetadata/@objectId').value
158
+ max_sequence = primary_cm.at_xpath('/contentMetadata/resource[last()]/@sequence').value.to_i
159
+
160
+ source_obj_pids.each do |src_pid|
161
+ source_obj = Dor::Item.find src_pid
162
+ source_cm = source_obj.contentMetadata.ng_xml
163
+
164
+ # Copy the resources from each source object
165
+ source_cm.xpath('/contentMetadata/resource').each do |old_resource|
166
+ max_sequence += 1
167
+ resource_copy = old_resource.clone
168
+ resource_copy['sequence'] = "#{max_sequence}"
169
+
170
+ # Append sequence number to each secondary filename, then
171
+ # look for filename collisions with the primary object
172
+ resource_copy.xpath('file').each do |secondary_file|
173
+ secondary_file['id'] = new_secondary_file_name(secondary_file['id'], max_sequence)
174
+
175
+ if primary_cm.at_xpath("//file[@id = '#{secondary_file["id"]}']")
176
+ raise Dor::Exception.new "File '#{secondary_file['id']}' from secondary object #{src_pid} already exist in primary object: #{self.pid}"
177
+ end
178
+ end
179
+
180
+ if old_resource['type']
181
+ resource_copy['id'] = "#{old_resource['type']}_#{max_sequence}"
182
+ else
183
+ resource_copy['id'] = "#{base_id}_#{max_sequence}"
184
+ end
185
+
186
+ lbl = old_resource.at_xpath 'label'
187
+ if lbl && lbl.text =~ /^(.*)\s+\d+$/
188
+ resource_copy.at_xpath('label').content = "#{$1} #{max_sequence}"
189
+ end
190
+
191
+ primary_cm.at_xpath('/contentMetadata/resource[last()]').add_next_sibling resource_copy
192
+ attr_node = primary_cm.create_element 'attr', src_pid, :name => 'mergedFromPid'
193
+ resource_copy.first_element_child.add_previous_sibling attr_node
194
+ attr_node = primary_cm.create_element 'attr', old_resource['id'], :name => 'mergedFromResource'
195
+ resource_copy.first_element_child.add_previous_sibling attr_node
196
+ end
197
+ end
198
+ self.contentMetadata.content_will_change!
199
+ end
200
+
201
+ def new_secondary_file_name old_name, sequence_num
202
+ if old_name =~ /^(.*)\.(.*)$/
203
+ return "#{$1}_#{sequence_num}.#{$2}"
204
+ else
205
+ return "#{old_name}_#{sequence_num}"
206
+ end
207
+ end
208
+
209
+ # Clears RELS-EXT relationships, sets the isGovernedBy relationship to the SDR Graveyard APO
210
+ # @param [String] tag optional String of text that is concatenated to the identityMetadata/tag "Decomissioned : "
211
+ def decomission tag = nil
212
+ # remove isMemberOf and isMemberOfCollection relationships
213
+ clear_relationship :is_member_of
214
+ clear_relationship :is_member_of_collection
215
+ # remove isGovernedBy relationship
216
+ clear_relationship :is_governed_by
217
+ # add isGovernedBy to graveyard APO druid:sw909tc7852
218
+ # SEARCH BY dc title for 'SDR Graveyard'
219
+ add_relationship :is_governed_by, ActiveFedora::Base.find(Dor::SearchService.sdr_graveyard_apo_druid)
220
+ # eliminate contentMetadata. set it to <contentMetadata/> ?
221
+ contentMetadata.content = '<contentMetadata/>'
222
+ # eliminate rightsMetadata. set it to <rightsMetadata/> ?
223
+ rightsMetadata.content = '<rightsMetadata/>'
224
+ add_tag "Decommissioned : #{tag}"
225
+ end
226
+ end
227
+ end
@@ -0,0 +1,194 @@
1
+ module Dor
2
+ module Describable
3
+ extend ActiveSupport::Concern
4
+
5
+ DESC_MD_FORMATS = {
6
+ "http://www.tei-c.org/ns/1.0" => 'tei',
7
+ "http://www.loc.gov/mods/v3" => 'mods'
8
+ }
9
+ class CrosswalkError < Exception; end
10
+
11
+ included do
12
+ has_metadata :name => "descMetadata", :type => Dor::DescMetadataDS, :label => 'Descriptive Metadata', :control_group => 'M'
13
+ end
14
+
15
+ def fetch_descMetadata_datastream
16
+ candidates = self.datastreams['identityMetadata'].otherId.collect { |oid| oid.to_s }
17
+ metadata_id = Dor::MetadataService.resolvable(candidates).first
18
+ unless metadata_id.nil?
19
+ return Dor::MetadataService.fetch(metadata_id.to_s)
20
+ else
21
+ return nil
22
+ end
23
+ end
24
+
25
+ def build_descMetadata_datastream(ds)
26
+ content = fetch_descMetadata_datastream
27
+ unless content.nil?
28
+ ds.dsLabel = 'Descriptive Metadata'
29
+ ds.ng_xml = Nokogiri::XML(content)
30
+ ds.ng_xml.normalize_text!
31
+ ds.content = ds.ng_xml.to_xml
32
+ end
33
+ end
34
+
35
+ # Generates Dublin Core from the MODS in the descMetadata datastream using the LoC mods2dc stylesheet
36
+ # Should not be used for the Fedora DC datastream
37
+ # @raise [Exception] Raises an Exception if the generated DC is empty or has no children
38
+ def generate_dublin_core
39
+ format = self.metadata_format
40
+ if format.nil?
41
+ raise CrosswalkError, "Unknown descMetadata namespace: #{metadata_namespace.inspect}"
42
+ end
43
+ xslt = Nokogiri::XSLT(File.new(File.expand_path(File.dirname(__FILE__) + "/#{format}2dc.xslt")) )
44
+ dc_doc = xslt.transform(Nokogiri::XML(self.add_collection_reference))
45
+ # Remove empty nodes
46
+ dc_doc.xpath('/oai_dc:dc/*[count(text()) = 0]').remove
47
+ if(dc_doc.root.nil? || dc_doc.root.children.size == 0)
48
+ raise CrosswalkError, "Dor::Item#generate_dublin_core produced incorrect xml:\n#{dc_doc.to_xml}"
49
+ end
50
+ dc_doc
51
+ end
52
+ #returns the desc metadata a relatedItem with information about the collection this object belongs to for use in published mods and mods to DC conversion
53
+ def add_collection_reference
54
+ if not self.methods.include? :public_relationships
55
+ return self.descMetadata.ng_xml.to_s
56
+ end
57
+ relationships=self.public_relationships
58
+ xml=Nokogiri::XML(self.descMetadata.ng_xml.to_s)
59
+
60
+ collections=relationships.search('//rdf:RDF/rdf:Description/fedora:isMemberOfCollection','fedora' => 'info:fedora/fedora-system:def/relations-external#', 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' )
61
+ #if there is an existing relatedItem node with type=host and a child typeOfResource @collection=yes dont add anything
62
+ existing_node=xml.search('//mods:relatedItem/mods:typeOfResource[@collection=\'yes\']', 'mods' => 'http://www.loc.gov/mods/v3')
63
+ if(existing_node.length>0)
64
+ return xml.to_s
65
+ end
66
+ collections.each do |collection_node|
67
+ druid=collection_node['rdf:resource']
68
+ druid=druid.gsub('info:fedora/','')
69
+ collection_obj=Dor::Item.find(druid)
70
+ collection_title = Dor::Describable.get_collection_title(collection_obj)
71
+ node=xml.search('//mods:mods', 'mods' => 'http://www.loc.gov/mods/v3')
72
+ node=node.first
73
+ related_item_node=Nokogiri::XML::Node.new('relatedItem',xml)
74
+ related_item_node['type']='host'
75
+ title_info_node=Nokogiri::XML::Node.new('titleInfo',xml)
76
+ title_node=Nokogiri::XML::Node.new('title',xml)
77
+ title_node.content=collection_title
78
+ type_node=Nokogiri::XML::Node.new('typeOfResource',xml)
79
+ type_node['collection'] = 'yes'
80
+ node.add_child(related_item_node)
81
+ related_item_node.add_child(title_info_node)
82
+ title_info_node.add_child(title_node)
83
+ related_item_node.add_child(type_node)
84
+ end
85
+ Nokogiri::XML(xml.to_s) {|x| x.noblanks }.to_s
86
+ end
87
+ def metadata_namespace
88
+ desc_md = self.datastreams['descMetadata'].ng_xml
89
+ if desc_md.nil? or desc_md.root.nil? or desc_md.root.namespace.nil?
90
+ return nil
91
+ else
92
+ return desc_md.root.namespace.href
93
+ end
94
+ end
95
+
96
+ def metadata_format
97
+ DESC_MD_FORMATS[metadata_namespace]
98
+ end
99
+
100
+ def to_solr(solr_doc=Hash.new, *args)
101
+ super solr_doc, *args
102
+ add_solr_value(solr_doc, "metadata_format", self.metadata_format, :string, [:searchable, :facetable])
103
+ begin
104
+ dc_doc = self.generate_dublin_core
105
+ dc_doc.xpath('/oai_dc:dc/*').each do |node|
106
+ add_solr_value(solr_doc, "public_dc_#{node.name}", node.text, :string, [:searchable])
107
+ end
108
+ creator=''
109
+ dc_doc.xpath('//dc:creator').each do |node|
110
+ creator=node.text
111
+ end
112
+ title=''
113
+ dc_doc.xpath('//dc:title').each do |node|
114
+ title=node.text
115
+ end
116
+ creator_title=creator+title
117
+ add_solr_value(solr_doc, 'creator_title', creator_title , :string, [:sortable])
118
+ rescue CrosswalkError => e
119
+ ActiveFedora.logger.warn "Cannot index #{self.pid}.descMetadata: #{e.message}"
120
+ end
121
+ solr_doc
122
+ end
123
+ def update_title(new_title)
124
+ if not update_simple_field('mods:mods/mods:titleInfo/mods:title',new_title)
125
+ raise 'Descriptive metadata has no title to update!'
126
+ end
127
+ end
128
+ def add_identifier(type, value)
129
+ ds_xml=self.descMetadata.ng_xml
130
+ ds_xml.search('//mods:mods','mods' => 'http://www.loc.gov/mods/v3').each do |node|
131
+ new_node=Nokogiri::XML::Node.new('identifier',ds_xml) #this ends up being mods:identifier without having to specify the namespace
132
+ new_node['type']=type
133
+ new_node.content=value
134
+ node.add_child(new_node)
135
+ end
136
+ end
137
+ def delete_identifier(type,value=nil)
138
+
139
+ ds_xml=self.descMetadata.ng_xml
140
+ ds_xml.search('//mods:identifier','mods' => 'http://www.loc.gov/mods/v3').each do |node|
141
+ if node.content == value or value==nil
142
+ node.remove
143
+ return true
144
+ end
145
+ end
146
+ return false
147
+ end
148
+
149
+ def set_desc_metadata_using_label(force=false)
150
+ ds=self.descMetadata
151
+ unless force or ds.new?#22 is the length of <?xml version="1.0"?>
152
+ raise 'Cannot proceed, there is already content in the descriptive metadata datastream.'+ds.content.to_s
153
+ end
154
+ label=self.label
155
+ builder = Nokogiri::XML::Builder.new { |xml|
156
+ xml.mods( 'xmlns' => 'http://www.loc.gov/mods/v3', 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance',:version => '3.3', "xsi:schemaLocation" => 'http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-3.xsd'){
157
+ xml.titleInfo{
158
+ xml.title label
159
+ }
160
+ }
161
+ }
162
+ self.descMetadata.content=builder.to_xml
163
+ end
164
+
165
+ def self.get_collection_title(obj)
166
+ xml=obj.descMetadata.ng_xml
167
+ preferred_citation=xml.search('//mods:mods/mods:note[@type=\'preferredCitation\']','mods' => 'http://www.loc.gov/mods/v3')
168
+ title=''
169
+ if preferred_citation.length == 1
170
+ title=preferred_citation.first.content
171
+ else
172
+ title=xml.search('//mods:mods/mods:titleInfo/mods:title','mods' => 'http://www.loc.gov/mods/v3').first.content
173
+ subtitle=xml.search('//mods:mods/mods:titleInfo/mods:subTitle','mods' => 'http://www.loc.gov/mods/v3')
174
+ if(subtitle.length==1)
175
+ title+=' ('+subtitle.first.content+')'
176
+ end
177
+ end
178
+ title
179
+ end
180
+
181
+ private
182
+ #generic updater useful for updating things like title or subtitle which can only have a single occurance and must be present
183
+ def update_simple_field(field,new_val)
184
+ ds_xml=self.descMetadata.ng_xml
185
+ ds_xml.search('//'+field,'mods' => 'http://www.loc.gov/mods/v3').each do |node|
186
+ node.content=new_val
187
+ return true
188
+ end
189
+ return false
190
+ end
191
+
192
+
193
+ end
194
+ end