dor-services 5.2.0 → 5.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -13
- data/config/certs/robots-dor-dev.crt +29 -0
- data/config/certs/robots-dor-dev.key +27 -0
- data/config/config_defaults.yml +2 -0
- data/config/dev_console_env.rb +77 -0
- data/lib/dor-services.rb +31 -27
- data/lib/dor/config.rb +25 -19
- data/lib/dor/datastreams/administrative_metadata_ds.rb +19 -20
- data/lib/dor/datastreams/content_metadata_ds.rb +238 -177
- data/lib/dor/datastreams/datastream_spec_solrizer.rb +1 -1
- data/lib/dor/datastreams/default_object_rights_ds.rb +99 -16
- data/lib/dor/datastreams/desc_metadata_ds.rb +37 -34
- data/lib/dor/datastreams/embargo_metadata_ds.rb +16 -16
- data/lib/dor/datastreams/events_ds.rb +2 -2
- data/lib/dor/datastreams/geo_metadata_ds.rb +5 -10
- data/lib/dor/datastreams/identity_metadata_ds.rb +22 -22
- data/lib/dor/datastreams/rights_metadata_ds.rb +43 -32
- data/lib/dor/datastreams/role_metadata_ds.rb +5 -5
- data/lib/dor/datastreams/simple_dublin_core_ds.rb +13 -14
- data/lib/dor/datastreams/version_metadata_ds.rb +22 -23
- data/lib/dor/datastreams/workflow_definition_ds.rb +15 -15
- data/lib/dor/datastreams/workflow_ds.rb +64 -70
- data/lib/dor/exceptions.rb +0 -1
- data/lib/dor/migrations/identifiable/uriify_augmented_contentlocation_refs.rb +4 -4
- data/lib/dor/migrations/processable/unify_workflows.rb +1 -1
- data/lib/dor/models/admin_policy_object.rb +4 -4
- data/lib/dor/models/assembleable.rb +2 -3
- data/lib/dor/models/collection.rb +1 -1
- data/lib/dor/models/contentable.rb +113 -108
- data/lib/dor/models/describable.rb +136 -95
- data/lib/dor/models/editable.rb +205 -119
- data/lib/dor/models/embargoable.rb +16 -16
- data/lib/dor/models/eventable.rb +2 -2
- data/lib/dor/models/geoable.rb +3 -3
- data/lib/dor/models/governable.rb +25 -26
- data/lib/dor/models/identifiable.rb +66 -55
- data/lib/dor/models/item.rb +0 -1
- data/lib/dor/models/itemizable.rb +7 -8
- data/lib/dor/models/preservable.rb +7 -8
- data/lib/dor/models/processable.rb +76 -73
- data/lib/dor/models/publishable.rb +25 -30
- data/lib/dor/models/releaseable.rb +118 -155
- data/lib/dor/models/rightsable.rb +2 -3
- data/lib/dor/models/set.rb +1 -1
- data/lib/dor/models/shelvable.rb +8 -10
- data/lib/dor/models/upgradable.rb +5 -6
- data/lib/dor/models/versionable.rb +3 -4
- data/lib/dor/models/workflow_object.rb +15 -16
- data/lib/dor/services/cleanup_reset_service.rb +15 -16
- data/lib/dor/services/cleanup_service.rb +2 -4
- data/lib/dor/services/digital_stacks_service.rb +10 -13
- data/lib/dor/services/merge_service.rb +8 -9
- data/lib/dor/services/metadata_handlers/catalog_handler.rb +1 -1
- data/lib/dor/services/metadata_handlers/mdtoolkit_handler.rb +3 -3
- data/lib/dor/services/metadata_service.rb +19 -20
- data/lib/dor/services/registration_service.rb +80 -61
- data/lib/dor/services/reset_workspace_service.rb +6 -10
- data/lib/dor/services/sdr_ingest_service.rb +15 -16
- data/lib/dor/services/search_service.rb +18 -23
- data/lib/dor/services/suri_service.rb +6 -6
- data/lib/dor/services/technical_metadata_service.rb +27 -44
- data/lib/dor/utils/ng_tidy.rb +3 -3
- data/lib/dor/utils/sdr_client.rb +2 -3
- data/lib/dor/utils/solr_doc_helper.rb +1 -3
- data/lib/dor/version.rb +1 -1
- data/lib/dor/workflow/document.rb +43 -40
- data/lib/dor/workflow/graph.rb +26 -26
- data/lib/dor/workflow/process.rb +34 -35
- data/lib/tasks/rdoc.rake +5 -5
- metadata +129 -111
- data/lib/dor/models/presentable.rb +0 -146
data/lib/dor/exceptions.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
Dor::Identifiable.on_upgrade '3.14.8', 'Fix up invalid URIs in content-augmented datastreams' do |obj|
|
2
2
|
bad_content_location_uri = begin
|
3
|
-
URI.parse(obj.send(
|
3
|
+
URI.parse(obj.send('content-augmented').dsLocation)
|
4
4
|
false
|
5
5
|
rescue URI::InvalidURIError
|
6
6
|
true
|
@@ -10,9 +10,9 @@ Dor::Identifiable.on_upgrade '3.14.8', 'Fix up invalid URIs in content-augmented
|
|
10
10
|
|
11
11
|
next unless bad_content_location_uri
|
12
12
|
|
13
|
-
parts = obj.send(
|
13
|
+
parts = obj.send('content-augmented').dsLocation.split('/')
|
14
14
|
parts[parts.length - 1] = URI.escape(parts.last)
|
15
|
-
obj.send(
|
15
|
+
obj.send('content-augmented').dsLocation = parts.join('/')
|
16
16
|
|
17
|
-
obj.send(
|
17
|
+
obj.send('content-augmented').save
|
18
18
|
end
|
@@ -7,7 +7,7 @@ Dor::Processable.on_upgrade '3.5.0', 'Replace individual *WF datastreams with un
|
|
7
7
|
end
|
8
8
|
|
9
9
|
# Remove individual *WF datastreams
|
10
|
-
obj.datastreams.each_pair do |dsid,ds|
|
10
|
+
obj.datastreams.each_pair do |dsid, ds|
|
11
11
|
if ds.controlGroup == 'E' && dsid =~ /WF$/
|
12
12
|
ds.delete
|
13
13
|
run = true
|
@@ -7,10 +7,10 @@ module Dor
|
|
7
7
|
include Processable
|
8
8
|
include Versionable
|
9
9
|
|
10
|
-
has_many :things, :property => :is_governed_by, :inbound => :true, :class_name =>
|
10
|
+
has_many :things, :property => :is_governed_by, :inbound => :true, :class_name => 'ActiveFedora::Base'
|
11
11
|
has_object_type 'adminPolicy'
|
12
|
-
has_metadata :name =>
|
13
|
-
has_metadata :name =>
|
14
|
-
has_metadata :name =>
|
12
|
+
has_metadata :name => 'administrativeMetadata', :type => Dor::AdministrativeMetadataDS, :label => 'Administrative Metadata'
|
13
|
+
has_metadata :name => 'roleMetadata', :type => Dor::RoleMetadataDS, :label => 'Role Metadata'
|
14
|
+
has_metadata :name => 'defaultObjectRights', :type => Dor::DefaultObjectRightsDS, :label => 'Default Object Rights'
|
15
15
|
end
|
16
16
|
end
|
@@ -1,8 +1,7 @@
|
|
1
1
|
module Dor
|
2
2
|
module Assembleable
|
3
|
-
|
4
|
-
|
5
|
-
druid = DruidTools::Druid.new(self.pid, Config.stacks.local_workspace_root)
|
3
|
+
def initialize_workspace(source = nil)
|
4
|
+
druid = DruidTools::Druid.new(pid, Config.stacks.local_workspace_root)
|
6
5
|
if source.nil?
|
7
6
|
druid.mkdir
|
8
7
|
else
|
@@ -8,7 +8,7 @@ module Dor
|
|
8
8
|
include Versionable
|
9
9
|
include Releaseable
|
10
10
|
|
11
|
-
has_many :members, :property => :is_member_of_collection, :inbound => true, :class_name =>
|
11
|
+
has_many :members, :property => :is_member_of_collection, :inbound => true, :class_name => 'ActiveFedora::Base'
|
12
12
|
has_object_type 'collection'
|
13
13
|
end
|
14
14
|
end
|
@@ -2,163 +2,165 @@ module Dor
|
|
2
2
|
module Contentable
|
3
3
|
extend ActiveSupport::Concern
|
4
4
|
|
5
|
-
#add a file to a resource, not to be confused with add a resource to an object
|
6
|
-
def add_file
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
size=File.size?(file.path)
|
20
|
-
#update contentmd
|
21
|
-
file_hash={:name=>file_name,:md5 => md5, :publish=>publish, :shelve=> shelve, :preserve => preserve, :size=>size.to_s, :sha1=>sha1, :mime_type => mime_type}
|
5
|
+
# add a file to a resource, not to be confused with add a resource to an object
|
6
|
+
def add_file(file, resource, file_name, mime_type = nil, publish = 'no', shelve = 'no', preserve = 'no')
|
7
|
+
xml = datastreams['contentMetadata'].ng_xml
|
8
|
+
# make sure the resource exists
|
9
|
+
raise 'resource doesnt exist.' if xml.search('//resource[@id=\'' + resource + '\']').length == 0
|
10
|
+
sftp = Net::SFTP.start(Config.content.content_server, Config.content.content_user, :auth_methods => ['publickey'])
|
11
|
+
druid_tools = DruidTools::Druid.new(pid, Config.content.content_base_dir)
|
12
|
+
location = druid_tools.path(file_name)
|
13
|
+
oldlocation = location.gsub('/' + pid.gsub('druid:', ''), '')
|
14
|
+
md5 = Digest::MD5.file(file.path).hexdigest
|
15
|
+
sha1 = Digest::SHA1.file(file.path).hexdigest
|
16
|
+
size = File.size?(file.path)
|
17
|
+
# update contentmd
|
18
|
+
file_hash = {:name => file_name, :md5 => md5, :publish => publish, :shelve => shelve, :preserve => preserve, :size => size.to_s, :sha1 => sha1, :mime_type => mime_type}
|
22
19
|
begin
|
23
|
-
|
20
|
+
sftp.stat!(location.gsub(file_name, ''))
|
24
21
|
begin
|
25
|
-
|
26
|
-
raise
|
22
|
+
sftp.stat!(location)
|
23
|
+
raise "The file #{file_name} already exists!"
|
27
24
|
rescue Net::SFTP::StatusException
|
28
|
-
sftp.upload!(file.path,location)
|
29
|
-
|
25
|
+
sftp.upload!(file.path, location)
|
26
|
+
contentMetadata.add_file file_hash, resource
|
30
27
|
end
|
31
28
|
rescue Net::SFTP::StatusException
|
32
|
-
#
|
29
|
+
# directory layout doesn't match the new style, so use the old style.
|
33
30
|
begin
|
34
|
-
|
35
|
-
raise
|
31
|
+
sftp.stat!(oldlocation)
|
32
|
+
raise "The file #{file_name} already exists!"
|
36
33
|
rescue Net::SFTP::StatusException
|
37
|
-
#
|
38
|
-
sftp.upload!(file.path,oldlocation)
|
39
|
-
|
34
|
+
# file doesn't already exist, which is good. Add it
|
35
|
+
sftp.upload!(file.path, oldlocation)
|
36
|
+
contentMetadata.add_file file_hash, resource
|
40
37
|
end
|
41
38
|
end
|
42
|
-
#can only arrive at this point if a non status exception occurred.
|
39
|
+
# can only arrive at this point if a non status exception occurred.
|
43
40
|
end
|
44
41
|
|
45
|
-
def replace_file
|
46
|
-
sftp=Net::SFTP.start(Config.content.content_server,Config.content.content_user
|
47
|
-
item=Dor::Item.find(
|
48
|
-
druid_tools=DruidTools::Druid.new(
|
49
|
-
location=druid_tools.path(file_name)
|
50
|
-
oldlocation=location.gsub('/'+
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
file_hash={:name=>file_name,:md5 => md5, :size=>size.to_s, :sha1=>sha1}
|
42
|
+
def replace_file(file, file_name)
|
43
|
+
sftp = Net::SFTP.start(Config.content.content_server, Config.content.content_user, :auth_methods => ['publickey'])
|
44
|
+
item = Dor::Item.find(pid)
|
45
|
+
druid_tools = DruidTools::Druid.new(pid, Config.content.content_base_dir)
|
46
|
+
location = druid_tools.path(file_name)
|
47
|
+
oldlocation = location.gsub('/' + pid.gsub('druid:', ''), '')
|
48
|
+
md5 = Digest::MD5.file(file.path).hexdigest
|
49
|
+
sha1 = Digest::SHA1.file(file.path).hexdigest
|
50
|
+
size = File.size?(file.path)
|
51
|
+
# update contentmd
|
52
|
+
file_hash = {:name => file_name, :md5 => md5, :size => size.to_s, :sha1 => sha1}
|
57
53
|
begin
|
58
|
-
|
59
|
-
sftp.upload!(file.path,location)
|
60
|
-
#this doesnt allow renaming files
|
54
|
+
sftp.stat!(location)
|
55
|
+
sftp.upload!(file.path, location)
|
56
|
+
# this doesnt allow renaming files
|
61
57
|
item.contentMetadata.update_file(file_hash, file_name)
|
62
58
|
rescue
|
63
|
-
sftp.upload!(file.path,oldlocation)
|
59
|
+
sftp.upload!(file.path, oldlocation)
|
64
60
|
item.contentMetadata.update_file(file_hash, file_name)
|
65
61
|
end
|
66
62
|
end
|
67
63
|
|
68
|
-
def get_preserved_file
|
69
|
-
|
70
|
-
file=URI.encode(file)
|
71
|
-
add=preservation_server+file+"?version="+version
|
72
|
-
uri = URI(add)
|
64
|
+
def get_preserved_file(file, version)
|
65
|
+
uri = URI(Config.content.sdr_server + "/sdr/objects/#{pid}/content/" + URI.encode(file) + "?version=#{version}")
|
73
66
|
req = Net::HTTP::Get.new(uri.request_uri)
|
74
67
|
req.basic_auth Config.content.sdr_user, Config.content.sdr_pass
|
75
|
-
|
68
|
+
Net::HTTP.start(uri.hostname, uri.port, :use_ssl => uri.scheme == 'https') {|http|
|
76
69
|
http.request(req)
|
77
70
|
}
|
78
71
|
end
|
79
72
|
|
80
|
-
def get_file
|
81
|
-
druid_tools=DruidTools::Druid.new(
|
82
|
-
location=druid_tools.path(file)
|
83
|
-
oldlocation=location.gsub('/'+file,'').gsub('/'+
|
84
|
-
sftp=Net::SFTP.start(Config.content.content_server,Config.content.content_user
|
73
|
+
def get_file(file)
|
74
|
+
druid_tools = DruidTools::Druid.new(pid, Config.content.content_base_dir)
|
75
|
+
location = druid_tools.path(file)
|
76
|
+
oldlocation = location.gsub('/' + file, '').gsub('/' + pid.gsub('druid:', ''), '') + '/' + file
|
77
|
+
sftp = Net::SFTP.start(Config.content.content_server, Config.content.content_user, :auth_methods => ['publickey'])
|
85
78
|
begin
|
86
|
-
data=sftp.download!(location)
|
79
|
+
data = sftp.download!(location)
|
87
80
|
rescue
|
88
|
-
data=sftp.download!(oldlocation)
|
81
|
+
data = sftp.download!(oldlocation)
|
89
82
|
end
|
83
|
+
data
|
90
84
|
end
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
85
|
+
|
86
|
+
# @param [String] filename
|
87
|
+
def remove_file(filename)
|
88
|
+
druid_tools = DruidTools::Druid.new(pid, Config.content.content_base_dir)
|
89
|
+
location = druid_tools.path(filename)
|
90
|
+
oldlocation = location.gsub('/' + pid.gsub('druid:', ''), '')
|
91
|
+
sftp = Net::SFTP.start(Config.content.content_server, Config.content.content_user, :auth_methods => ['publickey'])
|
96
92
|
begin
|
97
|
-
|
93
|
+
sftp.remove!(location)
|
98
94
|
rescue
|
99
|
-
#if the file doesnt exist, that is ok, not all files will be present in the workspace
|
95
|
+
# if the file doesnt exist, that is ok, not all files will be present in the workspace
|
100
96
|
begin
|
101
|
-
|
97
|
+
sftp.remove!(oldlocation)
|
102
98
|
rescue Net::SFTP::StatusException
|
103
99
|
end
|
104
100
|
end
|
105
|
-
|
101
|
+
contentMetadata.remove_file filename
|
106
102
|
end
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
103
|
+
|
104
|
+
# @param [String] old_name
|
105
|
+
# @param [String] new_name
|
106
|
+
def rename_file(old_name, new_name)
|
107
|
+
druid_tools = DruidTools::Druid.new(pid, Config.content.content_base_dir)
|
108
|
+
location = druid_tools.path(old_name)
|
109
|
+
oldlocation = location.gsub('/' + pid.gsub('druid:', ''), '')
|
110
|
+
sftp = Net::SFTP.start(Config.content.content_server, Config.content.content_user, :auth_methods => ['publickey'])
|
112
111
|
begin
|
113
|
-
|
112
|
+
sftp.rename!(location, location.gsub(old_name, new_name))
|
114
113
|
rescue
|
115
|
-
|
114
|
+
sftp.rename!(oldlocation, oldlocation.gsub(old_name, new_name))
|
116
115
|
end
|
117
|
-
|
116
|
+
contentMetadata.rename_file(old_name, new_name)
|
118
117
|
end
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
files
|
123
|
-
|
118
|
+
|
119
|
+
# @param [String] resource_name ID of the resource elememnt
|
120
|
+
def remove_resource(resource_name)
|
121
|
+
# run delete for all of the files in the resource
|
122
|
+
contentMetadata.ng_xml.search('//resource[@id=\'' + resource_name + '\']/file').each do |file|
|
123
|
+
remove_file(file['id'])
|
124
124
|
end
|
125
|
-
#remove the resource record from the metadata and renumber the resource sequence
|
126
|
-
|
125
|
+
# remove the resource record from the metadata and renumber the resource sequence
|
126
|
+
contentMetadata.remove_resource resource_name
|
127
127
|
end
|
128
128
|
|
129
|
-
#list files in the workspace
|
129
|
+
# list files in the workspace
|
130
|
+
# @return [Array] workspace files
|
130
131
|
def list_files
|
131
|
-
filename='none'
|
132
|
-
files=[]
|
133
|
-
sftp=Net::SFTP.start(Config.content.content_server,Config.content.content_user
|
134
|
-
druid_tools=DruidTools::Druid.new(
|
135
|
-
location=druid_tools.path(filename).gsub(filename,'')
|
136
|
-
oldlocation=location.gsub('/'+
|
132
|
+
filename = 'none'
|
133
|
+
files = []
|
134
|
+
sftp = Net::SFTP.start(Config.content.content_server, Config.content.content_user, :auth_methods => ['publickey'])
|
135
|
+
druid_tools = DruidTools::Druid.new(pid, Config.content.content_base_dir)
|
136
|
+
location = druid_tools.path(filename).gsub(filename, '')
|
137
|
+
oldlocation = location.gsub('/' + pid.gsub('druid:', ''), '')
|
137
138
|
begin
|
138
|
-
sftp.dir.entries(location,
|
139
|
-
files<<file.name
|
139
|
+
sftp.dir.entries(location, '*') do |file|
|
140
|
+
files << file.name
|
140
141
|
end
|
141
142
|
rescue
|
142
143
|
begin
|
143
|
-
sftp.dir.glob(oldlocation,
|
144
|
-
files<<file.name
|
144
|
+
sftp.dir.glob(oldlocation, '*') do |file|
|
145
|
+
files << file.name
|
145
146
|
end
|
146
147
|
rescue Net::SFTP::StatusException
|
147
148
|
return files
|
148
149
|
end
|
149
150
|
end
|
150
|
-
|
151
|
+
files
|
151
152
|
end
|
152
153
|
|
153
|
-
#
|
154
|
-
|
155
|
-
|
156
|
-
|
154
|
+
# @param [String] filename
|
155
|
+
# @return [Boolean] whether the file in question is present in the object's workspace
|
156
|
+
def is_file_in_workspace?(filename)
|
157
|
+
druid_obj = DruidTools::Druid.new(pid, Dor::Config.stacks.local_workspace_root)
|
158
|
+
!druid_obj.find_content(filename).nil?
|
157
159
|
end
|
158
160
|
|
159
161
|
# Appends contentMetadata file resources from the source objects to this object
|
160
162
|
# @param [Array<String>] source_obj_pids ids of the secondary objects that will get their contentMetadata merged into this one
|
161
|
-
def copy_file_resources
|
163
|
+
def copy_file_resources(source_obj_pids)
|
162
164
|
primary_cm = contentMetadata.ng_xml
|
163
165
|
base_id = primary_cm.at_xpath('/contentMetadata/@objectId').value
|
164
166
|
max_sequence = primary_cm.at_xpath('/contentMetadata/resource[last()]/@sequence').value.to_i
|
@@ -179,7 +181,7 @@ module Dor
|
|
179
181
|
secondary_file['id'] = new_secondary_file_name(secondary_file['id'], max_sequence)
|
180
182
|
|
181
183
|
if primary_cm.at_xpath("//file[@id = '#{secondary_file['id']}']")
|
182
|
-
raise Dor::Exception.new "File '#{secondary_file['id']}' from secondary object #{src_pid} already exist in primary object: #{
|
184
|
+
raise Dor::Exception.new "File '#{secondary_file['id']}' from secondary object #{src_pid} already exist in primary object: #{pid}"
|
183
185
|
end
|
184
186
|
end
|
185
187
|
|
@@ -201,20 +203,16 @@ module Dor
|
|
201
203
|
resource_copy.first_element_child.add_previous_sibling attr_node
|
202
204
|
end
|
203
205
|
end
|
204
|
-
|
206
|
+
contentMetadata.content_will_change!
|
205
207
|
end
|
206
208
|
|
207
|
-
def new_secondary_file_name
|
208
|
-
|
209
|
-
return "#{$1}_#{sequence_num}.#{$2}"
|
210
|
-
else
|
211
|
-
return "#{old_name}_#{sequence_num}"
|
212
|
-
end
|
209
|
+
def new_secondary_file_name(old_name, sequence_num)
|
210
|
+
old_name =~ /^(.*)\.(.*)$/ ? "#{$1}_#{sequence_num}.#{$2}" : "#{old_name}_#{sequence_num}"
|
213
211
|
end
|
214
212
|
|
215
213
|
# Clears RELS-EXT relationships, sets the isGovernedBy relationship to the SDR Graveyard APO
|
216
214
|
# @param [String] tag optional String of text that is concatenated to the identityMetadata/tag "Decomissioned : "
|
217
|
-
def decomission
|
215
|
+
def decomission(tag)
|
218
216
|
# remove isMemberOf and isMemberOfCollection relationships
|
219
217
|
clear_relationship :is_member_of
|
220
218
|
clear_relationship :is_member_of_collection
|
@@ -229,5 +227,12 @@ module Dor
|
|
229
227
|
rightsMetadata.content = '<rightsMetadata/>'
|
230
228
|
add_tag "Decommissioned : #{tag}"
|
231
229
|
end
|
230
|
+
|
231
|
+
# Adds a RELS-EXT constituent relationship to the given druid
|
232
|
+
# @param [String] druid the parent druid of the constituent relationship
|
233
|
+
# e.g.: <fedora:isConstituentOf rdf:resource="info:fedora/druid:hj097bm8879" />
|
234
|
+
def add_constituent(druid)
|
235
|
+
add_relationship :is_constituent_of, ActiveFedora::Base.find(druid)
|
236
|
+
end
|
232
237
|
end
|
233
238
|
end
|
@@ -3,13 +3,13 @@ module Dor
|
|
3
3
|
extend ActiveSupport::Concern
|
4
4
|
|
5
5
|
DESC_MD_FORMATS = {
|
6
|
-
|
7
|
-
|
6
|
+
'http://www.tei-c.org/ns/1.0' => 'tei',
|
7
|
+
'http://www.loc.gov/mods/v3' => 'mods'
|
8
8
|
}
|
9
9
|
class CrosswalkError < Exception; end
|
10
10
|
|
11
11
|
included do
|
12
|
-
has_metadata :name =>
|
12
|
+
has_metadata :name => 'descMetadata', :type => Dor::DescMetadataDS, :label => 'Descriptive Metadata', :control_group => 'M'
|
13
13
|
end
|
14
14
|
|
15
15
|
require 'stanford-mods/searchworks'
|
@@ -17,17 +17,17 @@ module Dor
|
|
17
17
|
# intended for read-access, "as SearchWorks would see it", mostly for to_solr()
|
18
18
|
# @param [Nokogiri::XML::Document] content Nokogiri descMetadata document (overriding internal data)
|
19
19
|
# @param [boolean] ns_aware namespace awareness toggle for from_nk_node()
|
20
|
-
def stanford_mods(content=nil, ns_aware=true)
|
20
|
+
def stanford_mods(content = nil, ns_aware = true)
|
21
21
|
m = Stanford::Mods::Record.new
|
22
|
-
desc = content.nil? ?
|
22
|
+
desc = content.nil? ? descMetadata.ng_xml : content
|
23
23
|
m.from_nk_node(desc.root, ns_aware)
|
24
24
|
m
|
25
25
|
end
|
26
26
|
|
27
27
|
def fetch_descMetadata_datastream
|
28
|
-
candidates =
|
28
|
+
candidates = datastreams['identityMetadata'].otherId.collect { |oid| oid.to_s }
|
29
29
|
metadata_id = Dor::MetadataService.resolvable(candidates).first
|
30
|
-
|
30
|
+
metadata_id.nil? ? nil : Dor::MetadataService.fetch(metadata_id.to_s)
|
31
31
|
end
|
32
32
|
|
33
33
|
def build_descMetadata_datastream(ds)
|
@@ -41,28 +41,27 @@ module Dor
|
|
41
41
|
|
42
42
|
# Generates Dublin Core from the MODS in the descMetadata datastream using the LoC mods2dc stylesheet
|
43
43
|
# Should not be used for the Fedora DC datastream
|
44
|
-
# @raise [
|
44
|
+
# @raise [CrosswalkError] Raises an Exception if the generated DC is empty or has no children
|
45
|
+
# @return [Nokogiri::Doc] the DublinCore XML document object
|
45
46
|
def generate_dublin_core
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
xslt = Nokogiri::XSLT(File.new(File.expand_path(File.dirname(__FILE__) + "/#{format}2dc.xslt")) )
|
51
|
-
desc_md = self.descMetadata.ng_xml.dup(1)
|
52
|
-
self.add_collection_reference(desc_md)
|
47
|
+
raise CrosswalkError, "Unknown descMetadata namespace: #{metadata_namespace.inspect}" if metadata_format.nil?
|
48
|
+
xslt = Nokogiri::XSLT(File.new(File.expand_path(File.dirname(__FILE__) + "/#{metadata_format}2dc.xslt")) )
|
49
|
+
desc_md = descMetadata.ng_xml.dup(1)
|
50
|
+
add_collection_reference(desc_md)
|
53
51
|
dc_doc = xslt.transform(desc_md)
|
54
|
-
# Remove empty nodes
|
55
|
-
dc_doc.
|
56
|
-
|
57
|
-
raise CrosswalkError, "Dor::Item#generate_dublin_core produced incorrect xml:\n#{dc_doc.to_xml}"
|
58
|
-
end
|
52
|
+
dc_doc.xpath('/oai_dc:dc/*[count(text()) = 0]').remove # Remove empty nodes
|
53
|
+
raise CrosswalkError, "Dor::Item#generate_dublin_core produced incorrect xml (no root):\n#{dc_doc.to_xml}" if dc_doc.root.nil?
|
54
|
+
raise CrosswalkError, "Dor::Item#generate_dublin_core produced incorrect xml (no children):\n#{dc_doc.to_xml}" if dc_doc.root.children.size == 0
|
59
55
|
dc_doc
|
60
56
|
end
|
61
57
|
|
58
|
+
# @return [String] Public descriptive medatada XML
|
62
59
|
def generate_public_desc_md
|
63
|
-
doc =
|
60
|
+
doc = descMetadata.ng_xml.dup(1)
|
64
61
|
add_collection_reference(doc)
|
65
62
|
add_access_conditions(doc)
|
63
|
+
add_constituent_relations(doc)
|
64
|
+
doc.xpath('//comment()').remove
|
66
65
|
new_doc = Nokogiri::XML(doc.to_xml) { |x| x.noblanks }
|
67
66
|
new_doc.encoding = 'UTF-8'
|
68
67
|
new_doc.to_xml
|
@@ -74,40 +73,42 @@ module Dor
|
|
74
73
|
def add_access_conditions(doc)
|
75
74
|
# clear out any existing accessConditions
|
76
75
|
doc.xpath('//mods:accessCondition', 'mods' => 'http://www.loc.gov/mods/v3').each {|n| n.remove}
|
77
|
-
rights =
|
76
|
+
rights = datastreams['rightsMetadata'].ng_xml
|
78
77
|
|
79
78
|
rights.xpath('//use/human[@type="useAndReproduction"]').each do |use|
|
80
79
|
txt = use.text.strip
|
81
80
|
next if txt.empty?
|
82
|
-
doc.root.element_children.last.add_next_sibling doc.create_element(
|
81
|
+
doc.root.element_children.last.add_next_sibling doc.create_element('accessCondition', txt, :type => 'useAndReproduction')
|
83
82
|
end
|
84
83
|
rights.xpath('//copyright/human[@type="copyright"]').each do |cr|
|
85
84
|
txt = cr.text.strip
|
86
85
|
next if txt.empty?
|
87
|
-
doc.root.element_children.last.add_next_sibling doc.create_element(
|
86
|
+
doc.root.element_children.last.add_next_sibling doc.create_element('accessCondition', txt, :type => 'copyright')
|
88
87
|
end
|
89
88
|
rights.xpath("//use/machine[#{ci_compare('type', 'creativecommons')}]").each do |lic_type|
|
90
89
|
next if lic_type.text =~ /none/i
|
91
90
|
lic_text = rights.at_xpath("//use/human[#{ci_compare('type', 'creativecommons')}]").text.strip
|
92
91
|
next if lic_text.empty?
|
93
92
|
new_text = "CC #{lic_type.text}: #{lic_text}"
|
94
|
-
doc.root.element_children.last.add_next_sibling doc.create_element(
|
93
|
+
doc.root.element_children.last.add_next_sibling doc.create_element('accessCondition', new_text, :type => 'license')
|
95
94
|
end
|
96
95
|
rights.xpath("//use/machine[#{ci_compare('type', 'opendatacommons')}]").each do |lic_type|
|
97
96
|
next if lic_type.text =~ /none/i
|
98
97
|
lic_text = rights.at_xpath("//use/human[#{ci_compare('type', 'opendatacommons')}]").text.strip
|
99
98
|
next if lic_text.empty?
|
100
99
|
new_text = "ODC #{lic_type.text}: #{lic_text}"
|
101
|
-
doc.root.element_children.last.add_next_sibling doc.create_element(
|
100
|
+
doc.root.element_children.last.add_next_sibling doc.create_element('accessCondition', new_text, :type => 'license')
|
102
101
|
end
|
103
102
|
end
|
104
103
|
|
105
|
-
#
|
106
|
-
#
|
104
|
+
# Adds to desc metadata a relatedItem with information about the collection this object belongs to.
|
105
|
+
# For use in published mods and mods-to-DC conversion.
|
106
|
+
# @param [Nokogiri::XML::Document] doc A copy of the descriptiveMetadata of the object, to be modified
|
107
|
+
# @return [Void]
|
107
108
|
# @note this method modifies the passed in doc
|
108
109
|
def add_collection_reference(doc)
|
109
|
-
return unless
|
110
|
-
collections=
|
110
|
+
return unless methods.include? :public_relationships
|
111
|
+
collections = public_relationships.search('//rdf:RDF/rdf:Description/fedora:isMemberOfCollection',
|
111
112
|
'fedora' => 'info:fedora/fedora-system:def/relations-external#',
|
112
113
|
'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' )
|
113
114
|
return if collections.empty?
|
@@ -118,43 +119,81 @@ module Dor
|
|
118
119
|
end
|
119
120
|
|
120
121
|
collections.each do |collection_node|
|
121
|
-
druid=collection_node['rdf:resource']
|
122
|
-
druid=druid.gsub('info:fedora/','')
|
123
|
-
collection_obj=Dor::Item.find(druid)
|
122
|
+
druid = collection_node['rdf:resource']
|
123
|
+
druid = druid.gsub('info:fedora/', '')
|
124
|
+
collection_obj = Dor::Item.find(druid)
|
124
125
|
collection_title = Dor::Describable.get_collection_title(collection_obj)
|
125
|
-
related_item_node=Nokogiri::XML::Node.new('relatedItem',doc)
|
126
|
-
related_item_node['type']='host'
|
127
|
-
title_info_node = Nokogiri::XML::Node.new('titleInfo',doc)
|
128
|
-
title_node = Nokogiri::XML::Node.new('title',doc)
|
129
|
-
title_node.content=collection_title
|
126
|
+
related_item_node = Nokogiri::XML::Node.new('relatedItem', doc)
|
127
|
+
related_item_node['type'] = 'host'
|
128
|
+
title_info_node = Nokogiri::XML::Node.new('titleInfo', doc)
|
129
|
+
title_node = Nokogiri::XML::Node.new('title', doc)
|
130
|
+
title_node.content = collection_title
|
130
131
|
|
131
|
-
|
132
|
-
|
133
|
-
|
132
|
+
# e.g.:
|
133
|
+
# <location>
|
134
|
+
# <url>http://purl.stanford.edu/rh056sr3313</url>
|
135
|
+
# </location>
|
136
|
+
loc_node = doc.create_element('location')
|
137
|
+
url_node = doc.create_element('url')
|
138
|
+
url_node.content = "https://#{Dor::Config.stacks.document_cache_host}/#{druid.split(':').last}"
|
139
|
+
loc_node << url_node
|
134
140
|
|
135
|
-
type_node=Nokogiri::XML::Node.new('typeOfResource',doc)
|
141
|
+
type_node = Nokogiri::XML::Node.new('typeOfResource', doc)
|
136
142
|
type_node['collection'] = 'yes'
|
137
143
|
doc.root.add_child(related_item_node)
|
138
144
|
related_item_node.add_child(title_info_node)
|
139
145
|
title_info_node.add_child(title_node)
|
140
|
-
related_item_node.add_child(
|
146
|
+
related_item_node.add_child(loc_node)
|
141
147
|
related_item_node.add_child(type_node)
|
142
148
|
end
|
143
149
|
end
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
+
|
151
|
+
# expand constituent relations into relatedItem references -- see JUMBO-18
|
152
|
+
# @param [Nokogiri::XML] doc public MODS XML being built
|
153
|
+
# @return [Void]
|
154
|
+
def add_constituent_relations(doc)
|
155
|
+
public_relationships.search('//rdf:RDF/rdf:Description/fedora:isConstituentOf',
|
156
|
+
'fedora' => 'info:fedora/fedora-system:def/relations-external#',
|
157
|
+
'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' ).each do |parent|
|
158
|
+
# fetch the parent object to get title
|
159
|
+
druid = parent['rdf:resource'].gsub(/^info:fedora\//, '')
|
160
|
+
parent_item = Dor::Item.find(druid)
|
161
|
+
|
162
|
+
# create the MODS relation
|
163
|
+
relatedItem = doc.create_element 'relatedItem'
|
164
|
+
relatedItem['type'] = 'host'
|
165
|
+
relatedItem['displayLabel'] = 'Appears in'
|
166
|
+
|
167
|
+
# load the title from the parent's DC.title
|
168
|
+
titleInfo = doc.create_element 'titleInfo'
|
169
|
+
title = doc.create_element 'title'
|
170
|
+
title.content = parent_item.datastreams['DC'].title.first
|
171
|
+
titleInfo << title
|
172
|
+
relatedItem << titleInfo
|
173
|
+
|
174
|
+
# point to the PURL for the parent
|
175
|
+
location = doc.create_element 'location'
|
176
|
+
url = doc.create_element 'url'
|
177
|
+
url.content = "http://#{Dor::Config.stacks.document_cache_host}/#{druid.split(':').last}"
|
178
|
+
location << url
|
179
|
+
relatedItem << location
|
180
|
+
|
181
|
+
# finish up by adding relation to public MODS
|
182
|
+
doc.root << relatedItem
|
150
183
|
end
|
151
184
|
end
|
152
185
|
|
186
|
+
def metadata_namespace
|
187
|
+
desc_md = datastreams['descMetadata'].ng_xml
|
188
|
+
return nil if desc_md.nil? || desc_md.root.nil? || desc_md.root.namespace.nil?
|
189
|
+
desc_md.root.namespace.href
|
190
|
+
end
|
191
|
+
|
153
192
|
def metadata_format
|
154
193
|
DESC_MD_FORMATS[metadata_namespace]
|
155
194
|
end
|
156
195
|
|
157
|
-
def to_solr(solr_doc=
|
196
|
+
def to_solr(solr_doc = {}, *args)
|
158
197
|
super solr_doc, *args
|
159
198
|
mods_sources = {
|
160
199
|
'sw_language_ssim' => :sw_language_facet,
|
@@ -170,33 +209,33 @@ module Dor
|
|
170
209
|
'mods_typeOfResource_ssim' => [:term_values, :typeOfResource],
|
171
210
|
'mods_typeOfResource_tesim' => [:term_values, :typeOfResource]
|
172
211
|
}
|
173
|
-
keys = mods_sources.keys.concat(%w
|
212
|
+
keys = mods_sources.keys.concat(%w( metadata_format_ssim ))
|
174
213
|
keys.each { |key|
|
175
214
|
solr_doc[key] ||= [] # initialize multivalue targts if necessary
|
176
215
|
}
|
177
216
|
|
178
|
-
solr_doc[
|
217
|
+
solr_doc['metadata_format_ssim'] << metadata_format
|
179
218
|
begin
|
180
|
-
dc_doc =
|
219
|
+
dc_doc = generate_dublin_core
|
181
220
|
dc_doc.xpath('/oai_dc:dc/*').each do |node|
|
182
221
|
add_solr_value(solr_doc, "public_dc_#{node.name}", node.text, :string, [:stored_searchable])
|
183
222
|
end
|
184
|
-
creator=''
|
223
|
+
creator = ''
|
185
224
|
dc_doc.xpath('//dc:creator').each do |node|
|
186
|
-
creator=node.text
|
225
|
+
creator = node.text
|
187
226
|
end
|
188
|
-
title=''
|
227
|
+
title = ''
|
189
228
|
dc_doc.xpath('//dc:title').each do |node|
|
190
|
-
title=node.text
|
229
|
+
title = node.text
|
191
230
|
end
|
192
|
-
creator_title=creator+title
|
231
|
+
creator_title = creator + title
|
193
232
|
add_solr_value(solr_doc, 'creator_title', creator_title , :string, [:stored_sortable])
|
194
233
|
rescue CrosswalkError => e
|
195
|
-
ActiveFedora.logger.warn "Cannot index #{
|
234
|
+
ActiveFedora.logger.warn "Cannot index #{pid}.descMetadata: #{e.message}"
|
196
235
|
end
|
197
236
|
|
198
237
|
begin
|
199
|
-
mods =
|
238
|
+
mods = stanford_mods
|
200
239
|
mods_sources.each_pair do |solr_key, meth|
|
201
240
|
vals = meth.is_a?(Array) ? mods.send(meth.shift, *meth) : mods.send(meth)
|
202
241
|
solr_doc[solr_key].push *vals unless vals.nil? || vals.empty?
|
@@ -206,82 +245,84 @@ module Dor
|
|
206
245
|
solr_doc['sw_pub_date_facet_ssi'] = mods.pub_date_facet # e.g. '9th century'
|
207
246
|
end
|
208
247
|
# some fields get explicit "(none)" placeholder values, mostly for faceting
|
209
|
-
%w
|
248
|
+
%w(sw_language_tesim sw_genre_tesim sw_format_tesim).each { |key| solr_doc[key] = ['(none)'] if solr_doc[key].empty? }
|
210
249
|
# otherwise remove empties
|
211
|
-
keys.each{ |key| solr_doc.delete(key) if solr_doc[key].nil? || solr_doc[key].empty?}
|
250
|
+
keys.each { |key| solr_doc.delete(key) if solr_doc[key].nil? || solr_doc[key].empty?}
|
212
251
|
solr_doc
|
213
252
|
end
|
214
253
|
|
215
254
|
def update_title(new_title)
|
216
255
|
raise 'Descriptive metadata has no title to update!' unless update_simple_field('mods:mods/mods:titleInfo/mods:title', new_title)
|
217
256
|
end
|
257
|
+
|
218
258
|
def add_identifier(type, value)
|
219
|
-
ds_xml=
|
220
|
-
ds_xml.search('//mods:mods','mods' => 'http://www.loc.gov/mods/v3').each do |node|
|
221
|
-
new_node=Nokogiri::XML::Node.new('identifier',ds_xml) #this ends up being mods:identifier without having to specify the namespace
|
222
|
-
new_node['type']=type
|
223
|
-
new_node.content=value
|
259
|
+
ds_xml = descMetadata.ng_xml
|
260
|
+
ds_xml.search('//mods:mods', 'mods' => 'http://www.loc.gov/mods/v3').each do |node|
|
261
|
+
new_node = Nokogiri::XML::Node.new('identifier', ds_xml) # this ends up being mods:identifier without having to specify the namespace
|
262
|
+
new_node['type'] = type
|
263
|
+
new_node.content = value
|
224
264
|
node.add_child(new_node)
|
225
265
|
end
|
226
266
|
end
|
227
|
-
|
228
|
-
|
229
|
-
ds_xml
|
267
|
+
|
268
|
+
def delete_identifier(type, value = nil)
|
269
|
+
ds_xml = descMetadata.ng_xml
|
270
|
+
ds_xml.search('//mods:identifier', 'mods' => 'http://www.loc.gov/mods/v3').each do |node|
|
230
271
|
if node.content == value || value.nil?
|
231
272
|
node.remove
|
232
273
|
return true
|
233
274
|
end
|
234
275
|
end
|
235
|
-
|
276
|
+
false
|
236
277
|
end
|
237
278
|
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
279
|
+
# @param [Boolean] force Overwrite existing XML
|
280
|
+
# @return [String] descMetadata.content XML
|
281
|
+
def set_desc_metadata_using_label(force = false)
|
282
|
+
unless force || descMetadata.new?
|
283
|
+
raise 'Cannot proceed, there is already content in the descriptive metadata datastream: ' + descMetadata.content.to_s
|
242
284
|
end
|
243
|
-
label=self.label
|
285
|
+
label = self.label
|
244
286
|
builder = Nokogiri::XML::Builder.new { |xml|
|
245
|
-
xml.mods( 'xmlns' => 'http://www.loc.gov/mods/v3', 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance'
|
246
|
-
xml.titleInfo{
|
287
|
+
xml.mods( 'xmlns' => 'http://www.loc.gov/mods/v3', 'xmlns:xsi' => 'http://www.w3.org/2001/XMLSchema-instance', :version => '3.3', 'xsi:schemaLocation' => 'http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-3.xsd') {
|
288
|
+
xml.titleInfo {
|
247
289
|
xml.title label
|
248
290
|
}
|
249
291
|
}
|
250
292
|
}
|
251
|
-
|
293
|
+
descMetadata.content = builder.to_xml
|
252
294
|
end
|
253
295
|
|
254
296
|
def self.get_collection_title(obj)
|
255
|
-
xml=obj.descMetadata.ng_xml
|
256
|
-
title=''
|
257
|
-
title_node = xml.at_xpath('//mods:mods/mods:titleInfo/mods:title','mods' => 'http://www.loc.gov/mods/v3')
|
297
|
+
xml = obj.descMetadata.ng_xml
|
298
|
+
title = ''
|
299
|
+
title_node = xml.at_xpath('//mods:mods/mods:titleInfo/mods:title', 'mods' => 'http://www.loc.gov/mods/v3')
|
258
300
|
if title_node
|
259
301
|
title = title_node.content
|
260
|
-
subtitle=xml.at_xpath('//mods:mods/mods:titleInfo/mods:subTitle','mods' => 'http://www.loc.gov/mods/v3')
|
302
|
+
subtitle = xml.at_xpath('//mods:mods/mods:titleInfo/mods:subTitle', 'mods' => 'http://www.loc.gov/mods/v3')
|
261
303
|
title += " (#{subtitle.content})" if subtitle
|
262
304
|
end
|
263
305
|
title
|
264
306
|
end
|
265
307
|
|
266
308
|
private
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
node.content=new_val
|
309
|
+
|
310
|
+
# generic updater useful for updating things like title or subtitle which can only have a single occurance and must be present
|
311
|
+
def update_simple_field(field, new_val)
|
312
|
+
descMetadata.ng_xml.search('//' + field, 'mods' => 'http://www.loc.gov/mods/v3').each do |node|
|
313
|
+
node.content = new_val
|
272
314
|
return true
|
273
315
|
end
|
274
|
-
|
316
|
+
false
|
275
317
|
end
|
276
318
|
|
277
319
|
# Builds case-insensitive xpath translate function call that will match the attribute to a value
|
278
320
|
def ci_compare(attribute, value)
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
321
|
+
"translate(
|
322
|
+
@#{attribute},
|
323
|
+
'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
|
324
|
+
'abcdefghijklmnopqrstuvwxyz'
|
325
|
+
) = '#{value}' "
|
284
326
|
end
|
285
|
-
|
286
327
|
end
|
287
328
|
end
|