dor-services 6.8.0 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/config_defaults.yml +0 -27
- data/config/dev_console_env.rb.example +0 -17
- data/lib/dor-services.rb +9 -73
- data/lib/dor/config.rb +1 -30
- data/lib/dor/datastreams/content_metadata_ds.rb +8 -0
- data/lib/dor/datastreams/desc_metadata_ds.rb +19 -0
- data/lib/dor/datastreams/identity_metadata_ds.rb +65 -0
- data/lib/dor/datastreams/rights_metadata_ds.rb +14 -2
- data/lib/dor/datastreams/workflow_definition_ds.rb +1 -1
- data/lib/dor/datastreams/workflow_ds.rb +0 -15
- data/lib/dor/indexers/identifiable_indexer.rb +8 -4
- data/lib/dor/indexers/releasable_indexer.rb +7 -1
- data/lib/dor/models/abstract.rb +143 -8
- data/lib/dor/models/admin_policy_object.rb +0 -3
- data/lib/dor/models/collection.rb +0 -2
- data/lib/dor/models/concerns/embargoable.rb +7 -60
- data/lib/dor/models/etd.rb +100 -0
- data/lib/dor/models/item.rb +12 -28
- data/lib/dor/models/part.rb +18 -0
- data/lib/dor/models/set.rb +0 -2
- data/lib/dor/services/collection_service.rb +36 -0
- data/lib/dor/services/embargo_service.rb +93 -0
- data/lib/dor/services/ontology.rb +0 -18
- data/lib/dor/services/public_desc_metadata_service.rb +7 -11
- data/lib/dor/services/search_service.rb +0 -40
- data/lib/dor/version.rb +1 -1
- data/lib/dor/workflow/document.rb +0 -7
- metadata +15 -78
- data/lib/dor/models/concerns/assembleable.rb +0 -18
- data/lib/dor/models/concerns/contentable.rb +0 -185
- data/lib/dor/models/concerns/describable.rb +0 -82
- data/lib/dor/models/concerns/eventable.rb +0 -18
- data/lib/dor/models/concerns/geoable.rb +0 -14
- data/lib/dor/models/concerns/governable.rb +0 -101
- data/lib/dor/models/concerns/identifiable.rb +0 -172
- data/lib/dor/models/concerns/itemizable.rb +0 -42
- data/lib/dor/models/concerns/preservable.rb +0 -46
- data/lib/dor/models/concerns/processable.rb +0 -86
- data/lib/dor/models/concerns/publishable.rb +0 -76
- data/lib/dor/models/concerns/releaseable.rb +0 -118
- data/lib/dor/models/concerns/rightsable.rb +0 -25
- data/lib/dor/models/concerns/shelvable.rb +0 -15
- data/lib/dor/models/concerns/versionable.rb +0 -72
- data/lib/dor/services/ability.rb +0 -77
- data/lib/dor/services/cleanup_reset_service.rb +0 -103
- data/lib/dor/services/datastream_builder.rb +0 -96
- data/lib/dor/services/decommission_service.rb +0 -31
- data/lib/dor/services/digital_stacks_service.rb +0 -125
- data/lib/dor/services/dublin_core_service.rb +0 -45
- data/lib/dor/services/file_metadata_merge_service.rb +0 -71
- data/lib/dor/services/indexing_service.rb +0 -131
- data/lib/dor/services/merge_service.rb +0 -105
- data/lib/dor/services/public_xml_service.rb +0 -116
- data/lib/dor/services/publish_metadata_service.rb +0 -99
- data/lib/dor/services/reset_workspace_service.rb +0 -27
- data/lib/dor/services/sdr_ingest_service.rb +0 -172
- data/lib/dor/services/secondary_file_name_service.rb +0 -10
- data/lib/dor/services/shelving_service.rb +0 -69
- data/lib/dor/services/technical_metadata_service.rb +0 -232
- data/lib/dor/services/version_service.rb +0 -84
- data/lib/dor/utils/sdr_client.rb +0 -94
@@ -1,27 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Dor
|
4
|
-
# Rename the druid trees at the end of the accessionWF in order to be cleaned/deleted later.
|
5
|
-
class ResetWorkspaceService
|
6
|
-
def self.reset_workspace_druid_tree(druid, version, workspace_root)
|
7
|
-
druid_tree_path = DruidTools::Druid.new(druid, workspace_root).pathname.to_s
|
8
|
-
|
9
|
-
raise "The archived directory #{druid_tree_path}_v#{version} already existed." if File.exist?("#{druid_tree_path}_v#{version}")
|
10
|
-
|
11
|
-
if File.exist?(druid_tree_path)
|
12
|
-
FileUtils.mv(druid_tree_path, "#{druid_tree_path}_v#{version}")
|
13
|
-
end # Else is a truncated tree where we shouldn't do anything
|
14
|
-
end
|
15
|
-
|
16
|
-
def self.reset_export_bag(druid, version, export_root)
|
17
|
-
id = druid.split(':').last
|
18
|
-
bag_dir = File.join(export_root, id)
|
19
|
-
|
20
|
-
raise "The archived bag #{bag_dir}_v#{version} already existed." if File.exist?("#{bag_dir}_v#{version}")
|
21
|
-
|
22
|
-
FileUtils.mv(bag_dir, "#{bag_dir}_v#{version}") if File.exist?(bag_dir)
|
23
|
-
|
24
|
-
FileUtils.mv("#{bag_dir}.tar", "#{bag_dir}_v#{version}.tar") if File.exist?("#{bag_dir}.tar")
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
@@ -1,172 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'moab/stanford'
|
4
|
-
|
5
|
-
module Dor
|
6
|
-
# Note: This should probably live in common-accessioning robot sdr-ingest-transfer
|
7
|
-
# as that is the only robot that uses it. See also preservable concern.
|
8
|
-
class SdrIngestService
|
9
|
-
# @param [Dor::Item] dor_item The representation of the digital object
|
10
|
-
# @param [String] _agreement_id deprecated, included for backward compatability with common-accessoning
|
11
|
-
# @return [void] Create the Moab/bag manifests for new version, export data to BagIt bag, kick off the SDR preservation workflow
|
12
|
-
def self.transfer(dor_item, _agreement_id = nil)
|
13
|
-
druid = dor_item.pid
|
14
|
-
workspace = DruidTools::Druid.new(druid, Dor::Config.sdr.local_workspace_root)
|
15
|
-
signature_catalog = get_signature_catalog(druid)
|
16
|
-
new_version_id = signature_catalog.version_id + 1
|
17
|
-
metadata_dir = extract_datastreams(dor_item, workspace)
|
18
|
-
verify_version_metadata(metadata_dir, new_version_id)
|
19
|
-
version_inventory = get_version_inventory(metadata_dir, druid, new_version_id)
|
20
|
-
version_addtions = signature_catalog.version_additions(version_inventory)
|
21
|
-
content_addtions = version_addtions.group('content')
|
22
|
-
if content_addtions.nil? || content_addtions.files.empty?
|
23
|
-
content_dir = nil
|
24
|
-
else
|
25
|
-
new_file_list = content_addtions.path_list
|
26
|
-
content_dir = workspace.find_filelist_parent('content', new_file_list)
|
27
|
-
end
|
28
|
-
content_group = version_inventory.group('content')
|
29
|
-
signature_catalog.normalize_group_signatures(content_group, content_dir) unless content_group.nil? || content_group.files.empty?
|
30
|
-
# export the bag (in tar format)
|
31
|
-
bag_dir = Pathname(Dor::Config.sdr.local_export_home).join(druid.sub('druid:', ''))
|
32
|
-
bagger = Moab::Bagger.new(version_inventory, signature_catalog, bag_dir)
|
33
|
-
bagger.reset_bag
|
34
|
-
bagger.create_bag_inventory(:depositor)
|
35
|
-
bagger.deposit_group('content', content_dir)
|
36
|
-
bagger.deposit_group('metadata', metadata_dir)
|
37
|
-
bagger.create_tagfiles
|
38
|
-
verify_bag_structure(bag_dir)
|
39
|
-
# start SDR preservation workflow (but do not create the workflows datastream)
|
40
|
-
CreateWorkflowService.create_workflow(dor_item, name: 'preservationIngestWF', create_ds: false)
|
41
|
-
rescue Exception => e
|
42
|
-
raise Dor::Exception, "Error exporting new object version to bag: #{e.message}"
|
43
|
-
end
|
44
|
-
|
45
|
-
# Note: the following methods should probably all be private
|
46
|
-
|
47
|
-
# @param [String] druid The object identifier
|
48
|
-
# @return [Moab::SignatureCatalog] the catalog of all files previously ingested
|
49
|
-
def self.get_signature_catalog(druid)
|
50
|
-
Dor::Services::Client.object(druid).sdr.signature_catalog
|
51
|
-
end
|
52
|
-
|
53
|
-
# @param [Dor::Item] dor_item The representation of the digital object
|
54
|
-
# @param [DruidTools::Druid] workspace The representation of the item's work area
|
55
|
-
# @return [Pathname] Pull all the datastreams specified in the configuration file
|
56
|
-
# into the workspace's metadata directory, overwriting existing file if present
|
57
|
-
def self.extract_datastreams(dor_item, workspace)
|
58
|
-
metadata_dir = Pathname.new(workspace.path('metadata', true))
|
59
|
-
Config.sdr.datastreams.to_hash.each_pair do |ds_name, required|
|
60
|
-
ds_name = ds_name.to_s
|
61
|
-
metadata_file = metadata_dir.join("#{ds_name}.xml")
|
62
|
-
metadata_string = get_datastream_content(dor_item, ds_name, required)
|
63
|
-
metadata_file.open('w') { |f| f << metadata_string } if metadata_string
|
64
|
-
end
|
65
|
-
metadata_dir
|
66
|
-
end
|
67
|
-
|
68
|
-
# @param [Dor::Item] dor_item The representation of the digital object
|
69
|
-
# @param [String] ds_name The name of the desired Fedora datastream
|
70
|
-
# @param [String] required Enumeration: one of ['required', 'optional']
|
71
|
-
# @return [String] return the xml text of the specified datastream if it exists.
|
72
|
-
# If not found, return nil unless it is a required datastream in which case raise exception
|
73
|
-
def self.get_datastream_content(dor_item, ds_name, required)
|
74
|
-
ds = (ds_name == 'relationshipMetadata' ? 'RELS-EXT' : ds_name)
|
75
|
-
if dor_item.datastreams.key?(ds) && !dor_item.datastreams[ds].new?
|
76
|
-
return dor_item.datastreams[ds].content
|
77
|
-
elsif required == 'optional'
|
78
|
-
return nil
|
79
|
-
else
|
80
|
-
raise "required datastream #{ds_name} not found in DOR"
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
# @param [Pathname] metadata_dir the location of the metadata directory in the workspace
|
85
|
-
# @param [Integer] expected the version identifer expected to be used in the versionMetadata
|
86
|
-
def self.verify_version_metadata(metadata_dir, expected)
|
87
|
-
vmfile = metadata_dir.join('versionMetadata.xml')
|
88
|
-
verify_version_id(vmfile, expected, vmfile_version_id(vmfile))
|
89
|
-
true
|
90
|
-
end
|
91
|
-
|
92
|
-
# @param [Pathname] pathname The location of the file containing a version number
|
93
|
-
# @param [Integer] expected The version number that should be in the file
|
94
|
-
# @param [Integer] found The version number that is actually in the file
|
95
|
-
def self.verify_version_id(pathname, expected, found)
|
96
|
-
raise "Version mismatch in #{pathname}, expected #{expected}, found #{found}" unless expected == found
|
97
|
-
|
98
|
-
true
|
99
|
-
end
|
100
|
-
|
101
|
-
# @param [Pathname] pathname the location of the versionMetadata file
|
102
|
-
# @return [Integer] the versionId found in the last version element, or nil if missing
|
103
|
-
def self.vmfile_version_id(pathname)
|
104
|
-
verify_pathname(pathname)
|
105
|
-
doc = Nokogiri::XML(File.open(pathname.to_s))
|
106
|
-
nodeset = doc.xpath('/versionMetadata/version')
|
107
|
-
version_id = nodeset.last['versionId']
|
108
|
-
version_id.nil? ? nil : version_id.to_i
|
109
|
-
end
|
110
|
-
|
111
|
-
# @param [Pathname] metadata_dir The location of the the object's metadata files
|
112
|
-
# @param [String] druid The object identifier
|
113
|
-
# @param [Integer] version_id The version number
|
114
|
-
# @return [Moab::FileInventory] Generate and return a version inventory for the object
|
115
|
-
def self.get_version_inventory(metadata_dir, druid, version_id)
|
116
|
-
version_inventory = get_content_inventory(metadata_dir, druid, version_id)
|
117
|
-
version_inventory.groups << get_metadata_file_group(metadata_dir)
|
118
|
-
version_inventory
|
119
|
-
end
|
120
|
-
|
121
|
-
# @param [Pathname] metadata_dir The location of the the object's metadata files
|
122
|
-
# @param [String] druid The object identifier
|
123
|
-
# @param [Integer] version_id The version number
|
124
|
-
# @return [Moab::FileInventory] Parse the contentMetadata
|
125
|
-
# and generate a new version inventory object containing a content group
|
126
|
-
def self.get_content_inventory(metadata_dir, druid, version_id)
|
127
|
-
content_metadata = get_content_metadata(metadata_dir)
|
128
|
-
if content_metadata
|
129
|
-
Stanford::ContentInventory.new.inventory_from_cm(content_metadata, druid, 'preserve', version_id)
|
130
|
-
else
|
131
|
-
Moab::FileInventory.new(type: 'version', digital_object_id: druid, version_id: version_id)
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
# @param [Pathname] metadata_dir The location of the the object's metadata files
|
136
|
-
# @return [String] Return the contents of the contentMetadata.xml file from the content directory
|
137
|
-
def self.get_content_metadata(metadata_dir)
|
138
|
-
content_metadata_pathname = metadata_dir.join('contentMetadata.xml')
|
139
|
-
content_metadata_pathname.read if content_metadata_pathname.exist?
|
140
|
-
end
|
141
|
-
|
142
|
-
# @param [Pathname] metadata_dir The location of the the object's metadata files
|
143
|
-
# @return [Moab::FileGroup] Traverse the metadata directory and generate a metadata group
|
144
|
-
def self.get_metadata_file_group(metadata_dir)
|
145
|
-
file_group = Moab::FileGroup.new(group_id: 'metadata').group_from_directory(metadata_dir)
|
146
|
-
file_group
|
147
|
-
end
|
148
|
-
|
149
|
-
# @param [Pathname] bag_dir the location of the bag to be verified
|
150
|
-
# @return [Boolean] true if all required files exist, raises exception if not
|
151
|
-
def self.verify_bag_structure(bag_dir)
|
152
|
-
verify_pathname(bag_dir)
|
153
|
-
verify_pathname(bag_dir.join('data'))
|
154
|
-
verify_pathname(bag_dir.join('bagit.txt'))
|
155
|
-
verify_pathname(bag_dir.join('bag-info.txt'))
|
156
|
-
verify_pathname(bag_dir.join('manifest-sha256.txt'))
|
157
|
-
verify_pathname(bag_dir.join('tagmanifest-sha256.txt'))
|
158
|
-
verify_pathname(bag_dir.join('versionAdditions.xml'))
|
159
|
-
verify_pathname(bag_dir.join('versionInventory.xml'))
|
160
|
-
verify_pathname(bag_dir.join('data', 'metadata', 'versionMetadata.xml'))
|
161
|
-
true
|
162
|
-
end
|
163
|
-
|
164
|
-
# @param [Pathname] pathname The file whose existence should be verified
|
165
|
-
# @return [Boolean] true if file exists, raises exception if not
|
166
|
-
def self.verify_pathname(pathname)
|
167
|
-
raise "#{pathname.basename} not found at #{pathname}" unless pathname.exist?
|
168
|
-
|
169
|
-
true
|
170
|
-
end
|
171
|
-
end
|
172
|
-
end
|
@@ -1,10 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Dor
|
4
|
-
# Merges contentMetadata from several objects into one.
|
5
|
-
class SecondaryFileNameService
|
6
|
-
def self.create(old_name, sequence_num)
|
7
|
-
old_name =~ /^(.*)\.(.*)$/ ? "#{Regexp.last_match(1)}_#{sequence_num}.#{Regexp.last_match(2)}" : "#{old_name}_#{sequence_num}"
|
8
|
-
end
|
9
|
-
end
|
10
|
-
end
|
@@ -1,69 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Dor
|
4
|
-
# Push file changes for shelve-able files into the stacks
|
5
|
-
class ShelvingService
|
6
|
-
def self.shelve(work)
|
7
|
-
new(work).shelve
|
8
|
-
end
|
9
|
-
|
10
|
-
def initialize(work)
|
11
|
-
@work = work
|
12
|
-
end
|
13
|
-
|
14
|
-
def shelve
|
15
|
-
# retrieve the differences between the current contentMetadata and the previously ingested version
|
16
|
-
diff = shelve_diff
|
17
|
-
stacks_object_pathname = stacks_location
|
18
|
-
# determine the location of the object's files in the stacks area
|
19
|
-
stacks_druid = DruidTools::StacksDruid.new work.id, stacks_object_pathname
|
20
|
-
stacks_object_pathname = Pathname(stacks_druid.path)
|
21
|
-
# determine the location of the object's content files in the workspace area
|
22
|
-
workspace_druid = DruidTools::Druid.new(work.id, Config.stacks.local_workspace_root)
|
23
|
-
workspace_content_pathname = workspace_content_dir(diff, workspace_druid)
|
24
|
-
# delete, rename, or copy files to the stacks area
|
25
|
-
DigitalStacksService.remove_from_stacks(stacks_object_pathname, diff)
|
26
|
-
DigitalStacksService.rename_in_stacks(stacks_object_pathname, diff)
|
27
|
-
DigitalStacksService.shelve_to_stacks(workspace_content_pathname, stacks_object_pathname, diff)
|
28
|
-
end
|
29
|
-
|
30
|
-
private
|
31
|
-
|
32
|
-
attr_reader :work
|
33
|
-
|
34
|
-
# retrieve the differences between the current contentMetadata and the previously ingested version
|
35
|
-
# (filtering to select only the files that should be shelved to stacks)
|
36
|
-
def shelve_diff
|
37
|
-
raise Dor::ParameterError, 'Missing Dor::Config.stacks.local_workspace_root' if Config.stacks.local_workspace_root.nil?
|
38
|
-
raise Dor::Exception, 'Missing contentMetadata datastream' if work.contentMetadata.nil?
|
39
|
-
|
40
|
-
client = Dor::Services::Client.object(work.pid).sdr
|
41
|
-
current_content = work.contentMetadata.content
|
42
|
-
inventory_diff = client.content_diff(current_content: current_content, subset: 'shelve')
|
43
|
-
inventory_diff.group_difference('content')
|
44
|
-
end
|
45
|
-
|
46
|
-
# Find the location of the object's content files in the workspace area
|
47
|
-
# @param [Moab::FileGroupDifference] content_diff The differences between the current contentMetadata and the previously ingested version
|
48
|
-
# @param [DruidTools::Druid] workspace_druid the location of the object's files in the workspace area
|
49
|
-
# @return [Pathname] The location of the object's content files in the workspace area
|
50
|
-
def workspace_content_dir(content_diff, workspace_druid)
|
51
|
-
deltas = content_diff.file_deltas
|
52
|
-
filelist = deltas[:modified] + deltas[:added] + deltas[:copyadded].collect { |_old, new| new }
|
53
|
-
return nil if filelist.empty?
|
54
|
-
|
55
|
-
Pathname(workspace_druid.find_filelist_parent('content', filelist))
|
56
|
-
end
|
57
|
-
|
58
|
-
# get the stack location based on the contentMetadata stacks attribute
|
59
|
-
# or using the default value from the config file if it doesn't exist
|
60
|
-
def stacks_location
|
61
|
-
return Config.stacks.local_stacks_root unless work.contentMetadata&.stacks.present?
|
62
|
-
|
63
|
-
location = work.contentMetadata.stacks[0]
|
64
|
-
return location if location.start_with? '/' # Absolute stacks path
|
65
|
-
|
66
|
-
raise "stacks attribute for item: #{work.id} contentMetadata should start with /. The current value is #{location}"
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
@@ -1,232 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
|
-
require 'moab/stanford'
|
5
|
-
require 'jhove_service'
|
6
|
-
require 'dor-services'
|
7
|
-
|
8
|
-
module Dor
|
9
|
-
# Extracts technical metadata from files using JHOVE
|
10
|
-
# If this is a new version it gets the old technicalMetadata datastream by
|
11
|
-
# making an API call to sdr-services-app (via dor-services-app) and
|
12
|
-
# only overwrites/adds parts for the files that were changed or added.
|
13
|
-
# This allows us to avoid re-staging files that have not changed.
|
14
|
-
# Switching to a more granular data model that has file metadata separate from
|
15
|
-
# the Work metadata will allow us to simplify this greatly.
|
16
|
-
class TechnicalMetadataService
|
17
|
-
# @param [Dor::Item] dor_item The DOR item being processed by the technical metadata robot
|
18
|
-
# @return [Boolean] True if technical metadata is correctly added or updated
|
19
|
-
def self.add_update_technical_metadata(dor_item)
|
20
|
-
test_jhove_service
|
21
|
-
druid = dor_item.pid
|
22
|
-
content_group_diff = get_content_group_diff(dor_item)
|
23
|
-
deltas = get_file_deltas(content_group_diff)
|
24
|
-
new_files = get_new_files(deltas)
|
25
|
-
old_techmd = get_old_technical_metadata(dor_item)
|
26
|
-
new_techmd = get_new_technical_metadata(druid, new_files)
|
27
|
-
if old_techmd.nil?
|
28
|
-
# this is version 1 or previous technical metadata was not saved
|
29
|
-
final_techmd = new_techmd
|
30
|
-
elsif content_group_diff.difference_count == 0
|
31
|
-
# there have been no changes to content files from previous version
|
32
|
-
return true
|
33
|
-
else
|
34
|
-
merged_nodes = merge_file_nodes(old_techmd, new_techmd, deltas)
|
35
|
-
final_techmd = build_technical_metadata(druid, merged_nodes)
|
36
|
-
end
|
37
|
-
ds = dor_item.datastreams['technicalMetadata']
|
38
|
-
ds.dsLabel = 'Technical Metadata'
|
39
|
-
ds.content = final_techmd
|
40
|
-
ds.save
|
41
|
-
true
|
42
|
-
end
|
43
|
-
|
44
|
-
# @return [Boolean] Make sure that the jhove-service gem is loaded
|
45
|
-
def self.test_jhove_service
|
46
|
-
unless defined? ::JhoveService
|
47
|
-
begin
|
48
|
-
require 'jhove_service'
|
49
|
-
rescue LoadError => e
|
50
|
-
puts e.inspect
|
51
|
-
raise 'jhove-service dependency gem was not found. Please add it to your Gemfile and run bundle install'
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
private_class_method :test_jhove_service
|
56
|
-
|
57
|
-
# @param [Dor::Item] dor_item The DOR item being processed by the technical metadata robot
|
58
|
-
# @return [FileGroupDifference] The differences between two versions of a group of files
|
59
|
-
def self.get_content_group_diff(dor_item)
|
60
|
-
return Moab::FileGroupDifference.new if dor_item.contentMetadata.nil?
|
61
|
-
raise Dor::ParameterError, 'Missing Dor::Config.stacks.local_workspace_root' if Config.stacks.local_workspace_root.nil?
|
62
|
-
|
63
|
-
client = Dor::Services::Client.object(dor_item.pid).sdr
|
64
|
-
current_content = dor_item.contentMetadata.content
|
65
|
-
inventory_diff = client.content_diff(current_content: current_content)
|
66
|
-
inventory_diff.group_difference('content')
|
67
|
-
end
|
68
|
-
private_class_method :get_content_group_diff
|
69
|
-
|
70
|
-
# @param [FileGroupDifference] content_group_diff
|
71
|
-
# @return [Hash<Symbol,Array>] Sets of filenames grouped by change type for use in performing file or metadata operations
|
72
|
-
def self.get_file_deltas(content_group_diff)
|
73
|
-
content_group_diff.file_deltas
|
74
|
-
end
|
75
|
-
private_class_method :get_file_deltas
|
76
|
-
|
77
|
-
# @param [Hash<Symbol,Array>] deltas Sets of filenames grouped by change type for use in performing file or metadata operations
|
78
|
-
# @return [Array<String>] The list of filenames for files that are either added or modifed since the previous version
|
79
|
-
def self.get_new_files(deltas)
|
80
|
-
deltas[:added] + deltas[:modified]
|
81
|
-
end
|
82
|
-
private_class_method :get_new_files
|
83
|
-
|
84
|
-
# @param [Dor::Item] dor_item The DOR item being processed by the technical metadata robot
|
85
|
-
# @return [String] The technicalMetadata datastream from the previous version of the digital object
|
86
|
-
def self.get_old_technical_metadata(dor_item)
|
87
|
-
sdr_techmd = get_sdr_technical_metadata(dor_item.pid)
|
88
|
-
return sdr_techmd unless sdr_techmd.nil?
|
89
|
-
|
90
|
-
get_dor_technical_metadata(dor_item)
|
91
|
-
end
|
92
|
-
private_class_method :get_old_technical_metadata
|
93
|
-
|
94
|
-
# @param [String] druid The identifier of the digital object being processed by the technical metadata robot
|
95
|
-
# @return [String] The technicalMetadata datastream from the previous version of the digital object (fetched from SDR storage)
|
96
|
-
# The data is updated to the latest format.
|
97
|
-
def self.get_sdr_technical_metadata(druid)
|
98
|
-
sdr_techmd = get_sdr_metadata(druid, 'technicalMetadata')
|
99
|
-
return sdr_techmd if sdr_techmd =~ /<technicalMetadata/
|
100
|
-
return ::JhoveService.new.upgrade_technical_metadata(sdr_techmd) if sdr_techmd =~ /<jhove/
|
101
|
-
|
102
|
-
nil
|
103
|
-
end
|
104
|
-
private_class_method :get_sdr_technical_metadata
|
105
|
-
|
106
|
-
# @param [Dor::Item] dor_item The DOR item being processed by the technical metadata robot
|
107
|
-
# @return [String] The technicalMetadata datastream from the previous version of the digital object (fetched from DOR fedora).
|
108
|
-
# The data is updated to the latest format.
|
109
|
-
def self.get_dor_technical_metadata(dor_item)
|
110
|
-
ds = 'technicalMetadata'
|
111
|
-
return nil unless dor_item.datastreams.key?(ds) && !dor_item.datastreams[ds].new?
|
112
|
-
|
113
|
-
dor_techmd = dor_item.datastreams[ds].content
|
114
|
-
return dor_techmd if dor_techmd =~ /<technicalMetadata/
|
115
|
-
return ::JhoveService.new.upgrade_technical_metadata(dor_techmd) if dor_techmd =~ /<jhove/
|
116
|
-
|
117
|
-
nil
|
118
|
-
end
|
119
|
-
private_class_method :get_dor_technical_metadata
|
120
|
-
|
121
|
-
# @param [String] druid The identifier of the digital object being processed by the technical metadata robot
|
122
|
-
# @param [String] dsname The identifier of the metadata datastream
|
123
|
-
# @return [String] The datastream contents from the previous version of the digital object (fetched from SDR storage)
|
124
|
-
def self.get_sdr_metadata(druid, dsname)
|
125
|
-
Dor::Services::Client.object(druid).sdr.metadata(datastream: dsname)
|
126
|
-
end
|
127
|
-
private_class_method :get_sdr_metadata
|
128
|
-
|
129
|
-
# @param [DruidTools::Druid] druid A wrapper class for the druid identifier. Used to generate paths
|
130
|
-
# @param [Array<String>] new_files The list of filenames for files that are either added or modifed since the previous version
|
131
|
-
# @return [String] The technicalMetadata datastream for the new files of the new digital object version
|
132
|
-
def self.get_new_technical_metadata(druid, new_files)
|
133
|
-
return nil if new_files.nil? || new_files.empty?
|
134
|
-
|
135
|
-
workspace = DruidTools::Druid.new(druid, Dor::Config.sdr.local_workspace_root)
|
136
|
-
content_dir = workspace.find_filelist_parent('content', new_files)
|
137
|
-
temp_dir = workspace.temp_dir
|
138
|
-
jhove_service = ::JhoveService.new(temp_dir)
|
139
|
-
jhove_service.digital_object_id = druid
|
140
|
-
fileset_file = write_fileset(temp_dir, new_files)
|
141
|
-
jhove_output_file = jhove_service.run_jhove(content_dir, fileset_file)
|
142
|
-
tech_md_file = jhove_service.create_technical_metadata(jhove_output_file)
|
143
|
-
IO.read(tech_md_file)
|
144
|
-
end
|
145
|
-
private_class_method :get_new_technical_metadata
|
146
|
-
|
147
|
-
# @param [Pathname] temp_dir The pathname of the temp folder in the object's workspace area
|
148
|
-
# @param [Object] new_files [Array<String>] The list of filenames for files that are either added or modifed since the previous version
|
149
|
-
# @return [Pathname] Save the new_files list to a text file and return that file's name
|
150
|
-
def self.write_fileset(temp_dir, new_files)
|
151
|
-
fileset_pathname = Pathname(temp_dir).join('jhove_fileset.txt')
|
152
|
-
fileset_pathname.open('w') { |f| f.puts(new_files) }
|
153
|
-
fileset_pathname
|
154
|
-
end
|
155
|
-
private_class_method :write_fileset
|
156
|
-
|
157
|
-
# @param [String] old_techmd The technicalMetadata datastream from the previous version of the digital object
|
158
|
-
# @param [String] new_techmd The technicalMetadata datastream for the new files of the new digital object version
|
159
|
-
# @param [Array<String>] deltas The list of filenames for files that are either added or modifed since the previous version
|
160
|
-
# @return [Hash<String,Nokogiri::XML::Node>] The complete set of technicalMetadata nodes for the digital object, indexed by filename
|
161
|
-
def self.merge_file_nodes(old_techmd, new_techmd, deltas)
|
162
|
-
old_file_nodes = get_file_nodes(old_techmd)
|
163
|
-
new_file_nodes = get_file_nodes(new_techmd)
|
164
|
-
merged_nodes = {}
|
165
|
-
deltas[:identical].each do |path|
|
166
|
-
merged_nodes[path] = old_file_nodes[path]
|
167
|
-
end
|
168
|
-
deltas[:modified].each do |path|
|
169
|
-
merged_nodes[path] = new_file_nodes[path]
|
170
|
-
end
|
171
|
-
deltas[:added].each do |path|
|
172
|
-
merged_nodes[path] = new_file_nodes[path]
|
173
|
-
end
|
174
|
-
deltas[:renamed].each do |oldpath, newpath|
|
175
|
-
clone = old_file_nodes[oldpath].clone
|
176
|
-
clone.sub!(/<file\s*id.*?["'].*?["'].*?>/, "<file id='#{newpath}'>")
|
177
|
-
merged_nodes[newpath] = clone
|
178
|
-
end
|
179
|
-
deltas[:copyadded].each do |oldpath, newpath|
|
180
|
-
clone = old_file_nodes[oldpath].clone
|
181
|
-
clone.sub!(/<file\s*id.*?["'].*?["'].*?>/, "<file id='#{newpath}'>")
|
182
|
-
merged_nodes[newpath] = clone
|
183
|
-
end
|
184
|
-
merged_nodes
|
185
|
-
end
|
186
|
-
private_class_method :merge_file_nodes
|
187
|
-
|
188
|
-
# @param [String] technical_metadata A technicalMetadata datastream contents
|
189
|
-
# @return [Hash<String,Nokogiri::XML::Node>] The set of nodes from a technicalMetadata datastream, indexed by filename
|
190
|
-
def self.get_file_nodes(technical_metadata)
|
191
|
-
file_hash = {}
|
192
|
-
return file_hash if technical_metadata.nil?
|
193
|
-
|
194
|
-
current_file = []
|
195
|
-
path = nil
|
196
|
-
in_file = false
|
197
|
-
technical_metadata.each_line do |line|
|
198
|
-
if line =~ /^\s*<file.*["'](.*?)["']/
|
199
|
-
current_file << line
|
200
|
-
path = $1
|
201
|
-
in_file = true
|
202
|
-
elsif line =~ /^\s*<\/file>/
|
203
|
-
current_file << line
|
204
|
-
file_hash[path] = current_file.join
|
205
|
-
current_file = []
|
206
|
-
path = nil
|
207
|
-
in_file = false
|
208
|
-
elsif in_file
|
209
|
-
current_file << line
|
210
|
-
end
|
211
|
-
end
|
212
|
-
file_hash
|
213
|
-
end
|
214
|
-
private_class_method :get_file_nodes
|
215
|
-
|
216
|
-
# @param [String] druid The identifier of the digital object being processed by the technical metadata robot
|
217
|
-
# @param [Hash<String,Nokogiri::XML::Node>] merged_nodes The complete set of technicalMetadata nodes for the digital object, indexed by filename
|
218
|
-
# @return [String] The finalized technicalMetadata datastream contents for the new object version
|
219
|
-
def self.build_technical_metadata(druid, merged_nodes)
|
220
|
-
techmd_root = +<<~EOF
|
221
|
-
<technicalMetadata objectId='#{druid}' datetime='#{Time.now.utc.iso8601}'
|
222
|
-
xmlns:jhove='http://hul.harvard.edu/ois/xml/ns/jhove'
|
223
|
-
xmlns:mix='http://www.loc.gov/mix/v10'
|
224
|
-
xmlns:textmd='info:lc/xmlns/textMD-v3'>
|
225
|
-
EOF
|
226
|
-
doc = techmd_root
|
227
|
-
merged_nodes.keys.sort.each { |path| doc << merged_nodes[path] }
|
228
|
-
doc + '</technicalMetadata>'
|
229
|
-
end
|
230
|
-
private_class_method :build_technical_metadata
|
231
|
-
end
|
232
|
-
end
|