dor-services 6.8.0 → 7.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/config_defaults.yml +0 -27
- data/config/dev_console_env.rb.example +0 -17
- data/lib/dor-services.rb +9 -73
- data/lib/dor/config.rb +1 -30
- data/lib/dor/datastreams/content_metadata_ds.rb +8 -0
- data/lib/dor/datastreams/desc_metadata_ds.rb +19 -0
- data/lib/dor/datastreams/identity_metadata_ds.rb +65 -0
- data/lib/dor/datastreams/rights_metadata_ds.rb +14 -2
- data/lib/dor/datastreams/workflow_definition_ds.rb +1 -1
- data/lib/dor/datastreams/workflow_ds.rb +0 -15
- data/lib/dor/indexers/identifiable_indexer.rb +8 -4
- data/lib/dor/indexers/releasable_indexer.rb +7 -1
- data/lib/dor/models/abstract.rb +143 -8
- data/lib/dor/models/admin_policy_object.rb +0 -3
- data/lib/dor/models/collection.rb +0 -2
- data/lib/dor/models/concerns/embargoable.rb +7 -60
- data/lib/dor/models/etd.rb +100 -0
- data/lib/dor/models/item.rb +12 -28
- data/lib/dor/models/part.rb +18 -0
- data/lib/dor/models/set.rb +0 -2
- data/lib/dor/services/collection_service.rb +36 -0
- data/lib/dor/services/embargo_service.rb +93 -0
- data/lib/dor/services/ontology.rb +0 -18
- data/lib/dor/services/public_desc_metadata_service.rb +7 -11
- data/lib/dor/services/search_service.rb +0 -40
- data/lib/dor/version.rb +1 -1
- data/lib/dor/workflow/document.rb +0 -7
- metadata +15 -78
- data/lib/dor/models/concerns/assembleable.rb +0 -18
- data/lib/dor/models/concerns/contentable.rb +0 -185
- data/lib/dor/models/concerns/describable.rb +0 -82
- data/lib/dor/models/concerns/eventable.rb +0 -18
- data/lib/dor/models/concerns/geoable.rb +0 -14
- data/lib/dor/models/concerns/governable.rb +0 -101
- data/lib/dor/models/concerns/identifiable.rb +0 -172
- data/lib/dor/models/concerns/itemizable.rb +0 -42
- data/lib/dor/models/concerns/preservable.rb +0 -46
- data/lib/dor/models/concerns/processable.rb +0 -86
- data/lib/dor/models/concerns/publishable.rb +0 -76
- data/lib/dor/models/concerns/releaseable.rb +0 -118
- data/lib/dor/models/concerns/rightsable.rb +0 -25
- data/lib/dor/models/concerns/shelvable.rb +0 -15
- data/lib/dor/models/concerns/versionable.rb +0 -72
- data/lib/dor/services/ability.rb +0 -77
- data/lib/dor/services/cleanup_reset_service.rb +0 -103
- data/lib/dor/services/datastream_builder.rb +0 -96
- data/lib/dor/services/decommission_service.rb +0 -31
- data/lib/dor/services/digital_stacks_service.rb +0 -125
- data/lib/dor/services/dublin_core_service.rb +0 -45
- data/lib/dor/services/file_metadata_merge_service.rb +0 -71
- data/lib/dor/services/indexing_service.rb +0 -131
- data/lib/dor/services/merge_service.rb +0 -105
- data/lib/dor/services/public_xml_service.rb +0 -116
- data/lib/dor/services/publish_metadata_service.rb +0 -99
- data/lib/dor/services/reset_workspace_service.rb +0 -27
- data/lib/dor/services/sdr_ingest_service.rb +0 -172
- data/lib/dor/services/secondary_file_name_service.rb +0 -10
- data/lib/dor/services/shelving_service.rb +0 -69
- data/lib/dor/services/technical_metadata_service.rb +0 -232
- data/lib/dor/services/version_service.rb +0 -84
- data/lib/dor/utils/sdr_client.rb +0 -94
@@ -1,27 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Dor
|
4
|
-
# Rename the druid trees at the end of the accessionWF in order to be cleaned/deleted later.
|
5
|
-
class ResetWorkspaceService
|
6
|
-
def self.reset_workspace_druid_tree(druid, version, workspace_root)
|
7
|
-
druid_tree_path = DruidTools::Druid.new(druid, workspace_root).pathname.to_s
|
8
|
-
|
9
|
-
raise "The archived directory #{druid_tree_path}_v#{version} already existed." if File.exist?("#{druid_tree_path}_v#{version}")
|
10
|
-
|
11
|
-
if File.exist?(druid_tree_path)
|
12
|
-
FileUtils.mv(druid_tree_path, "#{druid_tree_path}_v#{version}")
|
13
|
-
end # Else is a truncated tree where we shouldn't do anything
|
14
|
-
end
|
15
|
-
|
16
|
-
def self.reset_export_bag(druid, version, export_root)
|
17
|
-
id = druid.split(':').last
|
18
|
-
bag_dir = File.join(export_root, id)
|
19
|
-
|
20
|
-
raise "The archived bag #{bag_dir}_v#{version} already existed." if File.exist?("#{bag_dir}_v#{version}")
|
21
|
-
|
22
|
-
FileUtils.mv(bag_dir, "#{bag_dir}_v#{version}") if File.exist?(bag_dir)
|
23
|
-
|
24
|
-
FileUtils.mv("#{bag_dir}.tar", "#{bag_dir}_v#{version}.tar") if File.exist?("#{bag_dir}.tar")
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
@@ -1,172 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'moab/stanford'
|
4
|
-
|
5
|
-
module Dor
|
6
|
-
# Note: This should probably live in common-accessioning robot sdr-ingest-transfer
|
7
|
-
# as that is the only robot that uses it. See also preservable concern.
|
8
|
-
class SdrIngestService
|
9
|
-
# @param [Dor::Item] dor_item The representation of the digital object
|
10
|
-
# @param [String] _agreement_id deprecated, included for backward compatability with common-accessoning
|
11
|
-
# @return [void] Create the Moab/bag manifests for new version, export data to BagIt bag, kick off the SDR preservation workflow
|
12
|
-
def self.transfer(dor_item, _agreement_id = nil)
|
13
|
-
druid = dor_item.pid
|
14
|
-
workspace = DruidTools::Druid.new(druid, Dor::Config.sdr.local_workspace_root)
|
15
|
-
signature_catalog = get_signature_catalog(druid)
|
16
|
-
new_version_id = signature_catalog.version_id + 1
|
17
|
-
metadata_dir = extract_datastreams(dor_item, workspace)
|
18
|
-
verify_version_metadata(metadata_dir, new_version_id)
|
19
|
-
version_inventory = get_version_inventory(metadata_dir, druid, new_version_id)
|
20
|
-
version_addtions = signature_catalog.version_additions(version_inventory)
|
21
|
-
content_addtions = version_addtions.group('content')
|
22
|
-
if content_addtions.nil? || content_addtions.files.empty?
|
23
|
-
content_dir = nil
|
24
|
-
else
|
25
|
-
new_file_list = content_addtions.path_list
|
26
|
-
content_dir = workspace.find_filelist_parent('content', new_file_list)
|
27
|
-
end
|
28
|
-
content_group = version_inventory.group('content')
|
29
|
-
signature_catalog.normalize_group_signatures(content_group, content_dir) unless content_group.nil? || content_group.files.empty?
|
30
|
-
# export the bag (in tar format)
|
31
|
-
bag_dir = Pathname(Dor::Config.sdr.local_export_home).join(druid.sub('druid:', ''))
|
32
|
-
bagger = Moab::Bagger.new(version_inventory, signature_catalog, bag_dir)
|
33
|
-
bagger.reset_bag
|
34
|
-
bagger.create_bag_inventory(:depositor)
|
35
|
-
bagger.deposit_group('content', content_dir)
|
36
|
-
bagger.deposit_group('metadata', metadata_dir)
|
37
|
-
bagger.create_tagfiles
|
38
|
-
verify_bag_structure(bag_dir)
|
39
|
-
# start SDR preservation workflow (but do not create the workflows datastream)
|
40
|
-
CreateWorkflowService.create_workflow(dor_item, name: 'preservationIngestWF', create_ds: false)
|
41
|
-
rescue Exception => e
|
42
|
-
raise Dor::Exception, "Error exporting new object version to bag: #{e.message}"
|
43
|
-
end
|
44
|
-
|
45
|
-
# Note: the following methods should probably all be private
|
46
|
-
|
47
|
-
# @param [String] druid The object identifier
|
48
|
-
# @return [Moab::SignatureCatalog] the catalog of all files previously ingested
|
49
|
-
def self.get_signature_catalog(druid)
|
50
|
-
Dor::Services::Client.object(druid).sdr.signature_catalog
|
51
|
-
end
|
52
|
-
|
53
|
-
# @param [Dor::Item] dor_item The representation of the digital object
|
54
|
-
# @param [DruidTools::Druid] workspace The representation of the item's work area
|
55
|
-
# @return [Pathname] Pull all the datastreams specified in the configuration file
|
56
|
-
# into the workspace's metadata directory, overwriting existing file if present
|
57
|
-
def self.extract_datastreams(dor_item, workspace)
|
58
|
-
metadata_dir = Pathname.new(workspace.path('metadata', true))
|
59
|
-
Config.sdr.datastreams.to_hash.each_pair do |ds_name, required|
|
60
|
-
ds_name = ds_name.to_s
|
61
|
-
metadata_file = metadata_dir.join("#{ds_name}.xml")
|
62
|
-
metadata_string = get_datastream_content(dor_item, ds_name, required)
|
63
|
-
metadata_file.open('w') { |f| f << metadata_string } if metadata_string
|
64
|
-
end
|
65
|
-
metadata_dir
|
66
|
-
end
|
67
|
-
|
68
|
-
# @param [Dor::Item] dor_item The representation of the digital object
|
69
|
-
# @param [String] ds_name The name of the desired Fedora datastream
|
70
|
-
# @param [String] required Enumeration: one of ['required', 'optional']
|
71
|
-
# @return [String] return the xml text of the specified datastream if it exists.
|
72
|
-
# If not found, return nil unless it is a required datastream in which case raise exception
|
73
|
-
def self.get_datastream_content(dor_item, ds_name, required)
|
74
|
-
ds = (ds_name == 'relationshipMetadata' ? 'RELS-EXT' : ds_name)
|
75
|
-
if dor_item.datastreams.key?(ds) && !dor_item.datastreams[ds].new?
|
76
|
-
return dor_item.datastreams[ds].content
|
77
|
-
elsif required == 'optional'
|
78
|
-
return nil
|
79
|
-
else
|
80
|
-
raise "required datastream #{ds_name} not found in DOR"
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
# @param [Pathname] metadata_dir the location of the metadata directory in the workspace
|
85
|
-
# @param [Integer] expected the version identifer expected to be used in the versionMetadata
|
86
|
-
def self.verify_version_metadata(metadata_dir, expected)
|
87
|
-
vmfile = metadata_dir.join('versionMetadata.xml')
|
88
|
-
verify_version_id(vmfile, expected, vmfile_version_id(vmfile))
|
89
|
-
true
|
90
|
-
end
|
91
|
-
|
92
|
-
# @param [Pathname] pathname The location of the file containing a version number
|
93
|
-
# @param [Integer] expected The version number that should be in the file
|
94
|
-
# @param [Integer] found The version number that is actually in the file
|
95
|
-
def self.verify_version_id(pathname, expected, found)
|
96
|
-
raise "Version mismatch in #{pathname}, expected #{expected}, found #{found}" unless expected == found
|
97
|
-
|
98
|
-
true
|
99
|
-
end
|
100
|
-
|
101
|
-
# @param [Pathname] pathname the location of the versionMetadata file
|
102
|
-
# @return [Integer] the versionId found in the last version element, or nil if missing
|
103
|
-
def self.vmfile_version_id(pathname)
|
104
|
-
verify_pathname(pathname)
|
105
|
-
doc = Nokogiri::XML(File.open(pathname.to_s))
|
106
|
-
nodeset = doc.xpath('/versionMetadata/version')
|
107
|
-
version_id = nodeset.last['versionId']
|
108
|
-
version_id.nil? ? nil : version_id.to_i
|
109
|
-
end
|
110
|
-
|
111
|
-
# @param [Pathname] metadata_dir The location of the the object's metadata files
|
112
|
-
# @param [String] druid The object identifier
|
113
|
-
# @param [Integer] version_id The version number
|
114
|
-
# @return [Moab::FileInventory] Generate and return a version inventory for the object
|
115
|
-
def self.get_version_inventory(metadata_dir, druid, version_id)
|
116
|
-
version_inventory = get_content_inventory(metadata_dir, druid, version_id)
|
117
|
-
version_inventory.groups << get_metadata_file_group(metadata_dir)
|
118
|
-
version_inventory
|
119
|
-
end
|
120
|
-
|
121
|
-
# @param [Pathname] metadata_dir The location of the the object's metadata files
|
122
|
-
# @param [String] druid The object identifier
|
123
|
-
# @param [Integer] version_id The version number
|
124
|
-
# @return [Moab::FileInventory] Parse the contentMetadata
|
125
|
-
# and generate a new version inventory object containing a content group
|
126
|
-
def self.get_content_inventory(metadata_dir, druid, version_id)
|
127
|
-
content_metadata = get_content_metadata(metadata_dir)
|
128
|
-
if content_metadata
|
129
|
-
Stanford::ContentInventory.new.inventory_from_cm(content_metadata, druid, 'preserve', version_id)
|
130
|
-
else
|
131
|
-
Moab::FileInventory.new(type: 'version', digital_object_id: druid, version_id: version_id)
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
# @param [Pathname] metadata_dir The location of the the object's metadata files
|
136
|
-
# @return [String] Return the contents of the contentMetadata.xml file from the content directory
|
137
|
-
def self.get_content_metadata(metadata_dir)
|
138
|
-
content_metadata_pathname = metadata_dir.join('contentMetadata.xml')
|
139
|
-
content_metadata_pathname.read if content_metadata_pathname.exist?
|
140
|
-
end
|
141
|
-
|
142
|
-
# @param [Pathname] metadata_dir The location of the the object's metadata files
|
143
|
-
# @return [Moab::FileGroup] Traverse the metadata directory and generate a metadata group
|
144
|
-
def self.get_metadata_file_group(metadata_dir)
|
145
|
-
file_group = Moab::FileGroup.new(group_id: 'metadata').group_from_directory(metadata_dir)
|
146
|
-
file_group
|
147
|
-
end
|
148
|
-
|
149
|
-
# @param [Pathname] bag_dir the location of the bag to be verified
|
150
|
-
# @return [Boolean] true if all required files exist, raises exception if not
|
151
|
-
def self.verify_bag_structure(bag_dir)
|
152
|
-
verify_pathname(bag_dir)
|
153
|
-
verify_pathname(bag_dir.join('data'))
|
154
|
-
verify_pathname(bag_dir.join('bagit.txt'))
|
155
|
-
verify_pathname(bag_dir.join('bag-info.txt'))
|
156
|
-
verify_pathname(bag_dir.join('manifest-sha256.txt'))
|
157
|
-
verify_pathname(bag_dir.join('tagmanifest-sha256.txt'))
|
158
|
-
verify_pathname(bag_dir.join('versionAdditions.xml'))
|
159
|
-
verify_pathname(bag_dir.join('versionInventory.xml'))
|
160
|
-
verify_pathname(bag_dir.join('data', 'metadata', 'versionMetadata.xml'))
|
161
|
-
true
|
162
|
-
end
|
163
|
-
|
164
|
-
# @param [Pathname] pathname The file whose existence should be verified
|
165
|
-
# @return [Boolean] true if file exists, raises exception if not
|
166
|
-
def self.verify_pathname(pathname)
|
167
|
-
raise "#{pathname.basename} not found at #{pathname}" unless pathname.exist?
|
168
|
-
|
169
|
-
true
|
170
|
-
end
|
171
|
-
end
|
172
|
-
end
|
@@ -1,10 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Dor
|
4
|
-
# Merges contentMetadata from several objects into one.
|
5
|
-
class SecondaryFileNameService
|
6
|
-
def self.create(old_name, sequence_num)
|
7
|
-
old_name =~ /^(.*)\.(.*)$/ ? "#{Regexp.last_match(1)}_#{sequence_num}.#{Regexp.last_match(2)}" : "#{old_name}_#{sequence_num}"
|
8
|
-
end
|
9
|
-
end
|
10
|
-
end
|
@@ -1,69 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Dor
|
4
|
-
# Push file changes for shelve-able files into the stacks
|
5
|
-
class ShelvingService
|
6
|
-
def self.shelve(work)
|
7
|
-
new(work).shelve
|
8
|
-
end
|
9
|
-
|
10
|
-
def initialize(work)
|
11
|
-
@work = work
|
12
|
-
end
|
13
|
-
|
14
|
-
def shelve
|
15
|
-
# retrieve the differences between the current contentMetadata and the previously ingested version
|
16
|
-
diff = shelve_diff
|
17
|
-
stacks_object_pathname = stacks_location
|
18
|
-
# determine the location of the object's files in the stacks area
|
19
|
-
stacks_druid = DruidTools::StacksDruid.new work.id, stacks_object_pathname
|
20
|
-
stacks_object_pathname = Pathname(stacks_druid.path)
|
21
|
-
# determine the location of the object's content files in the workspace area
|
22
|
-
workspace_druid = DruidTools::Druid.new(work.id, Config.stacks.local_workspace_root)
|
23
|
-
workspace_content_pathname = workspace_content_dir(diff, workspace_druid)
|
24
|
-
# delete, rename, or copy files to the stacks area
|
25
|
-
DigitalStacksService.remove_from_stacks(stacks_object_pathname, diff)
|
26
|
-
DigitalStacksService.rename_in_stacks(stacks_object_pathname, diff)
|
27
|
-
DigitalStacksService.shelve_to_stacks(workspace_content_pathname, stacks_object_pathname, diff)
|
28
|
-
end
|
29
|
-
|
30
|
-
private
|
31
|
-
|
32
|
-
attr_reader :work
|
33
|
-
|
34
|
-
# retrieve the differences between the current contentMetadata and the previously ingested version
|
35
|
-
# (filtering to select only the files that should be shelved to stacks)
|
36
|
-
def shelve_diff
|
37
|
-
raise Dor::ParameterError, 'Missing Dor::Config.stacks.local_workspace_root' if Config.stacks.local_workspace_root.nil?
|
38
|
-
raise Dor::Exception, 'Missing contentMetadata datastream' if work.contentMetadata.nil?
|
39
|
-
|
40
|
-
client = Dor::Services::Client.object(work.pid).sdr
|
41
|
-
current_content = work.contentMetadata.content
|
42
|
-
inventory_diff = client.content_diff(current_content: current_content, subset: 'shelve')
|
43
|
-
inventory_diff.group_difference('content')
|
44
|
-
end
|
45
|
-
|
46
|
-
# Find the location of the object's content files in the workspace area
|
47
|
-
# @param [Moab::FileGroupDifference] content_diff The differences between the current contentMetadata and the previously ingested version
|
48
|
-
# @param [DruidTools::Druid] workspace_druid the location of the object's files in the workspace area
|
49
|
-
# @return [Pathname] The location of the object's content files in the workspace area
|
50
|
-
def workspace_content_dir(content_diff, workspace_druid)
|
51
|
-
deltas = content_diff.file_deltas
|
52
|
-
filelist = deltas[:modified] + deltas[:added] + deltas[:copyadded].collect { |_old, new| new }
|
53
|
-
return nil if filelist.empty?
|
54
|
-
|
55
|
-
Pathname(workspace_druid.find_filelist_parent('content', filelist))
|
56
|
-
end
|
57
|
-
|
58
|
-
# get the stack location based on the contentMetadata stacks attribute
|
59
|
-
# or using the default value from the config file if it doesn't exist
|
60
|
-
def stacks_location
|
61
|
-
return Config.stacks.local_stacks_root unless work.contentMetadata&.stacks.present?
|
62
|
-
|
63
|
-
location = work.contentMetadata.stacks[0]
|
64
|
-
return location if location.start_with? '/' # Absolute stacks path
|
65
|
-
|
66
|
-
raise "stacks attribute for item: #{work.id} contentMetadata should start with /. The current value is #{location}"
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
@@ -1,232 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'rubygems'
|
4
|
-
require 'moab/stanford'
|
5
|
-
require 'jhove_service'
|
6
|
-
require 'dor-services'
|
7
|
-
|
8
|
-
module Dor
|
9
|
-
# Extracts technical metadata from files using JHOVE
|
10
|
-
# If this is a new version it gets the old technicalMetadata datastream by
|
11
|
-
# making an API call to sdr-services-app (via dor-services-app) and
|
12
|
-
# only overwrites/adds parts for the files that were changed or added.
|
13
|
-
# This allows us to avoid re-staging files that have not changed.
|
14
|
-
# Switching to a more granular data model that has file metadata separate from
|
15
|
-
# the Work metadata will allow us to simplify this greatly.
|
16
|
-
class TechnicalMetadataService
|
17
|
-
# @param [Dor::Item] dor_item The DOR item being processed by the technical metadata robot
|
18
|
-
# @return [Boolean] True if technical metadata is correctly added or updated
|
19
|
-
def self.add_update_technical_metadata(dor_item)
|
20
|
-
test_jhove_service
|
21
|
-
druid = dor_item.pid
|
22
|
-
content_group_diff = get_content_group_diff(dor_item)
|
23
|
-
deltas = get_file_deltas(content_group_diff)
|
24
|
-
new_files = get_new_files(deltas)
|
25
|
-
old_techmd = get_old_technical_metadata(dor_item)
|
26
|
-
new_techmd = get_new_technical_metadata(druid, new_files)
|
27
|
-
if old_techmd.nil?
|
28
|
-
# this is version 1 or previous technical metadata was not saved
|
29
|
-
final_techmd = new_techmd
|
30
|
-
elsif content_group_diff.difference_count == 0
|
31
|
-
# there have been no changes to content files from previous version
|
32
|
-
return true
|
33
|
-
else
|
34
|
-
merged_nodes = merge_file_nodes(old_techmd, new_techmd, deltas)
|
35
|
-
final_techmd = build_technical_metadata(druid, merged_nodes)
|
36
|
-
end
|
37
|
-
ds = dor_item.datastreams['technicalMetadata']
|
38
|
-
ds.dsLabel = 'Technical Metadata'
|
39
|
-
ds.content = final_techmd
|
40
|
-
ds.save
|
41
|
-
true
|
42
|
-
end
|
43
|
-
|
44
|
-
# @return [Boolean] Make sure that the jhove-service gem is loaded
|
45
|
-
def self.test_jhove_service
|
46
|
-
unless defined? ::JhoveService
|
47
|
-
begin
|
48
|
-
require 'jhove_service'
|
49
|
-
rescue LoadError => e
|
50
|
-
puts e.inspect
|
51
|
-
raise 'jhove-service dependency gem was not found. Please add it to your Gemfile and run bundle install'
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
private_class_method :test_jhove_service
|
56
|
-
|
57
|
-
# @param [Dor::Item] dor_item The DOR item being processed by the technical metadata robot
|
58
|
-
# @return [FileGroupDifference] The differences between two versions of a group of files
|
59
|
-
def self.get_content_group_diff(dor_item)
|
60
|
-
return Moab::FileGroupDifference.new if dor_item.contentMetadata.nil?
|
61
|
-
raise Dor::ParameterError, 'Missing Dor::Config.stacks.local_workspace_root' if Config.stacks.local_workspace_root.nil?
|
62
|
-
|
63
|
-
client = Dor::Services::Client.object(dor_item.pid).sdr
|
64
|
-
current_content = dor_item.contentMetadata.content
|
65
|
-
inventory_diff = client.content_diff(current_content: current_content)
|
66
|
-
inventory_diff.group_difference('content')
|
67
|
-
end
|
68
|
-
private_class_method :get_content_group_diff
|
69
|
-
|
70
|
-
# @param [FileGroupDifference] content_group_diff
|
71
|
-
# @return [Hash<Symbol,Array>] Sets of filenames grouped by change type for use in performing file or metadata operations
|
72
|
-
def self.get_file_deltas(content_group_diff)
|
73
|
-
content_group_diff.file_deltas
|
74
|
-
end
|
75
|
-
private_class_method :get_file_deltas
|
76
|
-
|
77
|
-
# @param [Hash<Symbol,Array>] deltas Sets of filenames grouped by change type for use in performing file or metadata operations
|
78
|
-
# @return [Array<String>] The list of filenames for files that are either added or modifed since the previous version
|
79
|
-
def self.get_new_files(deltas)
|
80
|
-
deltas[:added] + deltas[:modified]
|
81
|
-
end
|
82
|
-
private_class_method :get_new_files
|
83
|
-
|
84
|
-
# @param [Dor::Item] dor_item The DOR item being processed by the technical metadata robot
|
85
|
-
# @return [String] The technicalMetadata datastream from the previous version of the digital object
|
86
|
-
def self.get_old_technical_metadata(dor_item)
|
87
|
-
sdr_techmd = get_sdr_technical_metadata(dor_item.pid)
|
88
|
-
return sdr_techmd unless sdr_techmd.nil?
|
89
|
-
|
90
|
-
get_dor_technical_metadata(dor_item)
|
91
|
-
end
|
92
|
-
private_class_method :get_old_technical_metadata
|
93
|
-
|
94
|
-
# @param [String] druid The identifier of the digital object being processed by the technical metadata robot
|
95
|
-
# @return [String] The technicalMetadata datastream from the previous version of the digital object (fetched from SDR storage)
|
96
|
-
# The data is updated to the latest format.
|
97
|
-
def self.get_sdr_technical_metadata(druid)
|
98
|
-
sdr_techmd = get_sdr_metadata(druid, 'technicalMetadata')
|
99
|
-
return sdr_techmd if sdr_techmd =~ /<technicalMetadata/
|
100
|
-
return ::JhoveService.new.upgrade_technical_metadata(sdr_techmd) if sdr_techmd =~ /<jhove/
|
101
|
-
|
102
|
-
nil
|
103
|
-
end
|
104
|
-
private_class_method :get_sdr_technical_metadata
|
105
|
-
|
106
|
-
# @param [Dor::Item] dor_item The DOR item being processed by the technical metadata robot
|
107
|
-
# @return [String] The technicalMetadata datastream from the previous version of the digital object (fetched from DOR fedora).
|
108
|
-
# The data is updated to the latest format.
|
109
|
-
def self.get_dor_technical_metadata(dor_item)
|
110
|
-
ds = 'technicalMetadata'
|
111
|
-
return nil unless dor_item.datastreams.key?(ds) && !dor_item.datastreams[ds].new?
|
112
|
-
|
113
|
-
dor_techmd = dor_item.datastreams[ds].content
|
114
|
-
return dor_techmd if dor_techmd =~ /<technicalMetadata/
|
115
|
-
return ::JhoveService.new.upgrade_technical_metadata(dor_techmd) if dor_techmd =~ /<jhove/
|
116
|
-
|
117
|
-
nil
|
118
|
-
end
|
119
|
-
private_class_method :get_dor_technical_metadata
|
120
|
-
|
121
|
-
# @param [String] druid The identifier of the digital object being processed by the technical metadata robot
|
122
|
-
# @param [String] dsname The identifier of the metadata datastream
|
123
|
-
# @return [String] The datastream contents from the previous version of the digital object (fetched from SDR storage)
|
124
|
-
def self.get_sdr_metadata(druid, dsname)
|
125
|
-
Dor::Services::Client.object(druid).sdr.metadata(datastream: dsname)
|
126
|
-
end
|
127
|
-
private_class_method :get_sdr_metadata
|
128
|
-
|
129
|
-
# @param [DruidTools::Druid] druid A wrapper class for the druid identifier. Used to generate paths
|
130
|
-
# @param [Array<String>] new_files The list of filenames for files that are either added or modifed since the previous version
|
131
|
-
# @return [String] The technicalMetadata datastream for the new files of the new digital object version
|
132
|
-
def self.get_new_technical_metadata(druid, new_files)
|
133
|
-
return nil if new_files.nil? || new_files.empty?
|
134
|
-
|
135
|
-
workspace = DruidTools::Druid.new(druid, Dor::Config.sdr.local_workspace_root)
|
136
|
-
content_dir = workspace.find_filelist_parent('content', new_files)
|
137
|
-
temp_dir = workspace.temp_dir
|
138
|
-
jhove_service = ::JhoveService.new(temp_dir)
|
139
|
-
jhove_service.digital_object_id = druid
|
140
|
-
fileset_file = write_fileset(temp_dir, new_files)
|
141
|
-
jhove_output_file = jhove_service.run_jhove(content_dir, fileset_file)
|
142
|
-
tech_md_file = jhove_service.create_technical_metadata(jhove_output_file)
|
143
|
-
IO.read(tech_md_file)
|
144
|
-
end
|
145
|
-
private_class_method :get_new_technical_metadata
|
146
|
-
|
147
|
-
# @param [Pathname] temp_dir The pathname of the temp folder in the object's workspace area
|
148
|
-
# @param [Object] new_files [Array<String>] The list of filenames for files that are either added or modifed since the previous version
|
149
|
-
# @return [Pathname] Save the new_files list to a text file and return that file's name
|
150
|
-
def self.write_fileset(temp_dir, new_files)
|
151
|
-
fileset_pathname = Pathname(temp_dir).join('jhove_fileset.txt')
|
152
|
-
fileset_pathname.open('w') { |f| f.puts(new_files) }
|
153
|
-
fileset_pathname
|
154
|
-
end
|
155
|
-
private_class_method :write_fileset
|
156
|
-
|
157
|
-
# @param [String] old_techmd The technicalMetadata datastream from the previous version of the digital object
|
158
|
-
# @param [String] new_techmd The technicalMetadata datastream for the new files of the new digital object version
|
159
|
-
# @param [Array<String>] deltas The list of filenames for files that are either added or modifed since the previous version
|
160
|
-
# @return [Hash<String,Nokogiri::XML::Node>] The complete set of technicalMetadata nodes for the digital object, indexed by filename
|
161
|
-
def self.merge_file_nodes(old_techmd, new_techmd, deltas)
|
162
|
-
old_file_nodes = get_file_nodes(old_techmd)
|
163
|
-
new_file_nodes = get_file_nodes(new_techmd)
|
164
|
-
merged_nodes = {}
|
165
|
-
deltas[:identical].each do |path|
|
166
|
-
merged_nodes[path] = old_file_nodes[path]
|
167
|
-
end
|
168
|
-
deltas[:modified].each do |path|
|
169
|
-
merged_nodes[path] = new_file_nodes[path]
|
170
|
-
end
|
171
|
-
deltas[:added].each do |path|
|
172
|
-
merged_nodes[path] = new_file_nodes[path]
|
173
|
-
end
|
174
|
-
deltas[:renamed].each do |oldpath, newpath|
|
175
|
-
clone = old_file_nodes[oldpath].clone
|
176
|
-
clone.sub!(/<file\s*id.*?["'].*?["'].*?>/, "<file id='#{newpath}'>")
|
177
|
-
merged_nodes[newpath] = clone
|
178
|
-
end
|
179
|
-
deltas[:copyadded].each do |oldpath, newpath|
|
180
|
-
clone = old_file_nodes[oldpath].clone
|
181
|
-
clone.sub!(/<file\s*id.*?["'].*?["'].*?>/, "<file id='#{newpath}'>")
|
182
|
-
merged_nodes[newpath] = clone
|
183
|
-
end
|
184
|
-
merged_nodes
|
185
|
-
end
|
186
|
-
private_class_method :merge_file_nodes
|
187
|
-
|
188
|
-
# @param [String] technical_metadata A technicalMetadata datastream contents
|
189
|
-
# @return [Hash<String,Nokogiri::XML::Node>] The set of nodes from a technicalMetadata datastream, indexed by filename
|
190
|
-
def self.get_file_nodes(technical_metadata)
|
191
|
-
file_hash = {}
|
192
|
-
return file_hash if technical_metadata.nil?
|
193
|
-
|
194
|
-
current_file = []
|
195
|
-
path = nil
|
196
|
-
in_file = false
|
197
|
-
technical_metadata.each_line do |line|
|
198
|
-
if line =~ /^\s*<file.*["'](.*?)["']/
|
199
|
-
current_file << line
|
200
|
-
path = $1
|
201
|
-
in_file = true
|
202
|
-
elsif line =~ /^\s*<\/file>/
|
203
|
-
current_file << line
|
204
|
-
file_hash[path] = current_file.join
|
205
|
-
current_file = []
|
206
|
-
path = nil
|
207
|
-
in_file = false
|
208
|
-
elsif in_file
|
209
|
-
current_file << line
|
210
|
-
end
|
211
|
-
end
|
212
|
-
file_hash
|
213
|
-
end
|
214
|
-
private_class_method :get_file_nodes
|
215
|
-
|
216
|
-
# @param [String] druid The identifier of the digital object being processed by the technical metadata robot
|
217
|
-
# @param [Hash<String,Nokogiri::XML::Node>] merged_nodes The complete set of technicalMetadata nodes for the digital object, indexed by filename
|
218
|
-
# @return [String] The finalized technicalMetadata datastream contents for the new object version
|
219
|
-
def self.build_technical_metadata(druid, merged_nodes)
|
220
|
-
techmd_root = +<<~EOF
|
221
|
-
<technicalMetadata objectId='#{druid}' datetime='#{Time.now.utc.iso8601}'
|
222
|
-
xmlns:jhove='http://hul.harvard.edu/ois/xml/ns/jhove'
|
223
|
-
xmlns:mix='http://www.loc.gov/mix/v10'
|
224
|
-
xmlns:textmd='info:lc/xmlns/textMD-v3'>
|
225
|
-
EOF
|
226
|
-
doc = techmd_root
|
227
|
-
merged_nodes.keys.sort.each { |path| doc << merged_nodes[path] }
|
228
|
-
doc + '</technicalMetadata>'
|
229
|
-
end
|
230
|
-
private_class_method :build_technical_metadata
|
231
|
-
end
|
232
|
-
end
|