dor-services 6.8.0 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/config/config_defaults.yml +0 -27
  3. data/config/dev_console_env.rb.example +0 -17
  4. data/lib/dor-services.rb +9 -73
  5. data/lib/dor/config.rb +1 -30
  6. data/lib/dor/datastreams/content_metadata_ds.rb +8 -0
  7. data/lib/dor/datastreams/desc_metadata_ds.rb +19 -0
  8. data/lib/dor/datastreams/identity_metadata_ds.rb +65 -0
  9. data/lib/dor/datastreams/rights_metadata_ds.rb +14 -2
  10. data/lib/dor/datastreams/workflow_definition_ds.rb +1 -1
  11. data/lib/dor/datastreams/workflow_ds.rb +0 -15
  12. data/lib/dor/indexers/identifiable_indexer.rb +8 -4
  13. data/lib/dor/indexers/releasable_indexer.rb +7 -1
  14. data/lib/dor/models/abstract.rb +143 -8
  15. data/lib/dor/models/admin_policy_object.rb +0 -3
  16. data/lib/dor/models/collection.rb +0 -2
  17. data/lib/dor/models/concerns/embargoable.rb +7 -60
  18. data/lib/dor/models/etd.rb +100 -0
  19. data/lib/dor/models/item.rb +12 -28
  20. data/lib/dor/models/part.rb +18 -0
  21. data/lib/dor/models/set.rb +0 -2
  22. data/lib/dor/services/collection_service.rb +36 -0
  23. data/lib/dor/services/embargo_service.rb +93 -0
  24. data/lib/dor/services/ontology.rb +0 -18
  25. data/lib/dor/services/public_desc_metadata_service.rb +7 -11
  26. data/lib/dor/services/search_service.rb +0 -40
  27. data/lib/dor/version.rb +1 -1
  28. data/lib/dor/workflow/document.rb +0 -7
  29. metadata +15 -78
  30. data/lib/dor/models/concerns/assembleable.rb +0 -18
  31. data/lib/dor/models/concerns/contentable.rb +0 -185
  32. data/lib/dor/models/concerns/describable.rb +0 -82
  33. data/lib/dor/models/concerns/eventable.rb +0 -18
  34. data/lib/dor/models/concerns/geoable.rb +0 -14
  35. data/lib/dor/models/concerns/governable.rb +0 -101
  36. data/lib/dor/models/concerns/identifiable.rb +0 -172
  37. data/lib/dor/models/concerns/itemizable.rb +0 -42
  38. data/lib/dor/models/concerns/preservable.rb +0 -46
  39. data/lib/dor/models/concerns/processable.rb +0 -86
  40. data/lib/dor/models/concerns/publishable.rb +0 -76
  41. data/lib/dor/models/concerns/releaseable.rb +0 -118
  42. data/lib/dor/models/concerns/rightsable.rb +0 -25
  43. data/lib/dor/models/concerns/shelvable.rb +0 -15
  44. data/lib/dor/models/concerns/versionable.rb +0 -72
  45. data/lib/dor/services/ability.rb +0 -77
  46. data/lib/dor/services/cleanup_reset_service.rb +0 -103
  47. data/lib/dor/services/datastream_builder.rb +0 -96
  48. data/lib/dor/services/decommission_service.rb +0 -31
  49. data/lib/dor/services/digital_stacks_service.rb +0 -125
  50. data/lib/dor/services/dublin_core_service.rb +0 -45
  51. data/lib/dor/services/file_metadata_merge_service.rb +0 -71
  52. data/lib/dor/services/indexing_service.rb +0 -131
  53. data/lib/dor/services/merge_service.rb +0 -105
  54. data/lib/dor/services/public_xml_service.rb +0 -116
  55. data/lib/dor/services/publish_metadata_service.rb +0 -99
  56. data/lib/dor/services/reset_workspace_service.rb +0 -27
  57. data/lib/dor/services/sdr_ingest_service.rb +0 -172
  58. data/lib/dor/services/secondary_file_name_service.rb +0 -10
  59. data/lib/dor/services/shelving_service.rb +0 -69
  60. data/lib/dor/services/technical_metadata_service.rb +0 -232
  61. data/lib/dor/services/version_service.rb +0 -84
  62. data/lib/dor/utils/sdr_client.rb +0 -94
@@ -1,27 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- # Rename the druid trees at the end of the accessionWF in order to be cleaned/deleted later.
5
- class ResetWorkspaceService
6
- def self.reset_workspace_druid_tree(druid, version, workspace_root)
7
- druid_tree_path = DruidTools::Druid.new(druid, workspace_root).pathname.to_s
8
-
9
- raise "The archived directory #{druid_tree_path}_v#{version} already existed." if File.exist?("#{druid_tree_path}_v#{version}")
10
-
11
- if File.exist?(druid_tree_path)
12
- FileUtils.mv(druid_tree_path, "#{druid_tree_path}_v#{version}")
13
- end # Else is a truncated tree where we shouldn't do anything
14
- end
15
-
16
- def self.reset_export_bag(druid, version, export_root)
17
- id = druid.split(':').last
18
- bag_dir = File.join(export_root, id)
19
-
20
- raise "The archived bag #{bag_dir}_v#{version} already existed." if File.exist?("#{bag_dir}_v#{version}")
21
-
22
- FileUtils.mv(bag_dir, "#{bag_dir}_v#{version}") if File.exist?(bag_dir)
23
-
24
- FileUtils.mv("#{bag_dir}.tar", "#{bag_dir}_v#{version}.tar") if File.exist?("#{bag_dir}.tar")
25
- end
26
- end
27
- end
@@ -1,172 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'moab/stanford'
4
-
5
- module Dor
6
- # Note: This should probably live in common-accessioning robot sdr-ingest-transfer
7
- # as that is the only robot that uses it. See also preservable concern.
8
- class SdrIngestService
9
- # @param [Dor::Item] dor_item The representation of the digital object
10
- # @param [String] _agreement_id deprecated, included for backward compatability with common-accessoning
11
- # @return [void] Create the Moab/bag manifests for new version, export data to BagIt bag, kick off the SDR preservation workflow
12
- def self.transfer(dor_item, _agreement_id = nil)
13
- druid = dor_item.pid
14
- workspace = DruidTools::Druid.new(druid, Dor::Config.sdr.local_workspace_root)
15
- signature_catalog = get_signature_catalog(druid)
16
- new_version_id = signature_catalog.version_id + 1
17
- metadata_dir = extract_datastreams(dor_item, workspace)
18
- verify_version_metadata(metadata_dir, new_version_id)
19
- version_inventory = get_version_inventory(metadata_dir, druid, new_version_id)
20
- version_addtions = signature_catalog.version_additions(version_inventory)
21
- content_addtions = version_addtions.group('content')
22
- if content_addtions.nil? || content_addtions.files.empty?
23
- content_dir = nil
24
- else
25
- new_file_list = content_addtions.path_list
26
- content_dir = workspace.find_filelist_parent('content', new_file_list)
27
- end
28
- content_group = version_inventory.group('content')
29
- signature_catalog.normalize_group_signatures(content_group, content_dir) unless content_group.nil? || content_group.files.empty?
30
- # export the bag (in tar format)
31
- bag_dir = Pathname(Dor::Config.sdr.local_export_home).join(druid.sub('druid:', ''))
32
- bagger = Moab::Bagger.new(version_inventory, signature_catalog, bag_dir)
33
- bagger.reset_bag
34
- bagger.create_bag_inventory(:depositor)
35
- bagger.deposit_group('content', content_dir)
36
- bagger.deposit_group('metadata', metadata_dir)
37
- bagger.create_tagfiles
38
- verify_bag_structure(bag_dir)
39
- # start SDR preservation workflow (but do not create the workflows datastream)
40
- CreateWorkflowService.create_workflow(dor_item, name: 'preservationIngestWF', create_ds: false)
41
- rescue Exception => e
42
- raise Dor::Exception, "Error exporting new object version to bag: #{e.message}"
43
- end
44
-
45
- # Note: the following methods should probably all be private
46
-
47
- # @param [String] druid The object identifier
48
- # @return [Moab::SignatureCatalog] the catalog of all files previously ingested
49
- def self.get_signature_catalog(druid)
50
- Dor::Services::Client.object(druid).sdr.signature_catalog
51
- end
52
-
53
- # @param [Dor::Item] dor_item The representation of the digital object
54
- # @param [DruidTools::Druid] workspace The representation of the item's work area
55
- # @return [Pathname] Pull all the datastreams specified in the configuration file
56
- # into the workspace's metadata directory, overwriting existing file if present
57
- def self.extract_datastreams(dor_item, workspace)
58
- metadata_dir = Pathname.new(workspace.path('metadata', true))
59
- Config.sdr.datastreams.to_hash.each_pair do |ds_name, required|
60
- ds_name = ds_name.to_s
61
- metadata_file = metadata_dir.join("#{ds_name}.xml")
62
- metadata_string = get_datastream_content(dor_item, ds_name, required)
63
- metadata_file.open('w') { |f| f << metadata_string } if metadata_string
64
- end
65
- metadata_dir
66
- end
67
-
68
- # @param [Dor::Item] dor_item The representation of the digital object
69
- # @param [String] ds_name The name of the desired Fedora datastream
70
- # @param [String] required Enumeration: one of ['required', 'optional']
71
- # @return [String] return the xml text of the specified datastream if it exists.
72
- # If not found, return nil unless it is a required datastream in which case raise exception
73
- def self.get_datastream_content(dor_item, ds_name, required)
74
- ds = (ds_name == 'relationshipMetadata' ? 'RELS-EXT' : ds_name)
75
- if dor_item.datastreams.key?(ds) && !dor_item.datastreams[ds].new?
76
- return dor_item.datastreams[ds].content
77
- elsif required == 'optional'
78
- return nil
79
- else
80
- raise "required datastream #{ds_name} not found in DOR"
81
- end
82
- end
83
-
84
- # @param [Pathname] metadata_dir the location of the metadata directory in the workspace
85
- # @param [Integer] expected the version identifer expected to be used in the versionMetadata
86
- def self.verify_version_metadata(metadata_dir, expected)
87
- vmfile = metadata_dir.join('versionMetadata.xml')
88
- verify_version_id(vmfile, expected, vmfile_version_id(vmfile))
89
- true
90
- end
91
-
92
- # @param [Pathname] pathname The location of the file containing a version number
93
- # @param [Integer] expected The version number that should be in the file
94
- # @param [Integer] found The version number that is actually in the file
95
- def self.verify_version_id(pathname, expected, found)
96
- raise "Version mismatch in #{pathname}, expected #{expected}, found #{found}" unless expected == found
97
-
98
- true
99
- end
100
-
101
- # @param [Pathname] pathname the location of the versionMetadata file
102
- # @return [Integer] the versionId found in the last version element, or nil if missing
103
- def self.vmfile_version_id(pathname)
104
- verify_pathname(pathname)
105
- doc = Nokogiri::XML(File.open(pathname.to_s))
106
- nodeset = doc.xpath('/versionMetadata/version')
107
- version_id = nodeset.last['versionId']
108
- version_id.nil? ? nil : version_id.to_i
109
- end
110
-
111
- # @param [Pathname] metadata_dir The location of the the object's metadata files
112
- # @param [String] druid The object identifier
113
- # @param [Integer] version_id The version number
114
- # @return [Moab::FileInventory] Generate and return a version inventory for the object
115
- def self.get_version_inventory(metadata_dir, druid, version_id)
116
- version_inventory = get_content_inventory(metadata_dir, druid, version_id)
117
- version_inventory.groups << get_metadata_file_group(metadata_dir)
118
- version_inventory
119
- end
120
-
121
- # @param [Pathname] metadata_dir The location of the the object's metadata files
122
- # @param [String] druid The object identifier
123
- # @param [Integer] version_id The version number
124
- # @return [Moab::FileInventory] Parse the contentMetadata
125
- # and generate a new version inventory object containing a content group
126
- def self.get_content_inventory(metadata_dir, druid, version_id)
127
- content_metadata = get_content_metadata(metadata_dir)
128
- if content_metadata
129
- Stanford::ContentInventory.new.inventory_from_cm(content_metadata, druid, 'preserve', version_id)
130
- else
131
- Moab::FileInventory.new(type: 'version', digital_object_id: druid, version_id: version_id)
132
- end
133
- end
134
-
135
- # @param [Pathname] metadata_dir The location of the the object's metadata files
136
- # @return [String] Return the contents of the contentMetadata.xml file from the content directory
137
- def self.get_content_metadata(metadata_dir)
138
- content_metadata_pathname = metadata_dir.join('contentMetadata.xml')
139
- content_metadata_pathname.read if content_metadata_pathname.exist?
140
- end
141
-
142
- # @param [Pathname] metadata_dir The location of the the object's metadata files
143
- # @return [Moab::FileGroup] Traverse the metadata directory and generate a metadata group
144
- def self.get_metadata_file_group(metadata_dir)
145
- file_group = Moab::FileGroup.new(group_id: 'metadata').group_from_directory(metadata_dir)
146
- file_group
147
- end
148
-
149
- # @param [Pathname] bag_dir the location of the bag to be verified
150
- # @return [Boolean] true if all required files exist, raises exception if not
151
- def self.verify_bag_structure(bag_dir)
152
- verify_pathname(bag_dir)
153
- verify_pathname(bag_dir.join('data'))
154
- verify_pathname(bag_dir.join('bagit.txt'))
155
- verify_pathname(bag_dir.join('bag-info.txt'))
156
- verify_pathname(bag_dir.join('manifest-sha256.txt'))
157
- verify_pathname(bag_dir.join('tagmanifest-sha256.txt'))
158
- verify_pathname(bag_dir.join('versionAdditions.xml'))
159
- verify_pathname(bag_dir.join('versionInventory.xml'))
160
- verify_pathname(bag_dir.join('data', 'metadata', 'versionMetadata.xml'))
161
- true
162
- end
163
-
164
- # @param [Pathname] pathname The file whose existence should be verified
165
- # @return [Boolean] true if file exists, raises exception if not
166
- def self.verify_pathname(pathname)
167
- raise "#{pathname.basename} not found at #{pathname}" unless pathname.exist?
168
-
169
- true
170
- end
171
- end
172
- end
@@ -1,10 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- # Merges contentMetadata from several objects into one.
5
- class SecondaryFileNameService
6
- def self.create(old_name, sequence_num)
7
- old_name =~ /^(.*)\.(.*)$/ ? "#{Regexp.last_match(1)}_#{sequence_num}.#{Regexp.last_match(2)}" : "#{old_name}_#{sequence_num}"
8
- end
9
- end
10
- end
@@ -1,69 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- # Push file changes for shelve-able files into the stacks
5
- class ShelvingService
6
- def self.shelve(work)
7
- new(work).shelve
8
- end
9
-
10
- def initialize(work)
11
- @work = work
12
- end
13
-
14
- def shelve
15
- # retrieve the differences between the current contentMetadata and the previously ingested version
16
- diff = shelve_diff
17
- stacks_object_pathname = stacks_location
18
- # determine the location of the object's files in the stacks area
19
- stacks_druid = DruidTools::StacksDruid.new work.id, stacks_object_pathname
20
- stacks_object_pathname = Pathname(stacks_druid.path)
21
- # determine the location of the object's content files in the workspace area
22
- workspace_druid = DruidTools::Druid.new(work.id, Config.stacks.local_workspace_root)
23
- workspace_content_pathname = workspace_content_dir(diff, workspace_druid)
24
- # delete, rename, or copy files to the stacks area
25
- DigitalStacksService.remove_from_stacks(stacks_object_pathname, diff)
26
- DigitalStacksService.rename_in_stacks(stacks_object_pathname, diff)
27
- DigitalStacksService.shelve_to_stacks(workspace_content_pathname, stacks_object_pathname, diff)
28
- end
29
-
30
- private
31
-
32
- attr_reader :work
33
-
34
- # retrieve the differences between the current contentMetadata and the previously ingested version
35
- # (filtering to select only the files that should be shelved to stacks)
36
- def shelve_diff
37
- raise Dor::ParameterError, 'Missing Dor::Config.stacks.local_workspace_root' if Config.stacks.local_workspace_root.nil?
38
- raise Dor::Exception, 'Missing contentMetadata datastream' if work.contentMetadata.nil?
39
-
40
- client = Dor::Services::Client.object(work.pid).sdr
41
- current_content = work.contentMetadata.content
42
- inventory_diff = client.content_diff(current_content: current_content, subset: 'shelve')
43
- inventory_diff.group_difference('content')
44
- end
45
-
46
- # Find the location of the object's content files in the workspace area
47
- # @param [Moab::FileGroupDifference] content_diff The differences between the current contentMetadata and the previously ingested version
48
- # @param [DruidTools::Druid] workspace_druid the location of the object's files in the workspace area
49
- # @return [Pathname] The location of the object's content files in the workspace area
50
- def workspace_content_dir(content_diff, workspace_druid)
51
- deltas = content_diff.file_deltas
52
- filelist = deltas[:modified] + deltas[:added] + deltas[:copyadded].collect { |_old, new| new }
53
- return nil if filelist.empty?
54
-
55
- Pathname(workspace_druid.find_filelist_parent('content', filelist))
56
- end
57
-
58
- # get the stack location based on the contentMetadata stacks attribute
59
- # or using the default value from the config file if it doesn't exist
60
- def stacks_location
61
- return Config.stacks.local_stacks_root unless work.contentMetadata&.stacks.present?
62
-
63
- location = work.contentMetadata.stacks[0]
64
- return location if location.start_with? '/' # Absolute stacks path
65
-
66
- raise "stacks attribute for item: #{work.id} contentMetadata should start with /. The current value is #{location}"
67
- end
68
- end
69
- end
@@ -1,232 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rubygems'
4
- require 'moab/stanford'
5
- require 'jhove_service'
6
- require 'dor-services'
7
-
8
- module Dor
9
- # Extracts technical metadata from files using JHOVE
10
- # If this is a new version it gets the old technicalMetadata datastream by
11
- # making an API call to sdr-services-app (via dor-services-app) and
12
- # only overwrites/adds parts for the files that were changed or added.
13
- # This allows us to avoid re-staging files that have not changed.
14
- # Switching to a more granular data model that has file metadata separate from
15
- # the Work metadata will allow us to simplify this greatly.
16
- class TechnicalMetadataService
17
- # @param [Dor::Item] dor_item The DOR item being processed by the technical metadata robot
18
- # @return [Boolean] True if technical metadata is correctly added or updated
19
- def self.add_update_technical_metadata(dor_item)
20
- test_jhove_service
21
- druid = dor_item.pid
22
- content_group_diff = get_content_group_diff(dor_item)
23
- deltas = get_file_deltas(content_group_diff)
24
- new_files = get_new_files(deltas)
25
- old_techmd = get_old_technical_metadata(dor_item)
26
- new_techmd = get_new_technical_metadata(druid, new_files)
27
- if old_techmd.nil?
28
- # this is version 1 or previous technical metadata was not saved
29
- final_techmd = new_techmd
30
- elsif content_group_diff.difference_count == 0
31
- # there have been no changes to content files from previous version
32
- return true
33
- else
34
- merged_nodes = merge_file_nodes(old_techmd, new_techmd, deltas)
35
- final_techmd = build_technical_metadata(druid, merged_nodes)
36
- end
37
- ds = dor_item.datastreams['technicalMetadata']
38
- ds.dsLabel = 'Technical Metadata'
39
- ds.content = final_techmd
40
- ds.save
41
- true
42
- end
43
-
44
- # @return [Boolean] Make sure that the jhove-service gem is loaded
45
- def self.test_jhove_service
46
- unless defined? ::JhoveService
47
- begin
48
- require 'jhove_service'
49
- rescue LoadError => e
50
- puts e.inspect
51
- raise 'jhove-service dependency gem was not found. Please add it to your Gemfile and run bundle install'
52
- end
53
- end
54
- end
55
- private_class_method :test_jhove_service
56
-
57
- # @param [Dor::Item] dor_item The DOR item being processed by the technical metadata robot
58
- # @return [FileGroupDifference] The differences between two versions of a group of files
59
- def self.get_content_group_diff(dor_item)
60
- return Moab::FileGroupDifference.new if dor_item.contentMetadata.nil?
61
- raise Dor::ParameterError, 'Missing Dor::Config.stacks.local_workspace_root' if Config.stacks.local_workspace_root.nil?
62
-
63
- client = Dor::Services::Client.object(dor_item.pid).sdr
64
- current_content = dor_item.contentMetadata.content
65
- inventory_diff = client.content_diff(current_content: current_content)
66
- inventory_diff.group_difference('content')
67
- end
68
- private_class_method :get_content_group_diff
69
-
70
- # @param [FileGroupDifference] content_group_diff
71
- # @return [Hash<Symbol,Array>] Sets of filenames grouped by change type for use in performing file or metadata operations
72
- def self.get_file_deltas(content_group_diff)
73
- content_group_diff.file_deltas
74
- end
75
- private_class_method :get_file_deltas
76
-
77
- # @param [Hash<Symbol,Array>] deltas Sets of filenames grouped by change type for use in performing file or metadata operations
78
- # @return [Array<String>] The list of filenames for files that are either added or modifed since the previous version
79
- def self.get_new_files(deltas)
80
- deltas[:added] + deltas[:modified]
81
- end
82
- private_class_method :get_new_files
83
-
84
- # @param [Dor::Item] dor_item The DOR item being processed by the technical metadata robot
85
- # @return [String] The technicalMetadata datastream from the previous version of the digital object
86
- def self.get_old_technical_metadata(dor_item)
87
- sdr_techmd = get_sdr_technical_metadata(dor_item.pid)
88
- return sdr_techmd unless sdr_techmd.nil?
89
-
90
- get_dor_technical_metadata(dor_item)
91
- end
92
- private_class_method :get_old_technical_metadata
93
-
94
- # @param [String] druid The identifier of the digital object being processed by the technical metadata robot
95
- # @return [String] The technicalMetadata datastream from the previous version of the digital object (fetched from SDR storage)
96
- # The data is updated to the latest format.
97
- def self.get_sdr_technical_metadata(druid)
98
- sdr_techmd = get_sdr_metadata(druid, 'technicalMetadata')
99
- return sdr_techmd if sdr_techmd =~ /<technicalMetadata/
100
- return ::JhoveService.new.upgrade_technical_metadata(sdr_techmd) if sdr_techmd =~ /<jhove/
101
-
102
- nil
103
- end
104
- private_class_method :get_sdr_technical_metadata
105
-
106
- # @param [Dor::Item] dor_item The DOR item being processed by the technical metadata robot
107
- # @return [String] The technicalMetadata datastream from the previous version of the digital object (fetched from DOR fedora).
108
- # The data is updated to the latest format.
109
- def self.get_dor_technical_metadata(dor_item)
110
- ds = 'technicalMetadata'
111
- return nil unless dor_item.datastreams.key?(ds) && !dor_item.datastreams[ds].new?
112
-
113
- dor_techmd = dor_item.datastreams[ds].content
114
- return dor_techmd if dor_techmd =~ /<technicalMetadata/
115
- return ::JhoveService.new.upgrade_technical_metadata(dor_techmd) if dor_techmd =~ /<jhove/
116
-
117
- nil
118
- end
119
- private_class_method :get_dor_technical_metadata
120
-
121
- # @param [String] druid The identifier of the digital object being processed by the technical metadata robot
122
- # @param [String] dsname The identifier of the metadata datastream
123
- # @return [String] The datastream contents from the previous version of the digital object (fetched from SDR storage)
124
- def self.get_sdr_metadata(druid, dsname)
125
- Dor::Services::Client.object(druid).sdr.metadata(datastream: dsname)
126
- end
127
- private_class_method :get_sdr_metadata
128
-
129
- # @param [DruidTools::Druid] druid A wrapper class for the druid identifier. Used to generate paths
130
- # @param [Array<String>] new_files The list of filenames for files that are either added or modifed since the previous version
131
- # @return [String] The technicalMetadata datastream for the new files of the new digital object version
132
- def self.get_new_technical_metadata(druid, new_files)
133
- return nil if new_files.nil? || new_files.empty?
134
-
135
- workspace = DruidTools::Druid.new(druid, Dor::Config.sdr.local_workspace_root)
136
- content_dir = workspace.find_filelist_parent('content', new_files)
137
- temp_dir = workspace.temp_dir
138
- jhove_service = ::JhoveService.new(temp_dir)
139
- jhove_service.digital_object_id = druid
140
- fileset_file = write_fileset(temp_dir, new_files)
141
- jhove_output_file = jhove_service.run_jhove(content_dir, fileset_file)
142
- tech_md_file = jhove_service.create_technical_metadata(jhove_output_file)
143
- IO.read(tech_md_file)
144
- end
145
- private_class_method :get_new_technical_metadata
146
-
147
- # @param [Pathname] temp_dir The pathname of the temp folder in the object's workspace area
148
- # @param [Object] new_files [Array<String>] The list of filenames for files that are either added or modifed since the previous version
149
- # @return [Pathname] Save the new_files list to a text file and return that file's name
150
- def self.write_fileset(temp_dir, new_files)
151
- fileset_pathname = Pathname(temp_dir).join('jhove_fileset.txt')
152
- fileset_pathname.open('w') { |f| f.puts(new_files) }
153
- fileset_pathname
154
- end
155
- private_class_method :write_fileset
156
-
157
- # @param [String] old_techmd The technicalMetadata datastream from the previous version of the digital object
158
- # @param [String] new_techmd The technicalMetadata datastream for the new files of the new digital object version
159
- # @param [Array<String>] deltas The list of filenames for files that are either added or modifed since the previous version
160
- # @return [Hash<String,Nokogiri::XML::Node>] The complete set of technicalMetadata nodes for the digital object, indexed by filename
161
- def self.merge_file_nodes(old_techmd, new_techmd, deltas)
162
- old_file_nodes = get_file_nodes(old_techmd)
163
- new_file_nodes = get_file_nodes(new_techmd)
164
- merged_nodes = {}
165
- deltas[:identical].each do |path|
166
- merged_nodes[path] = old_file_nodes[path]
167
- end
168
- deltas[:modified].each do |path|
169
- merged_nodes[path] = new_file_nodes[path]
170
- end
171
- deltas[:added].each do |path|
172
- merged_nodes[path] = new_file_nodes[path]
173
- end
174
- deltas[:renamed].each do |oldpath, newpath|
175
- clone = old_file_nodes[oldpath].clone
176
- clone.sub!(/<file\s*id.*?["'].*?["'].*?>/, "<file id='#{newpath}'>")
177
- merged_nodes[newpath] = clone
178
- end
179
- deltas[:copyadded].each do |oldpath, newpath|
180
- clone = old_file_nodes[oldpath].clone
181
- clone.sub!(/<file\s*id.*?["'].*?["'].*?>/, "<file id='#{newpath}'>")
182
- merged_nodes[newpath] = clone
183
- end
184
- merged_nodes
185
- end
186
- private_class_method :merge_file_nodes
187
-
188
- # @param [String] technical_metadata A technicalMetadata datastream contents
189
- # @return [Hash<String,Nokogiri::XML::Node>] The set of nodes from a technicalMetadata datastream, indexed by filename
190
- def self.get_file_nodes(technical_metadata)
191
- file_hash = {}
192
- return file_hash if technical_metadata.nil?
193
-
194
- current_file = []
195
- path = nil
196
- in_file = false
197
- technical_metadata.each_line do |line|
198
- if line =~ /^\s*<file.*["'](.*?)["']/
199
- current_file << line
200
- path = $1
201
- in_file = true
202
- elsif line =~ /^\s*<\/file>/
203
- current_file << line
204
- file_hash[path] = current_file.join
205
- current_file = []
206
- path = nil
207
- in_file = false
208
- elsif in_file
209
- current_file << line
210
- end
211
- end
212
- file_hash
213
- end
214
- private_class_method :get_file_nodes
215
-
216
- # @param [String] druid The identifier of the digital object being processed by the technical metadata robot
217
- # @param [Hash<String,Nokogiri::XML::Node>] merged_nodes The complete set of technicalMetadata nodes for the digital object, indexed by filename
218
- # @return [String] The finalized technicalMetadata datastream contents for the new object version
219
- def self.build_technical_metadata(druid, merged_nodes)
220
- techmd_root = +<<~EOF
221
- <technicalMetadata objectId='#{druid}' datetime='#{Time.now.utc.iso8601}'
222
- xmlns:jhove='http://hul.harvard.edu/ois/xml/ns/jhove'
223
- xmlns:mix='http://www.loc.gov/mix/v10'
224
- xmlns:textmd='info:lc/xmlns/textMD-v3'>
225
- EOF
226
- doc = techmd_root
227
- merged_nodes.keys.sort.each { |path| doc << merged_nodes[path] }
228
- doc + '</technicalMetadata>'
229
- end
230
- private_class_method :build_technical_metadata
231
- end
232
- end