dor-services 6.8.0 → 7.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/config/config_defaults.yml +0 -27
  3. data/config/dev_console_env.rb.example +0 -17
  4. data/lib/dor-services.rb +9 -73
  5. data/lib/dor/config.rb +1 -30
  6. data/lib/dor/datastreams/content_metadata_ds.rb +8 -0
  7. data/lib/dor/datastreams/desc_metadata_ds.rb +19 -0
  8. data/lib/dor/datastreams/identity_metadata_ds.rb +65 -0
  9. data/lib/dor/datastreams/rights_metadata_ds.rb +14 -2
  10. data/lib/dor/datastreams/workflow_definition_ds.rb +1 -1
  11. data/lib/dor/datastreams/workflow_ds.rb +0 -15
  12. data/lib/dor/indexers/identifiable_indexer.rb +8 -4
  13. data/lib/dor/indexers/releasable_indexer.rb +7 -1
  14. data/lib/dor/models/abstract.rb +143 -8
  15. data/lib/dor/models/admin_policy_object.rb +0 -3
  16. data/lib/dor/models/collection.rb +0 -2
  17. data/lib/dor/models/concerns/embargoable.rb +7 -60
  18. data/lib/dor/models/etd.rb +100 -0
  19. data/lib/dor/models/item.rb +12 -28
  20. data/lib/dor/models/part.rb +18 -0
  21. data/lib/dor/models/set.rb +0 -2
  22. data/lib/dor/services/collection_service.rb +36 -0
  23. data/lib/dor/services/embargo_service.rb +93 -0
  24. data/lib/dor/services/ontology.rb +0 -18
  25. data/lib/dor/services/public_desc_metadata_service.rb +7 -11
  26. data/lib/dor/services/search_service.rb +0 -40
  27. data/lib/dor/version.rb +1 -1
  28. data/lib/dor/workflow/document.rb +0 -7
  29. metadata +15 -78
  30. data/lib/dor/models/concerns/assembleable.rb +0 -18
  31. data/lib/dor/models/concerns/contentable.rb +0 -185
  32. data/lib/dor/models/concerns/describable.rb +0 -82
  33. data/lib/dor/models/concerns/eventable.rb +0 -18
  34. data/lib/dor/models/concerns/geoable.rb +0 -14
  35. data/lib/dor/models/concerns/governable.rb +0 -101
  36. data/lib/dor/models/concerns/identifiable.rb +0 -172
  37. data/lib/dor/models/concerns/itemizable.rb +0 -42
  38. data/lib/dor/models/concerns/preservable.rb +0 -46
  39. data/lib/dor/models/concerns/processable.rb +0 -86
  40. data/lib/dor/models/concerns/publishable.rb +0 -76
  41. data/lib/dor/models/concerns/releaseable.rb +0 -118
  42. data/lib/dor/models/concerns/rightsable.rb +0 -25
  43. data/lib/dor/models/concerns/shelvable.rb +0 -15
  44. data/lib/dor/models/concerns/versionable.rb +0 -72
  45. data/lib/dor/services/ability.rb +0 -77
  46. data/lib/dor/services/cleanup_reset_service.rb +0 -103
  47. data/lib/dor/services/datastream_builder.rb +0 -96
  48. data/lib/dor/services/decommission_service.rb +0 -31
  49. data/lib/dor/services/digital_stacks_service.rb +0 -125
  50. data/lib/dor/services/dublin_core_service.rb +0 -45
  51. data/lib/dor/services/file_metadata_merge_service.rb +0 -71
  52. data/lib/dor/services/indexing_service.rb +0 -131
  53. data/lib/dor/services/merge_service.rb +0 -105
  54. data/lib/dor/services/public_xml_service.rb +0 -116
  55. data/lib/dor/services/publish_metadata_service.rb +0 -99
  56. data/lib/dor/services/reset_workspace_service.rb +0 -27
  57. data/lib/dor/services/sdr_ingest_service.rb +0 -172
  58. data/lib/dor/services/secondary_file_name_service.rb +0 -10
  59. data/lib/dor/services/shelving_service.rb +0 -69
  60. data/lib/dor/services/technical_metadata_service.rb +0 -232
  61. data/lib/dor/services/version_service.rb +0 -84
  62. data/lib/dor/utils/sdr_client.rb +0 -94
@@ -1,27 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- # Rename the druid trees at the end of the accessionWF in order to be cleaned/deleted later.
5
- class ResetWorkspaceService
6
- def self.reset_workspace_druid_tree(druid, version, workspace_root)
7
- druid_tree_path = DruidTools::Druid.new(druid, workspace_root).pathname.to_s
8
-
9
- raise "The archived directory #{druid_tree_path}_v#{version} already existed." if File.exist?("#{druid_tree_path}_v#{version}")
10
-
11
- if File.exist?(druid_tree_path)
12
- FileUtils.mv(druid_tree_path, "#{druid_tree_path}_v#{version}")
13
- end # Else is a truncated tree where we shouldn't do anything
14
- end
15
-
16
- def self.reset_export_bag(druid, version, export_root)
17
- id = druid.split(':').last
18
- bag_dir = File.join(export_root, id)
19
-
20
- raise "The archived bag #{bag_dir}_v#{version} already existed." if File.exist?("#{bag_dir}_v#{version}")
21
-
22
- FileUtils.mv(bag_dir, "#{bag_dir}_v#{version}") if File.exist?(bag_dir)
23
-
24
- FileUtils.mv("#{bag_dir}.tar", "#{bag_dir}_v#{version}.tar") if File.exist?("#{bag_dir}.tar")
25
- end
26
- end
27
- end
@@ -1,172 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'moab/stanford'
4
-
5
- module Dor
6
- # Note: This should probably live in common-accessioning robot sdr-ingest-transfer
7
- # as that is the only robot that uses it. See also preservable concern.
8
- class SdrIngestService
9
- # @param [Dor::Item] dor_item The representation of the digital object
10
- # @param [String] _agreement_id deprecated, included for backward compatability with common-accessoning
11
- # @return [void] Create the Moab/bag manifests for new version, export data to BagIt bag, kick off the SDR preservation workflow
12
- def self.transfer(dor_item, _agreement_id = nil)
13
- druid = dor_item.pid
14
- workspace = DruidTools::Druid.new(druid, Dor::Config.sdr.local_workspace_root)
15
- signature_catalog = get_signature_catalog(druid)
16
- new_version_id = signature_catalog.version_id + 1
17
- metadata_dir = extract_datastreams(dor_item, workspace)
18
- verify_version_metadata(metadata_dir, new_version_id)
19
- version_inventory = get_version_inventory(metadata_dir, druid, new_version_id)
20
- version_addtions = signature_catalog.version_additions(version_inventory)
21
- content_addtions = version_addtions.group('content')
22
- if content_addtions.nil? || content_addtions.files.empty?
23
- content_dir = nil
24
- else
25
- new_file_list = content_addtions.path_list
26
- content_dir = workspace.find_filelist_parent('content', new_file_list)
27
- end
28
- content_group = version_inventory.group('content')
29
- signature_catalog.normalize_group_signatures(content_group, content_dir) unless content_group.nil? || content_group.files.empty?
30
- # export the bag (in tar format)
31
- bag_dir = Pathname(Dor::Config.sdr.local_export_home).join(druid.sub('druid:', ''))
32
- bagger = Moab::Bagger.new(version_inventory, signature_catalog, bag_dir)
33
- bagger.reset_bag
34
- bagger.create_bag_inventory(:depositor)
35
- bagger.deposit_group('content', content_dir)
36
- bagger.deposit_group('metadata', metadata_dir)
37
- bagger.create_tagfiles
38
- verify_bag_structure(bag_dir)
39
- # start SDR preservation workflow (but do not create the workflows datastream)
40
- CreateWorkflowService.create_workflow(dor_item, name: 'preservationIngestWF', create_ds: false)
41
- rescue Exception => e
42
- raise Dor::Exception, "Error exporting new object version to bag: #{e.message}"
43
- end
44
-
45
- # Note: the following methods should probably all be private
46
-
47
- # @param [String] druid The object identifier
48
- # @return [Moab::SignatureCatalog] the catalog of all files previously ingested
49
- def self.get_signature_catalog(druid)
50
- Dor::Services::Client.object(druid).sdr.signature_catalog
51
- end
52
-
53
- # @param [Dor::Item] dor_item The representation of the digital object
54
- # @param [DruidTools::Druid] workspace The representation of the item's work area
55
- # @return [Pathname] Pull all the datastreams specified in the configuration file
56
- # into the workspace's metadata directory, overwriting existing file if present
57
- def self.extract_datastreams(dor_item, workspace)
58
- metadata_dir = Pathname.new(workspace.path('metadata', true))
59
- Config.sdr.datastreams.to_hash.each_pair do |ds_name, required|
60
- ds_name = ds_name.to_s
61
- metadata_file = metadata_dir.join("#{ds_name}.xml")
62
- metadata_string = get_datastream_content(dor_item, ds_name, required)
63
- metadata_file.open('w') { |f| f << metadata_string } if metadata_string
64
- end
65
- metadata_dir
66
- end
67
-
68
- # @param [Dor::Item] dor_item The representation of the digital object
69
- # @param [String] ds_name The name of the desired Fedora datastream
70
- # @param [String] required Enumeration: one of ['required', 'optional']
71
- # @return [String] return the xml text of the specified datastream if it exists.
72
- # If not found, return nil unless it is a required datastream in which case raise exception
73
- def self.get_datastream_content(dor_item, ds_name, required)
74
- ds = (ds_name == 'relationshipMetadata' ? 'RELS-EXT' : ds_name)
75
- if dor_item.datastreams.key?(ds) && !dor_item.datastreams[ds].new?
76
- return dor_item.datastreams[ds].content
77
- elsif required == 'optional'
78
- return nil
79
- else
80
- raise "required datastream #{ds_name} not found in DOR"
81
- end
82
- end
83
-
84
- # @param [Pathname] metadata_dir the location of the metadata directory in the workspace
85
- # @param [Integer] expected the version identifer expected to be used in the versionMetadata
86
- def self.verify_version_metadata(metadata_dir, expected)
87
- vmfile = metadata_dir.join('versionMetadata.xml')
88
- verify_version_id(vmfile, expected, vmfile_version_id(vmfile))
89
- true
90
- end
91
-
92
- # @param [Pathname] pathname The location of the file containing a version number
93
- # @param [Integer] expected The version number that should be in the file
94
- # @param [Integer] found The version number that is actually in the file
95
- def self.verify_version_id(pathname, expected, found)
96
- raise "Version mismatch in #{pathname}, expected #{expected}, found #{found}" unless expected == found
97
-
98
- true
99
- end
100
-
101
- # @param [Pathname] pathname the location of the versionMetadata file
102
- # @return [Integer] the versionId found in the last version element, or nil if missing
103
- def self.vmfile_version_id(pathname)
104
- verify_pathname(pathname)
105
- doc = Nokogiri::XML(File.open(pathname.to_s))
106
- nodeset = doc.xpath('/versionMetadata/version')
107
- version_id = nodeset.last['versionId']
108
- version_id.nil? ? nil : version_id.to_i
109
- end
110
-
111
- # @param [Pathname] metadata_dir The location of the the object's metadata files
112
- # @param [String] druid The object identifier
113
- # @param [Integer] version_id The version number
114
- # @return [Moab::FileInventory] Generate and return a version inventory for the object
115
- def self.get_version_inventory(metadata_dir, druid, version_id)
116
- version_inventory = get_content_inventory(metadata_dir, druid, version_id)
117
- version_inventory.groups << get_metadata_file_group(metadata_dir)
118
- version_inventory
119
- end
120
-
121
- # @param [Pathname] metadata_dir The location of the the object's metadata files
122
- # @param [String] druid The object identifier
123
- # @param [Integer] version_id The version number
124
- # @return [Moab::FileInventory] Parse the contentMetadata
125
- # and generate a new version inventory object containing a content group
126
- def self.get_content_inventory(metadata_dir, druid, version_id)
127
- content_metadata = get_content_metadata(metadata_dir)
128
- if content_metadata
129
- Stanford::ContentInventory.new.inventory_from_cm(content_metadata, druid, 'preserve', version_id)
130
- else
131
- Moab::FileInventory.new(type: 'version', digital_object_id: druid, version_id: version_id)
132
- end
133
- end
134
-
135
- # @param [Pathname] metadata_dir The location of the the object's metadata files
136
- # @return [String] Return the contents of the contentMetadata.xml file from the content directory
137
- def self.get_content_metadata(metadata_dir)
138
- content_metadata_pathname = metadata_dir.join('contentMetadata.xml')
139
- content_metadata_pathname.read if content_metadata_pathname.exist?
140
- end
141
-
142
- # @param [Pathname] metadata_dir The location of the the object's metadata files
143
- # @return [Moab::FileGroup] Traverse the metadata directory and generate a metadata group
144
- def self.get_metadata_file_group(metadata_dir)
145
- file_group = Moab::FileGroup.new(group_id: 'metadata').group_from_directory(metadata_dir)
146
- file_group
147
- end
148
-
149
- # @param [Pathname] bag_dir the location of the bag to be verified
150
- # @return [Boolean] true if all required files exist, raises exception if not
151
- def self.verify_bag_structure(bag_dir)
152
- verify_pathname(bag_dir)
153
- verify_pathname(bag_dir.join('data'))
154
- verify_pathname(bag_dir.join('bagit.txt'))
155
- verify_pathname(bag_dir.join('bag-info.txt'))
156
- verify_pathname(bag_dir.join('manifest-sha256.txt'))
157
- verify_pathname(bag_dir.join('tagmanifest-sha256.txt'))
158
- verify_pathname(bag_dir.join('versionAdditions.xml'))
159
- verify_pathname(bag_dir.join('versionInventory.xml'))
160
- verify_pathname(bag_dir.join('data', 'metadata', 'versionMetadata.xml'))
161
- true
162
- end
163
-
164
- # @param [Pathname] pathname The file whose existence should be verified
165
- # @return [Boolean] true if file exists, raises exception if not
166
- def self.verify_pathname(pathname)
167
- raise "#{pathname.basename} not found at #{pathname}" unless pathname.exist?
168
-
169
- true
170
- end
171
- end
172
- end
@@ -1,10 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- # Merges contentMetadata from several objects into one.
5
- class SecondaryFileNameService
6
- def self.create(old_name, sequence_num)
7
- old_name =~ /^(.*)\.(.*)$/ ? "#{Regexp.last_match(1)}_#{sequence_num}.#{Regexp.last_match(2)}" : "#{old_name}_#{sequence_num}"
8
- end
9
- end
10
- end
@@ -1,69 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Dor
4
- # Push file changes for shelve-able files into the stacks
5
- class ShelvingService
6
- def self.shelve(work)
7
- new(work).shelve
8
- end
9
-
10
- def initialize(work)
11
- @work = work
12
- end
13
-
14
- def shelve
15
- # retrieve the differences between the current contentMetadata and the previously ingested version
16
- diff = shelve_diff
17
- stacks_object_pathname = stacks_location
18
- # determine the location of the object's files in the stacks area
19
- stacks_druid = DruidTools::StacksDruid.new work.id, stacks_object_pathname
20
- stacks_object_pathname = Pathname(stacks_druid.path)
21
- # determine the location of the object's content files in the workspace area
22
- workspace_druid = DruidTools::Druid.new(work.id, Config.stacks.local_workspace_root)
23
- workspace_content_pathname = workspace_content_dir(diff, workspace_druid)
24
- # delete, rename, or copy files to the stacks area
25
- DigitalStacksService.remove_from_stacks(stacks_object_pathname, diff)
26
- DigitalStacksService.rename_in_stacks(stacks_object_pathname, diff)
27
- DigitalStacksService.shelve_to_stacks(workspace_content_pathname, stacks_object_pathname, diff)
28
- end
29
-
30
- private
31
-
32
- attr_reader :work
33
-
34
- # retrieve the differences between the current contentMetadata and the previously ingested version
35
- # (filtering to select only the files that should be shelved to stacks)
36
- def shelve_diff
37
- raise Dor::ParameterError, 'Missing Dor::Config.stacks.local_workspace_root' if Config.stacks.local_workspace_root.nil?
38
- raise Dor::Exception, 'Missing contentMetadata datastream' if work.contentMetadata.nil?
39
-
40
- client = Dor::Services::Client.object(work.pid).sdr
41
- current_content = work.contentMetadata.content
42
- inventory_diff = client.content_diff(current_content: current_content, subset: 'shelve')
43
- inventory_diff.group_difference('content')
44
- end
45
-
46
- # Find the location of the object's content files in the workspace area
47
- # @param [Moab::FileGroupDifference] content_diff The differences between the current contentMetadata and the previously ingested version
48
- # @param [DruidTools::Druid] workspace_druid the location of the object's files in the workspace area
49
- # @return [Pathname] The location of the object's content files in the workspace area
50
- def workspace_content_dir(content_diff, workspace_druid)
51
- deltas = content_diff.file_deltas
52
- filelist = deltas[:modified] + deltas[:added] + deltas[:copyadded].collect { |_old, new| new }
53
- return nil if filelist.empty?
54
-
55
- Pathname(workspace_druid.find_filelist_parent('content', filelist))
56
- end
57
-
58
- # get the stack location based on the contentMetadata stacks attribute
59
- # or using the default value from the config file if it doesn't exist
60
- def stacks_location
61
- return Config.stacks.local_stacks_root unless work.contentMetadata&.stacks.present?
62
-
63
- location = work.contentMetadata.stacks[0]
64
- return location if location.start_with? '/' # Absolute stacks path
65
-
66
- raise "stacks attribute for item: #{work.id} contentMetadata should start with /. The current value is #{location}"
67
- end
68
- end
69
- end
@@ -1,232 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'rubygems'
4
- require 'moab/stanford'
5
- require 'jhove_service'
6
- require 'dor-services'
7
-
8
- module Dor
9
- # Extracts technical metadata from files using JHOVE
10
- # If this is a new version it gets the old technicalMetadata datastream by
11
- # making an API call to sdr-services-app (via dor-services-app) and
12
- # only overwrites/adds parts for the files that were changed or added.
13
- # This allows us to avoid re-staging files that have not changed.
14
- # Switching to a more granular data model that has file metadata separate from
15
- # the Work metadata will allow us to simplify this greatly.
16
- class TechnicalMetadataService
17
- # @param [Dor::Item] dor_item The DOR item being processed by the technical metadata robot
18
- # @return [Boolean] True if technical metadata is correctly added or updated
19
- def self.add_update_technical_metadata(dor_item)
20
- test_jhove_service
21
- druid = dor_item.pid
22
- content_group_diff = get_content_group_diff(dor_item)
23
- deltas = get_file_deltas(content_group_diff)
24
- new_files = get_new_files(deltas)
25
- old_techmd = get_old_technical_metadata(dor_item)
26
- new_techmd = get_new_technical_metadata(druid, new_files)
27
- if old_techmd.nil?
28
- # this is version 1 or previous technical metadata was not saved
29
- final_techmd = new_techmd
30
- elsif content_group_diff.difference_count == 0
31
- # there have been no changes to content files from previous version
32
- return true
33
- else
34
- merged_nodes = merge_file_nodes(old_techmd, new_techmd, deltas)
35
- final_techmd = build_technical_metadata(druid, merged_nodes)
36
- end
37
- ds = dor_item.datastreams['technicalMetadata']
38
- ds.dsLabel = 'Technical Metadata'
39
- ds.content = final_techmd
40
- ds.save
41
- true
42
- end
43
-
44
- # @return [Boolean] Make sure that the jhove-service gem is loaded
45
- def self.test_jhove_service
46
- unless defined? ::JhoveService
47
- begin
48
- require 'jhove_service'
49
- rescue LoadError => e
50
- puts e.inspect
51
- raise 'jhove-service dependency gem was not found. Please add it to your Gemfile and run bundle install'
52
- end
53
- end
54
- end
55
- private_class_method :test_jhove_service
56
-
57
- # @param [Dor::Item] dor_item The DOR item being processed by the technical metadata robot
58
- # @return [FileGroupDifference] The differences between two versions of a group of files
59
- def self.get_content_group_diff(dor_item)
60
- return Moab::FileGroupDifference.new if dor_item.contentMetadata.nil?
61
- raise Dor::ParameterError, 'Missing Dor::Config.stacks.local_workspace_root' if Config.stacks.local_workspace_root.nil?
62
-
63
- client = Dor::Services::Client.object(dor_item.pid).sdr
64
- current_content = dor_item.contentMetadata.content
65
- inventory_diff = client.content_diff(current_content: current_content)
66
- inventory_diff.group_difference('content')
67
- end
68
- private_class_method :get_content_group_diff
69
-
70
- # @param [FileGroupDifference] content_group_diff
71
- # @return [Hash<Symbol,Array>] Sets of filenames grouped by change type for use in performing file or metadata operations
72
- def self.get_file_deltas(content_group_diff)
73
- content_group_diff.file_deltas
74
- end
75
- private_class_method :get_file_deltas
76
-
77
- # @param [Hash<Symbol,Array>] deltas Sets of filenames grouped by change type for use in performing file or metadata operations
78
- # @return [Array<String>] The list of filenames for files that are either added or modifed since the previous version
79
- def self.get_new_files(deltas)
80
- deltas[:added] + deltas[:modified]
81
- end
82
- private_class_method :get_new_files
83
-
84
- # @param [Dor::Item] dor_item The DOR item being processed by the technical metadata robot
85
- # @return [String] The technicalMetadata datastream from the previous version of the digital object
86
- def self.get_old_technical_metadata(dor_item)
87
- sdr_techmd = get_sdr_technical_metadata(dor_item.pid)
88
- return sdr_techmd unless sdr_techmd.nil?
89
-
90
- get_dor_technical_metadata(dor_item)
91
- end
92
- private_class_method :get_old_technical_metadata
93
-
94
- # @param [String] druid The identifier of the digital object being processed by the technical metadata robot
95
- # @return [String] The technicalMetadata datastream from the previous version of the digital object (fetched from SDR storage)
96
- # The data is updated to the latest format.
97
- def self.get_sdr_technical_metadata(druid)
98
- sdr_techmd = get_sdr_metadata(druid, 'technicalMetadata')
99
- return sdr_techmd if sdr_techmd =~ /<technicalMetadata/
100
- return ::JhoveService.new.upgrade_technical_metadata(sdr_techmd) if sdr_techmd =~ /<jhove/
101
-
102
- nil
103
- end
104
- private_class_method :get_sdr_technical_metadata
105
-
106
- # @param [Dor::Item] dor_item The DOR item being processed by the technical metadata robot
107
- # @return [String] The technicalMetadata datastream from the previous version of the digital object (fetched from DOR fedora).
108
- # The data is updated to the latest format.
109
- def self.get_dor_technical_metadata(dor_item)
110
- ds = 'technicalMetadata'
111
- return nil unless dor_item.datastreams.key?(ds) && !dor_item.datastreams[ds].new?
112
-
113
- dor_techmd = dor_item.datastreams[ds].content
114
- return dor_techmd if dor_techmd =~ /<technicalMetadata/
115
- return ::JhoveService.new.upgrade_technical_metadata(dor_techmd) if dor_techmd =~ /<jhove/
116
-
117
- nil
118
- end
119
- private_class_method :get_dor_technical_metadata
120
-
121
- # @param [String] druid The identifier of the digital object being processed by the technical metadata robot
122
- # @param [String] dsname The identifier of the metadata datastream
123
- # @return [String] The datastream contents from the previous version of the digital object (fetched from SDR storage)
124
- def self.get_sdr_metadata(druid, dsname)
125
- Dor::Services::Client.object(druid).sdr.metadata(datastream: dsname)
126
- end
127
- private_class_method :get_sdr_metadata
128
-
129
- # @param [DruidTools::Druid] druid A wrapper class for the druid identifier. Used to generate paths
130
- # @param [Array<String>] new_files The list of filenames for files that are either added or modifed since the previous version
131
- # @return [String] The technicalMetadata datastream for the new files of the new digital object version
132
- def self.get_new_technical_metadata(druid, new_files)
133
- return nil if new_files.nil? || new_files.empty?
134
-
135
- workspace = DruidTools::Druid.new(druid, Dor::Config.sdr.local_workspace_root)
136
- content_dir = workspace.find_filelist_parent('content', new_files)
137
- temp_dir = workspace.temp_dir
138
- jhove_service = ::JhoveService.new(temp_dir)
139
- jhove_service.digital_object_id = druid
140
- fileset_file = write_fileset(temp_dir, new_files)
141
- jhove_output_file = jhove_service.run_jhove(content_dir, fileset_file)
142
- tech_md_file = jhove_service.create_technical_metadata(jhove_output_file)
143
- IO.read(tech_md_file)
144
- end
145
- private_class_method :get_new_technical_metadata
146
-
147
- # @param [Pathname] temp_dir The pathname of the temp folder in the object's workspace area
148
- # @param [Object] new_files [Array<String>] The list of filenames for files that are either added or modifed since the previous version
149
- # @return [Pathname] Save the new_files list to a text file and return that file's name
150
- def self.write_fileset(temp_dir, new_files)
151
- fileset_pathname = Pathname(temp_dir).join('jhove_fileset.txt')
152
- fileset_pathname.open('w') { |f| f.puts(new_files) }
153
- fileset_pathname
154
- end
155
- private_class_method :write_fileset
156
-
157
- # @param [String] old_techmd The technicalMetadata datastream from the previous version of the digital object
158
- # @param [String] new_techmd The technicalMetadata datastream for the new files of the new digital object version
159
- # @param [Array<String>] deltas The list of filenames for files that are either added or modifed since the previous version
160
- # @return [Hash<String,Nokogiri::XML::Node>] The complete set of technicalMetadata nodes for the digital object, indexed by filename
161
- def self.merge_file_nodes(old_techmd, new_techmd, deltas)
162
- old_file_nodes = get_file_nodes(old_techmd)
163
- new_file_nodes = get_file_nodes(new_techmd)
164
- merged_nodes = {}
165
- deltas[:identical].each do |path|
166
- merged_nodes[path] = old_file_nodes[path]
167
- end
168
- deltas[:modified].each do |path|
169
- merged_nodes[path] = new_file_nodes[path]
170
- end
171
- deltas[:added].each do |path|
172
- merged_nodes[path] = new_file_nodes[path]
173
- end
174
- deltas[:renamed].each do |oldpath, newpath|
175
- clone = old_file_nodes[oldpath].clone
176
- clone.sub!(/<file\s*id.*?["'].*?["'].*?>/, "<file id='#{newpath}'>")
177
- merged_nodes[newpath] = clone
178
- end
179
- deltas[:copyadded].each do |oldpath, newpath|
180
- clone = old_file_nodes[oldpath].clone
181
- clone.sub!(/<file\s*id.*?["'].*?["'].*?>/, "<file id='#{newpath}'>")
182
- merged_nodes[newpath] = clone
183
- end
184
- merged_nodes
185
- end
186
- private_class_method :merge_file_nodes
187
-
188
- # @param [String] technical_metadata A technicalMetadata datastream contents
189
- # @return [Hash<String,Nokogiri::XML::Node>] The set of nodes from a technicalMetadata datastream, indexed by filename
190
- def self.get_file_nodes(technical_metadata)
191
- file_hash = {}
192
- return file_hash if technical_metadata.nil?
193
-
194
- current_file = []
195
- path = nil
196
- in_file = false
197
- technical_metadata.each_line do |line|
198
- if line =~ /^\s*<file.*["'](.*?)["']/
199
- current_file << line
200
- path = $1
201
- in_file = true
202
- elsif line =~ /^\s*<\/file>/
203
- current_file << line
204
- file_hash[path] = current_file.join
205
- current_file = []
206
- path = nil
207
- in_file = false
208
- elsif in_file
209
- current_file << line
210
- end
211
- end
212
- file_hash
213
- end
214
- private_class_method :get_file_nodes
215
-
216
- # @param [String] druid The identifier of the digital object being processed by the technical metadata robot
217
- # @param [Hash<String,Nokogiri::XML::Node>] merged_nodes The complete set of technicalMetadata nodes for the digital object, indexed by filename
218
- # @return [String] The finalized technicalMetadata datastream contents for the new object version
219
- def self.build_technical_metadata(druid, merged_nodes)
220
- techmd_root = +<<~EOF
221
- <technicalMetadata objectId='#{druid}' datetime='#{Time.now.utc.iso8601}'
222
- xmlns:jhove='http://hul.harvard.edu/ois/xml/ns/jhove'
223
- xmlns:mix='http://www.loc.gov/mix/v10'
224
- xmlns:textmd='info:lc/xmlns/textMD-v3'>
225
- EOF
226
- doc = techmd_root
227
- merged_nodes.keys.sort.each { |path| doc << merged_nodes[path] }
228
- doc + '</technicalMetadata>'
229
- end
230
- private_class_method :build_technical_metadata
231
- end
232
- end