ocfl-tools 0.9.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +53 -0
- data/.rubocop.yml +35 -0
- data/.rubocop_todo.yml +189 -0
- data/.travis.yml +12 -0
- data/Gemfile +5 -0
- data/README.md +647 -0
- data/Rakefile +13 -0
- data/VERSION +1 -0
- data/examples/list_files.rb +56 -0
- data/examples/validate_object.rb +23 -0
- data/lib/ocfl-tools.rb +19 -0
- data/lib/ocfl_tools.rb +17 -0
- data/lib/ocfl_tools/config.rb +27 -0
- data/lib/ocfl_tools/ocfl_actions.rb +146 -0
- data/lib/ocfl_tools/ocfl_delta.rb +250 -0
- data/lib/ocfl_tools/ocfl_deposit.rb +685 -0
- data/lib/ocfl_tools/ocfl_errors.rb +23 -0
- data/lib/ocfl_tools/ocfl_inventory.rb +95 -0
- data/lib/ocfl_tools/ocfl_object.rb +425 -0
- data/lib/ocfl_tools/ocfl_results.rb +272 -0
- data/lib/ocfl_tools/ocfl_validator.rb +799 -0
- data/lib/ocfl_tools/ocfl_verify.rb +493 -0
- data/lib/ocfl_tools/utils.rb +127 -0
- data/lib/ocfl_tools/utils_file.rb +195 -0
- data/lib/ocfl_tools/utils_inventory.rb +96 -0
- data/ocfl-tools.gemspec +31 -0
- data/results_codes.md +106 -0
- data/test-it.sh +11 -0
- metadata +191 -0
data/Rakefile
ADDED
data/VERSION
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
0.9.14
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# A simple example to demonstrate the relationship between logical content in an OCFL object
|
|
2
|
+
# and the fully-resolved path to those binaries on the local storage system.
|
|
3
|
+
|
|
4
|
+
require 'ocfl-tools'
|
|
5
|
+
require 'optparse'
|
|
6
|
+
|
|
7
|
+
options = {}
|
|
8
|
+
|
|
9
|
+
opts = OptionParser.new do |opts|
|
|
10
|
+
opts.on('-d DIRECTORY', '--dir DIRECTORY', 'A directory containing an OCFL object') do |dir|
|
|
11
|
+
unless Dir.exist?(dir)
|
|
12
|
+
raise "#{dir} is not a valid directory path."
|
|
13
|
+
end
|
|
14
|
+
options[:object_root] = dir
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
opts.on('-v VERSION', '--version VERSION', 'An optional version number') do |ver|
|
|
18
|
+
options[:version] = ver.to_i
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
opts.parse(ARGV)
|
|
24
|
+
|
|
25
|
+
raise OptionParser::MissingArgument if options[:object_root].nil?
|
|
26
|
+
|
|
27
|
+
object_root = options[:object_root]
|
|
28
|
+
|
|
29
|
+
# The inventory we're working on might not conform to the site default version format.
|
|
30
|
+
# Inspect the object root to determine what version format we should use, and use it.
|
|
31
|
+
OcflTools.config.version_format = OcflTools::Utils::Files.get_version_format(object_root)
|
|
32
|
+
|
|
33
|
+
# Get the latest inventory file from the object root.
|
|
34
|
+
inventory_file = OcflTools::Utils::Files.get_latest_inventory(object_root)
|
|
35
|
+
|
|
36
|
+
# Create an ocfl object from that inventory.
|
|
37
|
+
ocfl_object = OcflTools::OcflInventory.new.from_file(inventory_file)
|
|
38
|
+
|
|
39
|
+
# If we've been asked for a specific version, use it.
|
|
40
|
+
if options[:version].nil?
|
|
41
|
+
version = OcflTools::Utils.version_string_to_int(ocfl_object.head)
|
|
42
|
+
else
|
|
43
|
+
version = options[:version]
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
local_files = ocfl_object.get_files(version)
|
|
47
|
+
|
|
48
|
+
# Prepend the object root path to content_path to get fully-resolvable files.
|
|
49
|
+
local_files.each do | logical_path, content_path |
|
|
50
|
+
local_files[logical_path] = object_root + '/' + content_path
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Output a pretty result, for demo purposes.
|
|
54
|
+
local_files.each do | logical_path, content_path |
|
|
55
|
+
puts " #{logical_path} => #{content_path}"
|
|
56
|
+
end
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Usage: ruby ./validate_object.rb /path/to/directory/to/check
|
|
2
|
+
require 'ocfl-tools'
|
|
3
|
+
require 'optparse'
|
|
4
|
+
|
|
5
|
+
options = {}
|
|
6
|
+
|
|
7
|
+
opts = OptionParser.new do |opts|
|
|
8
|
+
opts.on('-d DIRECTORY', '--dir DIRECTORY', 'A directory containing an OCFL object') do |dir|
|
|
9
|
+
unless Dir.exist?(dir)
|
|
10
|
+
raise "#{dir} is not a valid directory path."
|
|
11
|
+
end
|
|
12
|
+
options[:object_root] = dir
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
opts.parse(ARGV)
|
|
17
|
+
|
|
18
|
+
raise OptionParser::MissingArgument if options[:object_root].nil?
|
|
19
|
+
|
|
20
|
+
object_root = options[:object_root]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
OcflTools::OcflValidator.new(object_root).validate_ocfl_object_root.print
|
data/lib/ocfl-tools.rb
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# OcflTools is a module that provides a distintive namespace for classes that create,
|
|
4
|
+
# maintain and read Oxford Common File Layout preservation objects.
|
|
5
|
+
#
|
|
6
|
+
# ====Data Model
|
|
7
|
+
#
|
|
8
|
+
# * <b>{OcflObject} = an object that models the internal data structures of an OCFL manifest.</b>
|
|
9
|
+
# * {OcflInventory} = An I/O interface for {OcflObject} allowing the reading and creaton of OCFL inventory.json files.
|
|
10
|
+
#
|
|
11
|
+
# @note Copyright (c) 2019 by The Board of Trustees of the Leland Stanford Junior University.
|
|
12
|
+
|
|
13
|
+
require 'ocfl_tools'
|
|
14
|
+
require 'json'
|
|
15
|
+
require 'anyway'
|
|
16
|
+
require 'fileutils'
|
|
17
|
+
require 'digest'
|
|
18
|
+
require 'time' # for iso8601 checking.
|
|
19
|
+
require 'uri' # for, well, uri testing.
|
data/lib/ocfl_tools.rb
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module OcflTools
|
|
4
|
+
require 'ocfl_tools/ocfl_object'
|
|
5
|
+
require 'ocfl_tools/ocfl_inventory'
|
|
6
|
+
require 'ocfl_tools/ocfl_verify'
|
|
7
|
+
require 'ocfl_tools/ocfl_deposit'
|
|
8
|
+
require 'ocfl_tools/ocfl_validator'
|
|
9
|
+
require 'ocfl_tools/ocfl_results'
|
|
10
|
+
require 'ocfl_tools/ocfl_delta'
|
|
11
|
+
require 'ocfl_tools/ocfl_actions'
|
|
12
|
+
require 'ocfl_tools/ocfl_errors'
|
|
13
|
+
require 'ocfl_tools/config'
|
|
14
|
+
require 'ocfl_tools/utils'
|
|
15
|
+
require 'ocfl_tools/utils_file'
|
|
16
|
+
require 'ocfl_tools/utils_inventory'
|
|
17
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'anyway'
|
|
4
|
+
|
|
5
|
+
module OcflTools
|
|
6
|
+
# Site-wide configuration settings for OCFL-Tools, using the 'anyway' gem.
|
|
7
|
+
# Settings and their default values are:
|
|
8
|
+
# version_format: "v%04d",
|
|
9
|
+
# content_type: 'https://ocfl.io/1.0/spec/#inventory',
|
|
10
|
+
# content_directory: 'content',
|
|
11
|
+
# digest_algorithm: 'sha512',
|
|
12
|
+
# fixity_algorithms: ['md5', 'sha1', 'sha256']
|
|
13
|
+
# ocfl_version: '1.0'
|
|
14
|
+
class Config < Anyway::Config
|
|
15
|
+
attr_config version_format: 'v%04d',
|
|
16
|
+
content_type: 'https://ocfl.io/1.0/spec/#inventory',
|
|
17
|
+
content_directory: 'content',
|
|
18
|
+
digest_algorithm: 'sha512',
|
|
19
|
+
fixity_algorithms: %w[md5 sha1 sha256], # site-specific allowable fixity algorithms
|
|
20
|
+
ocfl_version: '1.0'
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Creates a new config instance if it doesn't already exist.
|
|
24
|
+
def self.config
|
|
25
|
+
@config ||= Config.new
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module OcflTools
|
|
4
|
+
# Class for collating manifest actions, both for delta reporting and staging new versions.
|
|
5
|
+
class OcflActions
|
|
6
|
+
def initialize
|
|
7
|
+
@my_actions = {}
|
|
8
|
+
@my_actions['update_manifest'] = {}
|
|
9
|
+
@my_actions['add'] = {}
|
|
10
|
+
@my_actions['update'] = {}
|
|
11
|
+
@my_actions['copy'] = {}
|
|
12
|
+
@my_actions['move'] = {}
|
|
13
|
+
@my_actions['delete'] = {}
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
# Convenience method for obtaining a hash of recorded actions.
|
|
17
|
+
# @return [Hash] of actions stored in this instance.
|
|
18
|
+
def actions
|
|
19
|
+
# Don't return empty keys.
|
|
20
|
+
@my_actions.delete_if { |_k, v| v == {} }
|
|
21
|
+
@my_actions
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Convenience method for obtaining a hash recorded of actions.
|
|
25
|
+
# @return [Hash] of actions stored in this instance.
|
|
26
|
+
def all
|
|
27
|
+
# Don't return empty keys.
|
|
28
|
+
@my_actions.delete_if { |_k, v| v == {} }
|
|
29
|
+
@my_actions
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Creates an 'update_manifest' entry in the actions hash.
|
|
33
|
+
# @param [String] digest of the filepath being recorded.
|
|
34
|
+
# @param [Pathname] filepath of file to record.
|
|
35
|
+
# @return [Hash] of recorded action.
|
|
36
|
+
def update_manifest(digest, filepath)
|
|
37
|
+
if @my_actions['update_manifest'].key?(digest) == false
|
|
38
|
+
@my_actions['update_manifest'][digest] = []
|
|
39
|
+
end
|
|
40
|
+
# Only put unique values into filepaths
|
|
41
|
+
if @my_actions['update_manifest'][digest].include?(filepath)
|
|
42
|
+
return @my_actions['update_manifest'][digest]
|
|
43
|
+
else
|
|
44
|
+
@my_actions['update_manifest'][digest] = (@my_actions['update_manifest'][digest] << filepath)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Creates an 'add' entry in the actions hash.
|
|
49
|
+
# @param [String] digest of the filepath being recorded.
|
|
50
|
+
# @param [Pathname] filepath of file to record.
|
|
51
|
+
# @return [Hash] of recorded action.
|
|
52
|
+
def add(digest, filepath)
|
|
53
|
+
if @my_actions['add'].key?(digest) == false
|
|
54
|
+
@my_actions['add'][digest] = []
|
|
55
|
+
end
|
|
56
|
+
# Only put unique values into filepaths
|
|
57
|
+
if @my_actions['add'][digest].include?(filepath)
|
|
58
|
+
return @my_actions['add'][digest]
|
|
59
|
+
else
|
|
60
|
+
@my_actions['add'][digest] = (@my_actions['add'][digest] << filepath)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Creates an 'update' entry in the actions hash.
|
|
65
|
+
# @param [String] digest of the filepath being recorded.
|
|
66
|
+
# @param [Pathname] filepath of file to record.
|
|
67
|
+
# @return [Hash] of recorded action.
|
|
68
|
+
def update(digest, filepath)
|
|
69
|
+
if @my_actions['update'].key?(digest) == false
|
|
70
|
+
@my_actions['update'][digest] = []
|
|
71
|
+
end
|
|
72
|
+
# Only put unique values into filepaths
|
|
73
|
+
if @my_actions['update'][digest].include?(filepath)
|
|
74
|
+
return @my_actions['update'][digest]
|
|
75
|
+
else
|
|
76
|
+
@my_actions['update'][digest] = (@my_actions['update'][digest] << filepath)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Creates a 'copy' entry in the actions hash.
|
|
81
|
+
# @param [String] digest of the filepath being recorded.
|
|
82
|
+
# @param [Pathname] filepath of file to record.
|
|
83
|
+
# @return [Hash] of recorded action.
|
|
84
|
+
def copy(digest, filepath)
|
|
85
|
+
if @my_actions['copy'].key?(digest) == false
|
|
86
|
+
@my_actions['copy'][digest] = []
|
|
87
|
+
end
|
|
88
|
+
# Only put unique values into filepaths
|
|
89
|
+
if @my_actions['copy'][digest].include?(filepath)
|
|
90
|
+
return @my_actions['copy'][digest]
|
|
91
|
+
else
|
|
92
|
+
@my_actions['copy'][digest] = (@my_actions['copy'][digest] << filepath)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Creates a 'move' entry in the actions hash.
|
|
97
|
+
# @param [String] digest of the filepath being recorded.
|
|
98
|
+
# @param [Pathname] filepath of file to record.
|
|
99
|
+
# @return [Hash] of recorded action.
|
|
100
|
+
def move(digest, filepath)
|
|
101
|
+
if @my_actions['move'].key?(digest) == false
|
|
102
|
+
@my_actions['move'][digest] = []
|
|
103
|
+
end
|
|
104
|
+
# Only put unique values into filepaths
|
|
105
|
+
if @my_actions['move'][digest].include?(filepath)
|
|
106
|
+
return @my_actions['move'][digest]
|
|
107
|
+
else
|
|
108
|
+
@my_actions['move'][digest] = (@my_actions['move'][digest] << filepath)
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Creates a 'delete' entry in the actions hash.
|
|
113
|
+
# @param [String] digest of the filepath being recorded.
|
|
114
|
+
# @param [Pathname] filepath of file to record.
|
|
115
|
+
# @return [Hash] of recorded action.
|
|
116
|
+
def delete(digest, filepath)
|
|
117
|
+
if @my_actions['delete'].key?(digest) == false
|
|
118
|
+
@my_actions['delete'][digest] = []
|
|
119
|
+
end
|
|
120
|
+
# Only put unique values into filepaths
|
|
121
|
+
if @my_actions['delete'][digest].include?(filepath)
|
|
122
|
+
return @my_actions['delete'][digest]
|
|
123
|
+
else
|
|
124
|
+
@my_actions['delete'][digest] = (@my_actions['delete'][digest] << filepath)
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
# @param [String] digest of the filepath that is getting additional fixity values.
|
|
129
|
+
# @param [String] fixity_algorithm of the fixity digest being added (e.g. 'md5', 'sha1').
|
|
130
|
+
# @param [String] fixity_digest to associate with this digest.
|
|
131
|
+
# @return [Hash] of recorded fixity block.
|
|
132
|
+
def fixity(digest, fixity_algorithm, fixity_digest)
|
|
133
|
+
# Only create this key if used.
|
|
134
|
+
@my_actions['fixity'] = {} if @my_actions.key?('fixity') == false
|
|
135
|
+
if @my_actions['fixity'].key?(fixity_algorithm) == false
|
|
136
|
+
@my_actions['fixity'][fixity_algorithm] = {}
|
|
137
|
+
end
|
|
138
|
+
# only add unique fixity digests.
|
|
139
|
+
if @my_actions['fixity'][fixity_algorithm].include?(digest)
|
|
140
|
+
return @my_actions['fixity'][fixity_algorithm][digest]
|
|
141
|
+
else
|
|
142
|
+
@my_actions['fixity'][fixity_algorithm][digest] = fixity_digest
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
end
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module OcflTools
|
|
4
|
+
# Given an inventory, show changes from previous versions.
|
|
5
|
+
# OcflDelta takes in an OCFL Inventory object and creates a delta hash containing
|
|
6
|
+
# the actions performed to assemble the requested version.
|
|
7
|
+
class OcflDelta
|
|
8
|
+
attr_reader :delta
|
|
9
|
+
|
|
10
|
+
def initialize(ocfl_object)
|
|
11
|
+
# Duck sanity check.
|
|
12
|
+
['@id', '@head', '@manifest', '@versions', '@fixity'].each do |var|
|
|
13
|
+
unless ocfl_object.instance_variable_defined?(var)
|
|
14
|
+
raise "Object #{ocfl_object} does not have instance var #{var} defined"
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
%w[get_state version_id_list get_digest].each do |mthd|
|
|
19
|
+
unless ocfl_object.respond_to?(mthd)
|
|
20
|
+
raise "Object #{ocfl_object} does not respond to #{mthd}"
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
@ocfl_object = ocfl_object
|
|
25
|
+
@delta = {}
|
|
26
|
+
# We need to get version format, for final report-out. Assume that the ocfl_object versions are
|
|
27
|
+
# formatted correctly (starting with a 'v'). We can't trust the site config setting
|
|
28
|
+
# for this, as there's no guarantee the inventory we are reading in was created at this site.
|
|
29
|
+
first_version = @ocfl_object.versions.keys.min # should get us 'v0001' or 'v1'
|
|
30
|
+
sliced_version = first_version.split('v')[1] # cut the leading 'v' from the string.
|
|
31
|
+
if sliced_version.length == 1 # A length of 1 for the first version implies 'v1'
|
|
32
|
+
@version_format = 'v%d'
|
|
33
|
+
else
|
|
34
|
+
@version_format = "v%0#{sliced_version.length}d"
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Generates a complete delta hash for all versions of this object.
|
|
39
|
+
def all
|
|
40
|
+
@ocfl_object.version_id_list.each do |version|
|
|
41
|
+
get_version_delta(version)
|
|
42
|
+
end
|
|
43
|
+
@delta
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Given a version, get the delta from the previous version.
|
|
47
|
+
# @param [Integer] version of object to get deltas for.
|
|
48
|
+
# @return [Hash] of actions applied to previous version to create current version.
|
|
49
|
+
def previous(version)
|
|
50
|
+
# San check, does version exist in object?
|
|
51
|
+
if version == 1
|
|
52
|
+
get_first_version_delta
|
|
53
|
+
else
|
|
54
|
+
# verify version exists, then...
|
|
55
|
+
unless @ocfl_object.version_id_list.include?(version)
|
|
56
|
+
raise "Version #{version} not found in #{@ocfl_object}!"
|
|
57
|
+
end
|
|
58
|
+
get_version_delta(version)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
def get_version_delta(version)
|
|
65
|
+
|
|
66
|
+
unless version > 1
|
|
67
|
+
return get_first_version_delta
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
current_digests = @ocfl_object.get_state(version)
|
|
71
|
+
current_files = OcflTools::Utils::Files.invert_and_expand(current_digests)
|
|
72
|
+
|
|
73
|
+
previous_digests = @ocfl_object.get_state((version - 1))
|
|
74
|
+
previous_files = OcflTools::Utils::Files.invert_and_expand(previous_digests)
|
|
75
|
+
|
|
76
|
+
missing_digests = {}
|
|
77
|
+
missing_files = {}
|
|
78
|
+
|
|
79
|
+
new_digests = {}
|
|
80
|
+
new_files = {}
|
|
81
|
+
|
|
82
|
+
unchanged_digests = {} # digests may not have changed, but filepaths can!
|
|
83
|
+
unchanged_files = {} # filepaths may not change, but digests can!
|
|
84
|
+
|
|
85
|
+
version_string = @version_format % version.to_i
|
|
86
|
+
@delta[version_string] = {}
|
|
87
|
+
@delta[version_string].clear # Always clear out the existing version delta.
|
|
88
|
+
actions = OcflTools::OcflActions.new
|
|
89
|
+
|
|
90
|
+
temp_digests = previous_digests.keys - current_digests.keys
|
|
91
|
+
unless temp_digests.empty?
|
|
92
|
+
temp_digests.each do |digest|
|
|
93
|
+
missing_digests[digest] = previous_digests[digest]
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
temp_files = previous_files.keys - current_files.keys
|
|
98
|
+
unless temp_files.empty?
|
|
99
|
+
temp_files.each do |file|
|
|
100
|
+
missing_files[file] = previous_files[file]
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
temp_digests = current_digests.keys - previous_digests.keys
|
|
105
|
+
unless temp_digests.empty?
|
|
106
|
+
|
|
107
|
+
temp_digests.each do |digest|
|
|
108
|
+
new_digests[digest] = current_digests[digest]
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
temp_files = current_files.keys - previous_files.keys
|
|
113
|
+
unless temp_files.empty?
|
|
114
|
+
|
|
115
|
+
temp_files.each do |file|
|
|
116
|
+
new_files[file] = current_files[file]
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
temp_digests = current_digests.keys - (new_digests.keys + missing_digests.keys)
|
|
121
|
+
unless temp_digests.empty?
|
|
122
|
+
temp_digests.each do |digest|
|
|
123
|
+
unchanged_digests[digest] = current_digests[digest]
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
temp_files = current_files.keys - (new_files.keys + missing_files.keys)
|
|
128
|
+
unless temp_files.empty?
|
|
129
|
+
temp_files.each do |file|
|
|
130
|
+
unchanged_files[file] = current_files[file]
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
# 1. ADD is new digest, new filepath.
|
|
135
|
+
# consult new_digests and new_files
|
|
136
|
+
unless new_digests.empty?
|
|
137
|
+
new_digests.each do |digest, filepaths|
|
|
138
|
+
# If new_files, check for ADD.
|
|
139
|
+
filepaths.each do |file|
|
|
140
|
+
if new_files.key?(file)
|
|
141
|
+
# new digest, new file, it's an ADD!
|
|
142
|
+
if new_files[file] == digest
|
|
143
|
+
actions.add(digest, file)
|
|
144
|
+
update_manifest_action(digest, version, actions)
|
|
145
|
+
next # need this so we don't also count it as an UPDATE
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# 2. UPDATE is new digest, existing filepath
|
|
150
|
+
# if new_files doesn't have it, check current_files
|
|
151
|
+
if current_files.key?(file)
|
|
152
|
+
# New digest, existing file
|
|
153
|
+
if current_files[file] == digest
|
|
154
|
+
actions.update(digest, file)
|
|
155
|
+
update_manifest_action(digest, version, actions)
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# 3. COPY is unchanged digest, additional (new) filepath
|
|
163
|
+
unless unchanged_digests.empty?
|
|
164
|
+
unchanged_digests.each do |digest, filepaths|
|
|
165
|
+
# get previous version filepaths, compare to current version filepaths.
|
|
166
|
+
if filepaths.size > previous_digests[digest].size
|
|
167
|
+
# Take current array from previous array
|
|
168
|
+
# What *new* filepaths do we have for this digest in this version?
|
|
169
|
+
copied_files = filepaths - previous_digests[digest]
|
|
170
|
+
copied_files.each do |copy_file|
|
|
171
|
+
actions.copy(digest, copy_file)
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# 4. MOVE is unchanged digest, 1 deleted filepath, 1 added filepath.
|
|
176
|
+
if filepaths.size == previous_digests[digest].size
|
|
177
|
+
# For it to be a move, this digest must be listed in missing_files AND new_files.
|
|
178
|
+
if missing_files.value?(digest) && new_files.value?(digest)
|
|
179
|
+
# look this up in previous_files.
|
|
180
|
+
old_filename = previous_digests[digest][0]
|
|
181
|
+
new_filename = current_digests[digest][0]
|
|
182
|
+
actions.move(digest, old_filename)
|
|
183
|
+
actions.move(digest, new_filename)
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# 5. One possible DELETE is unchanged digest, fewer filepaths.
|
|
188
|
+
if filepaths.size < previous_digests[digest].size
|
|
189
|
+
|
|
190
|
+
# Am I in missing_files ?
|
|
191
|
+
previous_filepaths = previous_digests[digest]
|
|
192
|
+
deleted_filepaths = previous_filepaths - filepaths
|
|
193
|
+
if deleted_filepaths.empty?
|
|
194
|
+
deleted_filepaths.each do |delete_me|
|
|
195
|
+
actions.delete(digest, delete_me)
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
# 6. DELETE of last filepath is where there's a missing_digest && the filepath is gone too.
|
|
203
|
+
unless missing_digests.empty?
|
|
204
|
+
missing_digests.each do |digest, filepaths|
|
|
205
|
+
# For each missing digest, see if any of its filepaths are still referenced in current files.
|
|
206
|
+
filepaths.each do |filepath|
|
|
207
|
+
actions.delete(digest, filepath) unless current_files.key?(filepath)
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
@delta[version_string] = actions.all
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def update_manifest_action(digest, version, action)
|
|
216
|
+
version_string = @version_format % version.to_i
|
|
217
|
+
# We need to make a deep copy here so content_paths edits don't screw up the ocfl_object's manifest.
|
|
218
|
+
content_paths = OcflTools::Utils.deep_copy(@ocfl_object.manifest[digest])
|
|
219
|
+
# Find any content_path that starts with the current version's directory & contentDirectory;
|
|
220
|
+
# these are bitstreams that were added to this version directory.
|
|
221
|
+
content_paths.each do |content_path|
|
|
222
|
+
if content_path =~ /^#{version_string}\/#{@ocfl_object.contentDirectory}/
|
|
223
|
+
# Now trim from front of content_path.
|
|
224
|
+
content_path.slice!("#{version_string}/#{@ocfl_object.contentDirectory}/")
|
|
225
|
+
action.update_manifest(digest, content_path)
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def get_first_version_delta
|
|
231
|
+
# Everything in get_state is an 'add'
|
|
232
|
+
version = 1
|
|
233
|
+
actions = OcflTools::OcflActions.new
|
|
234
|
+
|
|
235
|
+
version_string = @version_format % version.to_i
|
|
236
|
+
@delta[version_string] = {} # Always clear out the existing version delta.
|
|
237
|
+
@delta[version_string].clear
|
|
238
|
+
|
|
239
|
+
current_digests = @ocfl_object.get_state(version)
|
|
240
|
+
current_digests.each do |digest, filepaths|
|
|
241
|
+
filepaths.each do |file|
|
|
242
|
+
actions.add(digest, file)
|
|
243
|
+
update_manifest_action(digest, version, actions)
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
@delta[version_string] = actions.all
|
|
247
|
+
# Everything in Fixity is also an 'add'
|
|
248
|
+
end
|
|
249
|
+
end
|
|
250
|
+
end
|