ocfl-tools 0.9.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +53 -0
- data/.rubocop.yml +35 -0
- data/.rubocop_todo.yml +189 -0
- data/.travis.yml +12 -0
- data/Gemfile +5 -0
- data/README.md +647 -0
- data/Rakefile +13 -0
- data/VERSION +1 -0
- data/examples/list_files.rb +56 -0
- data/examples/validate_object.rb +23 -0
- data/lib/ocfl-tools.rb +19 -0
- data/lib/ocfl_tools.rb +17 -0
- data/lib/ocfl_tools/config.rb +27 -0
- data/lib/ocfl_tools/ocfl_actions.rb +146 -0
- data/lib/ocfl_tools/ocfl_delta.rb +250 -0
- data/lib/ocfl_tools/ocfl_deposit.rb +685 -0
- data/lib/ocfl_tools/ocfl_errors.rb +23 -0
- data/lib/ocfl_tools/ocfl_inventory.rb +95 -0
- data/lib/ocfl_tools/ocfl_object.rb +425 -0
- data/lib/ocfl_tools/ocfl_results.rb +272 -0
- data/lib/ocfl_tools/ocfl_validator.rb +799 -0
- data/lib/ocfl_tools/ocfl_verify.rb +493 -0
- data/lib/ocfl_tools/utils.rb +127 -0
- data/lib/ocfl_tools/utils_file.rb +195 -0
- data/lib/ocfl_tools/utils_inventory.rb +96 -0
- data/ocfl-tools.gemspec +31 -0
- data/results_codes.md +106 -0
- data/test-it.sh +11 -0
- metadata +191 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 915f99359028f587423cb4125cc197215fd2d6f0ae3a663c2d421ba2fa636d04
|
|
4
|
+
data.tar.gz: 7aaa9773441a5a672b9a94852946cdb7819d6aecc58ee176995b1cfd64ea8173
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: b06efb97d5cb2dc0a0376310ffefb66c93bce411a932de1afe7f49f4ee200f774c9d6465c424207cc390c35804151b03e9b370b8ca4397fb44b161d0cf9aee40
|
|
7
|
+
data.tar.gz: b29499a9bcbba6cb3dedfc140d6208a42ad712af0d70a1fd4b96802f4f58bc2f02532b97bb7d63138e3a93b17005939de6ed6ffe909545fe452f2b4e5e30fc0e
|
data/.gitignore
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
*.gem
|
|
2
|
+
*.rbc
|
|
3
|
+
/.config
|
|
4
|
+
/coverage/
|
|
5
|
+
/InstalledFiles
|
|
6
|
+
/pkg/
|
|
7
|
+
/spec/reports/
|
|
8
|
+
/spec/examples.txt
|
|
9
|
+
/test/tmp/
|
|
10
|
+
/test/version_tmp/
|
|
11
|
+
/tmp/
|
|
12
|
+
|
|
13
|
+
.DS_Store
|
|
14
|
+
|
|
15
|
+
# Used by dotenv library to load environment variables.
|
|
16
|
+
# .env
|
|
17
|
+
|
|
18
|
+
## Specific to RubyMotion:
|
|
19
|
+
.dat*
|
|
20
|
+
.repl_history
|
|
21
|
+
build/
|
|
22
|
+
*.bridgesupport
|
|
23
|
+
build-iPhoneOS/
|
|
24
|
+
build-iPhoneSimulator/
|
|
25
|
+
|
|
26
|
+
## Specific to RubyMotion (use of CocoaPods):
|
|
27
|
+
#
|
|
28
|
+
# We recommend against adding the Pods directory to your .gitignore. However
|
|
29
|
+
# you should judge for yourself, the pros and cons are mentioned at:
|
|
30
|
+
# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
|
|
31
|
+
#
|
|
32
|
+
# vendor/Pods/
|
|
33
|
+
|
|
34
|
+
## Documentation cache and generated files:
|
|
35
|
+
/.yardoc/
|
|
36
|
+
/_yardoc/
|
|
37
|
+
/doc/
|
|
38
|
+
/rdoc/
|
|
39
|
+
|
|
40
|
+
## Environment normalization:
|
|
41
|
+
/.bundle/
|
|
42
|
+
/vendor/bundle
|
|
43
|
+
/lib/bundler/man/
|
|
44
|
+
|
|
45
|
+
# for a library or gem, you might want to ignore these files since the code is
|
|
46
|
+
# intended to run in multiple environments; otherwise, check them in:
|
|
47
|
+
# Gemfile.lock
|
|
48
|
+
# .ruby-version
|
|
49
|
+
# .ruby-gemset
|
|
50
|
+
|
|
51
|
+
# unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
|
|
52
|
+
.rvmrc
|
|
53
|
+
Gemfile.lock
|
data/.rubocop.yml
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
inherit_from: .rubocop_todo.yml
|
|
2
|
+
|
|
3
|
+
Lint/UselessAssignment:
|
|
4
|
+
Exclude:
|
|
5
|
+
- 'lib/ocfl_tools/ocfl_deposit.rb'
|
|
6
|
+
- 'lib/ocfl_tools/utils.rb'
|
|
7
|
+
- 'spec/ocfl_actions_spec.rb'
|
|
8
|
+
- 'spec/ocfl_inventory_spec.rb'
|
|
9
|
+
- 'spec/ocfl_validator_spec.rb'
|
|
10
|
+
|
|
11
|
+
Metrics/BlockLength:
|
|
12
|
+
Exclude:
|
|
13
|
+
- 'spec/ocfl_actions_spec.rb'
|
|
14
|
+
- 'spec/ocfl_checksums_spec.rb'
|
|
15
|
+
- 'spec/ocfl_inventory_spec.rb'
|
|
16
|
+
- 'spec/ocfl_object_spec.rb'
|
|
17
|
+
- 'spec/ocfl_results_spec.rb'
|
|
18
|
+
|
|
19
|
+
Metrics/LineLength:
|
|
20
|
+
Exclude:
|
|
21
|
+
- 'ocfl-tools.gemspec'
|
|
22
|
+
- 'spec/ocfl_checksums_spec.rb'
|
|
23
|
+
- 'spec/ocfl_delta_spec.rb'
|
|
24
|
+
- 'spec/ocfl_deposit_spec.rb'
|
|
25
|
+
- 'spec/ocfl_inventory_spec.rb'
|
|
26
|
+
- 'spec/ocfl_object_spec.rb'
|
|
27
|
+
- 'spec/ocfl_results_spec.rb'
|
|
28
|
+
- 'spec/ocfl_utils_file_spec.rb'
|
|
29
|
+
- 'spec/ocfl_validator_spec.rb'
|
|
30
|
+
- 'spec/ocfl_verify_spec.rb'
|
|
31
|
+
|
|
32
|
+
Naming/MethodName:
|
|
33
|
+
Exclude:
|
|
34
|
+
- 'lib/ocfl_tools/ocfl_verify.rb'
|
|
35
|
+
- 'lib/ocfl_tools/utils_inventory.rb'
|
data/.rubocop_todo.yml
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# This configuration was generated by
|
|
2
|
+
# `rubocop --auto-gen-config`
|
|
3
|
+
# on 2019-11-21 23:10:46 -0800 using RuboCop version 0.76.0.
|
|
4
|
+
# The point is for the user to remove these configuration records
|
|
5
|
+
# one by one as the offenses are removed from the code base.
|
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
|
8
|
+
|
|
9
|
+
# Offense count: 1
|
|
10
|
+
# Cop supports --auto-correct.
|
|
11
|
+
# Configuration parameters: EnforcedStyle.
|
|
12
|
+
# SupportedStyles: empty_lines, no_empty_lines
|
|
13
|
+
Layout/EmptyLinesAroundBlockBody:
|
|
14
|
+
Exclude:
|
|
15
|
+
|
|
16
|
+
# Offense count: 3
|
|
17
|
+
# Cop supports --auto-correct.
|
|
18
|
+
# Configuration parameters: EnforcedStyleAlignWith, AutoCorrect, Severity.
|
|
19
|
+
# SupportedStylesAlignWith: keyword, variable, start_of_line
|
|
20
|
+
Layout/EndAlignment:
|
|
21
|
+
Exclude:
|
|
22
|
+
|
|
23
|
+
# Offense count: 1
|
|
24
|
+
Lint/ShadowingOuterLocalVariable:
|
|
25
|
+
Exclude:
|
|
26
|
+
|
|
27
|
+
# Offense count: 27
|
|
28
|
+
Metrics/AbcSize:
|
|
29
|
+
Max: 102
|
|
30
|
+
|
|
31
|
+
# Offense count: 5
|
|
32
|
+
# Configuration parameters: CountComments, ExcludedMethods.
|
|
33
|
+
# ExcludedMethods: refine
|
|
34
|
+
Metrics/BlockLength:
|
|
35
|
+
Max: 118
|
|
36
|
+
|
|
37
|
+
# Offense count: 6
|
|
38
|
+
# Configuration parameters: CountComments.
|
|
39
|
+
Metrics/ClassLength:
|
|
40
|
+
Max: 449
|
|
41
|
+
|
|
42
|
+
# Offense count: 8
|
|
43
|
+
Metrics/CyclomaticComplexity:
|
|
44
|
+
Max: 24
|
|
45
|
+
|
|
46
|
+
# Offense count: 40
|
|
47
|
+
# Configuration parameters: CountComments, ExcludedMethods.
|
|
48
|
+
Metrics/MethodLength:
|
|
49
|
+
Max: 122
|
|
50
|
+
|
|
51
|
+
# Offense count: 1
|
|
52
|
+
# Configuration parameters: CountComments.
|
|
53
|
+
Metrics/ModuleLength:
|
|
54
|
+
Max: 136
|
|
55
|
+
|
|
56
|
+
# Offense count: 7
|
|
57
|
+
Metrics/PerceivedComplexity:
|
|
58
|
+
Max: 30
|
|
59
|
+
|
|
60
|
+
# Offense count: 8
|
|
61
|
+
Naming/AccessorMethodName:
|
|
62
|
+
Exclude:
|
|
63
|
+
- 'lib/ocfl_tools/ocfl_delta.rb'
|
|
64
|
+
- 'lib/ocfl_tools/ocfl_object.rb'
|
|
65
|
+
- 'lib/ocfl_tools/ocfl_results.rb'
|
|
66
|
+
|
|
67
|
+
# Offense count: 1
|
|
68
|
+
# Configuration parameters: ExpectMatchingDefinition, Regex, IgnoreExecutableScripts, AllowedAcronyms.
|
|
69
|
+
# AllowedAcronyms: CLI, DSL, ACL, API, ASCII, CPU, CSS, DNS, EOF, GUID, HTML, HTTP, HTTPS, ID, IP, JSON, LHS, QPS, RAM, RHS, RPC, SLA, SMTP, SQL, SSH, TCP, TLS, TTL, UDP, UI, UID, UUID, URI, URL, UTF8, VM, XML, XMPP, XSRF, XSS
|
|
70
|
+
Naming/FileName:
|
|
71
|
+
Exclude:
|
|
72
|
+
- 'lib/ocfl-tools.rb'
|
|
73
|
+
|
|
74
|
+
# Offense count: 5
|
|
75
|
+
# Configuration parameters: MinNameLength, AllowNamesEndingInNumbers, AllowedNames, ForbiddenNames.
|
|
76
|
+
# AllowedNames: io, id, to, by, on, in, at, ip, db, os
|
|
77
|
+
Naming/UncommunicativeMethodParamName:
|
|
78
|
+
Exclude:
|
|
79
|
+
- 'lib/ocfl_tools/ocfl_object.rb'
|
|
80
|
+
- 'lib/ocfl_tools/utils.rb'
|
|
81
|
+
- 'lib/ocfl_tools/utils_file.rb'
|
|
82
|
+
- 'lib/ocfl_tools/utils_inventory.rb'
|
|
83
|
+
|
|
84
|
+
# Offense count: 44
|
|
85
|
+
# Configuration parameters: EnforcedStyle.
|
|
86
|
+
# SupportedStyles: snake_case, camelCase
|
|
87
|
+
Naming/VariableName:
|
|
88
|
+
Exclude:
|
|
89
|
+
- 'lib/ocfl_tools/ocfl_deposit.rb'
|
|
90
|
+
- 'lib/ocfl_tools/ocfl_inventory.rb'
|
|
91
|
+
- 'lib/ocfl_tools/ocfl_object.rb'
|
|
92
|
+
- 'lib/ocfl_tools/ocfl_validator.rb'
|
|
93
|
+
- 'lib/ocfl_tools/utils_file.rb'
|
|
94
|
+
- 'lib/ocfl_tools/utils_inventory.rb'
|
|
95
|
+
|
|
96
|
+
# Offense count: 1
|
|
97
|
+
Style/CommentedKeyword:
|
|
98
|
+
Exclude:
|
|
99
|
+
- 'lib/ocfl_tools/ocfl_verify.rb'
|
|
100
|
+
|
|
101
|
+
# Offense count: 1
|
|
102
|
+
# Cop supports --auto-correct.
|
|
103
|
+
# Configuration parameters: EnforcedStyle, SingleLineConditionsOnly, IncludeTernaryExpressions.
|
|
104
|
+
# SupportedStyles: assign_to_condition, assign_inside_condition
|
|
105
|
+
Style/ConditionalAssignment:
|
|
106
|
+
Exclude:
|
|
107
|
+
- 'lib/ocfl_tools/ocfl_delta.rb'
|
|
108
|
+
|
|
109
|
+
# Offense count: 4
|
|
110
|
+
Style/Documentation:
|
|
111
|
+
Exclude:
|
|
112
|
+
- 'spec/**/*'
|
|
113
|
+
- 'test/**/*'
|
|
114
|
+
- 'lib/ocfl_tools.rb'
|
|
115
|
+
- 'lib/ocfl_tools/config.rb'
|
|
116
|
+
- 'lib/ocfl_tools/utils.rb'
|
|
117
|
+
- 'lib/ocfl_tools/utils_file.rb'
|
|
118
|
+
|
|
119
|
+
# Offense count: 1
|
|
120
|
+
# Cop supports --auto-correct.
|
|
121
|
+
# Configuration parameters: EnforcedStyle.
|
|
122
|
+
# SupportedStyles: empty, nil, both
|
|
123
|
+
Style/EmptyElse:
|
|
124
|
+
Exclude:
|
|
125
|
+
- 'lib/ocfl_tools/ocfl_validator.rb'
|
|
126
|
+
|
|
127
|
+
# Offense count: 12
|
|
128
|
+
# Configuration parameters: MinBodyLength.
|
|
129
|
+
Style/GuardClause:
|
|
130
|
+
Exclude:
|
|
131
|
+
- 'lib/ocfl_tools/ocfl_actions.rb'
|
|
132
|
+
- 'lib/ocfl_tools/ocfl_deposit.rb'
|
|
133
|
+
- 'lib/ocfl_tools/ocfl_results.rb'
|
|
134
|
+
|
|
135
|
+
# Offense count: 8
|
|
136
|
+
Style/IdenticalConditionalBranches:
|
|
137
|
+
Exclude:
|
|
138
|
+
- 'lib/ocfl_tools/ocfl_object.rb'
|
|
139
|
+
- 'lib/ocfl_tools/ocfl_validator.rb'
|
|
140
|
+
- 'lib/ocfl_tools/utils_file.rb'
|
|
141
|
+
|
|
142
|
+
# Offense count: 65
|
|
143
|
+
# Cop supports --auto-correct.
|
|
144
|
+
Style/IfUnlessModifier:
|
|
145
|
+
Exclude:
|
|
146
|
+
- 'lib/ocfl_tools/ocfl_actions.rb'
|
|
147
|
+
- 'lib/ocfl_tools/ocfl_delta.rb'
|
|
148
|
+
- 'lib/ocfl_tools/ocfl_deposit.rb'
|
|
149
|
+
- 'lib/ocfl_tools/ocfl_inventory.rb'
|
|
150
|
+
- 'lib/ocfl_tools/ocfl_object.rb'
|
|
151
|
+
- 'lib/ocfl_tools/ocfl_results.rb'
|
|
152
|
+
- 'lib/ocfl_tools/ocfl_validator.rb'
|
|
153
|
+
- 'lib/ocfl_tools/ocfl_verify.rb'
|
|
154
|
+
- 'lib/ocfl_tools/utils.rb'
|
|
155
|
+
- 'lib/ocfl_tools/utils_file.rb'
|
|
156
|
+
- 'lib/ocfl_tools/utils_inventory.rb'
|
|
157
|
+
|
|
158
|
+
# Offense count: 4
|
|
159
|
+
# Cop supports --auto-correct.
|
|
160
|
+
# Configuration parameters: EnforcedStyle, MinBodyLength.
|
|
161
|
+
# SupportedStyles: skip_modifier_ifs, always
|
|
162
|
+
Style/Next:
|
|
163
|
+
Exclude:
|
|
164
|
+
- 'lib/ocfl_tools/ocfl_delta.rb'
|
|
165
|
+
- 'lib/ocfl_tools/ocfl_deposit.rb'
|
|
166
|
+
|
|
167
|
+
# Offense count: 4
|
|
168
|
+
# Cop supports --auto-correct.
|
|
169
|
+
# Configuration parameters: AutoCorrect, EnforcedStyle, IgnoredMethods.
|
|
170
|
+
# SupportedStyles: predicate, comparison
|
|
171
|
+
Style/NumericPredicate:
|
|
172
|
+
Exclude:
|
|
173
|
+
- 'spec/**/*'
|
|
174
|
+
- 'lib/ocfl_tools/ocfl_deposit.rb'
|
|
175
|
+
|
|
176
|
+
# Offense count: 1
|
|
177
|
+
# Cop supports --auto-correct.
|
|
178
|
+
# Configuration parameters: EnforcedStyle, AllowInnerSlashes.
|
|
179
|
+
# SupportedStyles: slashes, percent_r, mixed
|
|
180
|
+
Style/RegexpLiteral:
|
|
181
|
+
Exclude:
|
|
182
|
+
- 'lib/ocfl_tools/ocfl_delta.rb'
|
|
183
|
+
|
|
184
|
+
# Offense count: 475
|
|
185
|
+
# Cop supports --auto-correct.
|
|
186
|
+
# Configuration parameters: AutoCorrect, AllowHeredoc, AllowURI, URISchemes, IgnoreCopDirectives, IgnoredPatterns.
|
|
187
|
+
# URISchemes: http, https
|
|
188
|
+
Metrics/LineLength:
|
|
189
|
+
Max: 292
|
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
|
@@ -0,0 +1,647 @@
|
|
|
1
|
+
# OCFL-Tools
|
|
2
|
+
|
|
3
|
+
[](https://travis-ci.org/sul-dlss-labs/OCFL-Tools)
|
|
4
|
+
|
|
5
|
+
Ruby gem to manipulate Oxford Common File Layout preservation objects (https://ocfl.io).
|
|
6
|
+
Classes provide APIs to create objects and versions, perform typical file operations, verify
|
|
7
|
+
compliance of the resulting object and serialize it to an inventory.json file.
|
|
8
|
+
Can also read in an existing inventory.json to verify, manipulate, and produce
|
|
9
|
+
an updated inventory file.
|
|
10
|
+
|
|
11
|
+
This is beta software. No guarantee of fitness for purpose is made.
|
|
12
|
+
|
|
13
|
+
## Quickstart
|
|
14
|
+
|
|
15
|
+
### Install Ruby > 2.5.3
|
|
16
|
+
|
|
17
|
+
See: https://www.ruby-lang.org/en/documentation/installation/
|
|
18
|
+
|
|
19
|
+
### Install OCFL-Tools gem
|
|
20
|
+
|
|
21
|
+
Ruby gems is part of all modern distributions of Ruby.
|
|
22
|
+
|
|
23
|
+
```
|
|
24
|
+
gem install ocfl-tools
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
### Get the example scripts
|
|
28
|
+
```
|
|
29
|
+
wget https://raw.githubusercontent.com/sul-dlss-labs/OCFL-Tools/master/examples/list_files.rb
|
|
30
|
+
wget https://raw.githubusercontent.com/sul-dlss-labs/OCFL-Tools/master/examples/validate_object.rb
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### Checkout a copy of the OCFL Sample Fixtures
|
|
34
|
+
```
|
|
35
|
+
git clone https://github.com/OCFL/fixtures.git
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### Validate a fixture
|
|
39
|
+
|
|
40
|
+
From the directory you downloaded the example scripts to, do:
|
|
41
|
+
```
|
|
42
|
+
ruby ./validate_object.rb -d /[full path to fixture checkout dir]/fixtures/1.0/objects/of3
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### List all files in latest version of a fixture
|
|
46
|
+
|
|
47
|
+
From the directory you downloaded the example scripts to, do:
|
|
48
|
+
```
|
|
49
|
+
ruby ./list_files.rb -d /[full path to fixture checkout dir]/fixtures/1.0/objects/of3
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
### List all files in version 1 of a fixture
|
|
53
|
+
|
|
54
|
+
From the directory you downloaded the example scripts to, do:
|
|
55
|
+
```
|
|
56
|
+
ruby ./list_files.rb -d /[full path to fixture checkout dir]/fixtures/1.0/objects/of3 -v 1
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
## Development setup (assuming bundler is installed)
|
|
62
|
+
|
|
63
|
+
```
|
|
64
|
+
git clone https://github.com/sul-dlss-labs/OCFL-Tools.git
|
|
65
|
+
cd OCFL-Tools
|
|
66
|
+
bundle # to install dependencies
|
|
67
|
+
rake # to run rspec/rubocop
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Basic Usage
|
|
71
|
+
|
|
72
|
+
```
|
|
73
|
+
require 'ocfl-tools'
|
|
74
|
+
|
|
75
|
+
# Set our version string format; 5 characters, 4 of which are 0-padded integers.
|
|
76
|
+
OcflTools.config.version_format = "v%04d" # default value, yields 'v0001' etc.
|
|
77
|
+
|
|
78
|
+
# Set our digest algorithm
|
|
79
|
+
OcflTools.config.digest_algorithm = 'sha256' # default is sha512
|
|
80
|
+
|
|
81
|
+
# set our object's content directory name
|
|
82
|
+
OcflTools.config.content_directory = 'data' # default is 'content'
|
|
83
|
+
|
|
84
|
+
# Optionally, set allowed digest algorithms for the fixity block.
|
|
85
|
+
OcflTools.config.fixity_algorithms = ['md5', 'sha1', 'sha256'] # default values
|
|
86
|
+
|
|
87
|
+
ocfl = OcflTools::OcflInventory.new
|
|
88
|
+
|
|
89
|
+
ocfl.id = 'bb123cd4567'
|
|
90
|
+
|
|
91
|
+
ocfl.get_version(1) # Creates initial version.
|
|
92
|
+
|
|
93
|
+
ocfl.set_version_message(1, 'My first version!')
|
|
94
|
+
ocfl.add_file('my_content/this_is_a_file.txt', 'checksum_aaaaaaaaaaaa', 1)
|
|
95
|
+
|
|
96
|
+
# Create a new version and add a 2nd file
|
|
97
|
+
ocfl.add_file('my_content/a_second_file.txt', 'checksum_bbbbbbbbbbbb', 2)
|
|
98
|
+
|
|
99
|
+
# Create a third version and add a 3rd file.
|
|
100
|
+
ocfl.add_file('my_content/a_third_file.txt', 'checksum_cccccccccccc', 3)
|
|
101
|
+
|
|
102
|
+
# Make a (deduplicated) copy of that 3rd file in version 3.
|
|
103
|
+
ocfl.copy_file('my_content/a_third_file.txt', 'my_content/a_copy_of_third_file.txt', 3)
|
|
104
|
+
|
|
105
|
+
# or if you don't want to deduplicate the file, this also works:
|
|
106
|
+
ocfl.add_file('my_content/a_copy_of_third_file.txt', 'checksum_cccccccccccc', 3)
|
|
107
|
+
|
|
108
|
+
# Delete a file from version 3.
|
|
109
|
+
ocfl.delete_file('my_content/this_is_a_file.txt', 3)
|
|
110
|
+
|
|
111
|
+
# Create a 4th version where the bitstream of an existing file is modified.
|
|
112
|
+
# 1. add the file's bitstream to the object:
|
|
113
|
+
ocfl.update_manifest('my_content/a_second_file.txt', 'checksum_dddddddddddd', 4)
|
|
114
|
+
|
|
115
|
+
# 2. Update an existing logical filepath to point to the new bitstream.
|
|
116
|
+
ocfl.update_file('my_content/a_second_file.txt', 'checksum_dddddddddddd', 4)
|
|
117
|
+
|
|
118
|
+
# Still in version 4, move a file to a new location (functionally an add-then-delete).
|
|
119
|
+
ocfl.move_file('my_content/a_copy_of_third_file.txt', 'another_dir/a_copy_of_third_file.txt', 4)
|
|
120
|
+
|
|
121
|
+
# Add (optional) additional fixity checksums to an existing file:
|
|
122
|
+
ocfl.update_fixity('checksum_cccccccccccc', 'md5', 'an_md5_checksum_for_this_file')
|
|
123
|
+
ocfl.update_fixity('checksum_cccccccccccc', 'sha1', 'a_sha1_checksum_for_this_file')
|
|
124
|
+
|
|
125
|
+
# Remember we're using the digest of the file to positively identify it, which
|
|
126
|
+
# is why we use the digest, not the file path, to associate an additional checksum with that file.
|
|
127
|
+
# The actual fixity block in the inventory will include an array of all files
|
|
128
|
+
# for which the checksum applies.
|
|
129
|
+
|
|
130
|
+
# Output the complete inventory.json.
|
|
131
|
+
puts ocfl.serialize
|
|
132
|
+
|
|
133
|
+
# If you want the object output to an inventory.json file, call #to_file.
|
|
134
|
+
# This will also generate the appropriate digest sidecar file.
|
|
135
|
+
ocfl.to_file('/directory/to/put/inventory/in/')
|
|
136
|
+
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Validating OCFL objects
|
|
140
|
+
|
|
141
|
+
The prime use case of this gem is to inspect directories for well-formed OCFL objects
|
|
142
|
+
and perform verification actions on them: ensuring that they are syntactically correct and
|
|
143
|
+
that all files referenced in the OCFL object exist on disk and match their stored digest values.
|
|
144
|
+
|
|
145
|
+
There are four levels of verification available, each checking a different aspect of the OCFL object.
|
|
146
|
+
|
|
147
|
+
### Verify Structure
|
|
148
|
+
|
|
149
|
+
This check inspects a given directory on disk for "OCFL-ness". It attempts to deduce the version
|
|
150
|
+
directory naming convention, checks for the presence of required OCFL files (primarily the inventory.json, sidecar digest and NamAsTe identifier), and verifies that there is a complete
|
|
151
|
+
sequence of version directories present.
|
|
152
|
+
|
|
153
|
+
### Verify Inventory
|
|
154
|
+
|
|
155
|
+
This check takes an inventory file discovered by `#verify_structure` and checks it for format
|
|
156
|
+
and internal consistency. It also verifies that every file mentioned in every version state block
|
|
157
|
+
can be associated with its matching file in the manifest block. By default it acts on the
|
|
158
|
+
`inventory.json` in the object root, but it can also be directed at any of the inventories
|
|
159
|
+
in any version directory.
|
|
160
|
+
|
|
161
|
+
### Verify Manifest
|
|
162
|
+
|
|
163
|
+
This check verifies that all files mentioned in the manifest block exist on disk in the given
|
|
164
|
+
object directory, and that all files on disk for all versions of the given inventory file can
|
|
165
|
+
be associated with a matching record in the manifest. It does not perform checksum verification
|
|
166
|
+
of these files, and thus is appropriate for the quick initial identification and verification of
|
|
167
|
+
large volumes of suspected OCFL objects. Note that `#verify_manifest` confines itself to versions
|
|
168
|
+
discovered in the `inventory.json`, so if an object directory contains more version directories,
|
|
169
|
+
`#verify_manifest` will not inspect those directories. `#verify_structure` will, however, detect
|
|
170
|
+
this issue as an error condition.
|
|
171
|
+
|
|
172
|
+
### Verify Checksums
|
|
173
|
+
|
|
174
|
+
This is a potentially resource-intensive check that computes new digest values for each file discovered
|
|
175
|
+
on disk and compares them against values stored in the manifest block of the provided `inventory.json`.
|
|
176
|
+
It reports problems if a given checksum does not match the stored value, or if a file is discovered
|
|
177
|
+
on disk that does not have a record in the manifest block, or if a file in the manifest block cannot
|
|
178
|
+
be found on disk.
|
|
179
|
+
|
|
180
|
+
For larger objects, or as part of a deposit workflow, it is possible to call `#verify_checksum` against
|
|
181
|
+
the contents of one version directory only. See `OcflValidator#verify_directory` for details. This method
|
|
182
|
+
is used by `OcflDeposit` to verify successful transfer of a new version directory without invoking a full
|
|
183
|
+
checksum validation of all existing version directories in the destination object.
|
|
184
|
+
|
|
185
|
+
### Verify Fixity (optional)
|
|
186
|
+
|
|
187
|
+
Additionally, if a given `inventory.json` contains an optional fixity block, it is possible to perform
|
|
188
|
+
a `#verify_checksums` check against the files on disk, except using values and digest types stored in
|
|
189
|
+
the fixity block instead of the OCFL digest algorithm. Since a fixity block is optional, and is not
|
|
190
|
+
required to hold values for every file in the manifest, this check should not be considered a primary
|
|
191
|
+
method for checksum validation.
|
|
192
|
+
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
# Check a directory for a valid OCFL object
|
|
196
|
+
validate = OcflTools::OcflValidator.new(object_root_dir)
|
|
197
|
+
puts validate.verify_structure.results # checks the physical layout of the object root
|
|
198
|
+
puts validate.verify_inventory.results # checks the syntax and internal consistency of the inventory.json
|
|
199
|
+
puts validate.verify_manifest.results # cross-checks existence of files on disk against the manifest in the inventory.json
|
|
200
|
+
puts validate.verify_checksums.results # checks digests in the inventory manifest against files discovered in the object root.
|
|
201
|
+
|
|
202
|
+
# Optionally, if you have additional fixity checksums in the inventory:
|
|
203
|
+
puts validate.verify_fixity.results # checks files using MD5 checksums (default).
|
|
204
|
+
puts validate.verify_fixity(digest: 'sha1').results # checks files using sha1 checksums.
|
|
205
|
+
|
|
206
|
+
# If you just want to do a complete check of a suspected OCFL object root, do:
|
|
207
|
+
validate = OcflTools::OcflValidator.new(object_root_dir)
|
|
208
|
+
puts validate.validate_ocfl_object_root.results # Will do structure, inventory and manifest checksum checks.
|
|
209
|
+
|
|
210
|
+
# If you'd like to use values in the fixity block instead of the manifest checksums, do:
|
|
211
|
+
puts validate.validate_ocfl_object_root(digest: 'sha1').results
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
```
|
|
215
|
+
|
|
216
|
+
## Depositing and Updating Objects
|
|
217
|
+
|
|
218
|
+
This gem includes basic deposit and update functionality. It requires content for deposit
|
|
219
|
+
to be arranged in a specific syntax in a `deposit` directory. The `deposit` directory can
|
|
220
|
+
be any name, but MUST contain a `head` directory, which MUST contain a directory with a name
|
|
221
|
+
that matches your site's `OcflTools::config.content_directory` setting (defaults to `content`).
|
|
222
|
+
|
|
223
|
+
### First Version
|
|
224
|
+
|
|
225
|
+
If this is to be the first version of a new OCFL object you MUST provide at least one file
|
|
226
|
+
in the `head/content/` directory to add, and you MUST include either a `head/head.json` OR a
|
|
227
|
+
`head/add_files.json` file (but not both - see below for format descriptions).
|
|
228
|
+
|
|
229
|
+
If the logical paths of the files being ingested DO NOT match the physical path of the files
|
|
230
|
+
as laid out in the `head/content/` directory, then you MUST include an `update_manifest` stanza
|
|
231
|
+
in `head/head.json` (if used) or a `head/update_manifest.json` file. If the logical paths
|
|
232
|
+
match the physical paths (that is, if the directory structure in `head/content` matches how you
|
|
233
|
+
wish the object directory layout to appear after versioning) then you need not include an
|
|
234
|
+
`update_manifest` stanza in `head.json` or use an `update_manifest.json` action file ;
|
|
235
|
+
`OcflTools::OcflDeposit` will use the `add` stanza or contents of `add_files.json` to both
|
|
236
|
+
create the logical path and update the manifest block with the appropriate physical path.
|
|
237
|
+
|
|
238
|
+
The first version of an OCFL object MAY contain fixity and version metadata; provide this information
|
|
239
|
+
either as part of the `head/head.json` file or, if you are not using `head.json`, provide this in
|
|
240
|
+
`head/fixity_files.json` and `head/version.json`.
|
|
241
|
+
|
|
242
|
+
The first version of an OCFL object MAY have MOVE and COPY actions performed against digests in it,
|
|
243
|
+
either as stanzas in the `head.json` file or as stand-alone `copy_files.json` and `move_files.json`
|
|
244
|
+
if a `head.json` is not used, but the `head.json` MUST NOT contain DELETE actions and you MUST NOT
|
|
245
|
+
use a `head/delete_files.json`.
|
|
246
|
+
|
|
247
|
+
Finally, the `deposit` directory must contain a NAMasTE file, in the format of `4={id value}`,
|
|
248
|
+
describing the digital object identifier to use to uniquely identify this OCFL object at
|
|
249
|
+
this site. An example layout, where the id of the OCFL object being created is `123cd4567`, is below. In
|
|
250
|
+
this example the site is using the default value `content` for `content_directory`.
|
|
251
|
+
|
|
252
|
+
Note that, within an object version, actions are processed in the following order: UPDATE_MANIFEST, ADD,
|
|
253
|
+
UPDATE, MOVE, COPY, DELETE. This is to support the ingest of bitstreams where the logical filepath
|
|
254
|
+
needs to differ from the physical (deposit directory `head/content`) layout.
|
|
255
|
+
|
|
256
|
+
```
|
|
257
|
+
deposit_dir/
|
|
258
|
+
4=123cd4567
|
|
259
|
+
head/
|
|
260
|
+
head.json OR add_files.json
|
|
261
|
+
update_manifest.json [optional, if add_files.json is used]
|
|
262
|
+
move_files.json [optional, if add_files.json is used]
|
|
263
|
+
copy_files.json [optional, if add_files.json is used]
|
|
264
|
+
version.json [optional, if add_files.json is used]
|
|
265
|
+
fixity_files.json [optional, if add_files.json is used]
|
|
266
|
+
content/
|
|
267
|
+
my_content/a_file_to_add.txt
|
|
268
|
+
```
|
|
269
|
+
|
|
270
|
+
### Subsequent versions of an existing object
|
|
271
|
+
|
|
272
|
+
To version an existing object, you must provide a `deposit` directory with the following layout:
|
|
273
|
+
|
|
274
|
+
```
|
|
275
|
+
deposit_dir/
|
|
276
|
+
inventory.json
|
|
277
|
+
inventory.json.{sha256|sha512}
|
|
278
|
+
head/
|
|
279
|
+
head.json OR [one or more action files]
|
|
280
|
+
content/
|
|
281
|
+
{files and directories to add or update, if applicable}
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
`{action files}` are AT LEAST ONE of `update_manifest`, `add_files.json`, `delete_files.json`,
|
|
285
|
+
`update_files.json`, `move_files.json`, `copy_files.json` and `fixity_files.json`.
|
|
286
|
+
You may also optionally include `version.json`, but this file does not count towards
|
|
287
|
+
the minimum required action files requirement.
|
|
288
|
+
|
|
289
|
+
The `inventory.json` and sidecar digest file must be the most recent versions of the inventory and
|
|
290
|
+
sidecar from the OCFL object that you are updating, copied from the object root that you intend
|
|
291
|
+
to update. New version creation will fail if the destination object directory does not contain
|
|
292
|
+
the expected OCFL object at the `head` value of this `inventory.json`.
|
|
293
|
+
|
|
294
|
+
The `head/content` directory MUST exist, but is not required to contain any bitstreams unless there
|
|
295
|
+
is a correctly-formatted `add_files.json` or `update_files.json`.
|
|
296
|
+
|
|
297
|
+
Note that it is possible to version an object merely by providing a `fixity_files.json`.
|
|
298
|
+
|
|
299
|
+
### Update Manifest
|
|
300
|
+
|
|
301
|
+
Create a file named `update_manifest.json` and place in `deposit/head`. Place the bitstream to be
|
|
302
|
+
added to the object in the content directory, and reference that bitstream in `update_manifest.json`
|
|
303
|
+
with the following syntax:
|
|
304
|
+
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
{
|
|
308
|
+
"9b4566a0455e76a392c43ec4d8b8e7d636b21ff2cf83b87fe99b97d00a501de0": [
|
|
309
|
+
"my_content/dunwich.txt"
|
|
310
|
+
]
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
Note that this example, and all others in this doc, use the sha256 algorithm for digest values, for
|
|
316
|
+
easier legibility. Also note that the file path is relative to the object's content directory. The file
|
|
317
|
+
path for the above example relative to the deposit root directory would be `head/content/my_content/dunwich.txt`.
|
|
318
|
+
|
|
319
|
+
### Add files
|
|
320
|
+
|
|
321
|
+
Create a file named `add_files.json` and place in `deposit/head`. Place the file to be added
|
|
322
|
+
to the object in `deposit/head/{content_directory}` in the desired directory structure. If multiple
|
|
323
|
+
filepaths are provided for any one digest value, and if only one matching bitstream is provided
|
|
324
|
+
in `head/content`, then the file is deduplicated and only 1 bitstream of that file will exist
|
|
325
|
+
in the final object version.
|
|
326
|
+
|
|
327
|
+
```
|
|
328
|
+
{ "digest of file to add": [ filepaths of file to add ] }
|
|
329
|
+
|
|
330
|
+
e.g.:
|
|
331
|
+
|
|
332
|
+
{
|
|
333
|
+
"9b4566a0455e76a392c43ec4d8b8e7d636b21ff2cf83b87fe99b97d00a501de0": [
|
|
334
|
+
"my_content/dunwich.txt",
|
|
335
|
+
"my_content/a_deduplicated_copy_of_dunwich.txt"
|
|
336
|
+
]
|
|
337
|
+
}
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
### Update files
|
|
341
|
+
|
|
342
|
+
Create a file named `update_files.json` and place in `deposit/head`. Place the updated file
|
|
343
|
+
in `deposit/head/{content_directory}` in the desired directory structure.
|
|
344
|
+
|
|
345
|
+
```
|
|
346
|
+
{ "digest of file to update": [ existing filepaths of file to update ] }
|
|
347
|
+
|
|
348
|
+
e.g.: this updates the previously versioned file 'my_content/dunwich.txt' with a new bitstream:
|
|
349
|
+
|
|
350
|
+
{
|
|
351
|
+
"334566a04a5e76a392c43ec4d8b8e7d666f1ff2cf83b87fe99b97d00a5443f43": [
|
|
352
|
+
"my_content/dunwich.txt"
|
|
353
|
+
]
|
|
354
|
+
}
|
|
355
|
+
```
|
|
356
|
+
|
|
357
|
+
### Copy files
|
|
358
|
+
|
|
359
|
+
Create a file named `copy_files.json` and place in `deposit/head`. This makes a deduplicated
|
|
360
|
+
copy of a bitstream that already exists in the object. If you do NOT want to make a deduplicated
|
|
361
|
+
copy, use `add_files.json` instead, and provide the bitstream in `deposit/head/{content_directory}`.
|
|
362
|
+
|
|
363
|
+
```
|
|
364
|
+
{ "digest of an existing file": [ filepaths of new copies ] }
|
|
365
|
+
|
|
366
|
+
e.g.
|
|
367
|
+
|
|
368
|
+
{
|
|
369
|
+
"9b4566a0455e76a392c43ec4d8b8e7d636b21ff2cf83b87fe99b97d00a501de0": [
|
|
370
|
+
"my_content/a_second_copy_of_dunwich.txt",
|
|
371
|
+
"my_content/a_third_copy_of_dunwich.txt"
|
|
372
|
+
]
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
```
|
|
376
|
+
|
|
377
|
+
### Move files
|
|
378
|
+
|
|
379
|
+
`move` is functionally a rename operation, performed by creating a new filepath for the digest
|
|
380
|
+
and then deleting the old one.
|
|
381
|
+
|
|
382
|
+
Create a file named `move_files.json` and place in `deposit/head`. Note that `move_files.json`
|
|
383
|
+
requires exactly 2 filepaths per digest; a source and a destination. It also will fail if
|
|
384
|
+
the previous version has more than one filepath recorded for this digest; this is to prevent a
|
|
385
|
+
Disambiguation issue when reconstructing file actions from the inventory file.
|
|
386
|
+
|
|
387
|
+
If you wish to move a specific filepath instance where there are multiple source filepaths in
|
|
388
|
+
the prior version, perform a `copy` action and then `delete` the desired source file.
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
```
|
|
392
|
+
{ "digest of source filepath": [ "source_file", "destination_file" ] }
|
|
393
|
+
|
|
394
|
+
e.g.
|
|
395
|
+
|
|
396
|
+
{
|
|
397
|
+
"9b4566a0455e76a392c43ec4d8b8e7d636b21ff2cf83b87fe99b97d00a501de0": [
|
|
398
|
+
"my_content/a_third_copy_of_dunwich.txt",
|
|
399
|
+
"my_content/moved_third_copy_of_dunwich_to_here.txt"
|
|
400
|
+
]
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
```
|
|
405
|
+
|
|
406
|
+
### Delete files
|
|
407
|
+
|
|
408
|
+
Create a file named `delete_files.json` and place in `deposit/head`.
|
|
409
|
+
|
|
410
|
+
```
|
|
411
|
+
{ "digest of file to delete": [ filepaths of files to delete ] }
|
|
412
|
+
|
|
413
|
+
e.g.
|
|
414
|
+
|
|
415
|
+
{ "9b4566a0455e76a392c43ec4d8b8e7d636b21ff2cf83b87fe99b97d00a501de0": [
|
|
416
|
+
"my_content/a_third_copy_of_dunwich.txt",
|
|
417
|
+
"my_content/moved_third_copy_of_dunwich_to_here.txt"
|
|
418
|
+
]
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
```
|
|
422
|
+
|
|
423
|
+
### Additional version info
|
|
424
|
+
|
|
425
|
+
If you wish to add additional information to the version, create a file named `version.json` and place in `deposit/head`.
|
|
426
|
+
|
|
427
|
+
```
|
|
428
|
+
{
|
|
429
|
+
"created": "2019-11-12",
|
|
430
|
+
"message": "Ia! Ia! cthulhu fhtagn!",
|
|
431
|
+
"user": {
|
|
432
|
+
"name": "Yog-Sothoth",
|
|
433
|
+
"address": "all_seeing_spheres@miskatonic.edu"
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
```
|
|
437
|
+
|
|
438
|
+
### Add additional fixity values to object
|
|
439
|
+
|
|
440
|
+
Create a file named `fixity_files.json` and place in `deposit/head`. The top level keys of this JSON
|
|
441
|
+
should be the string value of the digest algorithm to add. Each key contains a hash of key/value pairs,
|
|
442
|
+
where the key is the string value of the file digest as recorded in the manifest (i.e. either SHA256 or
|
|
443
|
+
SHA512), and the value is the additional file digest to associate with this file as an additional fixity value.
|
|
444
|
+
Note that you do not need to provide fixity values for all existing files in the object, and you
|
|
445
|
+
can mix-and-match digest algorithms so long as the algorithm is listed as a supported value in your site.
|
|
446
|
+
Set `OcflTools.config.fixity_algorithms` to specify acceptable algorithms.
|
|
447
|
+
|
|
448
|
+
```
|
|
449
|
+
{
|
|
450
|
+
"md5": {
|
|
451
|
+
"cffe55838a878a29da82a0e10b2909b7e46b6f7167ed7f815782465573e98f27": "fccd3f96d461f495a3bef31dc1d28f01",
|
|
452
|
+
"f512eb0a032f562225e848ce88449895f3ec19f3d4836a80df80c77c74557bab": "d2c79c8519af858fac2993c2373b5203"
|
|
453
|
+
},
|
|
454
|
+
"sha1": {
|
|
455
|
+
"f512eb0a032f562225e848ce88449895f3ec19f3d4836a80df80c77c74557bab": "aa9e59cde167454f1f8b1f0eeeb0795e2d2f8c6f"
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
```
|
|
459
|
+
|
|
460
|
+
### Using head.json instead of individual action files
|
|
461
|
+
|
|
462
|
+
Instead of providing multiple action files in `head/` to describe desired operations,
|
|
463
|
+
you may provide a single file, `head.json`, containing multiple actions. Each individual
|
|
464
|
+
action has the same format as their action file, but is nested beneath a key that describes
|
|
465
|
+
the action, e.g.:
|
|
466
|
+
|
|
467
|
+
```
|
|
468
|
+
{
|
|
469
|
+
"update_manifest": {
|
|
470
|
+
"cffe55838a878a29da82a0e10b2909b7e46b6f7167ed7f815782465573e98f27": [
|
|
471
|
+
"ingest_temp/dracula.txt"
|
|
472
|
+
],
|
|
473
|
+
"f512eb0a032f562225e848ce88449895f3ec19f3d4836a80df80c77c74557bab": [
|
|
474
|
+
"ingest_temp/poe.txt"
|
|
475
|
+
]
|
|
476
|
+
},
|
|
477
|
+
"add": {
|
|
478
|
+
"cffe55838a878a29da82a0e10b2909b7e46b6f7167ed7f815782465573e98f27": [
|
|
479
|
+
"my_content/a_great_copy_of_dracula.txt",
|
|
480
|
+
"my_content/another_directory/a_third_copy_of_dracula.txt"
|
|
481
|
+
],
|
|
482
|
+
"f512eb0a032f562225e848ce88449895f3ec19f3d4836a80df80c77c74557bab": [
|
|
483
|
+
"edgar/alan/poe.txt"
|
|
484
|
+
]
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
```
|
|
488
|
+
|
|
489
|
+
In the above example we are adding two bitstreams to the object (via `update_manifest`),
|
|
490
|
+
in a directory called `ingest_temp`, but after this version is created the object
|
|
491
|
+
will appear to contain 3 files in total, thus:
|
|
492
|
+
|
|
493
|
+
```
|
|
494
|
+
|
|
495
|
+
my_content/a_great_copy_of_dracula.txt
|
|
496
|
+
my_content/another_directory/a_third_copy_of_dracula.txt
|
|
497
|
+
edgar/alan/poe.txt
|
|
498
|
+
```
|
|
499
|
+
|
|
500
|
+
This is an example of both data duplication (the same bitstream refers to two different files)
|
|
501
|
+
and that the logical representation of the object need not match its physical layout. In this
|
|
502
|
+
case, the version directory on disk would contain these files:
|
|
503
|
+
|
|
504
|
+
```
|
|
505
|
+
|
|
506
|
+
v0001/content/ingest_temp/dracula.txt
|
|
507
|
+
v0001/content/ingest_temp/poe.txt
|
|
508
|
+
```
|
|
509
|
+
|
|
510
|
+
### Accessioning a version
|
|
511
|
+
|
|
512
|
+
Once the content to be accessioned is marshaled correctly in the `deposit` directory,
|
|
513
|
+
simply do:
|
|
514
|
+
|
|
515
|
+
```
|
|
516
|
+
# Creating this object performs extensive sanity checks on both deposit layout and destination.
|
|
517
|
+
# Any error will cause it to raise an exception and perform no action on the destination object.
|
|
518
|
+
|
|
519
|
+
deposit = OcflTools::OcflDeposit.new(deposit_directory: deposit_dir, object_directory: object_dir)
|
|
520
|
+
|
|
521
|
+
# This creates the new version and verifies successful accessioning.
|
|
522
|
+
deposit.deposit_new_version
|
|
523
|
+
|
|
524
|
+
# This returns a results object with additional details.
|
|
525
|
+
deposit.results
|
|
526
|
+
```
|
|
527
|
+
|
|
528
|
+
Note that for the first version of an object, the destination `object_directory` MUST be empty. For
|
|
529
|
+
subsequent versions of the object, the `object_directory` must contain the most recent version of
|
|
530
|
+
the OCFL object to be updated.
|
|
531
|
+
|
|
532
|
+
### Viewing Object History
|
|
533
|
+
|
|
534
|
+
Use `OcflTools::OcflDelta` to query an OCFL object to produce the list of actions performed on each
|
|
535
|
+
version of the object. This does not list when fixity information was added to the object, nor
|
|
536
|
+
does it reveal `version` information. `version` information can be queried separately; historical
|
|
537
|
+
fixity info requires access to prior versions of the inventory file.
|
|
538
|
+
|
|
539
|
+
```
|
|
540
|
+
ocfl = OcflTools::OcflInventory.new.from_file("#{object_dir}/inventory.json")
|
|
541
|
+
ocfl_delta = OcflTools::OcflDelta.new(ocfl)
|
|
542
|
+
|
|
543
|
+
puts JSON.pretty_generate(ocfl_delta.all)
|
|
544
|
+
|
|
545
|
+
# Or if you just want a specific version (say, changes made to create version 3), do:
|
|
546
|
+
ocfl_delta.previous(3)
|
|
547
|
+
```
|
|
548
|
+
|
|
549
|
+
`JSON.pretty_generate(ocfl_delta.all)` yields output like this:
|
|
550
|
+
|
|
551
|
+
```
|
|
552
|
+
{
|
|
553
|
+
"v0001": {
|
|
554
|
+
"update_manifest": {
|
|
555
|
+
"cffe55838a878a29da82a0e10b2909b7e46b6f7167ed7f815782465573e98f27": [
|
|
556
|
+
"my_content/dracula.txt"
|
|
557
|
+
],
|
|
558
|
+
"f512eb0a032f562225e848ce88449895f3ec19f3d4836a80df80c77c74557bab": [
|
|
559
|
+
"my_content/poe.txt"
|
|
560
|
+
]
|
|
561
|
+
},
|
|
562
|
+
"add": {
|
|
563
|
+
"cffe55838a878a29da82a0e10b2909b7e46b6f7167ed7f815782465573e98f27": [
|
|
564
|
+
"my_content/dracula.txt"
|
|
565
|
+
],
|
|
566
|
+
"f512eb0a032f562225e848ce88449895f3ec19f3d4836a80df80c77c74557bab": [
|
|
567
|
+
"my_content/poe.txt"
|
|
568
|
+
]
|
|
569
|
+
}
|
|
570
|
+
},
|
|
571
|
+
"v0002": {
|
|
572
|
+
"copy": {
|
|
573
|
+
"cffe55838a878a29da82a0e10b2909b7e46b6f7167ed7f815782465573e98f27": [
|
|
574
|
+
"my_content/a_second_copy_of_dracula.txt",
|
|
575
|
+
"my_content/another_directory/a_third_copy_of_dracula.txt"
|
|
576
|
+
]
|
|
577
|
+
},
|
|
578
|
+
"move": {
|
|
579
|
+
"f512eb0a032f562225e848ce88449895f3ec19f3d4836a80df80c77c74557bab": [
|
|
580
|
+
"my_content/poe.txt",
|
|
581
|
+
"my_content/poe-nevermore.txt"
|
|
582
|
+
]
|
|
583
|
+
}
|
|
584
|
+
},
|
|
585
|
+
"v0003": {
|
|
586
|
+
"update_manifest": {
|
|
587
|
+
"618ea77f3a74558493f2df1d82fee18073f6458573d58e6b65bade8bd65227fb": [
|
|
588
|
+
"my_content/poe-nevermore.txt"
|
|
589
|
+
]
|
|
590
|
+
},
|
|
591
|
+
"update": {
|
|
592
|
+
"618ea77f3a74558493f2df1d82fee18073f6458573d58e6b65bade8bd65227fb": [
|
|
593
|
+
"my_content/poe-nevermore.txt"
|
|
594
|
+
]
|
|
595
|
+
}
|
|
596
|
+
},
|
|
597
|
+
"v0004": {
|
|
598
|
+
"update_manifest": {
|
|
599
|
+
"9b4566a0455e76a392c43ec4d8b8e7d636b21ff2cf83b87fe99b97d00a501de0": [
|
|
600
|
+
"my_content/dunwich.txt"
|
|
601
|
+
]
|
|
602
|
+
},
|
|
603
|
+
"add": {
|
|
604
|
+
"9b4566a0455e76a392c43ec4d8b8e7d636b21ff2cf83b87fe99b97d00a501de0": [
|
|
605
|
+
"my_content/dunwich.txt"
|
|
606
|
+
]
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
```
|
|
611
|
+
|
|
612
|
+
## Implementation notes
|
|
613
|
+
|
|
614
|
+
`OcflTools::OcflInventory` is a child class of `OcflTools::OcflObject`, designed
|
|
615
|
+
for reading and writing inventory.json files.
|
|
616
|
+
|
|
617
|
+
`OcflObject` will prevent you from doing the dumbest of things - once you've created
|
|
618
|
+
version 2 of an object, you can't edit the state of version 1 - but it won't prevent
|
|
619
|
+
you from the more subtle stupids. That's for implementing applications to work around
|
|
620
|
+
with their own business logic.
|
|
621
|
+
|
|
622
|
+
`OcflTools::OcflValidator` will take a directory and tell you if it's an OCFL object or not.
|
|
623
|
+
If it is a valid OCFL object, `OcflValidator` will check the files on disk against the records
|
|
624
|
+
in the inventory.json and let you know if they are all there and have matching checksums.
|
|
625
|
+
|
|
626
|
+
`OcflTools::OcflVerify` will take an `OcflObject` and will let you know if it's syntactically correct
|
|
627
|
+
and internally consistent. `OcflVerify` doesn't care or know about files or directories on disk.
|
|
628
|
+
`OcflValidator` uses `OcflVerify` as part of its validation process, once it has identified a suitable
|
|
629
|
+
inventory.json file.
|
|
630
|
+
|
|
631
|
+
`OcflTools::OcflResults` is a class to capture logging events for a specific OcflValidator or
|
|
632
|
+
OcflVerify instance. Any reported error (inspect `OcflResults#get_errors`) indicates the object
|
|
633
|
+
under consideration is not OCFL compliant.
|
|
634
|
+
|
|
635
|
+
`OcflTools::OcflDeposit` is a reference implementation of a deposit workflow from an upstream repository.
|
|
636
|
+
When given a correctly-formatted `deposit` directory and a destination directory, `OcflDeposit` will
|
|
637
|
+
attempt to create a new OCFL object an empty destination directory, or add a new version to a
|
|
638
|
+
well-formed OCFL object in the destination directory.
|
|
639
|
+
|
|
640
|
+
OCFL supports file deduplication but it is up to the implementing application to decide
|
|
641
|
+
if this is desirable behavior. If one is using `OcflDeposit` then deduplication will occur when
|
|
642
|
+
the same bitstream is added to an object several times in the same version with different
|
|
643
|
+
filenames AND only one file is placed in `deposit/head/content` for versioning.
|
|
644
|
+
|
|
645
|
+
When adding an existing bitstream as a different filename in a new version, deduplication will
|
|
646
|
+
occur when a matching digest can be found in the manifest, but only if the new filename is versioned
|
|
647
|
+
via `copy_files.json` and if the bitstream is not added again to `deposit/head/content`.
|