ocfl 0.8.1 → 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c21578c64e68d5bdab7fcea3b08bf8210a967a52d0d6b0f2ecdc203a4f2eaca4
4
- data.tar.gz: 48503dd0f9011c88bbf20bc6adf073d2b1ff422ac1ed06063c332f7c3bbf619e
3
+ metadata.gz: 73309493b42b637ca8a3526c52d50531996f77495ae114a9fa6963b0a3a3b67f
4
+ data.tar.gz: f7bd61220ac24196b86ae50a6cf44e301002c6cf265b0debbe643abc1b088e50
5
5
  SHA512:
6
- metadata.gz: 74169c05ca7d91082db0198d252234956c7a47139d57419f5be14e8e23168af39e75b9d9e78d397bd1b65fbc688834ee84cd1456d452795163c777c6c7da15c7
7
- data.tar.gz: acd185ad8a09d1b016d9c6867d5b4b68005fbe1227abe8848912f917c3d71f4d535cd1847cc4422606fec0d5fc01b9dbf59b6b613c4e5701dfca1703ebae79dc
6
+ metadata.gz: c94aa2013e50467adb00b2e4e2ea5a53b29947490ed069d13a117ec0bdddb8480cb81a369e9f1f4d4639434fa63b1cf89ce77ed309527b0dc2f49685f6e4ceda
7
+ data.tar.gz: 9922c3d6521295df8384b2927daeb573f2e762ee0f69b111ab23cca5931fe32ee05b346ebd31bf3779f41e89d31a4119fadc18308f48ca4f762376b4afe3ad2c
data/.rubocop.yml CHANGED
@@ -31,6 +31,10 @@ RSpec/MultipleExpectations:
31
31
  RSpec/ExampleLength:
32
32
  Max: 10
33
33
 
34
+ RSpec/InstanceVariable:
35
+ Exclude:
36
+ - spec/support/temp_directory.rb
37
+
34
38
  RSpec/MultipleMemoizedHelpers:
35
39
  Max: 8
36
40
 
data/README.md CHANGED
@@ -16,36 +16,50 @@ If bundler is not being used to manage dependencies, install the gem by executin
16
16
  ## Usage
17
17
 
18
18
  ```ruby
19
- directory = OCFL::Object::Directory.new(object_root: '/files/[object_root]')
20
- directory.exists?
19
+ storage_root = OCFL::StorageRoot.new(base_directory: '/files')
20
+ storage_root.exists?
21
+ # => false
22
+ storage_root.valid?
21
23
  # => false
22
- builder = OCFL::Object::DirectoryBuilder.new(object_root: 'spec/abc123', id: 'http://example.com/abc123')
23
- builder.copy_file('sig/ocfl.rbs', destination_path: 'ocfl/types/generated.rbs')
24
24
 
25
- directory = builder.save
26
- directory.exists?
25
+ storage_root.save
26
+ storage_root.exists?
27
27
  # => true
28
- directory.valid?
28
+ storage_root.valid?
29
29
  # => true
30
+
31
+ object = storage_root.object('bc123df4567') # returns an instance of `OCFL::Object`
32
+ object.exists?
33
+ # => false
34
+ object.valid?
35
+ # => false
36
+ object.head
37
+ # => 'v0'
30
38
  ```
31
39
 
32
40
  ### Versions
33
41
 
34
- There are three ways to get a version with an existing object directory.
42
+ To build out an object, you'll need to create one or more versions.
43
+
44
+ There are three ways to get a version within an existing object directory.
35
45
 
36
46
  #### Start a new version
37
47
  ```
38
- new_version = directory.begin_new_version
39
- new_version.copy_file('sig/ocfl.rbs')
48
+ new_version = object.begin_new_version
49
+ new_version.copy_file('sig/ocfl.rbs', destination_path: 'ocfl/types/generated.rbs')
40
50
  new_version.save
41
51
 
42
- directory.head
43
- # => 'v2'
52
+ object.exists?
53
+ # => true
54
+ object.valid?
55
+ # => true
56
+ object.head
57
+ # => 'v1'
44
58
  ```
45
59
 
46
60
  #### Modify the existing head version
47
61
  ```
48
- new_version = directory.head_version
62
+ new_version = object.head_version
49
63
  new_version.delete_file('sample.txt')
50
64
  new_version.copy_file('sig/ocfl.rbs')
51
65
  new_version.save
@@ -53,7 +67,7 @@ new_version.save
53
67
 
54
68
  #### Overwrite the existing head version
55
69
  ```
56
- new_version = directory.overwrite_current_version
70
+ new_version = object.overwrite_current_version
57
71
  new_version.copy_file('sig/ocfl.rbs')
58
72
  new_version.save
59
73
  ```
@@ -61,7 +75,7 @@ new_version.save
61
75
  ### File paths
62
76
  ```
63
77
  # List file names that were part of a given version
64
- directory.versions['v2'].file_names
78
+ object.versions['v1'].file_names
65
79
  # => ["ocfl.rbs"]
66
80
 
67
81
  # Or on the head version
@@ -69,13 +83,12 @@ directory.head_version.file_names
69
83
  # => ["ocfl.rbs"]
70
84
 
71
85
  # Get the path of a file in a given version
72
- directory.path(filepath: "ocfl.rbs", version: "v2")
73
- # => <Pathname:/files/[object_root]/v2/content/ocfl.rbs>
86
+ object.path(filepath: "ocfl.rbs", version: "v1")
87
+ # => <Pathname:/files/[object_root]/v1/content/ocfl.rbs>
74
88
 
75
89
  # Get the path of a file in the head version
76
- directory.path(filepath: "ocfl.rbs")
77
- # => <Pathname:/files/[object_root]/v2/content/ocfl.rbs>
78
-
90
+ object.path(filepath: "ocfl.rbs")
91
+ # => <Pathname:/files/[object_root]/v1/content/ocfl.rbs>
79
92
  ```
80
93
 
81
94
  ## Development
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OCFL
4
+ # Represents the JSON file that stores the object inventory
5
+ # https://ocfl.io/1.1/spec/#inventory
6
+ class Inventory
7
+ URI_1_1 = "https://ocfl.io/1.1/spec/#inventory"
8
+
9
+ # A data structure for the inventory
10
+ class InventoryStruct < Dry::Struct
11
+ transform_keys(&:to_sym)
12
+ attribute :id, Types::String
13
+ attribute :type, Types::String
14
+ attribute :digestAlgorithm, Types::String
15
+ attribute :head, Types::String
16
+ attribute? :contentDirectory, Types::String
17
+ attribute :versions, Types::Hash.map(Types::String, ObjectVersion)
18
+ attribute :manifest, Types::Hash
19
+ end
20
+
21
+ # @param [InventoryStruct] data
22
+ def initialize(data:)
23
+ @data = data
24
+ end
25
+
26
+ attr_reader :errors, :data
27
+
28
+ delegate :id, :head, :versions, :manifest, to: :data
29
+ delegate :state, to: :head_version
30
+
31
+ def content_directory
32
+ data.contentDirectory || "content"
33
+ end
34
+
35
+ # @return [String,nil] the path to the file relative to the object root. (e.g. v2/content/image.tiff)
36
+ def path(logical_path)
37
+ return unless head_version # object does not exist on disk
38
+
39
+ digest, = state.find { |_, logical_paths| logical_paths.include?(logical_path) }
40
+
41
+ return unless digest
42
+
43
+ manifest[digest].find { |content_path| content_path.match(%r{\Av\d+/#{content_directory}/#{logical_path}\z}) }
44
+ end
45
+
46
+ def head_version
47
+ versions[head]
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OCFL
4
+ # Loads and Inventory object from JSON
5
+ class InventoryLoader
6
+ include Dry::Monads[:result]
7
+
8
+ VersionEnum = Types::String.enum(Inventory::URI_1_1)
9
+ DigestAlgorithm = Types::String.enum("md5", "sha1", "sha256", "sha512", "blake2b-512")
10
+
11
+ # https://ocfl.io/1.1/spec/#inventory-structure
12
+ # Validation of the incoming data
13
+ Schema = Dry::Schema.Params do
14
+ # config.validate_keys = true
15
+ required(:id).filled(:string)
16
+ required(:type).filled(VersionEnum)
17
+ required(:digestAlgorithm).filled(DigestAlgorithm)
18
+ required(:head).filled(:string)
19
+ optional(:contentDirectory).filled(:string)
20
+ required(:versions).hash
21
+ required(:manifest).hash
22
+ end
23
+
24
+ def self.load(file_name)
25
+ new(file_name).load
26
+ end
27
+
28
+ def initialize(file_name)
29
+ @file_name = file_name
30
+ end
31
+
32
+ def load
33
+ bytestream = File.read(@file_name)
34
+ data = JSON.parse(bytestream)
35
+ errors = Schema.call(data).errors
36
+ if errors.empty?
37
+ Success(Inventory::InventoryStruct.new(data))
38
+ else
39
+ Failure(errors)
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OCFL
4
+ # Checks to see that the inventory.json and it's checksum in a direcotory are valid
5
+ class InventoryValidator
6
+ def initialize(directory:)
7
+ @directory = Pathname.new(directory)
8
+ end
9
+
10
+ attr_reader :directory
11
+
12
+ def valid?
13
+ inventory_file_exists? && inventory_file_matches_checksum?
14
+ end
15
+
16
+ def inventory_file_exists?
17
+ File.exist?(inventory_file)
18
+ end
19
+
20
+ def inventory_file_matches_checksum?
21
+ return false unless File.exist?(inventory_checksum_file)
22
+
23
+ actual = inventory_file_checksum
24
+ expected = File.read(inventory_checksum_file)
25
+ expected.match?(/\A#{actual}\s+inventory\.json\z/)
26
+ end
27
+
28
+ def inventory_checksum_file
29
+ directory / "inventory.json.sha512"
30
+ end
31
+
32
+ def inventory_file_checksum
33
+ Digest::SHA512.file inventory_file
34
+ end
35
+
36
+ def inventory_file
37
+ directory / "inventory.json"
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OCFL
4
+ # Writes a OCFL Inventory to json on disk
5
+ class InventoryWriter
6
+ def initialize(inventory:, path:)
7
+ @path = path
8
+ @inventory = inventory
9
+ end
10
+
11
+ attr_reader :inventory, :path
12
+
13
+ def write
14
+ write_inventory
15
+ update_inventory_checksum
16
+ end
17
+
18
+ def write_inventory
19
+ File.write(inventory_file, JSON.pretty_generate(inventory.to_h))
20
+ end
21
+
22
+ def inventory_file
23
+ path / "inventory.json"
24
+ end
25
+
26
+ def checksum_file
27
+ path / "inventory.json.sha512"
28
+ end
29
+
30
+ def update_inventory_checksum
31
+ digest = Digest::SHA512.file inventory_file
32
+ File.write(checksum_file, "#{digest} inventory.json")
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OCFL
4
+ module Layouts
5
+ # An OCFL Storage Root layout for the druid-tree structure
6
+ # @see https://ocfl.io/1.1/spec/#root-structure
7
+ class DruidTree
8
+ DRUID_PARTS_PATTERN = /\A([b-df-hjkmnp-tv-z]{2})([0-9]{3})([b-df-hjkmnp-tv-z]{2})([0-9]{4})\z/i
9
+
10
+ def self.path_to(identifier)
11
+ segments = Array(identifier&.match(DRUID_PARTS_PATTERN)&.captures)
12
+
13
+ raise "druid '#{identifier}' is invalid" unless segments.count == 4
14
+
15
+ Pathname.new(
16
+ File.join(segments)
17
+ )
18
+ end
19
+ end
20
+ end
21
+ end
data/lib/ocfl/object.rb CHANGED
@@ -3,7 +3,143 @@
3
3
  module OCFL
4
4
  # An OCFL Object is a group of one or more content files and administrative information
5
5
  # https://ocfl.io/1.1/spec/#object-spec
6
- module Object
6
+ class Object
7
7
  class FileNotFound < RuntimeError; end
8
+
9
+ # @param [String] identifier an object identifier
10
+ # @param [Pathname, String] root the path to the object root within the OCFL structure
11
+ # @param [Inventory, nil] inventory this is only passed in when creating a new version
12
+ # @param [String, nil] content_directory the directory to store versions in
13
+ def initialize(root:, identifier:, inventory: nil, content_directory: nil)
14
+ @identifier = identifier
15
+ @root = Pathname.new(root)
16
+ @content_directory = content_directory
17
+ @version_inventory = {}
18
+ @version_inventory_errors = {}
19
+ @inventory = inventory
20
+ end
21
+
22
+ attr_reader :root, :errors, :identifier
23
+
24
+ delegate :head, :versions, :manifest, to: :inventory
25
+
26
+ def exists?
27
+ namaste_file.exist?
28
+ end
29
+
30
+ def path(filepath:, version: nil)
31
+ version ||= head
32
+ relative_path = version_inventory(version).path(filepath)
33
+
34
+ raise FileNotFound, "Path '#{filepath}' not found in #{version} inventory" if relative_path.nil?
35
+
36
+ root / relative_path
37
+ end
38
+
39
+ def inventory
40
+ @inventory ||= begin
41
+ maybe_inventory, inventory_loading_errors = load_or_initialize_inventory
42
+ if maybe_inventory
43
+ maybe_inventory
44
+ else
45
+ @errors = inventory_loading_errors
46
+ puts @errors.messages.inspect
47
+ nil
48
+ end
49
+ end
50
+ end
51
+
52
+ def head_inventory
53
+ version_inventory(inventory.head)
54
+ end
55
+
56
+ def version_inventory(version)
57
+ @version_inventory[version] ||= begin
58
+ maybe_inventory, inventory_loading_errors = load_or_initialize_inventory(version:)
59
+ if maybe_inventory
60
+ maybe_inventory
61
+ else
62
+ @version_inventory_errors[version] = inventory_loading_errors
63
+ puts @version_inventory_errors[version].messages.inspect
64
+ nil
65
+ end
66
+ end
67
+ end
68
+
69
+ def valid?
70
+ InventoryValidator.new(directory: root).valid? &&
71
+ exists? &&
72
+ !inventory.nil? && # Ensures it could be loaded
73
+ head_directory_valid?
74
+ end
75
+
76
+ def head_directory_valid?
77
+ InventoryValidator.new(directory: root / inventory.head).valid? &&
78
+ !head_inventory.nil? # Ensures it could be loaded
79
+ end
80
+
81
+ # Start a completely new version
82
+ def begin_new_version
83
+ VersionBuilder.new(object: self, state:)
84
+ end
85
+
86
+ # Get a handle for the head version
87
+ def head_version
88
+ VersionBuilder.new(object: self, overwrite_head: true, state: head_inventory.state)
89
+ end
90
+
91
+ # Get a handle that will replace the existing head version
92
+ def overwrite_current_version
93
+ VersionBuilder.new(object: self, overwrite_head: true)
94
+ end
95
+
96
+ def reload
97
+ @version_inventory = {}
98
+ @inventory = nil
99
+ @errors = nil
100
+ @version_inventory_errors = {}
101
+ true
102
+ end
103
+
104
+ def namaste_file
105
+ root / "0=ocfl_object_1.1"
106
+ end
107
+
108
+ private
109
+
110
+ def load_or_initialize_inventory(version: "")
111
+ inventory_path = root / version / "inventory.json"
112
+
113
+ return [new_inventory, nil] unless inventory_path.exist?
114
+
115
+ data = InventoryLoader.load(inventory_path)
116
+ if data.success?
117
+ [Inventory.new(data: data.value!), nil]
118
+ else
119
+ [nil, data.failure]
120
+ end
121
+ end
122
+
123
+ def state
124
+ return {} if inventory.head == "v0"
125
+
126
+ head_inventory.state
127
+ end
128
+
129
+ def new_inventory # rubocop:disable Metrics/MethodLength
130
+ Inventory.new(
131
+ data: Inventory::InventoryStruct.new(
132
+ {
133
+ id: identifier,
134
+ version: "v0",
135
+ type: Inventory::URI_1_1,
136
+ digestAlgorithm: "sha512",
137
+ head: "v0",
138
+ versions: {},
139
+ manifest: {}
140
+ }.tap { |attrs| attrs[:contentDirectory] = @content_directory if @content_directory }
141
+ )
142
+ )
143
+ end
8
144
  end
9
145
  end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OCFL
4
+ # Represents the OCFL version
5
+ # https://ocfl.io/1.1/spec/#version
6
+ class ObjectVersion < Dry.Struct
7
+ # Represents the OCFL user
8
+ class User < Dry.Struct
9
+ transform_keys(&:to_sym)
10
+ attribute :name, Types::String
11
+ attribute? :address, Types::String
12
+ end
13
+
14
+ transform_keys(&:to_sym)
15
+ attribute :created, Types::String
16
+ attribute :state, Types::Hash.map(Types::String, Types::Array.of(Types::String))
17
+ attribute? :message, Types::String
18
+ attribute? :user, User
19
+
20
+ def file_names
21
+ state.values.flatten
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OCFL
4
+ # An OCFL Storage Root is the base directory of an OCFL storage layout.
5
+ # https://ocfl.io/1.1/spec/#storage-root
6
+ class StorageRoot
7
+ attr_reader :base_directory, :layout
8
+
9
+ delegate :path_to, to: :layout
10
+
11
+ def initialize(base_directory:)
12
+ @base_directory = Pathname.new(base_directory)
13
+ @layout = Layouts::DruidTree
14
+ end
15
+
16
+ def exists?
17
+ base_directory.directory?
18
+ end
19
+
20
+ def valid?
21
+ namaste_file.exist?
22
+ end
23
+
24
+ def save
25
+ # TODO: optionally write the OCFL 1.1 spec
26
+ # TODO: optionally write any given extensions (like the TBD druid-tree layout)
27
+ return if exists? && valid?
28
+
29
+ FileUtils.mkdir_p(base_directory)
30
+ FileUtils.touch(namaste_file)
31
+ true
32
+ end
33
+
34
+ def object(identifier, content_directory = nil)
35
+ root = base_directory / path_to(identifier)
36
+
37
+ Object.new(identifier:, root:, content_directory:)
38
+ end
39
+
40
+ private
41
+
42
+ def namaste_file
43
+ base_directory / "0=ocfl_1.1"
44
+ end
45
+ end
46
+ end
data/lib/ocfl/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module OCFL
4
- VERSION = "0.8.1"
4
+ VERSION = "0.9.1"
5
5
  end
@@ -0,0 +1,152 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OCFL
4
+ # Build a new version
5
+ class VersionBuilder
6
+ # @params [Object] object
7
+ def initialize(object:, overwrite_head: false, state: {})
8
+ @object = object
9
+ @manifest = object.inventory.manifest.dup
10
+ @state = state
11
+
12
+ number = object.head.delete_prefix("v").to_i
13
+ @version_number = "v#{overwrite_head ? number : number + 1}"
14
+ @prepared_content = @prepared = overwrite_head
15
+ end
16
+
17
+ attr_reader :object, :manifest, :state, :version_number
18
+
19
+ delegate :file_names, to: :to_version_struct
20
+
21
+ def move_file(incoming_path)
22
+ prepare_content_directory
23
+ already_stored = add(incoming_path)
24
+ return if already_stored
25
+
26
+ FileUtils.mv(incoming_path, content_path)
27
+ end
28
+
29
+ def copy_file(incoming_path, destination_path: "")
30
+ prepare_content_directory
31
+ copy_one(destination_path.presence || File.basename(incoming_path), incoming_path)
32
+ end
33
+
34
+ def digest_for_filename(filename)
35
+ state.find { |_, filenames| filenames.include?(filename) }&.first
36
+ end
37
+
38
+ # Note, this only removes the file from this version. Previous versions may still use it.
39
+ def delete_file(filename)
40
+ sha512_digest = digest_for_filename(filename)
41
+ raise "Unknown file: #{filename}" unless sha512_digest
42
+
43
+ state.delete(sha512_digest)
44
+ # If the manifest points at the current content directory, then we can delete it.
45
+ file_paths = manifest[sha512_digest]
46
+ return unless file_paths.all? { |path| path.start_with?("#{version_number}/") }
47
+
48
+ File.unlink (object.root + file_paths.first).to_s
49
+ end
50
+
51
+ # Copies files into the object and preserves their relative paths as logical directories in the object
52
+ def copy_recursive(incoming_path, destination_path: "")
53
+ prepare_content_directory
54
+ incoming_path = incoming_path.delete_suffix("/")
55
+ Dir.glob("#{incoming_path}/**/*").reject { |fn| File.directory?(fn) }.each do |file|
56
+ logical_file_path = file.delete_prefix(incoming_path).delete_prefix("/")
57
+ logical_file_path = File.join(destination_path, logical_file_path) unless destination_path.empty?
58
+
59
+ copy_one(logical_file_path, file)
60
+ end
61
+ end
62
+
63
+ def save
64
+ prepare_directory # only necessary if the version has no new content (deletes only)
65
+ write_inventory(build_inventory)
66
+ object.reload
67
+ end
68
+
69
+ private
70
+
71
+ def to_version_struct
72
+ ObjectVersion.new(state:, created: Time.now.utc.iso8601)
73
+ end
74
+
75
+ def write_inventory(inventory)
76
+ InventoryWriter.new(inventory:, path:).write
77
+ FileUtils.cp(path / "inventory.json", object.root)
78
+ FileUtils.cp(path / "inventory.json.sha512", object.root)
79
+ end
80
+
81
+ # @param [String] logical_file_path where we're going to store the file (e.g. 'object/directory_builder_spec.rb')
82
+ # @param [String] incoming_path where's this file from (e.g. 'spec/ocfl/object/directory_builder_spec.rb')
83
+ def copy_one(logical_file_path, incoming_path)
84
+ already_stored = add(incoming_path, logical_file_path:)
85
+ return if already_stored
86
+
87
+ parent_dir = (content_path / logical_file_path).parent
88
+ FileUtils.mkdir_p(parent_dir) unless parent_dir == content_path
89
+ FileUtils.cp(incoming_path, content_path / logical_file_path)
90
+ end
91
+
92
+ # @return [Boolean] true if the file already existed in this object. If false, the object must be
93
+ # moved to the content directory.
94
+ def add(incoming_path, logical_file_path: File.basename(incoming_path))
95
+ digest = Digest::SHA512.file(incoming_path).to_s
96
+ version_content_path = content_path.relative_path_from(object.root)
97
+ file_path_relative_to_root = (version_content_path / logical_file_path).to_s
98
+ result = @manifest.key?(digest)
99
+ @manifest[digest] ||= []
100
+ @state[digest] ||= []
101
+ @manifest[digest].push(file_path_relative_to_root)
102
+ @state[digest].push(logical_file_path)
103
+ result
104
+ end
105
+
106
+ def prepare_content_directory
107
+ prepare_directory
108
+ return if @prepared_content
109
+
110
+ FileUtils.mkdir(content_path)
111
+ @prepared_content = true
112
+ end
113
+
114
+ def prepare_directory
115
+ return if @prepared
116
+
117
+ FileUtils.mkdir_p(path)
118
+ FileUtils.touch(object.namaste_file) if version_number == "v1"
119
+ @prepared = true
120
+ end
121
+
122
+ def content_path
123
+ path / object.inventory.content_directory
124
+ end
125
+
126
+ def path
127
+ object.root / version_number
128
+ end
129
+
130
+ def build_inventory
131
+ old_data = object.inventory.data
132
+ versions = versions(old_data.versions)
133
+
134
+ # Prune items from manifest if they are not part of any version
135
+ Inventory::InventoryStruct.new(old_data.to_h.merge(manifest: filtered_manifest(versions),
136
+ head: version_number, versions:))
137
+ end
138
+
139
+ # This gives the update list of versions. The old list plus this new one.
140
+ # @param [Hash] old_versions the versions prior to this one.
141
+ def versions(old_versions)
142
+ old_versions.merge(version_number => to_version_struct)
143
+ end
144
+
145
+ # The manifest after unused SHAs have been filtered out.
146
+ def filtered_manifest(versions)
147
+ shas_in_versions = versions.values.flat_map { |v| v.state.keys }.uniq
148
+ manifest.slice!(*shas_in_versions)
149
+ manifest
150
+ end
151
+ end
152
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ocfl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.1
4
+ version: 0.9.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justin Coyne
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-05-31 00:00:00.000000000 Z
11
+ date: 2024-06-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -92,16 +92,16 @@ files:
92
92
  - README.md
93
93
  - Rakefile
94
94
  - lib/ocfl.rb
95
+ - lib/ocfl/inventory.rb
96
+ - lib/ocfl/inventory_loader.rb
97
+ - lib/ocfl/inventory_validator.rb
98
+ - lib/ocfl/inventory_writer.rb
99
+ - lib/ocfl/layouts/druid_tree.rb
95
100
  - lib/ocfl/object.rb
96
- - lib/ocfl/object/directory.rb
97
- - lib/ocfl/object/directory_builder.rb
98
- - lib/ocfl/object/draft_version.rb
99
- - lib/ocfl/object/inventory.rb
100
- - lib/ocfl/object/inventory_loader.rb
101
- - lib/ocfl/object/inventory_validator.rb
102
- - lib/ocfl/object/inventory_writer.rb
103
- - lib/ocfl/object/version.rb
101
+ - lib/ocfl/object_version.rb
102
+ - lib/ocfl/storage_root.rb
104
103
  - lib/ocfl/version.rb
104
+ - lib/ocfl/version_builder.rb
105
105
  - sig/ocfl.rbs
106
106
  - tmp/.keep
107
107
  homepage: https://github.com/sul-dlss/ocfl-rb
@@ -126,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
126
126
  - !ruby/object:Gem::Version
127
127
  version: '0'
128
128
  requirements: []
129
- rubygems_version: 3.4.19
129
+ rubygems_version: 3.5.10
130
130
  signing_key:
131
131
  specification_version: 4
132
132
  summary: A ruby library for interacting with the Oxford Common File Layout (OCFL)
@@ -1,107 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module OCFL
4
- module Object
5
- # An OCFL Directory layout for a particular object.
6
- class Directory
7
- # @param [String] object_root
8
- # @param [Inventory] inventory this is only passed in when creating a new object. (see DirectoryBuilder)
9
- def initialize(object_root:, inventory: nil)
10
- @object_root = Pathname.new(object_root)
11
- @version_inventory = {}
12
- @version_inventory_errors = {}
13
- @inventory = inventory
14
- end
15
-
16
- attr_reader :object_root, :errors
17
-
18
- delegate :head, :versions, :manifest, to: :inventory
19
-
20
- def path(filepath:, version: nil)
21
- version ||= head
22
- relative_path = version_inventory(version).path(filepath)
23
-
24
- raise FileNotFound, "Path '#{filepath}' not found in #{version} inventory" if relative_path.nil?
25
-
26
- object_root / relative_path
27
- end
28
-
29
- def inventory
30
- @inventory ||= begin
31
- data = InventoryLoader.load(object_root / "inventory.json")
32
- if data.success?
33
- Inventory.new(data: data.value!)
34
- else
35
- @errors = data.failure
36
- puts @errors.messages.inspect
37
- nil
38
- end
39
- end
40
- end
41
-
42
- def head_inventory
43
- version_inventory(inventory.head)
44
- end
45
-
46
- def version_inventory(version)
47
- @version_inventory[version] ||= begin
48
- data = InventoryLoader.load(object_root / version / "inventory.json")
49
- if data.success?
50
- Inventory.new(data: data.value!)
51
- else
52
- @version_inventory_errors[version] = data.failure
53
- puts @version_inventory_errors[version].messages.inspect
54
- nil
55
- end
56
- end
57
- end
58
-
59
- def reload
60
- @version_inventory = {}
61
- @inventory = nil
62
- @errors = nil
63
- @version_inventory_errors = {}
64
- true
65
- end
66
-
67
- # Start a completely new version
68
- def begin_new_version
69
- DraftVersion.new(object_directory: self, state: head_inventory.state)
70
- end
71
-
72
- # Get a handle for the head version
73
- def head_version
74
- DraftVersion.new(object_directory: self, overwrite_head: true, state: head_inventory.state)
75
- end
76
-
77
- # Get a handle that will replace the existing head version
78
- def overwrite_current_version
79
- DraftVersion.new(object_directory: self, overwrite_head: true)
80
- end
81
-
82
- def exists?
83
- namaste_exists?
84
- end
85
-
86
- def valid?
87
- InventoryValidator.new(directory: object_root).valid? &&
88
- namaste_exists? &&
89
- !inventory.nil? && # Ensures it could be loaded
90
- head_directory_valid?
91
- end
92
-
93
- def head_directory_valid?
94
- InventoryValidator.new(directory: object_root / inventory.head).valid? &&
95
- !head_inventory.nil? # Ensures it could be loaded
96
- end
97
-
98
- def namaste_exists?
99
- File.exist?(namaste_file)
100
- end
101
-
102
- def namaste_file
103
- object_root / "0=ocfl_object_1.1"
104
- end
105
- end
106
- end
107
- end
@@ -1,69 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module OCFL
4
- module Object
5
- # Creates a OCFL Directory layout for a particular object.
6
- class DirectoryBuilder
7
- class ObjectExists < Error; end
8
-
9
- def initialize(object_root:, id:, content_directory: nil)
10
- @object_root = Pathname.new(object_root)
11
- raise ObjectExists, "The directory `#{object_root}' already exists" if @object_root.exist?
12
-
13
- @id = id
14
- inventory = Inventory.new(
15
- data: Inventory::InventoryStruct.new(
16
- new_inventory_attrs.tap { |attrs| attrs[:contentDirectory] = content_directory if content_directory }
17
- )
18
- )
19
- @object_directory = Directory.new(object_root:, inventory:)
20
- end
21
-
22
- attr_reader :id, :inventory, :object_root, :object_directory
23
-
24
- def copy_file(...)
25
- create_object_directory
26
- version.copy_file(...)
27
- end
28
-
29
- def copy_recursive(...)
30
- create_object_directory
31
- version.copy_recursive(...)
32
- end
33
-
34
- def create_object_directory
35
- FileUtils.mkdir_p(object_root)
36
- FileUtils.touch(object_directory.namaste_file) unless File.exist?(object_directory.namaste_file)
37
- end
38
-
39
- # @return [Directory]
40
- def save
41
- create_object_directory
42
- write_inventory
43
- object_directory
44
- end
45
-
46
- def version
47
- @version ||= DraftVersion.new(object_directory:)
48
- end
49
-
50
- def write_inventory
51
- version.save
52
- end
53
-
54
- private
55
-
56
- def new_inventory_attrs
57
- {
58
- id:,
59
- version: "v0",
60
- type: Inventory::URI_1_1,
61
- digestAlgorithm: "sha512",
62
- head: "v0",
63
- versions: {},
64
- manifest: {}
65
- }
66
- end
67
- end
68
- end
69
- end
@@ -1,153 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module OCFL
4
- module Object
5
- # A new OCFL version
6
- class DraftVersion
7
- # @params [Directory] object_directory
8
- def initialize(object_directory:, overwrite_head: false, state: {})
9
- @object_directory = object_directory
10
- @manifest = object_directory.inventory.manifest.dup
11
- @state = state
12
-
13
- number = object_directory.head.delete_prefix("v").to_i
14
- @version_number = "v#{overwrite_head ? number : number + 1}"
15
- @prepared_content = @prepared = overwrite_head
16
- end
17
-
18
- attr_reader :object_directory, :manifest, :state, :version_number
19
-
20
- delegate :file_names, to: :to_version_struct
21
-
22
- def move_file(incoming_path)
23
- prepare_content_directory
24
- already_stored = add(incoming_path)
25
- return if already_stored
26
-
27
- FileUtils.mv(incoming_path, content_path)
28
- end
29
-
30
- def copy_file(incoming_path, destination_path: "")
31
- prepare_content_directory
32
- copy_one(destination_path.presence || File.basename(incoming_path), incoming_path)
33
- end
34
-
35
- def digest_for_filename(filename)
36
- state.find { |_, filenames| filenames.include?(filename) }&.first
37
- end
38
-
39
- # Note, this only removes the file from this version. Previous versions may still use it.
40
- def delete_file(filename)
41
- sha512_digest = digest_for_filename(filename)
42
- raise "Unknown file: #{filename}" unless sha512_digest
43
-
44
- state.delete(sha512_digest)
45
- # If the manifest points at the current content directory, then we can delete it.
46
- file_paths = manifest[sha512_digest]
47
- return unless file_paths.all? { |path| path.start_with?("#{version_number}/") }
48
-
49
- File.unlink (object_directory.object_root + file_paths.first).to_s
50
- end
51
-
52
- # Copies files into the object and preserves their relative paths as logical directories in the object
53
- def copy_recursive(incoming_path, destination_path: "")
54
- prepare_content_directory
55
- incoming_path = incoming_path.delete_suffix("/")
56
- Dir.glob("#{incoming_path}/**/*").reject { |fn| File.directory?(fn) }.each do |file|
57
- logical_file_path = file.delete_prefix(incoming_path).delete_prefix("/")
58
- logical_file_path = File.join(destination_path, logical_file_path) unless destination_path.empty?
59
-
60
- copy_one(logical_file_path, file)
61
- end
62
- end
63
-
64
- def save
65
- prepare_directory # only necessary if the version has no new content (deletes only)
66
- write_inventory(build_inventory)
67
- object_directory.reload
68
- end
69
-
70
- def to_version_struct
71
- Version.new(state:, created: Time.now.utc.iso8601)
72
- end
73
-
74
- private
75
-
76
- def write_inventory(inventory)
77
- InventoryWriter.new(inventory:, path:).write
78
- FileUtils.cp(path / "inventory.json", object_directory.object_root)
79
- FileUtils.cp(path / "inventory.json.sha512", object_directory.object_root)
80
- end
81
-
82
- # @param [String] logical_file_path where we're going to store the file (e.g. 'object/directory_builder_spec.rb')
83
- # @param [String] incoming_path where's this file from (e.g. 'spec/ocfl/object/directory_builder_spec.rb')
84
- def copy_one(logical_file_path, incoming_path)
85
- already_stored = add(incoming_path, logical_file_path:)
86
- return if already_stored
87
-
88
- parent_dir = (content_path / logical_file_path).parent
89
- FileUtils.mkdir_p(parent_dir) unless parent_dir == content_path
90
- FileUtils.cp(incoming_path, content_path / logical_file_path)
91
- end
92
-
93
- # @return [Boolean] true if the file already existed in this object. If false, the object must be
94
- # moved to the content directory.
95
- def add(incoming_path, logical_file_path: File.basename(incoming_path))
96
- digest = Digest::SHA512.file(incoming_path).to_s
97
- version_content_path = content_path.relative_path_from(object_directory.object_root)
98
- file_path_relative_to_root = (version_content_path / logical_file_path).to_s
99
- result = @manifest.key?(digest)
100
- @manifest[digest] ||= []
101
- @state[digest] ||= []
102
- @manifest[digest].push(file_path_relative_to_root)
103
- @state[digest].push(logical_file_path)
104
- result
105
- end
106
-
107
- def prepare_content_directory
108
- prepare_directory
109
- return if @prepared_content
110
-
111
- FileUtils.mkdir(content_path)
112
- @prepared_content = true
113
- end
114
-
115
- def prepare_directory
116
- return if @prepared
117
-
118
- FileUtils.mkdir(path)
119
- @prepared = true
120
- end
121
-
122
- def content_path
123
- path / object_directory.inventory.content_directory
124
- end
125
-
126
- def path
127
- object_directory.object_root / version_number
128
- end
129
-
130
- def build_inventory
131
- old_data = object_directory.inventory.data
132
- versions = versions(old_data.versions)
133
-
134
- # Prune items from manifest if they are not part of any version
135
- Inventory::InventoryStruct.new(old_data.to_h.merge(manifest: filtered_manifest(versions),
136
- head: version_number, versions:))
137
- end
138
-
139
- # This gives the update list of versions. The old list plus this new one.
140
- # @param [Hash] old_versions the versions prior to this one.
141
- def versions(old_versions)
142
- old_versions.merge(version_number => to_version_struct)
143
- end
144
-
145
- # The manifest after unused SHAs have been filtered out.
146
- def filtered_manifest(versions)
147
- shas_in_versions = versions.values.flat_map { |v| v.state.keys }.uniq
148
- manifest.slice!(*shas_in_versions)
149
- manifest
150
- end
151
- end
152
- end
153
- end
@@ -1,49 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module OCFL
4
- module Object
5
- # Represents the JSON file that stores the object inventory
6
- # https://ocfl.io/1.1/spec/#inventory
7
- class Inventory
8
- URI_1_1 = "https://ocfl.io/1.1/spec/#inventory"
9
-
10
- # A data structure for the inventory
11
- class InventoryStruct < Dry::Struct
12
- transform_keys(&:to_sym)
13
- attribute :id, Types::String
14
- attribute :type, Types::String
15
- attribute :digestAlgorithm, Types::String
16
- attribute :head, Types::String
17
- attribute? :contentDirectory, Types::String
18
- attribute :versions, Types::Hash.map(Types::String, Version)
19
- attribute :manifest, Types::Hash
20
- end
21
-
22
- def initialize(data:)
23
- @data = data
24
- end
25
-
26
- attr_reader :errors, :data
27
-
28
- delegate :id, :head, :versions, :manifest, to: :data
29
- delegate :state, to: :head_version
30
-
31
- def content_directory
32
- data.contentDirectory || "content"
33
- end
34
-
35
- # @return [String,nil] the path to the file relative to the object root. (e.g. v2/content/image.tiff)
36
- def path(logical_path)
37
- digest, = state.find { |_, logical_paths| logical_paths.include?(logical_path) }
38
-
39
- return unless digest
40
-
41
- manifest[digest].find { |content_path| content_path.match(%r{\Av\d+/#{content_directory}/#{logical_path}\z}) }
42
- end
43
-
44
- def head_version
45
- versions[head]
46
- end
47
- end
48
- end
49
- end
@@ -1,45 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module OCFL
4
- module Object
5
- # Loads and Inventory object from JSON
6
- class InventoryLoader
7
- include Dry::Monads[:result]
8
-
9
- VersionEnum = Types::String.enum(Inventory::URI_1_1)
10
- DigestAlgorithm = Types::String.enum("md5", "sha1", "sha256", "sha512", "blake2b-512")
11
-
12
- # https://ocfl.io/1.1/spec/#inventory-structure
13
- # Validation of the incoming data
14
- Schema = Dry::Schema.Params do
15
- # config.validate_keys = true
16
- required(:id).filled(:string)
17
- required(:type).filled(VersionEnum)
18
- required(:digestAlgorithm).filled(DigestAlgorithm)
19
- required(:head).filled(:string)
20
- optional(:contentDirectory).filled(:string)
21
- required(:versions).hash
22
- required(:manifest).hash
23
- end
24
-
25
- def self.load(file_name)
26
- new(file_name).load
27
- end
28
-
29
- def initialize(file_name)
30
- @file_name = file_name
31
- end
32
-
33
- def load
34
- bytestream = File.read(@file_name)
35
- data = JSON.parse(bytestream)
36
- errors = Schema.call(data).errors
37
- if errors.empty?
38
- Success(Inventory::InventoryStruct.new(data))
39
- else
40
- Failure(errors)
41
- end
42
- end
43
- end
44
- end
45
- end
@@ -1,42 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module OCFL
4
- module Object
5
- # Checks to see that the inventory.json and it's checksum in a direcotory are valid
6
- class InventoryValidator
7
- def initialize(directory:)
8
- @directory = Pathname.new(directory)
9
- end
10
-
11
- attr_reader :directory
12
-
13
- def valid?
14
- inventory_file_exists? && inventory_file_matches_checksum?
15
- end
16
-
17
- def inventory_file_exists?
18
- File.exist?(inventory_file)
19
- end
20
-
21
- def inventory_file_matches_checksum?
22
- return false unless File.exist?(inventory_checksum_file)
23
-
24
- actual = inventory_file_checksum
25
- expected = File.read(inventory_checksum_file)
26
- expected.match?(/\A#{actual}\s+inventory\.json\z/)
27
- end
28
-
29
- def inventory_checksum_file
30
- directory / "inventory.json.sha512"
31
- end
32
-
33
- def inventory_file_checksum
34
- Digest::SHA512.file inventory_file
35
- end
36
-
37
- def inventory_file
38
- directory / "inventory.json"
39
- end
40
- end
41
- end
42
- end
@@ -1,37 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module OCFL
4
- module Object
5
- # Writes a OCFL Inventory to json on disk
6
- class InventoryWriter
7
- def initialize(inventory:, path:)
8
- @path = path
9
- @inventory = inventory
10
- end
11
-
12
- attr_reader :inventory, :path
13
-
14
- def write
15
- write_inventory
16
- update_inventory_checksum
17
- end
18
-
19
- def write_inventory
20
- File.write(inventory_file, JSON.pretty_generate(inventory.to_h))
21
- end
22
-
23
- def inventory_file
24
- path / "inventory.json"
25
- end
26
-
27
- def checksum_file
28
- path / "inventory.json.sha512"
29
- end
30
-
31
- def update_inventory_checksum
32
- digest = Digest::SHA512.file inventory_file
33
- File.write(checksum_file, "#{digest} inventory.json")
34
- end
35
- end
36
- end
37
- end
@@ -1,26 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module OCFL
4
- module Object
5
- # Represents the OCFL version
6
- # https://ocfl.io/1.1/spec/#version
7
- class Version < Dry.Struct
8
- # Represents the OCFL user
9
- class User < Dry.Struct
10
- transform_keys(&:to_sym)
11
- attribute :name, Types::String
12
- attribute? :address, Types::String
13
- end
14
-
15
- transform_keys(&:to_sym)
16
- attribute :created, Types::String
17
- attribute :state, Types::Hash.map(Types::String, Types::Array.of(Types::String))
18
- attribute? :message, Types::String
19
- attribute? :user, User
20
-
21
- def file_names
22
- state.values.flatten
23
- end
24
- end
25
- end
26
- end