ocfl 0.8.1 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c21578c64e68d5bdab7fcea3b08bf8210a967a52d0d6b0f2ecdc203a4f2eaca4
4
- data.tar.gz: 48503dd0f9011c88bbf20bc6adf073d2b1ff422ac1ed06063c332f7c3bbf619e
3
+ metadata.gz: 73309493b42b637ca8a3526c52d50531996f77495ae114a9fa6963b0a3a3b67f
4
+ data.tar.gz: f7bd61220ac24196b86ae50a6cf44e301002c6cf265b0debbe643abc1b088e50
5
5
  SHA512:
6
- metadata.gz: 74169c05ca7d91082db0198d252234956c7a47139d57419f5be14e8e23168af39e75b9d9e78d397bd1b65fbc688834ee84cd1456d452795163c777c6c7da15c7
7
- data.tar.gz: acd185ad8a09d1b016d9c6867d5b4b68005fbe1227abe8848912f917c3d71f4d535cd1847cc4422606fec0d5fc01b9dbf59b6b613c4e5701dfca1703ebae79dc
6
+ metadata.gz: c94aa2013e50467adb00b2e4e2ea5a53b29947490ed069d13a117ec0bdddb8480cb81a369e9f1f4d4639434fa63b1cf89ce77ed309527b0dc2f49685f6e4ceda
7
+ data.tar.gz: 9922c3d6521295df8384b2927daeb573f2e762ee0f69b111ab23cca5931fe32ee05b346ebd31bf3779f41e89d31a4119fadc18308f48ca4f762376b4afe3ad2c
data/.rubocop.yml CHANGED
@@ -31,6 +31,10 @@ RSpec/MultipleExpectations:
31
31
  RSpec/ExampleLength:
32
32
  Max: 10
33
33
 
34
+ RSpec/InstanceVariable:
35
+ Exclude:
36
+ - spec/support/temp_directory.rb
37
+
34
38
  RSpec/MultipleMemoizedHelpers:
35
39
  Max: 8
36
40
 
data/README.md CHANGED
@@ -16,36 +16,50 @@ If bundler is not being used to manage dependencies, install the gem by executin
16
16
  ## Usage
17
17
 
18
18
  ```ruby
19
- directory = OCFL::Object::Directory.new(object_root: '/files/[object_root]')
20
- directory.exists?
19
+ storage_root = OCFL::StorageRoot.new(base_directory: '/files')
20
+ storage_root.exists?
21
+ # => false
22
+ storage_root.valid?
21
23
  # => false
22
- builder = OCFL::Object::DirectoryBuilder.new(object_root: 'spec/abc123', id: 'http://example.com/abc123')
23
- builder.copy_file('sig/ocfl.rbs', destination_path: 'ocfl/types/generated.rbs')
24
24
 
25
- directory = builder.save
26
- directory.exists?
25
+ storage_root.save
26
+ storage_root.exists?
27
27
  # => true
28
- directory.valid?
28
+ storage_root.valid?
29
29
  # => true
30
+
31
+ object = storage_root.object('bc123df4567') # returns an instance of `OCFL::Object`
32
+ object.exists?
33
+ # => false
34
+ object.valid?
35
+ # => false
36
+ object.head
37
+ # => 'v0'
30
38
  ```
31
39
 
32
40
  ### Versions
33
41
 
34
- There are three ways to get a version with an existing object directory.
42
+ To build out an object, you'll need to create one or more versions.
43
+
44
+ There are three ways to get a version within an existing object directory.
35
45
 
36
46
  #### Start a new version
37
47
  ```
38
- new_version = directory.begin_new_version
39
- new_version.copy_file('sig/ocfl.rbs')
48
+ new_version = object.begin_new_version
49
+ new_version.copy_file('sig/ocfl.rbs', destination_path: 'ocfl/types/generated.rbs')
40
50
  new_version.save
41
51
 
42
- directory.head
43
- # => 'v2'
52
+ object.exists?
53
+ # => true
54
+ object.valid?
55
+ # => true
56
+ object.head
57
+ # => 'v1'
44
58
  ```
45
59
 
46
60
  #### Modify the existing head version
47
61
  ```
48
- new_version = directory.head_version
62
+ new_version = object.head_version
49
63
  new_version.delete_file('sample.txt')
50
64
  new_version.copy_file('sig/ocfl.rbs')
51
65
  new_version.save
@@ -53,7 +67,7 @@ new_version.save
53
67
 
54
68
  #### Overwrite the existing head version
55
69
  ```
56
- new_version = directory.overwrite_current_version
70
+ new_version = object.overwrite_current_version
57
71
  new_version.copy_file('sig/ocfl.rbs')
58
72
  new_version.save
59
73
  ```
@@ -61,7 +75,7 @@ new_version.save
61
75
  ### File paths
62
76
  ```
63
77
  # List file names that were part of a given version
64
- directory.versions['v2'].file_names
78
+ object.versions['v1'].file_names
65
79
  # => ["ocfl.rbs"]
66
80
 
67
81
  # Or on the head version
@@ -69,13 +83,12 @@ directory.head_version.file_names
69
83
  # => ["ocfl.rbs"]
70
84
 
71
85
  # Get the path of a file in a given version
72
- directory.path(filepath: "ocfl.rbs", version: "v2")
73
- # => <Pathname:/files/[object_root]/v2/content/ocfl.rbs>
86
+ object.path(filepath: "ocfl.rbs", version: "v1")
87
+ # => <Pathname:/files/[object_root]/v1/content/ocfl.rbs>
74
88
 
75
89
  # Get the path of a file in the head version
76
- directory.path(filepath: "ocfl.rbs")
77
- # => <Pathname:/files/[object_root]/v2/content/ocfl.rbs>
78
-
90
+ object.path(filepath: "ocfl.rbs")
91
+ # => <Pathname:/files/[object_root]/v1/content/ocfl.rbs>
79
92
  ```
80
93
 
81
94
  ## Development
@@ -0,0 +1,50 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OCFL
4
+ # Represents the JSON file that stores the object inventory
5
+ # https://ocfl.io/1.1/spec/#inventory
6
+ class Inventory
7
+ URI_1_1 = "https://ocfl.io/1.1/spec/#inventory"
8
+
9
+ # A data structure for the inventory
10
+ class InventoryStruct < Dry::Struct
11
+ transform_keys(&:to_sym)
12
+ attribute :id, Types::String
13
+ attribute :type, Types::String
14
+ attribute :digestAlgorithm, Types::String
15
+ attribute :head, Types::String
16
+ attribute? :contentDirectory, Types::String
17
+ attribute :versions, Types::Hash.map(Types::String, ObjectVersion)
18
+ attribute :manifest, Types::Hash
19
+ end
20
+
21
+ # @param [InventoryStruct] data
22
+ def initialize(data:)
23
+ @data = data
24
+ end
25
+
26
+ attr_reader :errors, :data
27
+
28
+ delegate :id, :head, :versions, :manifest, to: :data
29
+ delegate :state, to: :head_version
30
+
31
+ def content_directory
32
+ data.contentDirectory || "content"
33
+ end
34
+
35
+ # @return [String,nil] the path to the file relative to the object root. (e.g. v2/content/image.tiff)
36
+ def path(logical_path)
37
+ return unless head_version # object does not exist on disk
38
+
39
+ digest, = state.find { |_, logical_paths| logical_paths.include?(logical_path) }
40
+
41
+ return unless digest
42
+
43
+ manifest[digest].find { |content_path| content_path.match(%r{\Av\d+/#{content_directory}/#{logical_path}\z}) }
44
+ end
45
+
46
+ def head_version
47
+ versions[head]
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OCFL
4
+ # Loads and Inventory object from JSON
5
+ class InventoryLoader
6
+ include Dry::Monads[:result]
7
+
8
+ VersionEnum = Types::String.enum(Inventory::URI_1_1)
9
+ DigestAlgorithm = Types::String.enum("md5", "sha1", "sha256", "sha512", "blake2b-512")
10
+
11
+ # https://ocfl.io/1.1/spec/#inventory-structure
12
+ # Validation of the incoming data
13
+ Schema = Dry::Schema.Params do
14
+ # config.validate_keys = true
15
+ required(:id).filled(:string)
16
+ required(:type).filled(VersionEnum)
17
+ required(:digestAlgorithm).filled(DigestAlgorithm)
18
+ required(:head).filled(:string)
19
+ optional(:contentDirectory).filled(:string)
20
+ required(:versions).hash
21
+ required(:manifest).hash
22
+ end
23
+
24
+ def self.load(file_name)
25
+ new(file_name).load
26
+ end
27
+
28
+ def initialize(file_name)
29
+ @file_name = file_name
30
+ end
31
+
32
+ def load
33
+ bytestream = File.read(@file_name)
34
+ data = JSON.parse(bytestream)
35
+ errors = Schema.call(data).errors
36
+ if errors.empty?
37
+ Success(Inventory::InventoryStruct.new(data))
38
+ else
39
+ Failure(errors)
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OCFL
4
+ # Checks to see that the inventory.json and it's checksum in a direcotory are valid
5
+ class InventoryValidator
6
+ def initialize(directory:)
7
+ @directory = Pathname.new(directory)
8
+ end
9
+
10
+ attr_reader :directory
11
+
12
+ def valid?
13
+ inventory_file_exists? && inventory_file_matches_checksum?
14
+ end
15
+
16
+ def inventory_file_exists?
17
+ File.exist?(inventory_file)
18
+ end
19
+
20
+ def inventory_file_matches_checksum?
21
+ return false unless File.exist?(inventory_checksum_file)
22
+
23
+ actual = inventory_file_checksum
24
+ expected = File.read(inventory_checksum_file)
25
+ expected.match?(/\A#{actual}\s+inventory\.json\z/)
26
+ end
27
+
28
+ def inventory_checksum_file
29
+ directory / "inventory.json.sha512"
30
+ end
31
+
32
+ def inventory_file_checksum
33
+ Digest::SHA512.file inventory_file
34
+ end
35
+
36
+ def inventory_file
37
+ directory / "inventory.json"
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OCFL
4
+ # Writes a OCFL Inventory to json on disk
5
+ class InventoryWriter
6
+ def initialize(inventory:, path:)
7
+ @path = path
8
+ @inventory = inventory
9
+ end
10
+
11
+ attr_reader :inventory, :path
12
+
13
+ def write
14
+ write_inventory
15
+ update_inventory_checksum
16
+ end
17
+
18
+ def write_inventory
19
+ File.write(inventory_file, JSON.pretty_generate(inventory.to_h))
20
+ end
21
+
22
+ def inventory_file
23
+ path / "inventory.json"
24
+ end
25
+
26
+ def checksum_file
27
+ path / "inventory.json.sha512"
28
+ end
29
+
30
+ def update_inventory_checksum
31
+ digest = Digest::SHA512.file inventory_file
32
+ File.write(checksum_file, "#{digest} inventory.json")
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OCFL
4
+ module Layouts
5
+ # An OCFL Storage Root layout for the druid-tree structure
6
+ # @see https://ocfl.io/1.1/spec/#root-structure
7
+ class DruidTree
8
+ DRUID_PARTS_PATTERN = /\A([b-df-hjkmnp-tv-z]{2})([0-9]{3})([b-df-hjkmnp-tv-z]{2})([0-9]{4})\z/i
9
+
10
+ def self.path_to(identifier)
11
+ segments = Array(identifier&.match(DRUID_PARTS_PATTERN)&.captures)
12
+
13
+ raise "druid '#{identifier}' is invalid" unless segments.count == 4
14
+
15
+ Pathname.new(
16
+ File.join(segments)
17
+ )
18
+ end
19
+ end
20
+ end
21
+ end
data/lib/ocfl/object.rb CHANGED
@@ -3,7 +3,143 @@
3
3
  module OCFL
4
4
  # An OCFL Object is a group of one or more content files and administrative information
5
5
  # https://ocfl.io/1.1/spec/#object-spec
6
- module Object
6
+ class Object
7
7
  class FileNotFound < RuntimeError; end
8
+
9
+ # @param [String] identifier an object identifier
10
+ # @param [Pathname, String] root the path to the object root within the OCFL structure
11
+ # @param [Inventory, nil] inventory this is only passed in when creating a new version
12
+ # @param [String, nil] content_directory the directory to store versions in
13
+ def initialize(root:, identifier:, inventory: nil, content_directory: nil)
14
+ @identifier = identifier
15
+ @root = Pathname.new(root)
16
+ @content_directory = content_directory
17
+ @version_inventory = {}
18
+ @version_inventory_errors = {}
19
+ @inventory = inventory
20
+ end
21
+
22
+ attr_reader :root, :errors, :identifier
23
+
24
+ delegate :head, :versions, :manifest, to: :inventory
25
+
26
+ def exists?
27
+ namaste_file.exist?
28
+ end
29
+
30
+ def path(filepath:, version: nil)
31
+ version ||= head
32
+ relative_path = version_inventory(version).path(filepath)
33
+
34
+ raise FileNotFound, "Path '#{filepath}' not found in #{version} inventory" if relative_path.nil?
35
+
36
+ root / relative_path
37
+ end
38
+
39
+ def inventory
40
+ @inventory ||= begin
41
+ maybe_inventory, inventory_loading_errors = load_or_initialize_inventory
42
+ if maybe_inventory
43
+ maybe_inventory
44
+ else
45
+ @errors = inventory_loading_errors
46
+ puts @errors.messages.inspect
47
+ nil
48
+ end
49
+ end
50
+ end
51
+
52
+ def head_inventory
53
+ version_inventory(inventory.head)
54
+ end
55
+
56
+ def version_inventory(version)
57
+ @version_inventory[version] ||= begin
58
+ maybe_inventory, inventory_loading_errors = load_or_initialize_inventory(version:)
59
+ if maybe_inventory
60
+ maybe_inventory
61
+ else
62
+ @version_inventory_errors[version] = inventory_loading_errors
63
+ puts @version_inventory_errors[version].messages.inspect
64
+ nil
65
+ end
66
+ end
67
+ end
68
+
69
+ def valid?
70
+ InventoryValidator.new(directory: root).valid? &&
71
+ exists? &&
72
+ !inventory.nil? && # Ensures it could be loaded
73
+ head_directory_valid?
74
+ end
75
+
76
+ def head_directory_valid?
77
+ InventoryValidator.new(directory: root / inventory.head).valid? &&
78
+ !head_inventory.nil? # Ensures it could be loaded
79
+ end
80
+
81
+ # Start a completely new version
82
+ def begin_new_version
83
+ VersionBuilder.new(object: self, state:)
84
+ end
85
+
86
+ # Get a handle for the head version
87
+ def head_version
88
+ VersionBuilder.new(object: self, overwrite_head: true, state: head_inventory.state)
89
+ end
90
+
91
+ # Get a handle that will replace the existing head version
92
+ def overwrite_current_version
93
+ VersionBuilder.new(object: self, overwrite_head: true)
94
+ end
95
+
96
+ def reload
97
+ @version_inventory = {}
98
+ @inventory = nil
99
+ @errors = nil
100
+ @version_inventory_errors = {}
101
+ true
102
+ end
103
+
104
+ def namaste_file
105
+ root / "0=ocfl_object_1.1"
106
+ end
107
+
108
+ private
109
+
110
+ def load_or_initialize_inventory(version: "")
111
+ inventory_path = root / version / "inventory.json"
112
+
113
+ return [new_inventory, nil] unless inventory_path.exist?
114
+
115
+ data = InventoryLoader.load(inventory_path)
116
+ if data.success?
117
+ [Inventory.new(data: data.value!), nil]
118
+ else
119
+ [nil, data.failure]
120
+ end
121
+ end
122
+
123
+ def state
124
+ return {} if inventory.head == "v0"
125
+
126
+ head_inventory.state
127
+ end
128
+
129
+ def new_inventory # rubocop:disable Metrics/MethodLength
130
+ Inventory.new(
131
+ data: Inventory::InventoryStruct.new(
132
+ {
133
+ id: identifier,
134
+ version: "v0",
135
+ type: Inventory::URI_1_1,
136
+ digestAlgorithm: "sha512",
137
+ head: "v0",
138
+ versions: {},
139
+ manifest: {}
140
+ }.tap { |attrs| attrs[:contentDirectory] = @content_directory if @content_directory }
141
+ )
142
+ )
143
+ end
8
144
  end
9
145
  end
@@ -0,0 +1,24 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OCFL
4
+ # Represents the OCFL version
5
+ # https://ocfl.io/1.1/spec/#version
6
+ class ObjectVersion < Dry.Struct
7
+ # Represents the OCFL user
8
+ class User < Dry.Struct
9
+ transform_keys(&:to_sym)
10
+ attribute :name, Types::String
11
+ attribute? :address, Types::String
12
+ end
13
+
14
+ transform_keys(&:to_sym)
15
+ attribute :created, Types::String
16
+ attribute :state, Types::Hash.map(Types::String, Types::Array.of(Types::String))
17
+ attribute? :message, Types::String
18
+ attribute? :user, User
19
+
20
+ def file_names
21
+ state.values.flatten
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OCFL
4
+ # An OCFL Storage Root is the base directory of an OCFL storage layout.
5
+ # https://ocfl.io/1.1/spec/#storage-root
6
+ class StorageRoot
7
+ attr_reader :base_directory, :layout
8
+
9
+ delegate :path_to, to: :layout
10
+
11
+ def initialize(base_directory:)
12
+ @base_directory = Pathname.new(base_directory)
13
+ @layout = Layouts::DruidTree
14
+ end
15
+
16
+ def exists?
17
+ base_directory.directory?
18
+ end
19
+
20
+ def valid?
21
+ namaste_file.exist?
22
+ end
23
+
24
+ def save
25
+ # TODO: optionally write the OCFL 1.1 spec
26
+ # TODO: optionally write any given extensions (like the TBD druid-tree layout)
27
+ return if exists? && valid?
28
+
29
+ FileUtils.mkdir_p(base_directory)
30
+ FileUtils.touch(namaste_file)
31
+ true
32
+ end
33
+
34
+ def object(identifier, content_directory = nil)
35
+ root = base_directory / path_to(identifier)
36
+
37
+ Object.new(identifier:, root:, content_directory:)
38
+ end
39
+
40
+ private
41
+
42
+ def namaste_file
43
+ base_directory / "0=ocfl_1.1"
44
+ end
45
+ end
46
+ end
data/lib/ocfl/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module OCFL
4
- VERSION = "0.8.1"
4
+ VERSION = "0.9.1"
5
5
  end
@@ -0,0 +1,152 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OCFL
4
+ # Build a new version
5
+ class VersionBuilder
6
+ # @params [Object] object
7
+ def initialize(object:, overwrite_head: false, state: {})
8
+ @object = object
9
+ @manifest = object.inventory.manifest.dup
10
+ @state = state
11
+
12
+ number = object.head.delete_prefix("v").to_i
13
+ @version_number = "v#{overwrite_head ? number : number + 1}"
14
+ @prepared_content = @prepared = overwrite_head
15
+ end
16
+
17
+ attr_reader :object, :manifest, :state, :version_number
18
+
19
+ delegate :file_names, to: :to_version_struct
20
+
21
+ def move_file(incoming_path)
22
+ prepare_content_directory
23
+ already_stored = add(incoming_path)
24
+ return if already_stored
25
+
26
+ FileUtils.mv(incoming_path, content_path)
27
+ end
28
+
29
+ def copy_file(incoming_path, destination_path: "")
30
+ prepare_content_directory
31
+ copy_one(destination_path.presence || File.basename(incoming_path), incoming_path)
32
+ end
33
+
34
+ def digest_for_filename(filename)
35
+ state.find { |_, filenames| filenames.include?(filename) }&.first
36
+ end
37
+
38
+ # Note, this only removes the file from this version. Previous versions may still use it.
39
+ def delete_file(filename)
40
+ sha512_digest = digest_for_filename(filename)
41
+ raise "Unknown file: #{filename}" unless sha512_digest
42
+
43
+ state.delete(sha512_digest)
44
+ # If the manifest points at the current content directory, then we can delete it.
45
+ file_paths = manifest[sha512_digest]
46
+ return unless file_paths.all? { |path| path.start_with?("#{version_number}/") }
47
+
48
+ File.unlink (object.root + file_paths.first).to_s
49
+ end
50
+
51
+ # Copies files into the object and preserves their relative paths as logical directories in the object
52
+ def copy_recursive(incoming_path, destination_path: "")
53
+ prepare_content_directory
54
+ incoming_path = incoming_path.delete_suffix("/")
55
+ Dir.glob("#{incoming_path}/**/*").reject { |fn| File.directory?(fn) }.each do |file|
56
+ logical_file_path = file.delete_prefix(incoming_path).delete_prefix("/")
57
+ logical_file_path = File.join(destination_path, logical_file_path) unless destination_path.empty?
58
+
59
+ copy_one(logical_file_path, file)
60
+ end
61
+ end
62
+
63
+ def save
64
+ prepare_directory # only necessary if the version has no new content (deletes only)
65
+ write_inventory(build_inventory)
66
+ object.reload
67
+ end
68
+
69
+ private
70
+
71
+ def to_version_struct
72
+ ObjectVersion.new(state:, created: Time.now.utc.iso8601)
73
+ end
74
+
75
+ def write_inventory(inventory)
76
+ InventoryWriter.new(inventory:, path:).write
77
+ FileUtils.cp(path / "inventory.json", object.root)
78
+ FileUtils.cp(path / "inventory.json.sha512", object.root)
79
+ end
80
+
81
+ # @param [String] logical_file_path where we're going to store the file (e.g. 'object/directory_builder_spec.rb')
82
+ # @param [String] incoming_path where's this file from (e.g. 'spec/ocfl/object/directory_builder_spec.rb')
83
+ def copy_one(logical_file_path, incoming_path)
84
+ already_stored = add(incoming_path, logical_file_path:)
85
+ return if already_stored
86
+
87
+ parent_dir = (content_path / logical_file_path).parent
88
+ FileUtils.mkdir_p(parent_dir) unless parent_dir == content_path
89
+ FileUtils.cp(incoming_path, content_path / logical_file_path)
90
+ end
91
+
92
+ # @return [Boolean] true if the file already existed in this object. If false, the object must be
93
+ # moved to the content directory.
94
+ def add(incoming_path, logical_file_path: File.basename(incoming_path))
95
+ digest = Digest::SHA512.file(incoming_path).to_s
96
+ version_content_path = content_path.relative_path_from(object.root)
97
+ file_path_relative_to_root = (version_content_path / logical_file_path).to_s
98
+ result = @manifest.key?(digest)
99
+ @manifest[digest] ||= []
100
+ @state[digest] ||= []
101
+ @manifest[digest].push(file_path_relative_to_root)
102
+ @state[digest].push(logical_file_path)
103
+ result
104
+ end
105
+
106
+ def prepare_content_directory
107
+ prepare_directory
108
+ return if @prepared_content
109
+
110
+ FileUtils.mkdir(content_path)
111
+ @prepared_content = true
112
+ end
113
+
114
+ def prepare_directory
115
+ return if @prepared
116
+
117
+ FileUtils.mkdir_p(path)
118
+ FileUtils.touch(object.namaste_file) if version_number == "v1"
119
+ @prepared = true
120
+ end
121
+
122
+ def content_path
123
+ path / object.inventory.content_directory
124
+ end
125
+
126
+ def path
127
+ object.root / version_number
128
+ end
129
+
130
+ def build_inventory
131
+ old_data = object.inventory.data
132
+ versions = versions(old_data.versions)
133
+
134
+ # Prune items from manifest if they are not part of any version
135
+ Inventory::InventoryStruct.new(old_data.to_h.merge(manifest: filtered_manifest(versions),
136
+ head: version_number, versions:))
137
+ end
138
+
139
+ # This gives the update list of versions. The old list plus this new one.
140
+ # @param [Hash] old_versions the versions prior to this one.
141
+ def versions(old_versions)
142
+ old_versions.merge(version_number => to_version_struct)
143
+ end
144
+
145
+ # The manifest after unused SHAs have been filtered out.
146
+ def filtered_manifest(versions)
147
+ shas_in_versions = versions.values.flat_map { |v| v.state.keys }.uniq
148
+ manifest.slice!(*shas_in_versions)
149
+ manifest
150
+ end
151
+ end
152
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ocfl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.1
4
+ version: 0.9.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Justin Coyne
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-05-31 00:00:00.000000000 Z
11
+ date: 2024-06-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -92,16 +92,16 @@ files:
92
92
  - README.md
93
93
  - Rakefile
94
94
  - lib/ocfl.rb
95
+ - lib/ocfl/inventory.rb
96
+ - lib/ocfl/inventory_loader.rb
97
+ - lib/ocfl/inventory_validator.rb
98
+ - lib/ocfl/inventory_writer.rb
99
+ - lib/ocfl/layouts/druid_tree.rb
95
100
  - lib/ocfl/object.rb
96
- - lib/ocfl/object/directory.rb
97
- - lib/ocfl/object/directory_builder.rb
98
- - lib/ocfl/object/draft_version.rb
99
- - lib/ocfl/object/inventory.rb
100
- - lib/ocfl/object/inventory_loader.rb
101
- - lib/ocfl/object/inventory_validator.rb
102
- - lib/ocfl/object/inventory_writer.rb
103
- - lib/ocfl/object/version.rb
101
+ - lib/ocfl/object_version.rb
102
+ - lib/ocfl/storage_root.rb
104
103
  - lib/ocfl/version.rb
104
+ - lib/ocfl/version_builder.rb
105
105
  - sig/ocfl.rbs
106
106
  - tmp/.keep
107
107
  homepage: https://github.com/sul-dlss/ocfl-rb
@@ -126,7 +126,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
126
126
  - !ruby/object:Gem::Version
127
127
  version: '0'
128
128
  requirements: []
129
- rubygems_version: 3.4.19
129
+ rubygems_version: 3.5.10
130
130
  signing_key:
131
131
  specification_version: 4
132
132
  summary: A ruby library for interacting with the Oxford Common File Layout (OCFL)
@@ -1,107 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module OCFL
4
- module Object
5
- # An OCFL Directory layout for a particular object.
6
- class Directory
7
- # @param [String] object_root
8
- # @param [Inventory] inventory this is only passed in when creating a new object. (see DirectoryBuilder)
9
- def initialize(object_root:, inventory: nil)
10
- @object_root = Pathname.new(object_root)
11
- @version_inventory = {}
12
- @version_inventory_errors = {}
13
- @inventory = inventory
14
- end
15
-
16
- attr_reader :object_root, :errors
17
-
18
- delegate :head, :versions, :manifest, to: :inventory
19
-
20
- def path(filepath:, version: nil)
21
- version ||= head
22
- relative_path = version_inventory(version).path(filepath)
23
-
24
- raise FileNotFound, "Path '#{filepath}' not found in #{version} inventory" if relative_path.nil?
25
-
26
- object_root / relative_path
27
- end
28
-
29
- def inventory
30
- @inventory ||= begin
31
- data = InventoryLoader.load(object_root / "inventory.json")
32
- if data.success?
33
- Inventory.new(data: data.value!)
34
- else
35
- @errors = data.failure
36
- puts @errors.messages.inspect
37
- nil
38
- end
39
- end
40
- end
41
-
42
- def head_inventory
43
- version_inventory(inventory.head)
44
- end
45
-
46
- def version_inventory(version)
47
- @version_inventory[version] ||= begin
48
- data = InventoryLoader.load(object_root / version / "inventory.json")
49
- if data.success?
50
- Inventory.new(data: data.value!)
51
- else
52
- @version_inventory_errors[version] = data.failure
53
- puts @version_inventory_errors[version].messages.inspect
54
- nil
55
- end
56
- end
57
- end
58
-
59
- def reload
60
- @version_inventory = {}
61
- @inventory = nil
62
- @errors = nil
63
- @version_inventory_errors = {}
64
- true
65
- end
66
-
67
- # Start a completely new version
68
- def begin_new_version
69
- DraftVersion.new(object_directory: self, state: head_inventory.state)
70
- end
71
-
72
- # Get a handle for the head version
73
- def head_version
74
- DraftVersion.new(object_directory: self, overwrite_head: true, state: head_inventory.state)
75
- end
76
-
77
- # Get a handle that will replace the existing head version
78
- def overwrite_current_version
79
- DraftVersion.new(object_directory: self, overwrite_head: true)
80
- end
81
-
82
- def exists?
83
- namaste_exists?
84
- end
85
-
86
- def valid?
87
- InventoryValidator.new(directory: object_root).valid? &&
88
- namaste_exists? &&
89
- !inventory.nil? && # Ensures it could be loaded
90
- head_directory_valid?
91
- end
92
-
93
- def head_directory_valid?
94
- InventoryValidator.new(directory: object_root / inventory.head).valid? &&
95
- !head_inventory.nil? # Ensures it could be loaded
96
- end
97
-
98
- def namaste_exists?
99
- File.exist?(namaste_file)
100
- end
101
-
102
- def namaste_file
103
- object_root / "0=ocfl_object_1.1"
104
- end
105
- end
106
- end
107
- end
@@ -1,69 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module OCFL
4
- module Object
5
- # Creates a OCFL Directory layout for a particular object.
6
- class DirectoryBuilder
7
- class ObjectExists < Error; end
8
-
9
- def initialize(object_root:, id:, content_directory: nil)
10
- @object_root = Pathname.new(object_root)
11
- raise ObjectExists, "The directory `#{object_root}' already exists" if @object_root.exist?
12
-
13
- @id = id
14
- inventory = Inventory.new(
15
- data: Inventory::InventoryStruct.new(
16
- new_inventory_attrs.tap { |attrs| attrs[:contentDirectory] = content_directory if content_directory }
17
- )
18
- )
19
- @object_directory = Directory.new(object_root:, inventory:)
20
- end
21
-
22
- attr_reader :id, :inventory, :object_root, :object_directory
23
-
24
- def copy_file(...)
25
- create_object_directory
26
- version.copy_file(...)
27
- end
28
-
29
- def copy_recursive(...)
30
- create_object_directory
31
- version.copy_recursive(...)
32
- end
33
-
34
- def create_object_directory
35
- FileUtils.mkdir_p(object_root)
36
- FileUtils.touch(object_directory.namaste_file) unless File.exist?(object_directory.namaste_file)
37
- end
38
-
39
- # @return [Directory]
40
- def save
41
- create_object_directory
42
- write_inventory
43
- object_directory
44
- end
45
-
46
- def version
47
- @version ||= DraftVersion.new(object_directory:)
48
- end
49
-
50
- def write_inventory
51
- version.save
52
- end
53
-
54
- private
55
-
56
- def new_inventory_attrs
57
- {
58
- id:,
59
- version: "v0",
60
- type: Inventory::URI_1_1,
61
- digestAlgorithm: "sha512",
62
- head: "v0",
63
- versions: {},
64
- manifest: {}
65
- }
66
- end
67
- end
68
- end
69
- end
@@ -1,153 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module OCFL
4
- module Object
5
- # A new OCFL version
6
- class DraftVersion
7
- # @params [Directory] object_directory
8
- def initialize(object_directory:, overwrite_head: false, state: {})
9
- @object_directory = object_directory
10
- @manifest = object_directory.inventory.manifest.dup
11
- @state = state
12
-
13
- number = object_directory.head.delete_prefix("v").to_i
14
- @version_number = "v#{overwrite_head ? number : number + 1}"
15
- @prepared_content = @prepared = overwrite_head
16
- end
17
-
18
- attr_reader :object_directory, :manifest, :state, :version_number
19
-
20
- delegate :file_names, to: :to_version_struct
21
-
22
- def move_file(incoming_path)
23
- prepare_content_directory
24
- already_stored = add(incoming_path)
25
- return if already_stored
26
-
27
- FileUtils.mv(incoming_path, content_path)
28
- end
29
-
30
- def copy_file(incoming_path, destination_path: "")
31
- prepare_content_directory
32
- copy_one(destination_path.presence || File.basename(incoming_path), incoming_path)
33
- end
34
-
35
- def digest_for_filename(filename)
36
- state.find { |_, filenames| filenames.include?(filename) }&.first
37
- end
38
-
39
- # Note, this only removes the file from this version. Previous versions may still use it.
40
- def delete_file(filename)
41
- sha512_digest = digest_for_filename(filename)
42
- raise "Unknown file: #{filename}" unless sha512_digest
43
-
44
- state.delete(sha512_digest)
45
- # If the manifest points at the current content directory, then we can delete it.
46
- file_paths = manifest[sha512_digest]
47
- return unless file_paths.all? { |path| path.start_with?("#{version_number}/") }
48
-
49
- File.unlink (object_directory.object_root + file_paths.first).to_s
50
- end
51
-
52
- # Copies files into the object and preserves their relative paths as logical directories in the object
53
- def copy_recursive(incoming_path, destination_path: "")
54
- prepare_content_directory
55
- incoming_path = incoming_path.delete_suffix("/")
56
- Dir.glob("#{incoming_path}/**/*").reject { |fn| File.directory?(fn) }.each do |file|
57
- logical_file_path = file.delete_prefix(incoming_path).delete_prefix("/")
58
- logical_file_path = File.join(destination_path, logical_file_path) unless destination_path.empty?
59
-
60
- copy_one(logical_file_path, file)
61
- end
62
- end
63
-
64
- def save
65
- prepare_directory # only necessary if the version has no new content (deletes only)
66
- write_inventory(build_inventory)
67
- object_directory.reload
68
- end
69
-
70
- def to_version_struct
71
- Version.new(state:, created: Time.now.utc.iso8601)
72
- end
73
-
74
- private
75
-
76
- def write_inventory(inventory)
77
- InventoryWriter.new(inventory:, path:).write
78
- FileUtils.cp(path / "inventory.json", object_directory.object_root)
79
- FileUtils.cp(path / "inventory.json.sha512", object_directory.object_root)
80
- end
81
-
82
- # @param [String] logical_file_path where we're going to store the file (e.g. 'object/directory_builder_spec.rb')
83
- # @param [String] incoming_path where's this file from (e.g. 'spec/ocfl/object/directory_builder_spec.rb')
84
- def copy_one(logical_file_path, incoming_path)
85
- already_stored = add(incoming_path, logical_file_path:)
86
- return if already_stored
87
-
88
- parent_dir = (content_path / logical_file_path).parent
89
- FileUtils.mkdir_p(parent_dir) unless parent_dir == content_path
90
- FileUtils.cp(incoming_path, content_path / logical_file_path)
91
- end
92
-
93
- # @return [Boolean] true if the file already existed in this object. If false, the object must be
94
- # moved to the content directory.
95
- def add(incoming_path, logical_file_path: File.basename(incoming_path))
96
- digest = Digest::SHA512.file(incoming_path).to_s
97
- version_content_path = content_path.relative_path_from(object_directory.object_root)
98
- file_path_relative_to_root = (version_content_path / logical_file_path).to_s
99
- result = @manifest.key?(digest)
100
- @manifest[digest] ||= []
101
- @state[digest] ||= []
102
- @manifest[digest].push(file_path_relative_to_root)
103
- @state[digest].push(logical_file_path)
104
- result
105
- end
106
-
107
- def prepare_content_directory
108
- prepare_directory
109
- return if @prepared_content
110
-
111
- FileUtils.mkdir(content_path)
112
- @prepared_content = true
113
- end
114
-
115
- def prepare_directory
116
- return if @prepared
117
-
118
- FileUtils.mkdir(path)
119
- @prepared = true
120
- end
121
-
122
- def content_path
123
- path / object_directory.inventory.content_directory
124
- end
125
-
126
- def path
127
- object_directory.object_root / version_number
128
- end
129
-
130
- def build_inventory
131
- old_data = object_directory.inventory.data
132
- versions = versions(old_data.versions)
133
-
134
- # Prune items from manifest if they are not part of any version
135
- Inventory::InventoryStruct.new(old_data.to_h.merge(manifest: filtered_manifest(versions),
136
- head: version_number, versions:))
137
- end
138
-
139
- # This gives the update list of versions. The old list plus this new one.
140
- # @param [Hash] old_versions the versions prior to this one.
141
- def versions(old_versions)
142
- old_versions.merge(version_number => to_version_struct)
143
- end
144
-
145
- # The manifest after unused SHAs have been filtered out.
146
- def filtered_manifest(versions)
147
- shas_in_versions = versions.values.flat_map { |v| v.state.keys }.uniq
148
- manifest.slice!(*shas_in_versions)
149
- manifest
150
- end
151
- end
152
- end
153
- end
@@ -1,49 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module OCFL
4
- module Object
5
- # Represents the JSON file that stores the object inventory
6
- # https://ocfl.io/1.1/spec/#inventory
7
- class Inventory
8
- URI_1_1 = "https://ocfl.io/1.1/spec/#inventory"
9
-
10
- # A data structure for the inventory
11
- class InventoryStruct < Dry::Struct
12
- transform_keys(&:to_sym)
13
- attribute :id, Types::String
14
- attribute :type, Types::String
15
- attribute :digestAlgorithm, Types::String
16
- attribute :head, Types::String
17
- attribute? :contentDirectory, Types::String
18
- attribute :versions, Types::Hash.map(Types::String, Version)
19
- attribute :manifest, Types::Hash
20
- end
21
-
22
- def initialize(data:)
23
- @data = data
24
- end
25
-
26
- attr_reader :errors, :data
27
-
28
- delegate :id, :head, :versions, :manifest, to: :data
29
- delegate :state, to: :head_version
30
-
31
- def content_directory
32
- data.contentDirectory || "content"
33
- end
34
-
35
- # @return [String,nil] the path to the file relative to the object root. (e.g. v2/content/image.tiff)
36
- def path(logical_path)
37
- digest, = state.find { |_, logical_paths| logical_paths.include?(logical_path) }
38
-
39
- return unless digest
40
-
41
- manifest[digest].find { |content_path| content_path.match(%r{\Av\d+/#{content_directory}/#{logical_path}\z}) }
42
- end
43
-
44
- def head_version
45
- versions[head]
46
- end
47
- end
48
- end
49
- end
@@ -1,45 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module OCFL
4
- module Object
5
- # Loads and Inventory object from JSON
6
- class InventoryLoader
7
- include Dry::Monads[:result]
8
-
9
- VersionEnum = Types::String.enum(Inventory::URI_1_1)
10
- DigestAlgorithm = Types::String.enum("md5", "sha1", "sha256", "sha512", "blake2b-512")
11
-
12
- # https://ocfl.io/1.1/spec/#inventory-structure
13
- # Validation of the incoming data
14
- Schema = Dry::Schema.Params do
15
- # config.validate_keys = true
16
- required(:id).filled(:string)
17
- required(:type).filled(VersionEnum)
18
- required(:digestAlgorithm).filled(DigestAlgorithm)
19
- required(:head).filled(:string)
20
- optional(:contentDirectory).filled(:string)
21
- required(:versions).hash
22
- required(:manifest).hash
23
- end
24
-
25
- def self.load(file_name)
26
- new(file_name).load
27
- end
28
-
29
- def initialize(file_name)
30
- @file_name = file_name
31
- end
32
-
33
- def load
34
- bytestream = File.read(@file_name)
35
- data = JSON.parse(bytestream)
36
- errors = Schema.call(data).errors
37
- if errors.empty?
38
- Success(Inventory::InventoryStruct.new(data))
39
- else
40
- Failure(errors)
41
- end
42
- end
43
- end
44
- end
45
- end
@@ -1,42 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module OCFL
4
- module Object
5
- # Checks to see that the inventory.json and it's checksum in a direcotory are valid
6
- class InventoryValidator
7
- def initialize(directory:)
8
- @directory = Pathname.new(directory)
9
- end
10
-
11
- attr_reader :directory
12
-
13
- def valid?
14
- inventory_file_exists? && inventory_file_matches_checksum?
15
- end
16
-
17
- def inventory_file_exists?
18
- File.exist?(inventory_file)
19
- end
20
-
21
- def inventory_file_matches_checksum?
22
- return false unless File.exist?(inventory_checksum_file)
23
-
24
- actual = inventory_file_checksum
25
- expected = File.read(inventory_checksum_file)
26
- expected.match?(/\A#{actual}\s+inventory\.json\z/)
27
- end
28
-
29
- def inventory_checksum_file
30
- directory / "inventory.json.sha512"
31
- end
32
-
33
- def inventory_file_checksum
34
- Digest::SHA512.file inventory_file
35
- end
36
-
37
- def inventory_file
38
- directory / "inventory.json"
39
- end
40
- end
41
- end
42
- end
@@ -1,37 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module OCFL
4
- module Object
5
- # Writes a OCFL Inventory to json on disk
6
- class InventoryWriter
7
- def initialize(inventory:, path:)
8
- @path = path
9
- @inventory = inventory
10
- end
11
-
12
- attr_reader :inventory, :path
13
-
14
- def write
15
- write_inventory
16
- update_inventory_checksum
17
- end
18
-
19
- def write_inventory
20
- File.write(inventory_file, JSON.pretty_generate(inventory.to_h))
21
- end
22
-
23
- def inventory_file
24
- path / "inventory.json"
25
- end
26
-
27
- def checksum_file
28
- path / "inventory.json.sha512"
29
- end
30
-
31
- def update_inventory_checksum
32
- digest = Digest::SHA512.file inventory_file
33
- File.write(checksum_file, "#{digest} inventory.json")
34
- end
35
- end
36
- end
37
- end
@@ -1,26 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module OCFL
4
- module Object
5
- # Represents the OCFL version
6
- # https://ocfl.io/1.1/spec/#version
7
- class Version < Dry.Struct
8
- # Represents the OCFL user
9
- class User < Dry.Struct
10
- transform_keys(&:to_sym)
11
- attribute :name, Types::String
12
- attribute? :address, Types::String
13
- end
14
-
15
- transform_keys(&:to_sym)
16
- attribute :created, Types::String
17
- attribute :state, Types::Hash.map(Types::String, Types::Array.of(Types::String))
18
- attribute? :message, Types::String
19
- attribute? :user, User
20
-
21
- def file_names
22
- state.values.flatten
23
- end
24
- end
25
- end
26
- end