longleaf 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +1 -1
- data/lib/longleaf/candidates/file_selector.rb +16 -5
- data/lib/longleaf/candidates/manifest_digest_provider.rb +1 -1
- data/lib/longleaf/candidates/physical_path_provider.rb +17 -0
- data/lib/longleaf/cli.rb +21 -6
- data/lib/longleaf/commands/register_command.rb +5 -3
- data/lib/longleaf/events/preserve_event.rb +3 -2
- data/lib/longleaf/events/register_event.rb +9 -2
- data/lib/longleaf/helpers/selection_options_parser.rb +39 -13
- data/lib/longleaf/logging/redirecting_logger.rb +4 -0
- data/lib/longleaf/models/file_record.rb +15 -1
- data/lib/longleaf/models/md_fields.rb +1 -0
- data/lib/longleaf/models/metadata_record.rb +4 -1
- data/lib/longleaf/models/s3_storage_location.rb +5 -0
- data/lib/longleaf/preservation_services/file_check_service.rb +7 -6
- data/lib/longleaf/preservation_services/fixity_check_service.rb +5 -4
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +24 -8
- data/lib/longleaf/preservation_services/s3_replication_service.rb +1 -13
- data/lib/longleaf/services/metadata_deserializer.rb +3 -1
- data/lib/longleaf/services/metadata_serializer.rb +1 -0
- data/lib/longleaf/specs/metadata_builder.rb +7 -1
- data/lib/longleaf/version.rb +1 -1
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71c3c9e78a968f153c457d2a59ae17e8d45b4f6e905deeb787172844cee3733f
|
4
|
+
data.tar.gz: d354dba30939cdf1917b09fa7fc874654135f311dd1d5c60453cb42fe2b2a736
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af2433108df902de025137d67c4b3d089b8766f7856d10d5d93d6bd449b82227c7cf3e987e50199c83a0869056842cb34d55f710102fa8a654282170c215882a
|
7
|
+
data.tar.gz: 93deda52a5f3de34ebc29baef3f531db099ea76521389dc223f33c2922da0e1fb81d775c8178370ade67ea397761553bf9ec34c03c12eec1e0a77d12174e07de
|
data/.circleci/config.yml
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'longleaf/logging'
|
2
|
+
require 'longleaf/candidates/physical_path_provider'
|
2
3
|
|
3
4
|
module Longleaf
|
4
5
|
# Selects and allows for iteration over files which match a provided set of selection criteria
|
@@ -10,7 +11,8 @@ module Longleaf
|
|
10
11
|
attr_reader :specificity
|
11
12
|
|
12
13
|
# May only provide either file_paths or storage_locations
|
13
|
-
def initialize(file_paths: nil, storage_locations: nil,
|
14
|
+
def initialize(file_paths: nil, storage_locations: nil, physical_provider: Longleaf::PhysicalPathProvider.new,
|
15
|
+
app_config:)
|
14
16
|
if nil_or_empty?(file_paths) && nil_or_empty?(storage_locations)
|
15
17
|
raise ArgumentError.new("Must provide either file paths or storage locations")
|
16
18
|
end
|
@@ -36,6 +38,7 @@ module Longleaf
|
|
36
38
|
end
|
37
39
|
# The set of storage locations to select file paths from
|
38
40
|
@storage_locations = storage_locations
|
41
|
+
@physical_provider = physical_provider
|
39
42
|
# Validate that the selected storage locations are known
|
40
43
|
if @storage_locations.nil?
|
41
44
|
@specificity = SPECIFICITY_PATH
|
@@ -63,7 +66,7 @@ module Longleaf
|
|
63
66
|
@target_paths
|
64
67
|
end
|
65
68
|
|
66
|
-
# Get the next file path for this selector.
|
69
|
+
# Get the next logical file path for this selector.
|
67
70
|
# @return [String] an absolute path to the next file targeted by this selector,
|
68
71
|
# or nil if no more files selected
|
69
72
|
def next_path
|
@@ -80,9 +83,17 @@ module Longleaf
|
|
80
83
|
path = @paths.pop
|
81
84
|
until path.nil? do
|
82
85
|
@app_config.location_manager.verify_path_in_location(path)
|
86
|
+
physical_path = @physical_provider.get_physical_path(path)
|
87
|
+
separate_logical = physical_path != path
|
88
|
+
if separate_logical
|
89
|
+
@app_config.location_manager.verify_path_in_location(physical_path)
|
90
|
+
end
|
83
91
|
|
84
|
-
if File.exist?(
|
85
|
-
if File.directory?(
|
92
|
+
if File.exist?(physical_path)
|
93
|
+
if File.directory?(physical_path)
|
94
|
+
if separate_logical
|
95
|
+
raise InvalidStoragePathError.new("Cannot specify physical path to a directory: #{physical_path}")
|
96
|
+
end
|
86
97
|
logger.debug("Expanding directory #{path}")
|
87
98
|
# For a directory, add all children to file_paths
|
88
99
|
Dir.entries(path).sort.reverse_each do |child|
|
@@ -93,7 +104,7 @@ module Longleaf
|
|
93
104
|
return path
|
94
105
|
end
|
95
106
|
else
|
96
|
-
raise InvalidStoragePathError.new("File #{
|
107
|
+
raise InvalidStoragePathError.new("File #{physical_path} does not exist.")
|
97
108
|
end
|
98
109
|
|
99
110
|
# Returned path was not a suitable file, try the next path
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module Longleaf
|
2
2
|
# Provides digests for files from a manifest
|
3
3
|
class ManifestDigestProvider
|
4
|
-
# @param hash which maps file paths to hashs of digests
|
4
|
+
# @param digests_mapping hash which maps file paths to hashs of digests
|
5
5
|
def initialize(digests_mapping)
|
6
6
|
@digests_mapping = digests_mapping
|
7
7
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Longleaf
|
2
|
+
# Provides physical paths for logical paths from a mapping
|
3
|
+
class PhysicalPathProvider
|
4
|
+
# @param phys_mapping hash with logical paths as keys, physical paths as values
|
5
|
+
def initialize(phys_mapping = Hash.new)
|
6
|
+
@phys_mapping = phys_mapping
|
7
|
+
end
|
8
|
+
|
9
|
+
# @param logical_path [String] logical path of file
|
10
|
+
# @return physical path of the file
|
11
|
+
def get_physical_path(logical_path)
|
12
|
+
# return the logical path itself if no physical path is mapped
|
13
|
+
return logical_path unless @phys_mapping.key?(logical_path)
|
14
|
+
@phys_mapping[logical_path]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/longleaf/cli.rb
CHANGED
@@ -65,12 +65,12 @@ module Longleaf
|
|
65
65
|
:aliases => "-f",
|
66
66
|
:required => false,
|
67
67
|
:desc => 'File or files to perform this operation on. If multiple files are provided, they must be comma separated.' })
|
68
|
+
|
68
69
|
add_shared_option(
|
69
|
-
:location, :
|
70
|
+
:location, :registered_selection, {
|
70
71
|
:aliases => "-s",
|
71
72
|
:required => false,
|
72
73
|
:desc => 'Name or comma separated names of storage locations to perform this operation over.' })
|
73
|
-
|
74
74
|
add_shared_option(
|
75
75
|
:from_list, :registered_selection, {
|
76
76
|
:aliases => "-l",
|
@@ -115,7 +115,21 @@ module Longleaf
|
|
115
115
|
...
|
116
116
|
md5:
|
117
117
|
<digest> <path>
|
118
|
-
...
|
118
|
+
...
|
119
|
+
|
120
|
+
To provide separate logical and physical paths, add a physical path column:
|
121
|
+
'-m sha1:@-'
|
122
|
+
Where the content in STDIN adheres to the format:
|
123
|
+
<digest> <logical path> <physical path>
|
124
|
+
...
|
125
|
+
})
|
126
|
+
method_option(:physical_path,
|
127
|
+
:aliases => "-p",
|
128
|
+
:required => false,
|
129
|
+
:desc => %q{Comma separated list of physical paths of files to register. Only needed
|
130
|
+
if the physical and logical paths of the files differ, otherwise they will be assumed to be the same.
|
131
|
+
Only applicable when used with the -f option, and only for individual files, not directories.
|
132
|
+
Must be provided in the same order as the logical paths.})
|
119
133
|
method_option(:force,
|
120
134
|
:type => :boolean,
|
121
135
|
:default => false,
|
@@ -132,11 +146,12 @@ module Longleaf
|
|
132
146
|
|
133
147
|
app_config_manager = load_application_config(options)
|
134
148
|
|
135
|
-
file_selector, digest_provider = SelectionOptionsParser
|
136
|
-
options, app_config_manager)
|
149
|
+
file_selector, digest_provider, physical_provider = SelectionOptionsParser
|
150
|
+
.parse_registration_selection_options(options, app_config_manager)
|
137
151
|
|
138
152
|
command = RegisterCommand.new(app_config_manager)
|
139
|
-
exit command.execute(file_selector: file_selector, force: options[:force], digest_provider: digest_provider
|
153
|
+
exit command.execute(file_selector: file_selector, force: options[:force], digest_provider: digest_provider,
|
154
|
+
physical_provider: physical_provider)
|
140
155
|
end
|
141
156
|
|
142
157
|
desc "deregister", "Deregister files with Longleaf"
|
@@ -16,9 +16,10 @@ module Longleaf
|
|
16
16
|
# Execute the register command on the given parameters
|
17
17
|
# @param file_selector [FileSelector] selector for files to register
|
18
18
|
# @param force [Boolean] force flag
|
19
|
-
# @param digest_provider [
|
19
|
+
# @param digest_provider [ManifestDigestProvider] object which provides digests for files being registered
|
20
|
+
# @param physical_provider [PhysicalPathProvider] object which provides physical paths for files being registered
|
20
21
|
# @return [Integer] status code
|
21
|
-
def execute(file_selector:, force: false, digest_provider: nil)
|
22
|
+
def execute(file_selector:, force: false, digest_provider: nil, physical_provider: nil)
|
22
23
|
start_time = Time.now
|
23
24
|
logger.info('Performing register command')
|
24
25
|
begin
|
@@ -29,7 +30,8 @@ module Longleaf
|
|
29
30
|
|
30
31
|
storage_location = @app_manager.location_manager.get_location_by_path(f_path)
|
31
32
|
|
32
|
-
|
33
|
+
phys_path = physical_provider.get_physical_path(f_path)
|
34
|
+
file_rec = FileRecord.new(f_path, storage_location, nil, phys_path)
|
33
35
|
|
34
36
|
register_event = RegisterEvent.new(file_rec: file_rec, force: force, app_manager: @app_manager,
|
35
37
|
digest_provider: digest_provider)
|
@@ -27,12 +27,13 @@ module Longleaf
|
|
27
27
|
service_manager = @app_manager.service_manager
|
28
28
|
md_rec = @file_rec.metadata_record
|
29
29
|
f_path = @file_rec.path
|
30
|
+
phys_path = @file_rec.physical_path
|
30
31
|
|
31
|
-
logger.info("Performing preserve event on #{
|
32
|
+
logger.info("Performing preserve event on #{f_path}")
|
32
33
|
|
33
34
|
needs_persist = false
|
34
35
|
begin
|
35
|
-
if !File.exist?(
|
36
|
+
if !File.exist?(phys_path)
|
36
37
|
# Need to persist metadata to avoid repeating processing of this file too soon.
|
37
38
|
needs_persist = true
|
38
39
|
record_failure(EventNames::PRESERVE, f_path, "File is registered but missing.")
|
@@ -70,10 +70,17 @@ module Longleaf
|
|
70
70
|
private
|
71
71
|
def populate_file_properties
|
72
72
|
md_rec = @file_rec.metadata_record
|
73
|
+
physical_path = @file_rec.physical_path
|
73
74
|
|
74
75
|
# Set file properties
|
75
|
-
md_rec.last_modified = File.mtime(
|
76
|
-
md_rec.file_size = File.size(
|
76
|
+
md_rec.last_modified = File.mtime(physical_path).utc.iso8601(3)
|
77
|
+
md_rec.file_size = File.size(physical_path)
|
78
|
+
|
79
|
+
if physical_path != @file_rec.path
|
80
|
+
md_rec.physical_path = physical_path
|
81
|
+
else
|
82
|
+
md_rec.physical_path = nil
|
83
|
+
end
|
77
84
|
end
|
78
85
|
|
79
86
|
# Copy a subset of properties from an existing metadata record to the new record
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'longleaf/candidates/file_selector'
|
2
2
|
require 'longleaf/candidates/registered_file_selector'
|
3
3
|
require 'longleaf/candidates/manifest_digest_provider'
|
4
|
+
require 'longleaf/candidates/physical_path_provider'
|
4
5
|
require 'longleaf/candidates/single_digest_provider'
|
5
6
|
|
6
7
|
module Longleaf
|
@@ -18,8 +19,11 @@ module Longleaf
|
|
18
19
|
options, :file, :manifest, :location)
|
19
20
|
|
20
21
|
if !options[:manifest].nil?
|
21
|
-
digests_mapping = self.
|
22
|
-
|
22
|
+
digests_mapping, logical_phys_mapping = self.parse_manifest(options[:manifest])
|
23
|
+
physical_provider = PhysicalPathProvider.new(logical_phys_mapping)
|
24
|
+
selector = FileSelector.new(file_paths: digests_mapping.keys,
|
25
|
+
physical_provider: physical_provider,
|
26
|
+
app_config: app_config_manager)
|
23
27
|
digest_provider = ManifestDigestProvider.new(digests_mapping)
|
24
28
|
elsif !options[:file].nil?
|
25
29
|
if options[:checksums]
|
@@ -35,18 +39,28 @@ module Longleaf
|
|
35
39
|
end
|
36
40
|
end
|
37
41
|
|
38
|
-
file_paths = options[:file]
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
42
|
+
file_paths = self.split_quoted(options[:file], "\\s*,\\s*")
|
43
|
+
if !options[:physical_path].nil?
|
44
|
+
physical_paths = self.split_quoted(options[:physical_path], "\\s*,\\s*")
|
45
|
+
if physical_paths.length != file_paths.length
|
46
|
+
logger.failure("Invalid physical paths parameter, number of paths did not match number of logical paths")
|
47
|
+
exit 1
|
48
|
+
end
|
49
|
+
logical_phys_mapping = Hash[file_paths.zip physical_paths]
|
50
|
+
physical_provider = PhysicalPathProvider.new(logical_phys_mapping)
|
51
|
+
else
|
52
|
+
physical_provider = PhysicalPathProvider.new
|
53
|
+
end
|
54
|
+
|
55
|
+
selector = FileSelector.new(file_paths: file_paths,
|
56
|
+
physical_provider: physical_provider,
|
57
|
+
app_config: app_config_manager)
|
44
58
|
else
|
45
59
|
logger.failure("Must provide one of the following file selection options: -f, l, or -m")
|
46
60
|
exit 1
|
47
61
|
end
|
48
62
|
|
49
|
-
[selector, digest_provider]
|
63
|
+
[selector, digest_provider, physical_provider]
|
50
64
|
end
|
51
65
|
|
52
66
|
def self.there_can_be_only_one(failure_msg, options, *names)
|
@@ -69,7 +83,7 @@ module Longleaf
|
|
69
83
|
#. <manifest_path> OR @-
|
70
84
|
# @return a hash containing the aggregated contents of the provided manifests. The keys are
|
71
85
|
# paths to manifested files. The values are hashes, mapping digest algorithms to digest values.
|
72
|
-
def self.
|
86
|
+
def self.parse_manifest(manifest_vals)
|
73
87
|
alg_manifest_pairs = []
|
74
88
|
# interpret option inputs into a list of algorithms to manifest sources
|
75
89
|
manifest_vals.each do |manifest_val|
|
@@ -87,6 +101,7 @@ module Longleaf
|
|
87
101
|
|
88
102
|
# read the provided manifests to build a mapping from file uri to all supplied digests
|
89
103
|
digests_mapping = Hash.new { |h,k| h[k] = Hash.new }
|
104
|
+
logical_phys_mapping = Hash.new
|
90
105
|
alg_manifest_pairs.each do |mpair|
|
91
106
|
source_stream = nil
|
92
107
|
# Determine if reading from a manifest file or stdin
|
@@ -111,17 +126,28 @@ module Longleaf
|
|
111
126
|
if current_alg.nil?
|
112
127
|
self.fail("Manifest with unknown checksums encountered, an algorithm must be specified")
|
113
128
|
end
|
114
|
-
entry_parts =
|
115
|
-
if entry_parts.length != 2
|
129
|
+
entry_parts = self.split_quoted(line)
|
130
|
+
if entry_parts.length != 2 && entry_parts.length != 3
|
116
131
|
self.fail("Invalid manifest entry: #{line}")
|
117
132
|
end
|
118
133
|
|
119
134
|
digests_mapping[entry_parts[1]][current_alg] = entry_parts[0]
|
135
|
+
if (entry_parts.length == 3)
|
136
|
+
logical_phys_mapping[entry_parts[1]] = entry_parts[2]
|
137
|
+
end
|
120
138
|
end
|
121
139
|
end
|
122
140
|
end
|
123
141
|
|
124
|
-
digests_mapping
|
142
|
+
[digests_mapping, logical_phys_mapping]
|
143
|
+
end
|
144
|
+
|
145
|
+
# Splits a string of quoted or unquoted tokens separated by spaces
|
146
|
+
# @param
|
147
|
+
def self.split_quoted(text, delimiter = "\\s+", limit = -1)
|
148
|
+
text.split(/#{delimiter}(?=(?:[^'"]|'[^']*'|"[^"]*")*$)/, limit)
|
149
|
+
.select {|s| not s.empty? }
|
150
|
+
.map {|s| s.gsub(/(^ +)|( +$)|(^["']+)|(["']+$)/, '')}
|
125
151
|
end
|
126
152
|
|
127
153
|
# Parses the provided options to create a selector for registered files
|
@@ -7,13 +7,16 @@ module Longleaf
|
|
7
7
|
|
8
8
|
# @param file_path [String] path to the file
|
9
9
|
# @param storage_location [StorageLocation] storage location containing the file
|
10
|
-
|
10
|
+
# @param metadata_record [MetadataRecord] metadata record for this file object. Optional.
|
11
|
+
# @param physical_path [String] physical path where the file is located. Defaults to the file_path.
|
12
|
+
def initialize(file_path, storage_location, metadata_record = nil, physical_path = nil)
|
11
13
|
raise ArgumentError.new("FileRecord requires a path") if file_path.nil?
|
12
14
|
raise ArgumentError.new("FileRecord requires a storage_location") if storage_location.nil?
|
13
15
|
|
14
16
|
@path = file_path
|
15
17
|
@storage_location = storage_location
|
16
18
|
@metadata_record = metadata_record
|
19
|
+
@physical_path = physical_path
|
17
20
|
end
|
18
21
|
|
19
22
|
# @return [String] path for the metadata file for this file
|
@@ -21,6 +24,17 @@ module Longleaf
|
|
21
24
|
@metadata_path = @storage_location.get_metadata_path_for(path) if @metadata_path.nil?
|
22
25
|
@metadata_path
|
23
26
|
end
|
27
|
+
|
28
|
+
def physical_path
|
29
|
+
if @physical_path.nil?
|
30
|
+
if @metadata_record.nil? || @metadata_record.physical_path.nil?
|
31
|
+
@physical_path = @path
|
32
|
+
else
|
33
|
+
@physical_path = @metadata_record.physical_path
|
34
|
+
end
|
35
|
+
end
|
36
|
+
@physical_path
|
37
|
+
end
|
24
38
|
|
25
39
|
def metadata_present?
|
26
40
|
File.exist?(metadata_path)
|
@@ -10,6 +10,7 @@ module Longleaf
|
|
10
10
|
attr_reader :checksums
|
11
11
|
attr_reader :properties
|
12
12
|
attr_accessor :file_size, :last_modified
|
13
|
+
attr_accessor :physical_path
|
13
14
|
|
14
15
|
# @param properties [Hash] initial data properties for this record
|
15
16
|
# @param services [Hash] initial service property tree
|
@@ -18,8 +19,9 @@ module Longleaf
|
|
18
19
|
# @param checksums [Hash] hash of checksum values
|
19
20
|
# @param file_size [Integer] size of file in bytes
|
20
21
|
# @param last_modified [String] iso8601 representation of the last modified date of file
|
22
|
+
# @param physical_path [String] physical path where the file is located
|
21
23
|
def initialize(properties: nil, services: nil, deregistered: nil, registered: nil, checksums: nil,
|
22
|
-
file_size: nil, last_modified: nil)
|
24
|
+
file_size: nil, last_modified: nil, physical_path: nil)
|
23
25
|
@properties = properties || Hash.new
|
24
26
|
@registered = registered
|
25
27
|
@deregistered = deregistered
|
@@ -28,6 +30,7 @@ module Longleaf
|
|
28
30
|
@services = services || Hash.new
|
29
31
|
@file_size = file_size
|
30
32
|
@last_modified = last_modified
|
33
|
+
@physical_path = physical_path
|
31
34
|
end
|
32
35
|
|
33
36
|
# @return [Boolean] true if the record is deregistered
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'longleaf/models/storage_location'
|
2
2
|
require 'longleaf/models/storage_types'
|
3
3
|
require 'longleaf/helpers/s3_uri_helper'
|
4
|
+
require 'longleaf/logging'
|
4
5
|
require 'uri'
|
5
6
|
require 'aws-sdk-s3'
|
6
7
|
|
@@ -12,6 +13,7 @@ module Longleaf
|
|
12
13
|
# https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#constructor_details
|
13
14
|
|
14
15
|
class S3StorageLocation < StorageLocation
|
16
|
+
include Longleaf::Logging
|
15
17
|
|
16
18
|
IS_URI_REGEX = /\A#{URI::regexp}\z/
|
17
19
|
|
@@ -38,6 +40,9 @@ module Longleaf
|
|
38
40
|
# Clone options and convert keys to symbols
|
39
41
|
@client_options = Hash[custom_options.map { |(k,v)| [k.to_sym,v] } ]
|
40
42
|
end
|
43
|
+
@client_options[:logger] = logger
|
44
|
+
@client_options[:log_level] = :debug if @client_options[:log_level].nil?
|
45
|
+
|
41
46
|
# If no region directly configured, use region from path
|
42
47
|
if !@client_options.key?(:region)
|
43
48
|
region = S3UriHelper.extract_region(@path)
|
@@ -23,22 +23,23 @@ module Longleaf
|
|
23
23
|
# @raise [PreservationServiceError] if the file system information does not match the stored details
|
24
24
|
def perform(file_rec, event)
|
25
25
|
file_path = file_rec.path
|
26
|
+
phys_path = file_rec.physical_path
|
26
27
|
md_rec = file_rec.metadata_record
|
27
28
|
|
28
29
|
logger.debug("Performing file information check of #{file_path}")
|
29
30
|
|
30
|
-
if !File.exist?(
|
31
|
-
raise PreservationServiceError.new("File does not exist: #{
|
31
|
+
if !File.exist?(phys_path)
|
32
|
+
raise PreservationServiceError.new("File does not exist: #{phys_path}")
|
32
33
|
end
|
33
34
|
|
34
|
-
file_size = File.size(
|
35
|
+
file_size = File.size(phys_path)
|
35
36
|
if file_size != md_rec.file_size
|
36
|
-
raise PreservationServiceError.new("File size for #{
|
37
|
+
raise PreservationServiceError.new("File size for #{phys_path} does not match the expected value: registered = #{md_rec.file_size} bytes, actual = #{file_size} bytes")
|
37
38
|
end
|
38
39
|
|
39
|
-
last_modified = File.mtime(
|
40
|
+
last_modified = File.mtime(phys_path).utc.iso8601(3)
|
40
41
|
if last_modified != md_rec.last_modified
|
41
|
-
raise PreservationServiceError.new("Last modified timestamp for #{
|
42
|
+
raise PreservationServiceError.new("Last modified timestamp for #{phys_path} does not match the expected value: registered = #{md_rec.last_modified}, actual = #{last_modified}")
|
42
43
|
end
|
43
44
|
end
|
44
45
|
|
@@ -63,6 +63,7 @@ module Longleaf
|
|
63
63
|
# @raise [ChecksumMismatchError] if the checksum on record does not match the generated checksum
|
64
64
|
def perform(file_rec, event)
|
65
65
|
path = file_rec.path
|
66
|
+
phys_path = file_rec.physical_path
|
66
67
|
md_rec = file_rec.metadata_record
|
67
68
|
|
68
69
|
# Get the list of existing checksums for the file and normalize algorithm names
|
@@ -89,19 +90,19 @@ module Longleaf
|
|
89
90
|
end
|
90
91
|
|
91
92
|
digest = DigestHelper::start_digest(alg)
|
92
|
-
digest.file(
|
93
|
+
digest.file(phys_path)
|
93
94
|
generated_digest = digest.hexdigest
|
94
95
|
|
95
96
|
# Store the missing checksum if using the 'generate' behavior
|
96
97
|
if existing_digest.nil? && @absent_digest_behavior == GENERATE_IF_ABSENT
|
97
98
|
md_rec.checksums[alg] = generated_digest
|
98
|
-
logger.info("Generated and stored digest using algorithm '#{alg}' for file #{
|
99
|
+
logger.info("Generated and stored digest using algorithm '#{alg}' for file #{phys_path}")
|
99
100
|
else
|
100
101
|
# Compare the new digest to the one on record
|
101
102
|
if existing_digest == generated_digest
|
102
|
-
logger.info("Fixity check using algorithm '#{alg}' succeeded for file #{
|
103
|
+
logger.info("Fixity check using algorithm '#{alg}' succeeded for file #{phys_path}")
|
103
104
|
else
|
104
|
-
raise ChecksumMismatchError.new("Fixity check using algorithm '#{alg}' failed for file #{
|
105
|
+
raise ChecksumMismatchError.new("Fixity check using algorithm '#{alg}' failed for file #{phys_path}: expected '#{existing_digest}', calculated '#{generated_digest}.'")
|
105
106
|
end
|
106
107
|
end
|
107
108
|
end
|
@@ -51,9 +51,6 @@ module Longleaf
|
|
51
51
|
+ " rsync_options may not include the following: #{DISALLOWED_OPTIONS.join(' ')}")
|
52
52
|
end
|
53
53
|
|
54
|
-
# Add -R (--relative) in to command options to ensure full path gets replicated
|
55
|
-
@options = @options + " -R"
|
56
|
-
|
57
54
|
# Set and validate the replica collision policy
|
58
55
|
@collision_policy = @service_def.properties[SF::COLLISION_PROPERTY] || SF::DEFAULT_COLLISION_POLICY
|
59
56
|
if !SF::VALID_COLLISION_POLICIES.include?(@collision_policy)
|
@@ -101,17 +98,36 @@ module Longleaf
|
|
101
98
|
else
|
102
99
|
dest_path = destination
|
103
100
|
end
|
104
|
-
|
101
|
+
|
102
|
+
logical_physical_same = file_rec.path == file_rec.physical_path
|
105
103
|
# Determine the path to the file being replicated relative to its storage location
|
106
104
|
rel_path = file_rec.storage_location.relativize(file_rec.path)
|
107
|
-
|
108
|
-
|
105
|
+
|
106
|
+
options = @options
|
107
|
+
if logical_physical_same
|
108
|
+
options = options + " -R"
|
109
|
+
# source path with . so that rsync will only create destination directories starting from that point
|
110
|
+
source_path = File.join(file_rec.storage_location.path, "./#{rel_path}")
|
111
|
+
else
|
112
|
+
options = options + " --no-relative"
|
113
|
+
source_path = file_rec.physical_path
|
114
|
+
dest_path = File.join(dest_path, rel_path)
|
115
|
+
if (dest_is_storage_loc && destination.is_a?(Longleaf::FilesystemStorageLocation)) || !dest_is_storage_loc
|
116
|
+
# Fill in missing parent directories, as rsync cannot do so when specifying a different source and dest filename
|
117
|
+
dirname = File.dirname(dest_path)
|
118
|
+
logger.debug("Creating parent dirs #{dirname} for #{file_rec.path}")
|
119
|
+
FileUtils.mkdir_p(dirname)
|
120
|
+
else
|
121
|
+
raise PreservationServiceError.new(
|
122
|
+
"Destination #{destination.name} does not currently support separate physical and logical paths")
|
123
|
+
end
|
124
|
+
end
|
109
125
|
|
110
126
|
# Check that the destination is available because attempting to write
|
111
127
|
verify_destination_available(destination, file_rec)
|
112
128
|
|
113
|
-
logger.debug("Invoking rsync with command: #{@command} \"#{source_path}\" \"#{dest_path}\" #{
|
114
|
-
stdout, stderr, status = Open3.capture3("#{@command} \"#{source_path}\" \"#{dest_path}\" #{
|
129
|
+
logger.debug("Invoking rsync with command: #{@command} \"#{source_path}\" \"#{dest_path}\" #{options}")
|
130
|
+
stdout, stderr, status = Open3.capture3("#{@command} \"#{source_path}\" \"#{dest_path}\" #{options}")
|
115
131
|
raise PreservationServiceError.new("Failed to replicate #{file_rec.path} to #{dest_path}: #{stderr}") \
|
116
132
|
unless status.success?
|
117
133
|
|
@@ -83,8 +83,6 @@ module Longleaf
|
|
83
83
|
# Determine the path to the file being replicated relative to its storage location
|
84
84
|
rel_path = file_rec.storage_location.relativize(file_rec.path)
|
85
85
|
|
86
|
-
content_md5 = get_content_md5(file_rec)
|
87
|
-
|
88
86
|
@destinations.each do |destination|
|
89
87
|
# Check that the destination is available before attempting to write
|
90
88
|
verify_destination_available(destination, file_rec)
|
@@ -92,7 +90,7 @@ module Longleaf
|
|
92
90
|
rel_to_bucket = destination.relative_to_bucket_path(rel_path)
|
93
91
|
file_obj = destination.s3_bucket.object(rel_to_bucket)
|
94
92
|
begin
|
95
|
-
file_obj.upload_file(file_rec.
|
93
|
+
file_obj.upload_file(file_rec.physical_path)
|
96
94
|
rescue Aws::S3::Errors::BadDigest => e
|
97
95
|
raise ChecksumMismatchError.new("Transfer to bucket '#{destination.s3_bucket.name}' failed, " \
|
98
96
|
+ "MD5 provided did not match the received content for #{file_rec.path}")
|
@@ -129,15 +127,5 @@ module Longleaf
|
|
129
127
|
+ e.message)
|
130
128
|
end
|
131
129
|
end
|
132
|
-
|
133
|
-
def get_content_md5(file_rec)
|
134
|
-
md_rec = file_rec.metadata_record
|
135
|
-
if md_rec.checksums.key?('md5')
|
136
|
-
# base 64 encode the digest, as is required by the Content-Md5 header
|
137
|
-
[[md_rec.checksums['md5']].pack("H*")].pack("m0")
|
138
|
-
else
|
139
|
-
nil
|
140
|
-
end
|
141
|
-
end
|
142
130
|
end
|
143
131
|
end
|
@@ -32,6 +32,7 @@ module Longleaf
|
|
32
32
|
checksums = data.delete(MDFields::CHECKSUMS)
|
33
33
|
file_size = data.delete(MDFields::FILE_SIZE)
|
34
34
|
last_modified = data.delete(MDFields::LAST_MODIFIED)
|
35
|
+
physical_path = data.delete(MDFields::PHYSICAL_PATH)
|
35
36
|
|
36
37
|
services = md[MDF::SERVICES]
|
37
38
|
service_records = Hash.new
|
@@ -57,7 +58,8 @@ module Longleaf
|
|
57
58
|
deregistered: deregistered,
|
58
59
|
checksums: checksums,
|
59
60
|
file_size: file_size,
|
60
|
-
last_modified: last_modified
|
61
|
+
last_modified: last_modified,
|
62
|
+
physical_path: physical_path)
|
61
63
|
end
|
62
64
|
|
63
65
|
# Load configuration a yaml encoded configuration file
|
@@ -52,6 +52,7 @@ module Longleaf
|
|
52
52
|
data[MDF::CHECKSUMS] = metadata.checksums unless metadata.checksums && metadata.checksums.empty?
|
53
53
|
data[MDF::FILE_SIZE] = metadata.file_size unless metadata.file_size.nil?
|
54
54
|
data[MDF::LAST_MODIFIED] = metadata.last_modified if metadata.last_modified
|
55
|
+
data[MDF::PHYSICAL_PATH] = metadata.physical_path if metadata.physical_path
|
55
56
|
|
56
57
|
props[MDF::DATA] = data
|
57
58
|
|
@@ -29,6 +29,11 @@ module Longleaf
|
|
29
29
|
@checksums[alg] = value
|
30
30
|
self
|
31
31
|
end
|
32
|
+
|
33
|
+
def with_physical_path(phys_path)
|
34
|
+
@physical_path = phys_path
|
35
|
+
self
|
36
|
+
end
|
32
37
|
|
33
38
|
def with_service(name, timestamp: ServiceDateHelper::formatted_timestamp, run_needed: false, properties: nil,
|
34
39
|
failure_timestamp: nil)
|
@@ -56,7 +61,8 @@ module Longleaf
|
|
56
61
|
registered: @registered,
|
57
62
|
checksums: @checksums,
|
58
63
|
file_size: @file_size,
|
59
|
-
last_modified: @last_modified
|
64
|
+
last_modified: @last_modified,
|
65
|
+
physical_path: @physical_path)
|
60
66
|
end
|
61
67
|
|
62
68
|
# Add the generated metadata record to the given file record
|
data/lib/longleaf/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: longleaf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Pennell
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -372,6 +372,7 @@ files:
|
|
372
372
|
- lib/longleaf.rb
|
373
373
|
- lib/longleaf/candidates/file_selector.rb
|
374
374
|
- lib/longleaf/candidates/manifest_digest_provider.rb
|
375
|
+
- lib/longleaf/candidates/physical_path_provider.rb
|
375
376
|
- lib/longleaf/candidates/registered_file_selector.rb
|
376
377
|
- lib/longleaf/candidates/service_candidate_filesystem_iterator.rb
|
377
378
|
- lib/longleaf/candidates/service_candidate_index_iterator.rb
|
@@ -449,7 +450,7 @@ licenses:
|
|
449
450
|
- Apache-2.0
|
450
451
|
metadata:
|
451
452
|
source_code_uri: https://github.com/UNC-Libraries/longleaf-preservation
|
452
|
-
post_install_message:
|
453
|
+
post_install_message:
|
453
454
|
rdoc_options: []
|
454
455
|
require_paths:
|
455
456
|
- lib
|
@@ -464,8 +465,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
464
465
|
- !ruby/object:Gem::Version
|
465
466
|
version: '0'
|
466
467
|
requirements: []
|
467
|
-
rubygems_version: 3.1.
|
468
|
-
signing_key:
|
468
|
+
rubygems_version: 3.1.4
|
469
|
+
signing_key:
|
469
470
|
specification_version: 4
|
470
471
|
summary: Longleaf preservation services tool
|
471
472
|
test_files: []
|