longleaf 1.0.0 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +1 -1
- data/lib/longleaf/candidates/file_selector.rb +16 -5
- data/lib/longleaf/candidates/manifest_digest_provider.rb +1 -1
- data/lib/longleaf/candidates/physical_path_provider.rb +17 -0
- data/lib/longleaf/cli.rb +21 -6
- data/lib/longleaf/commands/register_command.rb +5 -3
- data/lib/longleaf/events/preserve_event.rb +3 -2
- data/lib/longleaf/events/register_event.rb +9 -2
- data/lib/longleaf/helpers/selection_options_parser.rb +39 -13
- data/lib/longleaf/logging/redirecting_logger.rb +4 -0
- data/lib/longleaf/models/file_record.rb +15 -1
- data/lib/longleaf/models/md_fields.rb +1 -0
- data/lib/longleaf/models/metadata_record.rb +4 -1
- data/lib/longleaf/models/s3_storage_location.rb +5 -0
- data/lib/longleaf/preservation_services/file_check_service.rb +7 -6
- data/lib/longleaf/preservation_services/fixity_check_service.rb +5 -4
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +24 -8
- data/lib/longleaf/preservation_services/s3_replication_service.rb +1 -13
- data/lib/longleaf/services/metadata_deserializer.rb +3 -1
- data/lib/longleaf/services/metadata_serializer.rb +1 -0
- data/lib/longleaf/specs/metadata_builder.rb +7 -1
- data/lib/longleaf/version.rb +1 -1
- metadata +7 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 71c3c9e78a968f153c457d2a59ae17e8d45b4f6e905deeb787172844cee3733f
|
4
|
+
data.tar.gz: d354dba30939cdf1917b09fa7fc874654135f311dd1d5c60453cb42fe2b2a736
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af2433108df902de025137d67c4b3d089b8766f7856d10d5d93d6bd449b82227c7cf3e987e50199c83a0869056842cb34d55f710102fa8a654282170c215882a
|
7
|
+
data.tar.gz: 93deda52a5f3de34ebc29baef3f531db099ea76521389dc223f33c2922da0e1fb81d775c8178370ade67ea397761553bf9ec34c03c12eec1e0a77d12174e07de
|
data/.circleci/config.yml
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'longleaf/logging'
|
2
|
+
require 'longleaf/candidates/physical_path_provider'
|
2
3
|
|
3
4
|
module Longleaf
|
4
5
|
# Selects and allows for iteration over files which match a provided set of selection criteria
|
@@ -10,7 +11,8 @@ module Longleaf
|
|
10
11
|
attr_reader :specificity
|
11
12
|
|
12
13
|
# May only provide either file_paths or storage_locations
|
13
|
-
def initialize(file_paths: nil, storage_locations: nil,
|
14
|
+
def initialize(file_paths: nil, storage_locations: nil, physical_provider: Longleaf::PhysicalPathProvider.new,
|
15
|
+
app_config:)
|
14
16
|
if nil_or_empty?(file_paths) && nil_or_empty?(storage_locations)
|
15
17
|
raise ArgumentError.new("Must provide either file paths or storage locations")
|
16
18
|
end
|
@@ -36,6 +38,7 @@ module Longleaf
|
|
36
38
|
end
|
37
39
|
# The set of storage locations to select file paths from
|
38
40
|
@storage_locations = storage_locations
|
41
|
+
@physical_provider = physical_provider
|
39
42
|
# Validate that the selected storage locations are known
|
40
43
|
if @storage_locations.nil?
|
41
44
|
@specificity = SPECIFICITY_PATH
|
@@ -63,7 +66,7 @@ module Longleaf
|
|
63
66
|
@target_paths
|
64
67
|
end
|
65
68
|
|
66
|
-
# Get the next file path for this selector.
|
69
|
+
# Get the next logical file path for this selector.
|
67
70
|
# @return [String] an absolute path to the next file targeted by this selector,
|
68
71
|
# or nil if no more files selected
|
69
72
|
def next_path
|
@@ -80,9 +83,17 @@ module Longleaf
|
|
80
83
|
path = @paths.pop
|
81
84
|
until path.nil? do
|
82
85
|
@app_config.location_manager.verify_path_in_location(path)
|
86
|
+
physical_path = @physical_provider.get_physical_path(path)
|
87
|
+
separate_logical = physical_path != path
|
88
|
+
if separate_logical
|
89
|
+
@app_config.location_manager.verify_path_in_location(physical_path)
|
90
|
+
end
|
83
91
|
|
84
|
-
if File.exist?(
|
85
|
-
if File.directory?(
|
92
|
+
if File.exist?(physical_path)
|
93
|
+
if File.directory?(physical_path)
|
94
|
+
if separate_logical
|
95
|
+
raise InvalidStoragePathError.new("Cannot specify physical path to a directory: #{physical_path}")
|
96
|
+
end
|
86
97
|
logger.debug("Expanding directory #{path}")
|
87
98
|
# For a directory, add all children to file_paths
|
88
99
|
Dir.entries(path).sort.reverse_each do |child|
|
@@ -93,7 +104,7 @@ module Longleaf
|
|
93
104
|
return path
|
94
105
|
end
|
95
106
|
else
|
96
|
-
raise InvalidStoragePathError.new("File #{
|
107
|
+
raise InvalidStoragePathError.new("File #{physical_path} does not exist.")
|
97
108
|
end
|
98
109
|
|
99
110
|
# Returned path was not a suitable file, try the next path
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module Longleaf
|
2
2
|
# Provides digests for files from a manifest
|
3
3
|
class ManifestDigestProvider
|
4
|
-
# @param hash which maps file paths to hashs of digests
|
4
|
+
# @param digests_mapping hash which maps file paths to hashs of digests
|
5
5
|
def initialize(digests_mapping)
|
6
6
|
@digests_mapping = digests_mapping
|
7
7
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Longleaf
|
2
|
+
# Provides physical paths for logical paths from a mapping
|
3
|
+
class PhysicalPathProvider
|
4
|
+
# @param phys_mapping hash with logical paths as keys, physical paths as values
|
5
|
+
def initialize(phys_mapping = Hash.new)
|
6
|
+
@phys_mapping = phys_mapping
|
7
|
+
end
|
8
|
+
|
9
|
+
# @param logical_path [String] logical path of file
|
10
|
+
# @return physical path of the file
|
11
|
+
def get_physical_path(logical_path)
|
12
|
+
# return the logical path itself if no physical path is mapped
|
13
|
+
return logical_path unless @phys_mapping.key?(logical_path)
|
14
|
+
@phys_mapping[logical_path]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/longleaf/cli.rb
CHANGED
@@ -65,12 +65,12 @@ module Longleaf
|
|
65
65
|
:aliases => "-f",
|
66
66
|
:required => false,
|
67
67
|
:desc => 'File or files to perform this operation on. If multiple files are provided, they must be comma separated.' })
|
68
|
+
|
68
69
|
add_shared_option(
|
69
|
-
:location, :
|
70
|
+
:location, :registered_selection, {
|
70
71
|
:aliases => "-s",
|
71
72
|
:required => false,
|
72
73
|
:desc => 'Name or comma separated names of storage locations to perform this operation over.' })
|
73
|
-
|
74
74
|
add_shared_option(
|
75
75
|
:from_list, :registered_selection, {
|
76
76
|
:aliases => "-l",
|
@@ -115,7 +115,21 @@ module Longleaf
|
|
115
115
|
...
|
116
116
|
md5:
|
117
117
|
<digest> <path>
|
118
|
-
...
|
118
|
+
...
|
119
|
+
|
120
|
+
To provide separate logical and physical paths, add a physical path column:
|
121
|
+
'-m sha1:@-'
|
122
|
+
Where the content in STDIN adheres to the format:
|
123
|
+
<digest> <logical path> <physical path>
|
124
|
+
...
|
125
|
+
})
|
126
|
+
method_option(:physical_path,
|
127
|
+
:aliases => "-p",
|
128
|
+
:required => false,
|
129
|
+
:desc => %q{Comma separated list of physical paths of files to register. Only needed
|
130
|
+
if the physical and logical paths of the files differ, otherwise they will be assumed to be the same.
|
131
|
+
Only applicable when used with the -f option, and only for individual files, not directories.
|
132
|
+
Must be provided in the same order as the logical paths.})
|
119
133
|
method_option(:force,
|
120
134
|
:type => :boolean,
|
121
135
|
:default => false,
|
@@ -132,11 +146,12 @@ module Longleaf
|
|
132
146
|
|
133
147
|
app_config_manager = load_application_config(options)
|
134
148
|
|
135
|
-
file_selector, digest_provider = SelectionOptionsParser
|
136
|
-
options, app_config_manager)
|
149
|
+
file_selector, digest_provider, physical_provider = SelectionOptionsParser
|
150
|
+
.parse_registration_selection_options(options, app_config_manager)
|
137
151
|
|
138
152
|
command = RegisterCommand.new(app_config_manager)
|
139
|
-
exit command.execute(file_selector: file_selector, force: options[:force], digest_provider: digest_provider
|
153
|
+
exit command.execute(file_selector: file_selector, force: options[:force], digest_provider: digest_provider,
|
154
|
+
physical_provider: physical_provider)
|
140
155
|
end
|
141
156
|
|
142
157
|
desc "deregister", "Deregister files with Longleaf"
|
@@ -16,9 +16,10 @@ module Longleaf
|
|
16
16
|
# Execute the register command on the given parameters
|
17
17
|
# @param file_selector [FileSelector] selector for files to register
|
18
18
|
# @param force [Boolean] force flag
|
19
|
-
# @param digest_provider [
|
19
|
+
# @param digest_provider [ManifestDigestProvider] object which provides digests for files being registered
|
20
|
+
# @param physical_provider [PhysicalPathProvider] object which provides physical paths for files being registered
|
20
21
|
# @return [Integer] status code
|
21
|
-
def execute(file_selector:, force: false, digest_provider: nil)
|
22
|
+
def execute(file_selector:, force: false, digest_provider: nil, physical_provider: nil)
|
22
23
|
start_time = Time.now
|
23
24
|
logger.info('Performing register command')
|
24
25
|
begin
|
@@ -29,7 +30,8 @@ module Longleaf
|
|
29
30
|
|
30
31
|
storage_location = @app_manager.location_manager.get_location_by_path(f_path)
|
31
32
|
|
32
|
-
|
33
|
+
phys_path = physical_provider.get_physical_path(f_path)
|
34
|
+
file_rec = FileRecord.new(f_path, storage_location, nil, phys_path)
|
33
35
|
|
34
36
|
register_event = RegisterEvent.new(file_rec: file_rec, force: force, app_manager: @app_manager,
|
35
37
|
digest_provider: digest_provider)
|
@@ -27,12 +27,13 @@ module Longleaf
|
|
27
27
|
service_manager = @app_manager.service_manager
|
28
28
|
md_rec = @file_rec.metadata_record
|
29
29
|
f_path = @file_rec.path
|
30
|
+
phys_path = @file_rec.physical_path
|
30
31
|
|
31
|
-
logger.info("Performing preserve event on #{
|
32
|
+
logger.info("Performing preserve event on #{f_path}")
|
32
33
|
|
33
34
|
needs_persist = false
|
34
35
|
begin
|
35
|
-
if !File.exist?(
|
36
|
+
if !File.exist?(phys_path)
|
36
37
|
# Need to persist metadata to avoid repeating processing of this file too soon.
|
37
38
|
needs_persist = true
|
38
39
|
record_failure(EventNames::PRESERVE, f_path, "File is registered but missing.")
|
@@ -70,10 +70,17 @@ module Longleaf
|
|
70
70
|
private
|
71
71
|
def populate_file_properties
|
72
72
|
md_rec = @file_rec.metadata_record
|
73
|
+
physical_path = @file_rec.physical_path
|
73
74
|
|
74
75
|
# Set file properties
|
75
|
-
md_rec.last_modified = File.mtime(
|
76
|
-
md_rec.file_size = File.size(
|
76
|
+
md_rec.last_modified = File.mtime(physical_path).utc.iso8601(3)
|
77
|
+
md_rec.file_size = File.size(physical_path)
|
78
|
+
|
79
|
+
if physical_path != @file_rec.path
|
80
|
+
md_rec.physical_path = physical_path
|
81
|
+
else
|
82
|
+
md_rec.physical_path = nil
|
83
|
+
end
|
77
84
|
end
|
78
85
|
|
79
86
|
# Copy a subset of properties from an existing metadata record to the new record
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'longleaf/candidates/file_selector'
|
2
2
|
require 'longleaf/candidates/registered_file_selector'
|
3
3
|
require 'longleaf/candidates/manifest_digest_provider'
|
4
|
+
require 'longleaf/candidates/physical_path_provider'
|
4
5
|
require 'longleaf/candidates/single_digest_provider'
|
5
6
|
|
6
7
|
module Longleaf
|
@@ -18,8 +19,11 @@ module Longleaf
|
|
18
19
|
options, :file, :manifest, :location)
|
19
20
|
|
20
21
|
if !options[:manifest].nil?
|
21
|
-
digests_mapping = self.
|
22
|
-
|
22
|
+
digests_mapping, logical_phys_mapping = self.parse_manifest(options[:manifest])
|
23
|
+
physical_provider = PhysicalPathProvider.new(logical_phys_mapping)
|
24
|
+
selector = FileSelector.new(file_paths: digests_mapping.keys,
|
25
|
+
physical_provider: physical_provider,
|
26
|
+
app_config: app_config_manager)
|
23
27
|
digest_provider = ManifestDigestProvider.new(digests_mapping)
|
24
28
|
elsif !options[:file].nil?
|
25
29
|
if options[:checksums]
|
@@ -35,18 +39,28 @@ module Longleaf
|
|
35
39
|
end
|
36
40
|
end
|
37
41
|
|
38
|
-
file_paths = options[:file]
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
42
|
+
file_paths = self.split_quoted(options[:file], "\\s*,\\s*")
|
43
|
+
if !options[:physical_path].nil?
|
44
|
+
physical_paths = self.split_quoted(options[:physical_path], "\\s*,\\s*")
|
45
|
+
if physical_paths.length != file_paths.length
|
46
|
+
logger.failure("Invalid physical paths parameter, number of paths did not match number of logical paths")
|
47
|
+
exit 1
|
48
|
+
end
|
49
|
+
logical_phys_mapping = Hash[file_paths.zip physical_paths]
|
50
|
+
physical_provider = PhysicalPathProvider.new(logical_phys_mapping)
|
51
|
+
else
|
52
|
+
physical_provider = PhysicalPathProvider.new
|
53
|
+
end
|
54
|
+
|
55
|
+
selector = FileSelector.new(file_paths: file_paths,
|
56
|
+
physical_provider: physical_provider,
|
57
|
+
app_config: app_config_manager)
|
44
58
|
else
|
45
59
|
logger.failure("Must provide one of the following file selection options: -f, l, or -m")
|
46
60
|
exit 1
|
47
61
|
end
|
48
62
|
|
49
|
-
[selector, digest_provider]
|
63
|
+
[selector, digest_provider, physical_provider]
|
50
64
|
end
|
51
65
|
|
52
66
|
def self.there_can_be_only_one(failure_msg, options, *names)
|
@@ -69,7 +83,7 @@ module Longleaf
|
|
69
83
|
#. <manifest_path> OR @-
|
70
84
|
# @return a hash containing the aggregated contents of the provided manifests. The keys are
|
71
85
|
# paths to manifested files. The values are hashes, mapping digest algorithms to digest values.
|
72
|
-
def self.
|
86
|
+
def self.parse_manifest(manifest_vals)
|
73
87
|
alg_manifest_pairs = []
|
74
88
|
# interpret option inputs into a list of algorithms to manifest sources
|
75
89
|
manifest_vals.each do |manifest_val|
|
@@ -87,6 +101,7 @@ module Longleaf
|
|
87
101
|
|
88
102
|
# read the provided manifests to build a mapping from file uri to all supplied digests
|
89
103
|
digests_mapping = Hash.new { |h,k| h[k] = Hash.new }
|
104
|
+
logical_phys_mapping = Hash.new
|
90
105
|
alg_manifest_pairs.each do |mpair|
|
91
106
|
source_stream = nil
|
92
107
|
# Determine if reading from a manifest file or stdin
|
@@ -111,17 +126,28 @@ module Longleaf
|
|
111
126
|
if current_alg.nil?
|
112
127
|
self.fail("Manifest with unknown checksums encountered, an algorithm must be specified")
|
113
128
|
end
|
114
|
-
entry_parts =
|
115
|
-
if entry_parts.length != 2
|
129
|
+
entry_parts = self.split_quoted(line)
|
130
|
+
if entry_parts.length != 2 && entry_parts.length != 3
|
116
131
|
self.fail("Invalid manifest entry: #{line}")
|
117
132
|
end
|
118
133
|
|
119
134
|
digests_mapping[entry_parts[1]][current_alg] = entry_parts[0]
|
135
|
+
if (entry_parts.length == 3)
|
136
|
+
logical_phys_mapping[entry_parts[1]] = entry_parts[2]
|
137
|
+
end
|
120
138
|
end
|
121
139
|
end
|
122
140
|
end
|
123
141
|
|
124
|
-
digests_mapping
|
142
|
+
[digests_mapping, logical_phys_mapping]
|
143
|
+
end
|
144
|
+
|
145
|
+
# Splits a string of quoted or unquoted tokens separated by spaces
|
146
|
+
# @param
|
147
|
+
def self.split_quoted(text, delimiter = "\\s+", limit = -1)
|
148
|
+
text.split(/#{delimiter}(?=(?:[^'"]|'[^']*'|"[^"]*")*$)/, limit)
|
149
|
+
.select {|s| not s.empty? }
|
150
|
+
.map {|s| s.gsub(/(^ +)|( +$)|(^["']+)|(["']+$)/, '')}
|
125
151
|
end
|
126
152
|
|
127
153
|
# Parses the provided options to create a selector for registered files
|
@@ -7,13 +7,16 @@ module Longleaf
|
|
7
7
|
|
8
8
|
# @param file_path [String] path to the file
|
9
9
|
# @param storage_location [StorageLocation] storage location containing the file
|
10
|
-
|
10
|
+
# @param metadata_record [MetadataRecord] metadata record for this file object. Optional.
|
11
|
+
# @param physical_path [String] physical path where the file is located. Defaults to the file_path.
|
12
|
+
def initialize(file_path, storage_location, metadata_record = nil, physical_path = nil)
|
11
13
|
raise ArgumentError.new("FileRecord requires a path") if file_path.nil?
|
12
14
|
raise ArgumentError.new("FileRecord requires a storage_location") if storage_location.nil?
|
13
15
|
|
14
16
|
@path = file_path
|
15
17
|
@storage_location = storage_location
|
16
18
|
@metadata_record = metadata_record
|
19
|
+
@physical_path = physical_path
|
17
20
|
end
|
18
21
|
|
19
22
|
# @return [String] path for the metadata file for this file
|
@@ -21,6 +24,17 @@ module Longleaf
|
|
21
24
|
@metadata_path = @storage_location.get_metadata_path_for(path) if @metadata_path.nil?
|
22
25
|
@metadata_path
|
23
26
|
end
|
27
|
+
|
28
|
+
def physical_path
|
29
|
+
if @physical_path.nil?
|
30
|
+
if @metadata_record.nil? || @metadata_record.physical_path.nil?
|
31
|
+
@physical_path = @path
|
32
|
+
else
|
33
|
+
@physical_path = @metadata_record.physical_path
|
34
|
+
end
|
35
|
+
end
|
36
|
+
@physical_path
|
37
|
+
end
|
24
38
|
|
25
39
|
def metadata_present?
|
26
40
|
File.exist?(metadata_path)
|
@@ -10,6 +10,7 @@ module Longleaf
|
|
10
10
|
attr_reader :checksums
|
11
11
|
attr_reader :properties
|
12
12
|
attr_accessor :file_size, :last_modified
|
13
|
+
attr_accessor :physical_path
|
13
14
|
|
14
15
|
# @param properties [Hash] initial data properties for this record
|
15
16
|
# @param services [Hash] initial service property tree
|
@@ -18,8 +19,9 @@ module Longleaf
|
|
18
19
|
# @param checksums [Hash] hash of checksum values
|
19
20
|
# @param file_size [Integer] size of file in bytes
|
20
21
|
# @param last_modified [String] iso8601 representation of the last modified date of file
|
22
|
+
# @param physical_path [String] physical path where the file is located
|
21
23
|
def initialize(properties: nil, services: nil, deregistered: nil, registered: nil, checksums: nil,
|
22
|
-
file_size: nil, last_modified: nil)
|
24
|
+
file_size: nil, last_modified: nil, physical_path: nil)
|
23
25
|
@properties = properties || Hash.new
|
24
26
|
@registered = registered
|
25
27
|
@deregistered = deregistered
|
@@ -28,6 +30,7 @@ module Longleaf
|
|
28
30
|
@services = services || Hash.new
|
29
31
|
@file_size = file_size
|
30
32
|
@last_modified = last_modified
|
33
|
+
@physical_path = physical_path
|
31
34
|
end
|
32
35
|
|
33
36
|
# @return [Boolean] true if the record is deregistered
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'longleaf/models/storage_location'
|
2
2
|
require 'longleaf/models/storage_types'
|
3
3
|
require 'longleaf/helpers/s3_uri_helper'
|
4
|
+
require 'longleaf/logging'
|
4
5
|
require 'uri'
|
5
6
|
require 'aws-sdk-s3'
|
6
7
|
|
@@ -12,6 +13,7 @@ module Longleaf
|
|
12
13
|
# https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#constructor_details
|
13
14
|
|
14
15
|
class S3StorageLocation < StorageLocation
|
16
|
+
include Longleaf::Logging
|
15
17
|
|
16
18
|
IS_URI_REGEX = /\A#{URI::regexp}\z/
|
17
19
|
|
@@ -38,6 +40,9 @@ module Longleaf
|
|
38
40
|
# Clone options and convert keys to symbols
|
39
41
|
@client_options = Hash[custom_options.map { |(k,v)| [k.to_sym,v] } ]
|
40
42
|
end
|
43
|
+
@client_options[:logger] = logger
|
44
|
+
@client_options[:log_level] = :debug if @client_options[:log_level].nil?
|
45
|
+
|
41
46
|
# If no region directly configured, use region from path
|
42
47
|
if !@client_options.key?(:region)
|
43
48
|
region = S3UriHelper.extract_region(@path)
|
@@ -23,22 +23,23 @@ module Longleaf
|
|
23
23
|
# @raise [PreservationServiceError] if the file system information does not match the stored details
|
24
24
|
def perform(file_rec, event)
|
25
25
|
file_path = file_rec.path
|
26
|
+
phys_path = file_rec.physical_path
|
26
27
|
md_rec = file_rec.metadata_record
|
27
28
|
|
28
29
|
logger.debug("Performing file information check of #{file_path}")
|
29
30
|
|
30
|
-
if !File.exist?(
|
31
|
-
raise PreservationServiceError.new("File does not exist: #{
|
31
|
+
if !File.exist?(phys_path)
|
32
|
+
raise PreservationServiceError.new("File does not exist: #{phys_path}")
|
32
33
|
end
|
33
34
|
|
34
|
-
file_size = File.size(
|
35
|
+
file_size = File.size(phys_path)
|
35
36
|
if file_size != md_rec.file_size
|
36
|
-
raise PreservationServiceError.new("File size for #{
|
37
|
+
raise PreservationServiceError.new("File size for #{phys_path} does not match the expected value: registered = #{md_rec.file_size} bytes, actual = #{file_size} bytes")
|
37
38
|
end
|
38
39
|
|
39
|
-
last_modified = File.mtime(
|
40
|
+
last_modified = File.mtime(phys_path).utc.iso8601(3)
|
40
41
|
if last_modified != md_rec.last_modified
|
41
|
-
raise PreservationServiceError.new("Last modified timestamp for #{
|
42
|
+
raise PreservationServiceError.new("Last modified timestamp for #{phys_path} does not match the expected value: registered = #{md_rec.last_modified}, actual = #{last_modified}")
|
42
43
|
end
|
43
44
|
end
|
44
45
|
|
@@ -63,6 +63,7 @@ module Longleaf
|
|
63
63
|
# @raise [ChecksumMismatchError] if the checksum on record does not match the generated checksum
|
64
64
|
def perform(file_rec, event)
|
65
65
|
path = file_rec.path
|
66
|
+
phys_path = file_rec.physical_path
|
66
67
|
md_rec = file_rec.metadata_record
|
67
68
|
|
68
69
|
# Get the list of existing checksums for the file and normalize algorithm names
|
@@ -89,19 +90,19 @@ module Longleaf
|
|
89
90
|
end
|
90
91
|
|
91
92
|
digest = DigestHelper::start_digest(alg)
|
92
|
-
digest.file(
|
93
|
+
digest.file(phys_path)
|
93
94
|
generated_digest = digest.hexdigest
|
94
95
|
|
95
96
|
# Store the missing checksum if using the 'generate' behavior
|
96
97
|
if existing_digest.nil? && @absent_digest_behavior == GENERATE_IF_ABSENT
|
97
98
|
md_rec.checksums[alg] = generated_digest
|
98
|
-
logger.info("Generated and stored digest using algorithm '#{alg}' for file #{
|
99
|
+
logger.info("Generated and stored digest using algorithm '#{alg}' for file #{phys_path}")
|
99
100
|
else
|
100
101
|
# Compare the new digest to the one on record
|
101
102
|
if existing_digest == generated_digest
|
102
|
-
logger.info("Fixity check using algorithm '#{alg}' succeeded for file #{
|
103
|
+
logger.info("Fixity check using algorithm '#{alg}' succeeded for file #{phys_path}")
|
103
104
|
else
|
104
|
-
raise ChecksumMismatchError.new("Fixity check using algorithm '#{alg}' failed for file #{
|
105
|
+
raise ChecksumMismatchError.new("Fixity check using algorithm '#{alg}' failed for file #{phys_path}: expected '#{existing_digest}', calculated '#{generated_digest}.'")
|
105
106
|
end
|
106
107
|
end
|
107
108
|
end
|
@@ -51,9 +51,6 @@ module Longleaf
|
|
51
51
|
+ " rsync_options may not include the following: #{DISALLOWED_OPTIONS.join(' ')}")
|
52
52
|
end
|
53
53
|
|
54
|
-
# Add -R (--relative) in to command options to ensure full path gets replicated
|
55
|
-
@options = @options + " -R"
|
56
|
-
|
57
54
|
# Set and validate the replica collision policy
|
58
55
|
@collision_policy = @service_def.properties[SF::COLLISION_PROPERTY] || SF::DEFAULT_COLLISION_POLICY
|
59
56
|
if !SF::VALID_COLLISION_POLICIES.include?(@collision_policy)
|
@@ -101,17 +98,36 @@ module Longleaf
|
|
101
98
|
else
|
102
99
|
dest_path = destination
|
103
100
|
end
|
104
|
-
|
101
|
+
|
102
|
+
logical_physical_same = file_rec.path == file_rec.physical_path
|
105
103
|
# Determine the path to the file being replicated relative to its storage location
|
106
104
|
rel_path = file_rec.storage_location.relativize(file_rec.path)
|
107
|
-
|
108
|
-
|
105
|
+
|
106
|
+
options = @options
|
107
|
+
if logical_physical_same
|
108
|
+
options = options + " -R"
|
109
|
+
# source path with . so that rsync will only create destination directories starting from that point
|
110
|
+
source_path = File.join(file_rec.storage_location.path, "./#{rel_path}")
|
111
|
+
else
|
112
|
+
options = options + " --no-relative"
|
113
|
+
source_path = file_rec.physical_path
|
114
|
+
dest_path = File.join(dest_path, rel_path)
|
115
|
+
if (dest_is_storage_loc && destination.is_a?(Longleaf::FilesystemStorageLocation)) || !dest_is_storage_loc
|
116
|
+
# Fill in missing parent directories, as rsync cannot do so when specifying a different source and dest filename
|
117
|
+
dirname = File.dirname(dest_path)
|
118
|
+
logger.debug("Creating parent dirs #{dirname} for #{file_rec.path}")
|
119
|
+
FileUtils.mkdir_p(dirname)
|
120
|
+
else
|
121
|
+
raise PreservationServiceError.new(
|
122
|
+
"Destination #{destination.name} does not currently support separate physical and logical paths")
|
123
|
+
end
|
124
|
+
end
|
109
125
|
|
110
126
|
# Check that the destination is available because attempting to write
|
111
127
|
verify_destination_available(destination, file_rec)
|
112
128
|
|
113
|
-
logger.debug("Invoking rsync with command: #{@command} \"#{source_path}\" \"#{dest_path}\" #{
|
114
|
-
stdout, stderr, status = Open3.capture3("#{@command} \"#{source_path}\" \"#{dest_path}\" #{
|
129
|
+
logger.debug("Invoking rsync with command: #{@command} \"#{source_path}\" \"#{dest_path}\" #{options}")
|
130
|
+
stdout, stderr, status = Open3.capture3("#{@command} \"#{source_path}\" \"#{dest_path}\" #{options}")
|
115
131
|
raise PreservationServiceError.new("Failed to replicate #{file_rec.path} to #{dest_path}: #{stderr}") \
|
116
132
|
unless status.success?
|
117
133
|
|
@@ -83,8 +83,6 @@ module Longleaf
|
|
83
83
|
# Determine the path to the file being replicated relative to its storage location
|
84
84
|
rel_path = file_rec.storage_location.relativize(file_rec.path)
|
85
85
|
|
86
|
-
content_md5 = get_content_md5(file_rec)
|
87
|
-
|
88
86
|
@destinations.each do |destination|
|
89
87
|
# Check that the destination is available before attempting to write
|
90
88
|
verify_destination_available(destination, file_rec)
|
@@ -92,7 +90,7 @@ module Longleaf
|
|
92
90
|
rel_to_bucket = destination.relative_to_bucket_path(rel_path)
|
93
91
|
file_obj = destination.s3_bucket.object(rel_to_bucket)
|
94
92
|
begin
|
95
|
-
file_obj.upload_file(file_rec.
|
93
|
+
file_obj.upload_file(file_rec.physical_path)
|
96
94
|
rescue Aws::S3::Errors::BadDigest => e
|
97
95
|
raise ChecksumMismatchError.new("Transfer to bucket '#{destination.s3_bucket.name}' failed, " \
|
98
96
|
+ "MD5 provided did not match the received content for #{file_rec.path}")
|
@@ -129,15 +127,5 @@ module Longleaf
|
|
129
127
|
+ e.message)
|
130
128
|
end
|
131
129
|
end
|
132
|
-
|
133
|
-
def get_content_md5(file_rec)
|
134
|
-
md_rec = file_rec.metadata_record
|
135
|
-
if md_rec.checksums.key?('md5')
|
136
|
-
# base 64 encode the digest, as is required by the Content-Md5 header
|
137
|
-
[[md_rec.checksums['md5']].pack("H*")].pack("m0")
|
138
|
-
else
|
139
|
-
nil
|
140
|
-
end
|
141
|
-
end
|
142
130
|
end
|
143
131
|
end
|
@@ -32,6 +32,7 @@ module Longleaf
|
|
32
32
|
checksums = data.delete(MDFields::CHECKSUMS)
|
33
33
|
file_size = data.delete(MDFields::FILE_SIZE)
|
34
34
|
last_modified = data.delete(MDFields::LAST_MODIFIED)
|
35
|
+
physical_path = data.delete(MDFields::PHYSICAL_PATH)
|
35
36
|
|
36
37
|
services = md[MDF::SERVICES]
|
37
38
|
service_records = Hash.new
|
@@ -57,7 +58,8 @@ module Longleaf
|
|
57
58
|
deregistered: deregistered,
|
58
59
|
checksums: checksums,
|
59
60
|
file_size: file_size,
|
60
|
-
last_modified: last_modified
|
61
|
+
last_modified: last_modified,
|
62
|
+
physical_path: physical_path)
|
61
63
|
end
|
62
64
|
|
63
65
|
# Load configuration a yaml encoded configuration file
|
@@ -52,6 +52,7 @@ module Longleaf
|
|
52
52
|
data[MDF::CHECKSUMS] = metadata.checksums unless metadata.checksums && metadata.checksums.empty?
|
53
53
|
data[MDF::FILE_SIZE] = metadata.file_size unless metadata.file_size.nil?
|
54
54
|
data[MDF::LAST_MODIFIED] = metadata.last_modified if metadata.last_modified
|
55
|
+
data[MDF::PHYSICAL_PATH] = metadata.physical_path if metadata.physical_path
|
55
56
|
|
56
57
|
props[MDF::DATA] = data
|
57
58
|
|
@@ -29,6 +29,11 @@ module Longleaf
|
|
29
29
|
@checksums[alg] = value
|
30
30
|
self
|
31
31
|
end
|
32
|
+
|
33
|
+
def with_physical_path(phys_path)
|
34
|
+
@physical_path = phys_path
|
35
|
+
self
|
36
|
+
end
|
32
37
|
|
33
38
|
def with_service(name, timestamp: ServiceDateHelper::formatted_timestamp, run_needed: false, properties: nil,
|
34
39
|
failure_timestamp: nil)
|
@@ -56,7 +61,8 @@ module Longleaf
|
|
56
61
|
registered: @registered,
|
57
62
|
checksums: @checksums,
|
58
63
|
file_size: @file_size,
|
59
|
-
last_modified: @last_modified
|
64
|
+
last_modified: @last_modified,
|
65
|
+
physical_path: @physical_path)
|
60
66
|
end
|
61
67
|
|
62
68
|
# Add the generated metadata record to the given file record
|
data/lib/longleaf/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: longleaf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Pennell
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-03-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: thor
|
@@ -372,6 +372,7 @@ files:
|
|
372
372
|
- lib/longleaf.rb
|
373
373
|
- lib/longleaf/candidates/file_selector.rb
|
374
374
|
- lib/longleaf/candidates/manifest_digest_provider.rb
|
375
|
+
- lib/longleaf/candidates/physical_path_provider.rb
|
375
376
|
- lib/longleaf/candidates/registered_file_selector.rb
|
376
377
|
- lib/longleaf/candidates/service_candidate_filesystem_iterator.rb
|
377
378
|
- lib/longleaf/candidates/service_candidate_index_iterator.rb
|
@@ -449,7 +450,7 @@ licenses:
|
|
449
450
|
- Apache-2.0
|
450
451
|
metadata:
|
451
452
|
source_code_uri: https://github.com/UNC-Libraries/longleaf-preservation
|
452
|
-
post_install_message:
|
453
|
+
post_install_message:
|
453
454
|
rdoc_options: []
|
454
455
|
require_paths:
|
455
456
|
- lib
|
@@ -464,8 +465,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
464
465
|
- !ruby/object:Gem::Version
|
465
466
|
version: '0'
|
466
467
|
requirements: []
|
467
|
-
rubygems_version: 3.1.
|
468
|
-
signing_key:
|
468
|
+
rubygems_version: 3.1.4
|
469
|
+
signing_key:
|
469
470
|
specification_version: 4
|
470
471
|
summary: Longleaf preservation services tool
|
471
472
|
test_files: []
|