longleaf 0.3.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +12 -2
- data/README.md +11 -1
- data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
- data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
- data/lib/longleaf/cli.rb +49 -36
- data/lib/longleaf/commands/register_command.rb +3 -3
- data/lib/longleaf/commands/validate_config_command.rb +1 -1
- data/lib/longleaf/events/register_event.rb +8 -4
- data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
- data/lib/longleaf/helpers/digest_helper.rb +7 -1
- data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
- data/lib/longleaf/helpers/selection_options_parser.rb +189 -0
- data/lib/longleaf/helpers/service_date_helper.rb +29 -1
- data/lib/longleaf/indexing/sequel_index_driver.rb +2 -20
- data/lib/longleaf/models/app_fields.rb +4 -2
- data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
- data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
- data/lib/longleaf/models/metadata_location.rb +47 -0
- data/lib/longleaf/models/metadata_record.rb +3 -1
- data/lib/longleaf/models/s3_storage_location.rb +133 -0
- data/lib/longleaf/models/service_fields.rb +4 -0
- data/lib/longleaf/models/storage_location.rb +17 -48
- data/lib/longleaf/models/storage_types.rb +9 -0
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +9 -11
- data/lib/longleaf/preservation_services/s3_replication_service.rb +143 -0
- data/lib/longleaf/services/application_config_deserializer.rb +26 -4
- data/lib/longleaf/services/application_config_validator.rb +17 -6
- data/lib/longleaf/services/configuration_validator.rb +64 -4
- data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
- data/lib/longleaf/services/metadata_deserializer.rb +41 -9
- data/lib/longleaf/services/metadata_persistence_manager.rb +3 -2
- data/lib/longleaf/services/metadata_serializer.rb +94 -13
- data/lib/longleaf/services/metadata_validator.rb +76 -0
- data/lib/longleaf/services/s3_location_validator.rb +19 -0
- data/lib/longleaf/services/service_definition_validator.rb +16 -8
- data/lib/longleaf/services/service_manager.rb +7 -15
- data/lib/longleaf/services/service_mapping_validator.rb +26 -15
- data/lib/longleaf/services/storage_location_manager.rb +38 -12
- data/lib/longleaf/services/storage_location_validator.rb +41 -30
- data/lib/longleaf/specs/config_builder.rb +10 -3
- data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
- data/lib/longleaf/specs/metadata_builder.rb +1 -0
- data/lib/longleaf/version.rb +1 -1
- data/longleaf.gemspec +3 -1
- data/mkdocs.yml +2 -1
- metadata +48 -8
- data/.travis.yml +0 -4
- data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -4,14 +4,25 @@ require_relative 'service_mapping_validator'
|
|
4
4
|
|
5
5
|
module Longleaf
|
6
6
|
# Validator for Longleaf application configuration
|
7
|
-
class ApplicationConfigValidator
|
7
|
+
class ApplicationConfigValidator < ConfigurationValidator
|
8
|
+
# @param config [Hash] hash containing the application configuration
|
9
|
+
def initialize(config)
|
10
|
+
super(config)
|
11
|
+
end
|
12
|
+
|
13
|
+
protected
|
8
14
|
# Validates the application configuration provided. Will raise ConfigurationError
|
9
15
|
# if any portion of the configuration is not syntactically or semantically valid.
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
16
|
+
def validate
|
17
|
+
loc_result = StorageLocationValidator.new(@config).validate_config
|
18
|
+
defs_result = ServiceDefinitionValidator.new(@config).validate_config
|
19
|
+
mapping_result = ServiceMappingValidator.new(@config).validate_config
|
20
|
+
|
21
|
+
@result.errors.concat(loc_result.errors) unless loc_result.valid?
|
22
|
+
@result.errors.concat(defs_result.errors) unless defs_result.valid?
|
23
|
+
@result.errors.concat(mapping_result.errors) unless mapping_result.valid?
|
24
|
+
|
25
|
+
@result
|
15
26
|
end
|
16
27
|
end
|
17
28
|
end
|
@@ -1,11 +1,71 @@
|
|
1
1
|
module Longleaf
|
2
2
|
# Abstract configuration validator class
|
3
3
|
class ConfigurationValidator
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
4
|
+
attr_reader :result
|
5
|
+
|
6
|
+
def initialize(config)
|
7
|
+
@result = ConfigurationValidationResult.new
|
8
|
+
@config = config
|
9
|
+
end
|
10
|
+
|
11
|
+
# Verify that the provided configuration is valid
|
12
|
+
# @return [ConfigurationValidationResult] the result of the validation
|
13
|
+
def validate_config
|
14
|
+
register_on_failure { validate }
|
15
|
+
|
16
|
+
@result
|
17
|
+
end
|
18
|
+
|
19
|
+
# Asserts that the given conditional is true, raising a ConfigurationError if it is not.
|
20
|
+
def assert(fail_message, assertion_passed)
|
21
|
+
fail(fail_message) unless assertion_passed
|
22
|
+
end
|
23
|
+
|
24
|
+
# Indicate that validation has failed, throwing a Configuration error with the given message
|
25
|
+
def fail(fail_message)
|
26
|
+
raise ConfigurationError.new(fail_message)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Registers an error to the result for this validator
|
30
|
+
def register_error(error)
|
31
|
+
if error.is_a?(StandardError)
|
32
|
+
@result.register_error(error.msg)
|
33
|
+
else
|
34
|
+
@result.register_error(error)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Performs the provided block. If the block produces a ConfigurationError, the error
|
39
|
+
# is swallowed and registered to the result
|
40
|
+
def register_on_failure
|
41
|
+
begin
|
42
|
+
yield
|
43
|
+
rescue ConfigurationError => err
|
44
|
+
register_error(err.message)
|
8
45
|
end
|
9
46
|
end
|
10
47
|
end
|
48
|
+
|
49
|
+
class ConfigurationValidationResult
|
50
|
+
attr_reader :errors
|
51
|
+
|
52
|
+
def initialize
|
53
|
+
@errors = Array.new
|
54
|
+
end
|
55
|
+
|
56
|
+
# Register an error with this validation result
|
57
|
+
def register_error(error_message)
|
58
|
+
@errors << error_message
|
59
|
+
end
|
60
|
+
|
61
|
+
# @return [boolean] true if validation produced not errors
|
62
|
+
def valid?
|
63
|
+
@errors.length == 0
|
64
|
+
end
|
65
|
+
|
66
|
+
# @raise [ConfigurationError] if the result is not valid, which lists all failures
|
67
|
+
def raise_if_invalid
|
68
|
+
raise ConfigurationError.new(@errors.join("\n")) unless valid?
|
69
|
+
end
|
70
|
+
end
|
11
71
|
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
require 'longleaf/errors'
|
3
|
+
|
4
|
+
module Longleaf
|
5
|
+
# Validates the configuration of a filesystem based location
|
6
|
+
class FilesystemLocationValidator
|
7
|
+
|
8
|
+
def self.validate(p_validator, name, path_prop, section_name, path)
|
9
|
+
base_msg = "Storage location '#{name}' specifies invalid #{section_name} '#{path_prop}' property: "
|
10
|
+
p_validator.assert(base_msg + 'Path must not be empty', !path.nil? && !path.to_s.strip.empty?)
|
11
|
+
p_validator.assert(base_msg + 'Path must not contain any relative modifiers (/..)', !path.include?('/..'))
|
12
|
+
p_validator.assert(base_msg + 'Path must be absolute', Pathname.new(path).absolute?)
|
13
|
+
p_validator.assert(base_msg + 'Path does not exist', Dir.exist?(path))
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
require 'longleaf/models/metadata_record'
|
3
3
|
require 'longleaf/models/md_fields'
|
4
|
+
require 'longleaf/services/metadata_validator'
|
4
5
|
require 'longleaf/errors'
|
5
6
|
require 'longleaf/logging'
|
6
7
|
|
@@ -15,6 +16,8 @@ module Longleaf
|
|
15
16
|
# @param file_path [String] path of the file to read. Required.
|
16
17
|
# @param format [String] format the file is stored in. Default is 'yaml'.
|
17
18
|
def self.deserialize(file_path:, format: 'yaml', digest_algs: [])
|
19
|
+
file_path = file_path.path if file_path.is_a?(File)
|
20
|
+
|
18
21
|
case format
|
19
22
|
when 'yaml'
|
20
23
|
md = from_yaml(file_path, digest_algs)
|
@@ -22,10 +25,6 @@ module Longleaf
|
|
22
25
|
raise ArgumentError.new("Invalid deserialization format #{format} specified")
|
23
26
|
end
|
24
27
|
|
25
|
-
if !md || !md.is_a?(Hash) || !md.key?(MDF::DATA) || !md.key?(MDF::SERVICES)
|
26
|
-
raise Longleaf::MetadataError.new("Invalid metadata file, did not contain data or services fields: #{file_path}")
|
27
|
-
end
|
28
|
-
|
29
28
|
data = Hash.new.merge(md[MDF::DATA])
|
30
29
|
# Extract reserved properties for submission as separate parameters
|
31
30
|
registered = data.delete(MDFields::REGISTERED_TIMESTAMP)
|
@@ -37,7 +36,7 @@ module Longleaf
|
|
37
36
|
services = md[MDF::SERVICES]
|
38
37
|
service_records = Hash.new
|
39
38
|
services&.each do |name, props|
|
40
|
-
raise
|
39
|
+
raise MetadataError.new("Value of service #{name} must be a hash") unless props.class == Hash
|
41
40
|
|
42
41
|
service_props = Hash.new.merge(props)
|
43
42
|
|
@@ -66,12 +65,45 @@ module Longleaf
|
|
66
65
|
File.open(file_path, 'r:bom|utf-8') do |f|
|
67
66
|
contents = f.read
|
68
67
|
|
69
|
-
|
68
|
+
checksum_error = nil
|
69
|
+
begin
|
70
|
+
verify_digests(file_path, contents, digest_algs)
|
71
|
+
rescue ChecksumMismatchError => err
|
72
|
+
# Hold onto the checksum error, in case we can identify the underlying cause
|
73
|
+
checksum_error = err
|
74
|
+
end
|
70
75
|
|
71
76
|
begin
|
72
|
-
|
73
|
-
|
74
|
-
|
77
|
+
md = nil
|
78
|
+
begin
|
79
|
+
md = YAML.safe_load(contents, [], [], true)
|
80
|
+
rescue => err
|
81
|
+
raise MetadataError.new("Failed to parse metadata file #{file_path}: #{err.message}")
|
82
|
+
end
|
83
|
+
|
84
|
+
validation_result = MetadataValidator.new(md).validate_config
|
85
|
+
if !validation_result.valid?
|
86
|
+
if checksum_error.nil?
|
87
|
+
raise MetadataError.new("Invalid metadata file #{file_path.to_s}:\n#{validation_result.errors.join("\n")}")
|
88
|
+
else
|
89
|
+
raise MetadataError.new(validation_result.errors.join("\n"))
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# Either return the valid metadata, or raise the checksum error as is
|
94
|
+
if checksum_error.nil?
|
95
|
+
md
|
96
|
+
else
|
97
|
+
raise checksum_error
|
98
|
+
end
|
99
|
+
rescue MetadataError => err
|
100
|
+
if checksum_error.nil?
|
101
|
+
raise err
|
102
|
+
else
|
103
|
+
# Add underlying cause from the metadata error to the checksum mismatch error
|
104
|
+
msg = checksum_error.message + "\nWith related issue(s):\n#{err.message}"
|
105
|
+
raise ChecksumMismatchError.new(msg)
|
106
|
+
end
|
75
107
|
end
|
76
108
|
end
|
77
109
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'longleaf/services/metadata_serializer'
|
2
|
+
require 'longleaf/services/metadata_deserializer'
|
2
3
|
require 'longleaf/errors'
|
3
4
|
|
4
5
|
module Longleaf
|
@@ -20,7 +21,7 @@ module Longleaf
|
|
20
21
|
|
21
22
|
MetadataSerializer::write(metadata: file_rec.metadata_record,
|
22
23
|
file_path: file_rec.metadata_path,
|
23
|
-
digest_algs: file_rec.storage_location.
|
24
|
+
digest_algs: file_rec.storage_location.metadata_location.digests)
|
24
25
|
|
25
26
|
index(file_rec)
|
26
27
|
end
|
@@ -38,7 +39,7 @@ module Longleaf
|
|
38
39
|
# @return [MetadataRecord] the metadata record for the file record
|
39
40
|
def load(file_rec)
|
40
41
|
md_rec = MetadataDeserializer.deserialize(file_path: file_rec.metadata_path,
|
41
|
-
digest_algs: file_rec.storage_location.
|
42
|
+
digest_algs: file_rec.storage_location.metadata_location.digests)
|
42
43
|
file_rec.metadata_record = md_rec
|
43
44
|
md_rec
|
44
45
|
end
|
@@ -5,6 +5,7 @@ require 'longleaf/helpers/digest_helper'
|
|
5
5
|
require 'longleaf/errors'
|
6
6
|
require 'longleaf/logging'
|
7
7
|
require 'pathname'
|
8
|
+
require "tempfile"
|
8
9
|
|
9
10
|
module Longleaf
|
10
11
|
# Service which serializes MetadataRecord objects
|
@@ -30,12 +31,7 @@ module Longleaf
|
|
30
31
|
raise ArgumentError.new("Invalid serialization format #{format} specified")
|
31
32
|
end
|
32
33
|
|
33
|
-
|
34
|
-
parent_dir = Pathname(file_path).parent
|
35
|
-
parent_dir.mkpath unless parent_dir.exist?
|
36
|
-
|
37
|
-
File.write(file_path, content)
|
38
|
-
write_digests(file_path, content, digest_algs)
|
34
|
+
atomic_write(file_path, content, digest_algs)
|
39
35
|
end
|
40
36
|
|
41
37
|
# @param metadata [MetadataRecord] metadata record to transform
|
@@ -85,24 +81,109 @@ module Longleaf
|
|
85
81
|
end
|
86
82
|
end
|
87
83
|
|
84
|
+
# Safely writes the new metadata file and its digests.
|
85
|
+
# It does so by first writing the content and its digests to temp files,
|
86
|
+
# then making the temp files the current version of the file.
|
87
|
+
# Attempts to clean up new data in the case of failure.
|
88
|
+
def self.atomic_write(file_path, content, digest_algs)
|
89
|
+
# Fill in parent directories if they do not exist
|
90
|
+
parent_dir = Pathname(file_path).parent
|
91
|
+
parent_dir.mkpath unless parent_dir.exist?
|
92
|
+
|
93
|
+
file_path = file_path.path if file_path.respond_to?(:path)
|
94
|
+
|
95
|
+
# If file does not already exist, then simply write it
|
96
|
+
if !File.exist?(file_path)
|
97
|
+
File.write(file_path, content)
|
98
|
+
write_digests(file_path, content, digest_algs)
|
99
|
+
return
|
100
|
+
end
|
101
|
+
|
102
|
+
# Updating file, use safe atomic write
|
103
|
+
File.open(file_path) do |original_file|
|
104
|
+
original_file.flock(File::LOCK_EX)
|
105
|
+
|
106
|
+
base_name = File.basename(file_path)
|
107
|
+
Tempfile.open(base_name, parent_dir) do |temp_file|
|
108
|
+
begin
|
109
|
+
# Write content to temp file
|
110
|
+
temp_file.write(content)
|
111
|
+
temp_file.close
|
112
|
+
|
113
|
+
temp_path = temp_file.path
|
114
|
+
|
115
|
+
# Set permissions of new file to match old if it exists
|
116
|
+
old_stat = File.stat(file_path)
|
117
|
+
set_perms(temp_path, old_stat)
|
118
|
+
|
119
|
+
begin
|
120
|
+
digest_paths = write_digests(temp_path, content, digest_algs)
|
121
|
+
|
122
|
+
File.rename(temp_path, file_path)
|
123
|
+
rescue => e
|
124
|
+
cleanup_digests(temp_path)
|
125
|
+
raise e
|
126
|
+
end
|
127
|
+
rescue => e
|
128
|
+
temp_file.delete
|
129
|
+
raise e
|
130
|
+
end
|
131
|
+
|
132
|
+
# Cleanup all existing digest files, in case the set of algorithms has changed
|
133
|
+
cleanup_digests(file_path)
|
134
|
+
# Move new digests into place
|
135
|
+
digest_paths.each do |digest_path|
|
136
|
+
File.rename(digest_path, digest_path.sub(temp_path, file_path))
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def self.set_perms(file_path, stat_info)
|
143
|
+
if stat_info
|
144
|
+
# Set correct permissions on new file
|
145
|
+
begin
|
146
|
+
File.chown(stat_info.uid, stat_info.gid, file_path)
|
147
|
+
# This operation will affect filesystem ACL's
|
148
|
+
File.chmod(stat_info.mode, file_path)
|
149
|
+
rescue Errno::EPERM, Errno::EACCES
|
150
|
+
# Changing file ownership failed, moving on.
|
151
|
+
return false
|
152
|
+
end
|
153
|
+
end
|
154
|
+
true
|
155
|
+
end
|
156
|
+
|
157
|
+
# Deletes all known digest files for the provided file path
|
158
|
+
def self.cleanup_digests(file_path)
|
159
|
+
DigestHelper::KNOWN_DIGESTS.each do |alg|
|
160
|
+
digest_path = "#{file_path}.#{alg}"
|
161
|
+
File.delete(digest_path) if File.exist?(digest_path)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
88
165
|
def self.write_digests(file_path, content, digests)
|
89
|
-
return if digests.nil? || digests.empty?
|
166
|
+
return [] if digests.nil? || digests.empty?
|
167
|
+
|
168
|
+
digest_paths = Array.new
|
90
169
|
|
91
170
|
digests.each do |alg|
|
92
171
|
digest_class = DigestHelper::start_digest(alg)
|
93
172
|
result = digest_class.hexdigest(content)
|
94
|
-
|
95
|
-
digest_path = "#{file_path.path}.#{alg}"
|
96
|
-
else
|
97
|
-
digest_path = "#{file_path}.#{alg}"
|
98
|
-
end
|
173
|
+
digest_path = "#{file_path}.#{alg}"
|
99
174
|
|
100
175
|
File.write(digest_path, result)
|
101
176
|
|
102
|
-
|
177
|
+
digest_paths.push(digest_path)
|
178
|
+
|
179
|
+
self.logger.debug("Generated #{alg} digest for metadata file #{file_path}: #{digest_path} #{result}")
|
103
180
|
end
|
181
|
+
|
182
|
+
digest_paths
|
104
183
|
end
|
105
184
|
|
185
|
+
private_class_method :cleanup_digests
|
106
186
|
private_class_method :write_digests
|
187
|
+
private_class_method :atomic_write
|
107
188
|
end
|
108
189
|
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
require 'longleaf/models/md_fields'
|
3
|
+
require 'longleaf/errors'
|
4
|
+
require_relative 'configuration_validator'
|
5
|
+
|
6
|
+
module Longleaf
|
7
|
+
# Validator for file metadata
|
8
|
+
class MetadataValidator < ConfigurationValidator
|
9
|
+
MDF ||= MDFields
|
10
|
+
|
11
|
+
# @param config [Hash] hash containing the application configuration
|
12
|
+
def initialize(config)
|
13
|
+
super(config)
|
14
|
+
end
|
15
|
+
|
16
|
+
protected
|
17
|
+
# Validates the provided metadata for a file to ensure that it is syntactically correct and field types
|
18
|
+
# are validate.
|
19
|
+
def validate
|
20
|
+
assert("Metadata must be a hash, but a #{@config.class} was provided", @config.class == Hash)
|
21
|
+
assert("Metadata must contain a '#{MDF::DATA}' key", @config.key?(MDF::DATA))
|
22
|
+
assert("Metadata must contain a '#{MDF::SERVICES}' key", @config.key?(MDF::SERVICES))
|
23
|
+
|
24
|
+
data = @config[MDF::DATA]
|
25
|
+
register_on_failure { validate_date_field(data, MDF::REGISTERED_TIMESTAMP) }
|
26
|
+
register_on_failure { validate_date_field(data, MDF::DEREGISTERED_TIMESTAMP, required: false) }
|
27
|
+
register_on_failure { validate_date_field(data, MDF::LAST_MODIFIED) }
|
28
|
+
|
29
|
+
register_on_failure { validate_positive_integer(data, MDF::FILE_SIZE) }
|
30
|
+
|
31
|
+
checksums = data[MDF::CHECKSUMS]
|
32
|
+
register_on_failure do
|
33
|
+
if !checksums.nil? && !checksums.is_a?(Hash)
|
34
|
+
fail("Field '#{MDF::CHECKSUMS}' must be a map of algorithms to digests, but was a #{checksums.class}")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Ensure that any service timestamps present are valid dates
|
39
|
+
services = @config[MDF::SERVICES]
|
40
|
+
services.each do |service_name, service_rec|
|
41
|
+
register_on_failure { validate_date_field(service_rec, MDF::SERVICE_TIMESTAMP, required: false) }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def validate_date_field(section, field_key, required: true)
|
46
|
+
field_val = section[field_key]
|
47
|
+
|
48
|
+
if field_val
|
49
|
+
begin
|
50
|
+
Time.iso8601(section[field_key])
|
51
|
+
rescue ArgumentError
|
52
|
+
fail("Field '#{field_key}' must be a valid ISO8601 timestamp, but contained value '#{section[field_key]}'")
|
53
|
+
end
|
54
|
+
elsif required
|
55
|
+
fail("Metadata must contain a '#{field_key}' field")
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def validate_positive_integer(section, field_key, required: true)
|
60
|
+
field_val = section[field_key]
|
61
|
+
|
62
|
+
if field_val
|
63
|
+
begin
|
64
|
+
val = field_val.is_a?(Integer) ? field_val : Integer(field_val, 10)
|
65
|
+
if val < 0
|
66
|
+
fail("Field '#{field_key}' must be a positive integer")
|
67
|
+
end
|
68
|
+
rescue ArgumentError => err
|
69
|
+
fail("Field '#{field_key}' must be a positive integer")
|
70
|
+
end
|
71
|
+
elsif required
|
72
|
+
fail("Metadata must contain a '#{field_key}' field")
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|