longleaf 0.3.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +12 -2
- data/README.md +11 -1
- data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
- data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
- data/lib/longleaf/cli.rb +49 -36
- data/lib/longleaf/commands/register_command.rb +3 -3
- data/lib/longleaf/commands/validate_config_command.rb +1 -1
- data/lib/longleaf/events/register_event.rb +8 -4
- data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
- data/lib/longleaf/helpers/digest_helper.rb +7 -1
- data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
- data/lib/longleaf/helpers/selection_options_parser.rb +189 -0
- data/lib/longleaf/helpers/service_date_helper.rb +29 -1
- data/lib/longleaf/indexing/sequel_index_driver.rb +2 -20
- data/lib/longleaf/models/app_fields.rb +4 -2
- data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
- data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
- data/lib/longleaf/models/metadata_location.rb +47 -0
- data/lib/longleaf/models/metadata_record.rb +3 -1
- data/lib/longleaf/models/s3_storage_location.rb +133 -0
- data/lib/longleaf/models/service_fields.rb +4 -0
- data/lib/longleaf/models/storage_location.rb +17 -48
- data/lib/longleaf/models/storage_types.rb +9 -0
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +9 -11
- data/lib/longleaf/preservation_services/s3_replication_service.rb +143 -0
- data/lib/longleaf/services/application_config_deserializer.rb +26 -4
- data/lib/longleaf/services/application_config_validator.rb +17 -6
- data/lib/longleaf/services/configuration_validator.rb +64 -4
- data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
- data/lib/longleaf/services/metadata_deserializer.rb +41 -9
- data/lib/longleaf/services/metadata_persistence_manager.rb +3 -2
- data/lib/longleaf/services/metadata_serializer.rb +94 -13
- data/lib/longleaf/services/metadata_validator.rb +76 -0
- data/lib/longleaf/services/s3_location_validator.rb +19 -0
- data/lib/longleaf/services/service_definition_validator.rb +16 -8
- data/lib/longleaf/services/service_manager.rb +7 -15
- data/lib/longleaf/services/service_mapping_validator.rb +26 -15
- data/lib/longleaf/services/storage_location_manager.rb +38 -12
- data/lib/longleaf/services/storage_location_validator.rb +41 -30
- data/lib/longleaf/specs/config_builder.rb +10 -3
- data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
- data/lib/longleaf/specs/metadata_builder.rb +1 -0
- data/lib/longleaf/version.rb +1 -1
- data/longleaf.gemspec +3 -1
- data/mkdocs.yml +2 -1
- metadata +48 -8
- data/.travis.yml +0 -4
- data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -4,14 +4,25 @@ require_relative 'service_mapping_validator'
|
|
4
4
|
|
5
5
|
module Longleaf
|
6
6
|
# Validator for Longleaf application configuration
|
7
|
-
class ApplicationConfigValidator
|
7
|
+
class ApplicationConfigValidator < ConfigurationValidator
|
8
|
+
# @param config [Hash] hash containing the application configuration
|
9
|
+
def initialize(config)
|
10
|
+
super(config)
|
11
|
+
end
|
12
|
+
|
13
|
+
protected
|
8
14
|
# Validates the application configuration provided. Will raise ConfigurationError
|
9
15
|
# if any portion of the configuration is not syntactically or semantically valid.
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
16
|
+
def validate
|
17
|
+
loc_result = StorageLocationValidator.new(@config).validate_config
|
18
|
+
defs_result = ServiceDefinitionValidator.new(@config).validate_config
|
19
|
+
mapping_result = ServiceMappingValidator.new(@config).validate_config
|
20
|
+
|
21
|
+
@result.errors.concat(loc_result.errors) unless loc_result.valid?
|
22
|
+
@result.errors.concat(defs_result.errors) unless defs_result.valid?
|
23
|
+
@result.errors.concat(mapping_result.errors) unless mapping_result.valid?
|
24
|
+
|
25
|
+
@result
|
15
26
|
end
|
16
27
|
end
|
17
28
|
end
|
@@ -1,11 +1,71 @@
|
|
1
1
|
module Longleaf
|
2
2
|
# Abstract configuration validator class
|
3
3
|
class ConfigurationValidator
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
4
|
+
attr_reader :result
|
5
|
+
|
6
|
+
def initialize(config)
|
7
|
+
@result = ConfigurationValidationResult.new
|
8
|
+
@config = config
|
9
|
+
end
|
10
|
+
|
11
|
+
# Verify that the provided configuration is valid
|
12
|
+
# @return [ConfigurationValidationResult] the result of the validation
|
13
|
+
def validate_config
|
14
|
+
register_on_failure { validate }
|
15
|
+
|
16
|
+
@result
|
17
|
+
end
|
18
|
+
|
19
|
+
# Asserts that the given conditional is true, raising a ConfigurationError if it is not.
|
20
|
+
def assert(fail_message, assertion_passed)
|
21
|
+
fail(fail_message) unless assertion_passed
|
22
|
+
end
|
23
|
+
|
24
|
+
# Indicate that validation has failed, throwing a Configuration error with the given message
|
25
|
+
def fail(fail_message)
|
26
|
+
raise ConfigurationError.new(fail_message)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Registers an error to the result for this validator
|
30
|
+
def register_error(error)
|
31
|
+
if error.is_a?(StandardError)
|
32
|
+
@result.register_error(error.msg)
|
33
|
+
else
|
34
|
+
@result.register_error(error)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Performs the provided block. If the block produces a ConfigurationError, the error
|
39
|
+
# is swallowed and registered to the result
|
40
|
+
def register_on_failure
|
41
|
+
begin
|
42
|
+
yield
|
43
|
+
rescue ConfigurationError => err
|
44
|
+
register_error(err.message)
|
8
45
|
end
|
9
46
|
end
|
10
47
|
end
|
48
|
+
|
49
|
+
class ConfigurationValidationResult
|
50
|
+
attr_reader :errors
|
51
|
+
|
52
|
+
def initialize
|
53
|
+
@errors = Array.new
|
54
|
+
end
|
55
|
+
|
56
|
+
# Register an error with this validation result
|
57
|
+
def register_error(error_message)
|
58
|
+
@errors << error_message
|
59
|
+
end
|
60
|
+
|
61
|
+
# @return [boolean] true if validation produced not errors
|
62
|
+
def valid?
|
63
|
+
@errors.length == 0
|
64
|
+
end
|
65
|
+
|
66
|
+
# @raise [ConfigurationError] if the result is not valid, which lists all failures
|
67
|
+
def raise_if_invalid
|
68
|
+
raise ConfigurationError.new(@errors.join("\n")) unless valid?
|
69
|
+
end
|
70
|
+
end
|
11
71
|
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
require 'longleaf/errors'
|
3
|
+
|
4
|
+
module Longleaf
|
5
|
+
# Validates the configuration of a filesystem based location
|
6
|
+
class FilesystemLocationValidator
|
7
|
+
|
8
|
+
def self.validate(p_validator, name, path_prop, section_name, path)
|
9
|
+
base_msg = "Storage location '#{name}' specifies invalid #{section_name} '#{path_prop}' property: "
|
10
|
+
p_validator.assert(base_msg + 'Path must not be empty', !path.nil? && !path.to_s.strip.empty?)
|
11
|
+
p_validator.assert(base_msg + 'Path must not contain any relative modifiers (/..)', !path.include?('/..'))
|
12
|
+
p_validator.assert(base_msg + 'Path must be absolute', Pathname.new(path).absolute?)
|
13
|
+
p_validator.assert(base_msg + 'Path does not exist', Dir.exist?(path))
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
require 'longleaf/models/metadata_record'
|
3
3
|
require 'longleaf/models/md_fields'
|
4
|
+
require 'longleaf/services/metadata_validator'
|
4
5
|
require 'longleaf/errors'
|
5
6
|
require 'longleaf/logging'
|
6
7
|
|
@@ -15,6 +16,8 @@ module Longleaf
|
|
15
16
|
# @param file_path [String] path of the file to read. Required.
|
16
17
|
# @param format [String] format the file is stored in. Default is 'yaml'.
|
17
18
|
def self.deserialize(file_path:, format: 'yaml', digest_algs: [])
|
19
|
+
file_path = file_path.path if file_path.is_a?(File)
|
20
|
+
|
18
21
|
case format
|
19
22
|
when 'yaml'
|
20
23
|
md = from_yaml(file_path, digest_algs)
|
@@ -22,10 +25,6 @@ module Longleaf
|
|
22
25
|
raise ArgumentError.new("Invalid deserialization format #{format} specified")
|
23
26
|
end
|
24
27
|
|
25
|
-
if !md || !md.is_a?(Hash) || !md.key?(MDF::DATA) || !md.key?(MDF::SERVICES)
|
26
|
-
raise Longleaf::MetadataError.new("Invalid metadata file, did not contain data or services fields: #{file_path}")
|
27
|
-
end
|
28
|
-
|
29
28
|
data = Hash.new.merge(md[MDF::DATA])
|
30
29
|
# Extract reserved properties for submission as separate parameters
|
31
30
|
registered = data.delete(MDFields::REGISTERED_TIMESTAMP)
|
@@ -37,7 +36,7 @@ module Longleaf
|
|
37
36
|
services = md[MDF::SERVICES]
|
38
37
|
service_records = Hash.new
|
39
38
|
services&.each do |name, props|
|
40
|
-
raise
|
39
|
+
raise MetadataError.new("Value of service #{name} must be a hash") unless props.class == Hash
|
41
40
|
|
42
41
|
service_props = Hash.new.merge(props)
|
43
42
|
|
@@ -66,12 +65,45 @@ module Longleaf
|
|
66
65
|
File.open(file_path, 'r:bom|utf-8') do |f|
|
67
66
|
contents = f.read
|
68
67
|
|
69
|
-
|
68
|
+
checksum_error = nil
|
69
|
+
begin
|
70
|
+
verify_digests(file_path, contents, digest_algs)
|
71
|
+
rescue ChecksumMismatchError => err
|
72
|
+
# Hold onto the checksum error, in case we can identify the underlying cause
|
73
|
+
checksum_error = err
|
74
|
+
end
|
70
75
|
|
71
76
|
begin
|
72
|
-
|
73
|
-
|
74
|
-
|
77
|
+
md = nil
|
78
|
+
begin
|
79
|
+
md = YAML.safe_load(contents, [], [], true)
|
80
|
+
rescue => err
|
81
|
+
raise MetadataError.new("Failed to parse metadata file #{file_path}: #{err.message}")
|
82
|
+
end
|
83
|
+
|
84
|
+
validation_result = MetadataValidator.new(md).validate_config
|
85
|
+
if !validation_result.valid?
|
86
|
+
if checksum_error.nil?
|
87
|
+
raise MetadataError.new("Invalid metadata file #{file_path.to_s}:\n#{validation_result.errors.join("\n")}")
|
88
|
+
else
|
89
|
+
raise MetadataError.new(validation_result.errors.join("\n"))
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# Either return the valid metadata, or raise the checksum error as is
|
94
|
+
if checksum_error.nil?
|
95
|
+
md
|
96
|
+
else
|
97
|
+
raise checksum_error
|
98
|
+
end
|
99
|
+
rescue MetadataError => err
|
100
|
+
if checksum_error.nil?
|
101
|
+
raise err
|
102
|
+
else
|
103
|
+
# Add underlying cause from the metadata error to the checksum mismatch error
|
104
|
+
msg = checksum_error.message + "\nWith related issue(s):\n#{err.message}"
|
105
|
+
raise ChecksumMismatchError.new(msg)
|
106
|
+
end
|
75
107
|
end
|
76
108
|
end
|
77
109
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'longleaf/services/metadata_serializer'
|
2
|
+
require 'longleaf/services/metadata_deserializer'
|
2
3
|
require 'longleaf/errors'
|
3
4
|
|
4
5
|
module Longleaf
|
@@ -20,7 +21,7 @@ module Longleaf
|
|
20
21
|
|
21
22
|
MetadataSerializer::write(metadata: file_rec.metadata_record,
|
22
23
|
file_path: file_rec.metadata_path,
|
23
|
-
digest_algs: file_rec.storage_location.
|
24
|
+
digest_algs: file_rec.storage_location.metadata_location.digests)
|
24
25
|
|
25
26
|
index(file_rec)
|
26
27
|
end
|
@@ -38,7 +39,7 @@ module Longleaf
|
|
38
39
|
# @return [MetadataRecord] the metadata record for the file record
|
39
40
|
def load(file_rec)
|
40
41
|
md_rec = MetadataDeserializer.deserialize(file_path: file_rec.metadata_path,
|
41
|
-
digest_algs: file_rec.storage_location.
|
42
|
+
digest_algs: file_rec.storage_location.metadata_location.digests)
|
42
43
|
file_rec.metadata_record = md_rec
|
43
44
|
md_rec
|
44
45
|
end
|
@@ -5,6 +5,7 @@ require 'longleaf/helpers/digest_helper'
|
|
5
5
|
require 'longleaf/errors'
|
6
6
|
require 'longleaf/logging'
|
7
7
|
require 'pathname'
|
8
|
+
require "tempfile"
|
8
9
|
|
9
10
|
module Longleaf
|
10
11
|
# Service which serializes MetadataRecord objects
|
@@ -30,12 +31,7 @@ module Longleaf
|
|
30
31
|
raise ArgumentError.new("Invalid serialization format #{format} specified")
|
31
32
|
end
|
32
33
|
|
33
|
-
|
34
|
-
parent_dir = Pathname(file_path).parent
|
35
|
-
parent_dir.mkpath unless parent_dir.exist?
|
36
|
-
|
37
|
-
File.write(file_path, content)
|
38
|
-
write_digests(file_path, content, digest_algs)
|
34
|
+
atomic_write(file_path, content, digest_algs)
|
39
35
|
end
|
40
36
|
|
41
37
|
# @param metadata [MetadataRecord] metadata record to transform
|
@@ -85,24 +81,109 @@ module Longleaf
|
|
85
81
|
end
|
86
82
|
end
|
87
83
|
|
84
|
+
# Safely writes the new metadata file and its digests.
|
85
|
+
# It does so by first writing the content and its digests to temp files,
|
86
|
+
# then making the temp files the current version of the file.
|
87
|
+
# Attempts to clean up new data in the case of failure.
|
88
|
+
def self.atomic_write(file_path, content, digest_algs)
|
89
|
+
# Fill in parent directories if they do not exist
|
90
|
+
parent_dir = Pathname(file_path).parent
|
91
|
+
parent_dir.mkpath unless parent_dir.exist?
|
92
|
+
|
93
|
+
file_path = file_path.path if file_path.respond_to?(:path)
|
94
|
+
|
95
|
+
# If file does not already exist, then simply write it
|
96
|
+
if !File.exist?(file_path)
|
97
|
+
File.write(file_path, content)
|
98
|
+
write_digests(file_path, content, digest_algs)
|
99
|
+
return
|
100
|
+
end
|
101
|
+
|
102
|
+
# Updating file, use safe atomic write
|
103
|
+
File.open(file_path) do |original_file|
|
104
|
+
original_file.flock(File::LOCK_EX)
|
105
|
+
|
106
|
+
base_name = File.basename(file_path)
|
107
|
+
Tempfile.open(base_name, parent_dir) do |temp_file|
|
108
|
+
begin
|
109
|
+
# Write content to temp file
|
110
|
+
temp_file.write(content)
|
111
|
+
temp_file.close
|
112
|
+
|
113
|
+
temp_path = temp_file.path
|
114
|
+
|
115
|
+
# Set permissions of new file to match old if it exists
|
116
|
+
old_stat = File.stat(file_path)
|
117
|
+
set_perms(temp_path, old_stat)
|
118
|
+
|
119
|
+
begin
|
120
|
+
digest_paths = write_digests(temp_path, content, digest_algs)
|
121
|
+
|
122
|
+
File.rename(temp_path, file_path)
|
123
|
+
rescue => e
|
124
|
+
cleanup_digests(temp_path)
|
125
|
+
raise e
|
126
|
+
end
|
127
|
+
rescue => e
|
128
|
+
temp_file.delete
|
129
|
+
raise e
|
130
|
+
end
|
131
|
+
|
132
|
+
# Cleanup all existing digest files, in case the set of algorithms has changed
|
133
|
+
cleanup_digests(file_path)
|
134
|
+
# Move new digests into place
|
135
|
+
digest_paths.each do |digest_path|
|
136
|
+
File.rename(digest_path, digest_path.sub(temp_path, file_path))
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def self.set_perms(file_path, stat_info)
|
143
|
+
if stat_info
|
144
|
+
# Set correct permissions on new file
|
145
|
+
begin
|
146
|
+
File.chown(stat_info.uid, stat_info.gid, file_path)
|
147
|
+
# This operation will affect filesystem ACL's
|
148
|
+
File.chmod(stat_info.mode, file_path)
|
149
|
+
rescue Errno::EPERM, Errno::EACCES
|
150
|
+
# Changing file ownership failed, moving on.
|
151
|
+
return false
|
152
|
+
end
|
153
|
+
end
|
154
|
+
true
|
155
|
+
end
|
156
|
+
|
157
|
+
# Deletes all known digest files for the provided file path
|
158
|
+
def self.cleanup_digests(file_path)
|
159
|
+
DigestHelper::KNOWN_DIGESTS.each do |alg|
|
160
|
+
digest_path = "#{file_path}.#{alg}"
|
161
|
+
File.delete(digest_path) if File.exist?(digest_path)
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
88
165
|
def self.write_digests(file_path, content, digests)
|
89
|
-
return if digests.nil? || digests.empty?
|
166
|
+
return [] if digests.nil? || digests.empty?
|
167
|
+
|
168
|
+
digest_paths = Array.new
|
90
169
|
|
91
170
|
digests.each do |alg|
|
92
171
|
digest_class = DigestHelper::start_digest(alg)
|
93
172
|
result = digest_class.hexdigest(content)
|
94
|
-
|
95
|
-
digest_path = "#{file_path.path}.#{alg}"
|
96
|
-
else
|
97
|
-
digest_path = "#{file_path}.#{alg}"
|
98
|
-
end
|
173
|
+
digest_path = "#{file_path}.#{alg}"
|
99
174
|
|
100
175
|
File.write(digest_path, result)
|
101
176
|
|
102
|
-
|
177
|
+
digest_paths.push(digest_path)
|
178
|
+
|
179
|
+
self.logger.debug("Generated #{alg} digest for metadata file #{file_path}: #{digest_path} #{result}")
|
103
180
|
end
|
181
|
+
|
182
|
+
digest_paths
|
104
183
|
end
|
105
184
|
|
185
|
+
private_class_method :cleanup_digests
|
106
186
|
private_class_method :write_digests
|
187
|
+
private_class_method :atomic_write
|
107
188
|
end
|
108
189
|
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'pathname'
|
2
|
+
require 'longleaf/models/md_fields'
|
3
|
+
require 'longleaf/errors'
|
4
|
+
require_relative 'configuration_validator'
|
5
|
+
|
6
|
+
module Longleaf
|
7
|
+
# Validator for file metadata
|
8
|
+
class MetadataValidator < ConfigurationValidator
|
9
|
+
MDF ||= MDFields
|
10
|
+
|
11
|
+
# @param config [Hash] hash containing the application configuration
|
12
|
+
def initialize(config)
|
13
|
+
super(config)
|
14
|
+
end
|
15
|
+
|
16
|
+
protected
|
17
|
+
# Validates the provided metadata for a file to ensure that it is syntactically correct and field types
|
18
|
+
# are validate.
|
19
|
+
def validate
|
20
|
+
assert("Metadata must be a hash, but a #{@config.class} was provided", @config.class == Hash)
|
21
|
+
assert("Metadata must contain a '#{MDF::DATA}' key", @config.key?(MDF::DATA))
|
22
|
+
assert("Metadata must contain a '#{MDF::SERVICES}' key", @config.key?(MDF::SERVICES))
|
23
|
+
|
24
|
+
data = @config[MDF::DATA]
|
25
|
+
register_on_failure { validate_date_field(data, MDF::REGISTERED_TIMESTAMP) }
|
26
|
+
register_on_failure { validate_date_field(data, MDF::DEREGISTERED_TIMESTAMP, required: false) }
|
27
|
+
register_on_failure { validate_date_field(data, MDF::LAST_MODIFIED) }
|
28
|
+
|
29
|
+
register_on_failure { validate_positive_integer(data, MDF::FILE_SIZE) }
|
30
|
+
|
31
|
+
checksums = data[MDF::CHECKSUMS]
|
32
|
+
register_on_failure do
|
33
|
+
if !checksums.nil? && !checksums.is_a?(Hash)
|
34
|
+
fail("Field '#{MDF::CHECKSUMS}' must be a map of algorithms to digests, but was a #{checksums.class}")
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Ensure that any service timestamps present are valid dates
|
39
|
+
services = @config[MDF::SERVICES]
|
40
|
+
services.each do |service_name, service_rec|
|
41
|
+
register_on_failure { validate_date_field(service_rec, MDF::SERVICE_TIMESTAMP, required: false) }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def validate_date_field(section, field_key, required: true)
|
46
|
+
field_val = section[field_key]
|
47
|
+
|
48
|
+
if field_val
|
49
|
+
begin
|
50
|
+
Time.iso8601(section[field_key])
|
51
|
+
rescue ArgumentError
|
52
|
+
fail("Field '#{field_key}' must be a valid ISO8601 timestamp, but contained value '#{section[field_key]}'")
|
53
|
+
end
|
54
|
+
elsif required
|
55
|
+
fail("Metadata must contain a '#{field_key}' field")
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def validate_positive_integer(section, field_key, required: true)
|
60
|
+
field_val = section[field_key]
|
61
|
+
|
62
|
+
if field_val
|
63
|
+
begin
|
64
|
+
val = field_val.is_a?(Integer) ? field_val : Integer(field_val, 10)
|
65
|
+
if val < 0
|
66
|
+
fail("Field '#{field_key}' must be a positive integer")
|
67
|
+
end
|
68
|
+
rescue ArgumentError => err
|
69
|
+
fail("Field '#{field_key}' must be a positive integer")
|
70
|
+
end
|
71
|
+
elsif required
|
72
|
+
fail("Metadata must contain a '#{field_key}' field")
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|