longleaf 0.1.0.pre.2 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +94 -0
- data/.editorconfig +13 -0
- data/.gitignore +4 -1
- data/.rubocop.yml +44 -0
- data/.rubocop_todo.yml +834 -0
- data/.yardopts +1 -0
- data/Gemfile +16 -1
- data/README.md +98 -12
- data/Rakefile +6 -0
- data/bin/setup +16 -1
- data/docs/aboutlongleaf.md +28 -0
- data/docs/extra.css +32 -0
- data/docs/img/change-file.png +0 -0
- data/docs/img/ll-example-preserved.png +0 -0
- data/docs/index.md +19 -0
- data/docs/install.md +66 -0
- data/docs/ll-example/config-example-relative.yml +33 -0
- data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
- data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
- data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
- data/docs/ll-example/metadata-dir/.gitkeep +0 -0
- data/docs/ll-example/replica-files/.gitkeep +0 -0
- data/docs/ll-example/replica-metadata/.gitkeep +0 -0
- data/docs/quickstart.md +270 -0
- data/docs/rdocs/Longleaf.html +135 -0
- data/docs/rdocs/Longleaf/AppFields.html +178 -0
- data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
- data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
- data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
- data/docs/rdocs/Longleaf/CLI.html +909 -0
- data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
- data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
- data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
- data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
- data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
- data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
- data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
- data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
- data/docs/rdocs/Longleaf/EventError.html +147 -0
- data/docs/rdocs/Longleaf/EventNames.html +163 -0
- data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
- data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
- data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
- data/docs/rdocs/Longleaf/FileRecord.html +716 -0
- data/docs/rdocs/Longleaf/FileSelector.html +901 -0
- data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
- data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
- data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
- data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
- data/docs/rdocs/Longleaf/Logging.html +405 -0
- data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
- data/docs/rdocs/Longleaf/LongleafError.html +139 -0
- data/docs/rdocs/Longleaf/MDFields.html +193 -0
- data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
- data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
- data/docs/rdocs/Longleaf/MetadataError.html +143 -0
- data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
- data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
- data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
- data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
- data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
- data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
- data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
- data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
- data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
- data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
- data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
- data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
- data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
- data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
- data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
- data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
- data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
- data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
- data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
- data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
- data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
- data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
- data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
- data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
- data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
- data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
- data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
- data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
- data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
- data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
- data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
- data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
- data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
- data/docs/rdocs/_index.html +660 -0
- data/docs/rdocs/class_list.html +51 -0
- data/docs/rdocs/css/common.css +1 -0
- data/docs/rdocs/css/full_list.css +58 -0
- data/docs/rdocs/css/style.css +496 -0
- data/docs/rdocs/file.README.html +165 -0
- data/docs/rdocs/file_list.html +56 -0
- data/docs/rdocs/frames.html +17 -0
- data/docs/rdocs/index.html +165 -0
- data/docs/rdocs/js/app.js +303 -0
- data/docs/rdocs/js/full_list.js +216 -0
- data/docs/rdocs/js/jquery.js +4 -0
- data/docs/rdocs/method_list.html +2051 -0
- data/docs/rdocs/top-level-namespace.html +110 -0
- data/lib/longleaf/candidates/file_selector.rb +139 -0
- data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
- data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
- data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
- data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
- data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
- data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
- data/lib/longleaf/cli.rb +237 -46
- data/lib/longleaf/commands/deregister_command.rb +51 -0
- data/lib/longleaf/commands/preserve_command.rb +50 -0
- data/lib/longleaf/commands/register_command.rb +32 -43
- data/lib/longleaf/commands/reindex_command.rb +92 -0
- data/lib/longleaf/commands/validate_config_command.rb +33 -8
- data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
- data/lib/longleaf/errors.rb +26 -7
- data/lib/longleaf/events/deregister_event.rb +53 -0
- data/lib/longleaf/events/event_names.rb +9 -0
- data/lib/longleaf/events/event_status_tracking.rb +59 -0
- data/lib/longleaf/events/preserve_event.rb +81 -0
- data/lib/longleaf/events/register_event.rb +52 -51
- data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
- data/lib/longleaf/helpers/digest_helper.rb +56 -0
- data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
- data/lib/longleaf/helpers/selection_options_parser.rb +189 -0
- data/lib/longleaf/helpers/service_date_helper.rb +78 -0
- data/lib/longleaf/indexing/index_manager.rb +101 -0
- data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
- data/lib/longleaf/logging.rb +5 -4
- data/lib/longleaf/logging/redirecting_logger.rb +26 -25
- data/lib/longleaf/models/app_fields.rb +7 -2
- data/lib/longleaf/models/file_record.rb +17 -8
- data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
- data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
- data/lib/longleaf/models/md_fields.rb +2 -1
- data/lib/longleaf/models/metadata_location.rb +47 -0
- data/lib/longleaf/models/metadata_record.rb +39 -15
- data/lib/longleaf/models/s3_storage_location.rb +133 -0
- data/lib/longleaf/models/service_definition.rb +7 -6
- data/lib/longleaf/models/service_fields.rb +7 -1
- data/lib/longleaf/models/service_record.rb +10 -6
- data/lib/longleaf/models/storage_location.rb +24 -19
- data/lib/longleaf/models/storage_types.rb +9 -0
- data/lib/longleaf/models/system_config_fields.rb +9 -0
- data/lib/longleaf/preservation_services/file_check_service.rb +58 -0
- data/lib/longleaf/preservation_services/fixity_check_service.rb +123 -0
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +182 -0
- data/lib/longleaf/preservation_services/s3_replication_service.rb +143 -0
- data/lib/longleaf/services/application_config_deserializer.rb +81 -24
- data/lib/longleaf/services/application_config_manager.rb +20 -6
- data/lib/longleaf/services/application_config_validator.rb +19 -9
- data/lib/longleaf/services/configuration_validator.rb +67 -4
- data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
- data/lib/longleaf/services/metadata_deserializer.rb +113 -42
- data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
- data/lib/longleaf/services/metadata_serializer.rb +138 -25
- data/lib/longleaf/services/metadata_validator.rb +76 -0
- data/lib/longleaf/services/s3_location_validator.rb +19 -0
- data/lib/longleaf/services/service_class_cache.rb +112 -0
- data/lib/longleaf/services/service_definition_manager.rb +10 -7
- data/lib/longleaf/services/service_definition_validator.rb +25 -18
- data/lib/longleaf/services/service_manager.rb +86 -11
- data/lib/longleaf/services/service_mapping_manager.rb +13 -12
- data/lib/longleaf/services/service_mapping_validator.rb +36 -26
- data/lib/longleaf/services/storage_location_manager.rb +76 -15
- data/lib/longleaf/services/storage_location_validator.rb +49 -35
- data/lib/longleaf/specs/config_builder.rb +47 -23
- data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
- data/lib/longleaf/specs/custom_matchers.rb +9 -0
- data/lib/longleaf/specs/file_helpers.rb +61 -0
- data/lib/longleaf/specs/metadata_builder.rb +92 -0
- data/lib/longleaf/specs/system_config_builder.rb +27 -0
- data/lib/longleaf/version.rb +1 -1
- data/longleaf.gemspec +20 -7
- data/mkdocs.yml +21 -0
- metadata +306 -23
- data/.travis.yml +0 -4
- data/lib/longleaf/commands/abstract_command.rb +0 -37
- data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -0,0 +1,81 @@
|
|
1
|
+
require 'longleaf/services/service_manager'
|
2
|
+
require 'longleaf/events/event_names'
|
3
|
+
require 'longleaf/events/event_status_tracking'
|
4
|
+
require 'longleaf/logging'
|
5
|
+
|
6
|
+
module Longleaf
|
7
|
+
# Verify event for a single file
|
8
|
+
class PreserveEvent
|
9
|
+
include Longleaf::Logging
|
10
|
+
include Longleaf::EventStatusTracking
|
11
|
+
|
12
|
+
# @param file_rec [FileRecord] file record
|
13
|
+
# @param app_manager [ApplicationConfigManager] the application configuration
|
14
|
+
# @param force [boolean] if true, then services run regardless of whether they are flagged as needed
|
15
|
+
def initialize(file_rec:, app_manager:, force: false)
|
16
|
+
raise ArgumentError.new('Must provide a file_rec parameter') if file_rec.nil?
|
17
|
+
raise ArgumentError.new('Must provide an ApplicationConfigManager') if app_manager.nil?
|
18
|
+
|
19
|
+
@app_manager = app_manager
|
20
|
+
@file_rec = file_rec
|
21
|
+
@force = force
|
22
|
+
end
|
23
|
+
|
24
|
+
# Perform a preserve event on the given file, updating its metadata record if any services were executed.
|
25
|
+
def perform
|
26
|
+
storage_loc = @file_rec.storage_location
|
27
|
+
service_manager = @app_manager.service_manager
|
28
|
+
md_rec = @file_rec.metadata_record
|
29
|
+
f_path = @file_rec.path
|
30
|
+
|
31
|
+
logger.info("Performing preserve event on #{@file_rec.path}")
|
32
|
+
|
33
|
+
needs_persist = false
|
34
|
+
begin
|
35
|
+
if !File.exist?(f_path)
|
36
|
+
# Need to persist metadata to avoid repeating processing of this file too soon.
|
37
|
+
needs_persist = true
|
38
|
+
record_failure(EventNames::PRESERVE, f_path, "File is registered but missing.")
|
39
|
+
return return_status
|
40
|
+
end
|
41
|
+
|
42
|
+
# get the list of services applicable to this location and event
|
43
|
+
service_manager.list_services(location: storage_loc.name, event: EventNames::PRESERVE).each do |service_name|
|
44
|
+
# Skip over this service if it does not need to be run, unless force flag active
|
45
|
+
unless @force || service_manager.service_needed?(service_name, md_rec)
|
46
|
+
logger.debug("Service #{service_name} not needed for file '#{@file_rec.path}', skipping")
|
47
|
+
next
|
48
|
+
end
|
49
|
+
|
50
|
+
begin
|
51
|
+
logger.info("Performing preserve service #{service_name} for #{@file_rec.path}")
|
52
|
+
needs_persist = true
|
53
|
+
# execute the service
|
54
|
+
service_manager.perform_service(service_name, @file_rec, EventNames::PRESERVE)
|
55
|
+
|
56
|
+
# record the outcome
|
57
|
+
@file_rec.metadata_record.update_service_as_performed(service_name)
|
58
|
+
record_success(EventNames::PRESERVE, f_path, nil, service_name)
|
59
|
+
rescue PreservationServiceError => e
|
60
|
+
@file_rec.metadata_record.update_service_as_failed(service_name)
|
61
|
+
record_failure(EventNames::PRESERVE, f_path, e.message, service_name)
|
62
|
+
rescue StorageLocationUnavailableError => e
|
63
|
+
raise e
|
64
|
+
rescue StandardError => e
|
65
|
+
@file_rec.metadata_record.update_service_as_failed(service_name)
|
66
|
+
record_failure(EventNames::PRESERVE, f_path, nil, service_name, error: e)
|
67
|
+
return return_status
|
68
|
+
end
|
69
|
+
end
|
70
|
+
ensure
|
71
|
+
# persist the metadata out to file if any services were executed
|
72
|
+
if needs_persist
|
73
|
+
# persist the metadata
|
74
|
+
@app_manager.md_manager.persist(@file_rec)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
return_status
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -1,92 +1,93 @@
|
|
1
1
|
require 'longleaf/errors'
|
2
|
+
require 'longleaf/events/event_names'
|
3
|
+
require 'longleaf/events/event_status_tracking'
|
2
4
|
require 'longleaf/models/metadata_record'
|
3
5
|
require 'longleaf/services/metadata_deserializer'
|
4
6
|
require 'longleaf/services/metadata_serializer'
|
5
7
|
require 'time'
|
6
8
|
|
7
|
-
# Event to register a file with longleaf
|
8
9
|
module Longleaf
|
10
|
+
# Event to register a file with longleaf
|
9
11
|
class RegisterEvent
|
10
|
-
|
11
|
-
|
12
|
+
include Longleaf::EventStatusTracking
|
13
|
+
|
12
14
|
# @param file_rec [FileRecord] file record
|
13
15
|
# @param app_manager [ApplicationConfigManager] the application configuration
|
14
16
|
# @param force [boolean] if true, then already registered files will be re-registered
|
15
|
-
|
17
|
+
# @param digest_provider [#get_digests] object which provides digests for files being registered
|
18
|
+
def initialize(file_rec:, app_manager:, force: false, digest_provider: nil)
|
16
19
|
raise ArgumentError.new('Must provide a file_rec parameter') if file_rec.nil?
|
17
20
|
raise ArgumentError.new('Parameter file_rec must be a FileRecord') \
|
18
21
|
unless file_rec.is_a?(FileRecord)
|
19
22
|
raise ArgumentError.new('Must provide an ApplicationConfigManager') if app_manager.nil?
|
20
23
|
raise ArgumentError.new('Parameter app_manager must be an ApplicationConfigManager') \
|
21
24
|
unless app_manager.is_a?(ApplicationConfigManager)
|
22
|
-
|
25
|
+
|
23
26
|
@app_manager = app_manager
|
24
27
|
@file_rec = file_rec
|
25
28
|
@force = force
|
26
|
-
@
|
29
|
+
@digest_provider = digest_provider
|
27
30
|
end
|
28
|
-
|
31
|
+
|
29
32
|
# Perform a registration event on the given file
|
30
|
-
# @
|
33
|
+
# @raise RegistrationError if a file cannot be registered
|
31
34
|
def perform
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
35
|
+
begin
|
36
|
+
# Only need to re-register file if the force flag is provided
|
37
|
+
if @file_rec.metadata_present? && !@force
|
38
|
+
raise RegistrationError.new("Unable to register '#{@file_rec.path}', it is already registered.")
|
39
|
+
end
|
40
|
+
|
41
|
+
# create metadata record
|
42
|
+
md_rec = MetadataRecord.new(registered: Time.now.utc.iso8601(3))
|
43
|
+
@file_rec.metadata_record = md_rec
|
44
|
+
|
45
|
+
# retain significant details from former record
|
46
|
+
if @file_rec.metadata_present?
|
47
|
+
retain_existing_properties
|
48
|
+
end
|
49
|
+
|
50
|
+
populate_file_properties
|
51
|
+
|
52
|
+
if !@digest_provider.nil?
|
53
|
+
checksums = @digest_provider.get_digests(@file_rec.path)
|
54
|
+
md_rec.checksums.merge!(checksums) unless checksums.nil?
|
55
|
+
end
|
56
|
+
|
57
|
+
# persist the metadata
|
58
|
+
@app_manager.md_manager.persist(@file_rec)
|
59
|
+
|
60
|
+
record_success(EventNames::REGISTER, @file_rec.path)
|
61
|
+
rescue RegistrationError => err
|
62
|
+
record_failure(EventNames::REGISTER, @file_rec.path, err.message)
|
63
|
+
rescue InvalidStoragePathError => err
|
64
|
+
record_failure(EventNames::REGISTER, @file_rec.path, err.message)
|
45
65
|
end
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
md_rec.checksums.merge!(@checksums) unless @checksums.nil?
|
50
|
-
|
51
|
-
populate_services
|
52
|
-
|
53
|
-
# persist the metadata out to file
|
54
|
-
MetadataSerializer::write(metadata: md_rec, file_path: @file_rec.metadata_path)
|
66
|
+
|
67
|
+
return_status
|
55
68
|
end
|
56
|
-
|
69
|
+
|
57
70
|
private
|
58
71
|
def populate_file_properties
|
59
72
|
md_rec = @file_rec.metadata_record
|
60
|
-
|
73
|
+
|
61
74
|
# Set file properties
|
62
|
-
md_rec.last_modified = File.mtime(@file_rec.path).utc.iso8601
|
75
|
+
md_rec.last_modified = File.mtime(@file_rec.path).utc.iso8601(3)
|
63
76
|
md_rec.file_size = File.size(@file_rec.path)
|
64
77
|
end
|
65
|
-
|
66
|
-
def populate_services
|
67
|
-
md_rec = @file_rec.metadata_record
|
68
|
-
|
69
|
-
service_manager = @app_manager.service_manager
|
70
|
-
definitions = service_manager.list_service_definitions(location: @file_rec.storage_location.name)
|
71
|
-
|
72
|
-
# Add service section
|
73
|
-
definitions.each do |serv_def|
|
74
|
-
serv_name = serv_def.name
|
75
|
-
md_rec.add_service(serv_name)
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
78
|
+
|
79
79
|
# Copy a subset of properties from an existing metadata record to the new record
|
80
80
|
def retain_existing_properties
|
81
81
|
md_rec = @file_rec.metadata_record
|
82
|
-
|
83
|
-
old_md = MetadataDeserializer.deserialize(file_path: @file_rec.metadata_path
|
82
|
+
|
83
|
+
old_md = MetadataDeserializer.deserialize(file_path: @file_rec.metadata_path,
|
84
|
+
digest_algs: @file_rec.storage_location.metadata_location.digests)
|
84
85
|
# Copy custom properties
|
85
86
|
old_md.properties.each { |name, value| md_rec.properties[name] = value }
|
86
87
|
# Copy stale-replicas flag per service
|
87
88
|
old_md.list_services.each do |serv_name|
|
88
89
|
serv_rec = old_md.service(serv_name)
|
89
|
-
|
90
|
+
|
90
91
|
stale_replicas = serv_rec.stale_replicas
|
91
92
|
if stale_replicas
|
92
93
|
new_service = md_rec.service(serv_name)
|
@@ -95,4 +96,4 @@ module Longleaf
|
|
95
96
|
end
|
96
97
|
end
|
97
98
|
end
|
98
|
-
end
|
99
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Longleaf
|
2
|
+
# Hash subclass which provides case insensitive keys, where keys are always downcased.
|
3
|
+
class CaseInsensitiveHash < Hash
|
4
|
+
def [](key)
|
5
|
+
super _insensitive(key)
|
6
|
+
end
|
7
|
+
|
8
|
+
def []=(key, value)
|
9
|
+
super _insensitive(key), value
|
10
|
+
end
|
11
|
+
|
12
|
+
def delete(key)
|
13
|
+
super _insensitive(key)
|
14
|
+
end
|
15
|
+
|
16
|
+
def has_key?(key)
|
17
|
+
super _insensitive(key)
|
18
|
+
end
|
19
|
+
|
20
|
+
def merge(other_hash)
|
21
|
+
super other_hash.map {|k, v| [_insensitive(k), v] }.to_h
|
22
|
+
end
|
23
|
+
|
24
|
+
def merge!(other_hash)
|
25
|
+
super other_hash.map {|k, v| [_insensitive(k), v] }.to_h
|
26
|
+
end
|
27
|
+
|
28
|
+
# Cause this hash to serialize as a regular hash to avoid deserialization failures
|
29
|
+
def encode_with coder
|
30
|
+
coder.represent_map nil, self
|
31
|
+
end
|
32
|
+
|
33
|
+
protected
|
34
|
+
def _insensitive(key)
|
35
|
+
key.respond_to?(:downcase) ? key.downcase : key
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'longleaf/errors'
|
2
|
+
require 'digest'
|
3
|
+
|
4
|
+
module Longleaf
|
5
|
+
# Helper methods for generating digests
|
6
|
+
class DigestHelper
|
7
|
+
KNOWN_DIGESTS ||= ['md5', 'sha1', 'sha2', 'sha256', 'sha384', 'sha512', 'rmd160']
|
8
|
+
|
9
|
+
# @param algs Either a string containing one or an array containing zero or more digest
|
10
|
+
# algorithm names.
|
11
|
+
# @raise [InvalidDigestAlgorithmError] thrown if any of the digest algorithms listed are not
|
12
|
+
# known to the system.
|
13
|
+
def self.validate_algorithms(algs)
|
14
|
+
return if algs.nil?
|
15
|
+
if algs.is_a?(String)
|
16
|
+
unless self.is_known_algorithm?(algs)
|
17
|
+
raise InvalidDigestAlgorithmError.new("Unknown digest algorithm #{algs}")
|
18
|
+
end
|
19
|
+
else
|
20
|
+
unknown = algs.select { |alg| !KNOWN_DIGESTS.include?(alg) }
|
21
|
+
unless unknown.empty?
|
22
|
+
raise InvalidDigestAlgorithmError.new("Unknown digest algorithm(s): #{unknown}")
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
# @param alg [String] identifier of digest algorithm
|
28
|
+
# @return [Boolean] true if the digest is a valid known algorithm
|
29
|
+
def self.is_known_algorithm?(alg)
|
30
|
+
KNOWN_DIGESTS.include?(alg)
|
31
|
+
end
|
32
|
+
|
33
|
+
# Get a Digest class for the specified algorithm
|
34
|
+
# @param alg [String] name of the digest algorithm
|
35
|
+
# @return [Digest] A digest class for the requested algorithm
|
36
|
+
# @raise [InvalidDigestAlgorithmError] if an unknown digest algorithm is requested
|
37
|
+
def self.start_digest(alg)
|
38
|
+
case alg
|
39
|
+
when 'md5'
|
40
|
+
return Digest::MD5.new
|
41
|
+
when 'sha1'
|
42
|
+
return Digest::SHA1.new
|
43
|
+
when 'sha2', 'sha256'
|
44
|
+
return Digest::SHA2.new
|
45
|
+
when 'sha384'
|
46
|
+
return Digest::SHA2.new(384)
|
47
|
+
when 'sha512'
|
48
|
+
return Digest::SHA2.new(512)
|
49
|
+
when 'rmd160'
|
50
|
+
return Digest::RMD160.new
|
51
|
+
else
|
52
|
+
raise InvalidDigestAlgorithmError.new("Cannot produce digest for unknown algorithm '#{alg}'.")
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module Longleaf
|
4
|
+
# Helper for interacting with s3 uris
|
5
|
+
class S3UriHelper
|
6
|
+
ENDPOINT_PATTERN = /^(.+\.)?s3[.\-]([a-z0-9\-]+[\-.])?[a-z0-9]+\./
|
7
|
+
ALLOWED_SCHEMES = ['http', 'https', 's3']
|
8
|
+
|
9
|
+
# Extract the name of the s3 bucket from the provided url
|
10
|
+
# @param url s3 url
|
11
|
+
# @return the name of the bucket, or nil if the name could not be identified
|
12
|
+
def self.extract_bucket(url)
|
13
|
+
uri = s3_uri(url)
|
14
|
+
|
15
|
+
matches = ENDPOINT_PATTERN.match(uri.host)
|
16
|
+
if matches.nil?
|
17
|
+
raise ArgumentError.new("Provided URI does match the expected pattern for an S3 URI")
|
18
|
+
end
|
19
|
+
|
20
|
+
prefix = matches[1]
|
21
|
+
if prefix.nil? || prefix.empty?
|
22
|
+
# Is a path style url
|
23
|
+
path = uri.path
|
24
|
+
|
25
|
+
return nil if path == '/'
|
26
|
+
|
27
|
+
path_parts = path.split('/')
|
28
|
+
return nil if path_parts.empty?
|
29
|
+
return path_parts[1]
|
30
|
+
else
|
31
|
+
return prefix[0..-2]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.extract_path(url)
|
36
|
+
uri = s3_uri(url)
|
37
|
+
|
38
|
+
matches = ENDPOINT_PATTERN.match(uri.host)
|
39
|
+
if matches.nil?
|
40
|
+
raise ArgumentError.new("Provided URI does match the expected pattern for an S3 URI")
|
41
|
+
end
|
42
|
+
|
43
|
+
path = uri.path
|
44
|
+
return nil if path == '/' || path.empty?
|
45
|
+
|
46
|
+
# trim off the first slash
|
47
|
+
path = path.partition('/').last
|
48
|
+
|
49
|
+
# Determine if the first part of the path is the bucket name
|
50
|
+
prefix = matches[1]
|
51
|
+
if prefix.nil? || prefix.empty?
|
52
|
+
# trim off the bucket name
|
53
|
+
path = path.partition('/').last
|
54
|
+
end
|
55
|
+
|
56
|
+
path
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.extract_region(url)
|
60
|
+
uri = s3_uri(url)
|
61
|
+
|
62
|
+
matches = ENDPOINT_PATTERN.match(uri.host)
|
63
|
+
|
64
|
+
if matches[2].nil?
|
65
|
+
# No region specified
|
66
|
+
nil
|
67
|
+
else
|
68
|
+
matches[2][0..-2]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.s3_uri(url)
|
73
|
+
if url.nil?
|
74
|
+
raise ArgumentError.new("url cannot be empty")
|
75
|
+
end
|
76
|
+
uri = URI(url)
|
77
|
+
if !ALLOWED_SCHEMES.include?(uri.scheme&.downcase)
|
78
|
+
raise ArgumentError.new("Invalid scheme for s3 URI #{url}, only http, https and s3 are permitted")
|
79
|
+
end
|
80
|
+
if uri.host.nil?
|
81
|
+
raise ArgumentError.new("Invalid S3 URI, no hostname: #{url}")
|
82
|
+
end
|
83
|
+
uri
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,189 @@
|
|
1
|
+
require 'longleaf/candidates/file_selector'
|
2
|
+
require 'longleaf/candidates/registered_file_selector'
|
3
|
+
require 'longleaf/candidates/manifest_digest_provider'
|
4
|
+
require 'longleaf/candidates/single_digest_provider'
|
5
|
+
|
6
|
+
module Longleaf
|
7
|
+
# Helper for parsing manifest inputs used for registration
|
8
|
+
class SelectionOptionsParser
|
9
|
+
extend Longleaf::Logging
|
10
|
+
|
11
|
+
# Parses the provided options to construct a file selector and digest provider for
|
12
|
+
# use in registration commands.
|
13
|
+
# @param options [Hash] command options
|
14
|
+
# @param app_config_manager [ApplicationConfigManager] app config manager
|
15
|
+
# @return The file selector and digest provider.
|
16
|
+
def self.parse_registration_selection_options(options, app_config_manager)
|
17
|
+
there_can_be_only_one("Only one of the following selection options may be provided: -m, -f, -s",
|
18
|
+
options, :file, :manifest, :location)
|
19
|
+
|
20
|
+
if !options[:manifest].nil?
|
21
|
+
digests_mapping = self.manifests_to_digest_mapping(options[:manifest])
|
22
|
+
selector = FileSelector.new(file_paths: digests_mapping.keys, app_config: app_config_manager)
|
23
|
+
digest_provider = ManifestDigestProvider.new(digests_mapping)
|
24
|
+
elsif !options[:file].nil?
|
25
|
+
if options[:checksums]
|
26
|
+
checksums = options[:checksums]
|
27
|
+
# validate checksum list format, must a comma delimited list of prefix:checksums
|
28
|
+
if /^[^:,]+:[^:,]+(,[^:,]+:[^:,]+)*$/.match(checksums)
|
29
|
+
# convert checksum list into hash with prefix as key
|
30
|
+
checksums = Hash[*checksums.split(/\s*[:,]\s*/)]
|
31
|
+
digest_provider = SingleDigestProvider.new(checksums)
|
32
|
+
else
|
33
|
+
logger.failure("Invalid checksums parameter format, see `longleaf help <command>` for more information")
|
34
|
+
exit 1
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
file_paths = options[:file].split(/\s*,\s*/)
|
39
|
+
selector = FileSelector.new(file_paths: file_paths, app_config: app_config_manager)
|
40
|
+
elsif !options[:location].nil?
|
41
|
+
storage_locations = options[:location].split(/\s*,\s*/)
|
42
|
+
selector = FileSelector.new(storage_locations: storage_locations, app_config: app_config_manager)
|
43
|
+
digest_provider = SingleDigestProvider.new(nil)
|
44
|
+
else
|
45
|
+
logger.failure("Must provide one of the following file selection options: -f, l, or -m")
|
46
|
+
exit 1
|
47
|
+
end
|
48
|
+
|
49
|
+
[selector, digest_provider]
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.there_can_be_only_one(failure_msg, options, *names)
|
53
|
+
got_one = false
|
54
|
+
names.each do |name|
|
55
|
+
if !options[name].nil?
|
56
|
+
if got_one
|
57
|
+
logger.failure(failure_msg)
|
58
|
+
exit 1
|
59
|
+
end
|
60
|
+
got_one = true
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Parses the provided manifest options, reading the contents of the manifests to produce
|
66
|
+
# a mapping from files to one or more algorithms.
|
67
|
+
# @param manifest_vals [Array] List of manifest option values. They may be in one of the following formats:
|
68
|
+
# <alg_name>:<manifest_path> OR <alg_name>:@-
|
69
|
+
#. <manifest_path> OR @-
|
70
|
+
# @return a hash containing the aggregated contents of the provided manifests. The keys are
|
71
|
+
# paths to manifested files. The values are hashes, mapping digest algorithms to digest values.
|
72
|
+
def self.manifests_to_digest_mapping(manifest_vals)
|
73
|
+
alg_manifest_pairs = []
|
74
|
+
# interpret option inputs into a list of algorithms to manifest sources
|
75
|
+
manifest_vals.each do |manifest_val|
|
76
|
+
if manifest_val.include?(':')
|
77
|
+
manifest_parts = manifest_val.split(':', 2)
|
78
|
+
alg_manifest_pairs << manifest_parts
|
79
|
+
else
|
80
|
+
# algorithm not specified in option value
|
81
|
+
alg_manifest_pairs << [nil, manifest_val]
|
82
|
+
end
|
83
|
+
end
|
84
|
+
if alg_manifest_pairs.select { |mpair| mpair[1] == '@-' }.count > 1
|
85
|
+
self.fail("Cannot specify more than one manifest from STDIN")
|
86
|
+
end
|
87
|
+
|
88
|
+
# read the provided manifests to build a mapping from file uri to all supplied digests
|
89
|
+
digests_mapping = Hash.new { |h,k| h[k] = Hash.new }
|
90
|
+
alg_manifest_pairs.each do |mpair|
|
91
|
+
source_stream = nil
|
92
|
+
# Determine if reading from a manifest file or stdin
|
93
|
+
if mpair[1] == '@-'
|
94
|
+
source_stream = $stdin
|
95
|
+
else
|
96
|
+
source_stream = File.new(mpair[1])
|
97
|
+
end
|
98
|
+
|
99
|
+
current_alg = mpair[0]
|
100
|
+
multi_digest_manifest = current_alg.nil?
|
101
|
+
source_stream.each_line do |line|
|
102
|
+
line = line.strip
|
103
|
+
if multi_digest_manifest && /^[a-zA-Z0-9]+:$/ =~ line
|
104
|
+
# Found a digest algorithm header, assuming succeeding entries are of this type
|
105
|
+
current_alg = line.chomp(':')
|
106
|
+
# Verify that the digest algorithm is known to longleaf
|
107
|
+
if !DigestHelper.is_known_algorithm?(current_alg)
|
108
|
+
self.fail("Manifest specifies unknown digest algorithm: #{current_alg}")
|
109
|
+
end
|
110
|
+
else
|
111
|
+
if current_alg.nil?
|
112
|
+
self.fail("Manifest with unknown checksums encountered, an algorithm must be specified")
|
113
|
+
end
|
114
|
+
entry_parts = line.split(' ', 2)
|
115
|
+
if entry_parts.length != 2
|
116
|
+
self.fail("Invalid manifest entry: #{line}")
|
117
|
+
end
|
118
|
+
|
119
|
+
digests_mapping[entry_parts[1]][current_alg] = entry_parts[0]
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
digests_mapping
|
125
|
+
end
|
126
|
+
|
127
|
+
# Parses the provided options to create a selector for registered files
|
128
|
+
# @param options [Hash] command options
|
129
|
+
# @param app_config_manager [ApplicationConfigManager] app config manager
|
130
|
+
# @return selector
|
131
|
+
def self.create_registered_selector(options, app_config_manager)
|
132
|
+
there_can_be_only_one("Only one of the following selection options may be provided: -l, -f, -s",
|
133
|
+
options, :file, :location, :from_list)
|
134
|
+
|
135
|
+
if !options[:from_list].nil?
|
136
|
+
file_paths = read_from_list(options[:from_list])
|
137
|
+
return RegisteredFileSelector.new(file_paths: file_paths, app_config: app_config_manager)
|
138
|
+
elsif !options[:file].nil?
|
139
|
+
file_paths = options[:file].split(/\s*,\s*/)
|
140
|
+
return RegisteredFileSelector.new(file_paths: file_paths, app_config: app_config_manager)
|
141
|
+
elsif !options[:location].nil?
|
142
|
+
storage_locations = options[:location].split(/\s*,\s*/)
|
143
|
+
return RegisteredFileSelector.new(storage_locations: storage_locations, app_config: app_config_manager)
|
144
|
+
else
|
145
|
+
logger.failure("Must provide one of the following file selection options: -l, -f, or -s")
|
146
|
+
exit 1
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
# Parses the -l from_list option, reading the list of files specified either from the provided
|
151
|
+
# file path or STDIN
|
152
|
+
# @param from_list option value, either a file path or "@-"
|
153
|
+
# @return list of files from the from_list
|
154
|
+
def self.read_from_list(from_list)
|
155
|
+
from_list = from_list.strip
|
156
|
+
if from_list.empty?
|
157
|
+
logger.failure("List parameter must not be empty")
|
158
|
+
exit 1
|
159
|
+
end
|
160
|
+
|
161
|
+
if from_list == '@-'
|
162
|
+
source_stream = $stdin
|
163
|
+
else
|
164
|
+
begin
|
165
|
+
source_stream = File.new(from_list)
|
166
|
+
rescue Errno::ENOENT
|
167
|
+
logger.failure("Specified list file does not exist: #{from_list}")
|
168
|
+
exit 1
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
lines = []
|
173
|
+
source_stream.each_line do |line|
|
174
|
+
lines << line.strip
|
175
|
+
end
|
176
|
+
|
177
|
+
if lines.empty?
|
178
|
+
logger.failure("File list is empty, must provide one or more files for this operation")
|
179
|
+
exit 1
|
180
|
+
end
|
181
|
+
lines
|
182
|
+
end
|
183
|
+
|
184
|
+
def self.fail(message)
|
185
|
+
logger.failure(message)
|
186
|
+
exit 1
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|