longleaf 0.1.0.pre.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +94 -0
- data/.editorconfig +13 -0
- data/.gitignore +4 -1
- data/.rubocop.yml +44 -0
- data/.rubocop_todo.yml +834 -0
- data/.yardopts +1 -0
- data/Gemfile +16 -1
- data/README.md +98 -12
- data/Rakefile +6 -0
- data/bin/setup +16 -1
- data/docs/aboutlongleaf.md +28 -0
- data/docs/extra.css +32 -0
- data/docs/img/change-file.png +0 -0
- data/docs/img/ll-example-preserved.png +0 -0
- data/docs/index.md +19 -0
- data/docs/install.md +66 -0
- data/docs/ll-example/config-example-relative.yml +33 -0
- data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
- data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
- data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
- data/docs/ll-example/metadata-dir/.gitkeep +0 -0
- data/docs/ll-example/replica-files/.gitkeep +0 -0
- data/docs/ll-example/replica-metadata/.gitkeep +0 -0
- data/docs/quickstart.md +270 -0
- data/docs/rdocs/Longleaf.html +135 -0
- data/docs/rdocs/Longleaf/AppFields.html +178 -0
- data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
- data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
- data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
- data/docs/rdocs/Longleaf/CLI.html +909 -0
- data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
- data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
- data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
- data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
- data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
- data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
- data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
- data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
- data/docs/rdocs/Longleaf/EventError.html +147 -0
- data/docs/rdocs/Longleaf/EventNames.html +163 -0
- data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
- data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
- data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
- data/docs/rdocs/Longleaf/FileRecord.html +716 -0
- data/docs/rdocs/Longleaf/FileSelector.html +901 -0
- data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
- data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
- data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
- data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
- data/docs/rdocs/Longleaf/Logging.html +405 -0
- data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
- data/docs/rdocs/Longleaf/LongleafError.html +139 -0
- data/docs/rdocs/Longleaf/MDFields.html +193 -0
- data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
- data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
- data/docs/rdocs/Longleaf/MetadataError.html +143 -0
- data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
- data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
- data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
- data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
- data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
- data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
- data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
- data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
- data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
- data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
- data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
- data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
- data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
- data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
- data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
- data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
- data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
- data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
- data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
- data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
- data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
- data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
- data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
- data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
- data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
- data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
- data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
- data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
- data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
- data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
- data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
- data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
- data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
- data/docs/rdocs/_index.html +660 -0
- data/docs/rdocs/class_list.html +51 -0
- data/docs/rdocs/css/common.css +1 -0
- data/docs/rdocs/css/full_list.css +58 -0
- data/docs/rdocs/css/style.css +496 -0
- data/docs/rdocs/file.README.html +165 -0
- data/docs/rdocs/file_list.html +56 -0
- data/docs/rdocs/frames.html +17 -0
- data/docs/rdocs/index.html +165 -0
- data/docs/rdocs/js/app.js +303 -0
- data/docs/rdocs/js/full_list.js +216 -0
- data/docs/rdocs/js/jquery.js +4 -0
- data/docs/rdocs/method_list.html +2051 -0
- data/docs/rdocs/top-level-namespace.html +110 -0
- data/lib/longleaf/candidates/file_selector.rb +139 -0
- data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
- data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
- data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
- data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
- data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
- data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
- data/lib/longleaf/cli.rb +237 -46
- data/lib/longleaf/commands/deregister_command.rb +51 -0
- data/lib/longleaf/commands/preserve_command.rb +50 -0
- data/lib/longleaf/commands/register_command.rb +32 -43
- data/lib/longleaf/commands/reindex_command.rb +92 -0
- data/lib/longleaf/commands/validate_config_command.rb +33 -8
- data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
- data/lib/longleaf/errors.rb +26 -7
- data/lib/longleaf/events/deregister_event.rb +53 -0
- data/lib/longleaf/events/event_names.rb +9 -0
- data/lib/longleaf/events/event_status_tracking.rb +59 -0
- data/lib/longleaf/events/preserve_event.rb +81 -0
- data/lib/longleaf/events/register_event.rb +52 -51
- data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
- data/lib/longleaf/helpers/digest_helper.rb +56 -0
- data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
- data/lib/longleaf/helpers/selection_options_parser.rb +189 -0
- data/lib/longleaf/helpers/service_date_helper.rb +78 -0
- data/lib/longleaf/indexing/index_manager.rb +101 -0
- data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
- data/lib/longleaf/logging.rb +5 -4
- data/lib/longleaf/logging/redirecting_logger.rb +26 -25
- data/lib/longleaf/models/app_fields.rb +7 -2
- data/lib/longleaf/models/file_record.rb +17 -8
- data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
- data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
- data/lib/longleaf/models/md_fields.rb +2 -1
- data/lib/longleaf/models/metadata_location.rb +47 -0
- data/lib/longleaf/models/metadata_record.rb +39 -15
- data/lib/longleaf/models/s3_storage_location.rb +133 -0
- data/lib/longleaf/models/service_definition.rb +7 -6
- data/lib/longleaf/models/service_fields.rb +7 -1
- data/lib/longleaf/models/service_record.rb +10 -6
- data/lib/longleaf/models/storage_location.rb +24 -19
- data/lib/longleaf/models/storage_types.rb +9 -0
- data/lib/longleaf/models/system_config_fields.rb +9 -0
- data/lib/longleaf/preservation_services/file_check_service.rb +58 -0
- data/lib/longleaf/preservation_services/fixity_check_service.rb +123 -0
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +182 -0
- data/lib/longleaf/preservation_services/s3_replication_service.rb +143 -0
- data/lib/longleaf/services/application_config_deserializer.rb +81 -24
- data/lib/longleaf/services/application_config_manager.rb +20 -6
- data/lib/longleaf/services/application_config_validator.rb +19 -9
- data/lib/longleaf/services/configuration_validator.rb +67 -4
- data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
- data/lib/longleaf/services/metadata_deserializer.rb +113 -42
- data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
- data/lib/longleaf/services/metadata_serializer.rb +138 -25
- data/lib/longleaf/services/metadata_validator.rb +76 -0
- data/lib/longleaf/services/s3_location_validator.rb +19 -0
- data/lib/longleaf/services/service_class_cache.rb +112 -0
- data/lib/longleaf/services/service_definition_manager.rb +10 -7
- data/lib/longleaf/services/service_definition_validator.rb +25 -18
- data/lib/longleaf/services/service_manager.rb +86 -11
- data/lib/longleaf/services/service_mapping_manager.rb +13 -12
- data/lib/longleaf/services/service_mapping_validator.rb +36 -26
- data/lib/longleaf/services/storage_location_manager.rb +76 -15
- data/lib/longleaf/services/storage_location_validator.rb +49 -35
- data/lib/longleaf/specs/config_builder.rb +47 -23
- data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
- data/lib/longleaf/specs/custom_matchers.rb +9 -0
- data/lib/longleaf/specs/file_helpers.rb +61 -0
- data/lib/longleaf/specs/metadata_builder.rb +92 -0
- data/lib/longleaf/specs/system_config_builder.rb +27 -0
- data/lib/longleaf/version.rb +1 -1
- data/longleaf.gemspec +20 -7
- data/mkdocs.yml +21 -0
- metadata +306 -23
- data/.travis.yml +0 -4
- data/lib/longleaf/commands/abstract_command.rb +0 -37
- data/lib/longleaf/services/storage_path_validator.rb +0 -16
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
require 'longleaf/services/service_manager'
|
|
2
|
+
require 'longleaf/events/event_names'
|
|
3
|
+
require 'longleaf/events/event_status_tracking'
|
|
4
|
+
require 'longleaf/logging'
|
|
5
|
+
|
|
6
|
+
module Longleaf
|
|
7
|
+
# Verify event for a single file
|
|
8
|
+
class PreserveEvent
|
|
9
|
+
include Longleaf::Logging
|
|
10
|
+
include Longleaf::EventStatusTracking
|
|
11
|
+
|
|
12
|
+
# @param file_rec [FileRecord] file record
|
|
13
|
+
# @param app_manager [ApplicationConfigManager] the application configuration
|
|
14
|
+
# @param force [boolean] if true, then services run regardless of whether they are flagged as needed
|
|
15
|
+
def initialize(file_rec:, app_manager:, force: false)
|
|
16
|
+
raise ArgumentError.new('Must provide a file_rec parameter') if file_rec.nil?
|
|
17
|
+
raise ArgumentError.new('Must provide an ApplicationConfigManager') if app_manager.nil?
|
|
18
|
+
|
|
19
|
+
@app_manager = app_manager
|
|
20
|
+
@file_rec = file_rec
|
|
21
|
+
@force = force
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Perform a preserve event on the given file, updating its metadata record if any services were executed.
|
|
25
|
+
def perform
|
|
26
|
+
storage_loc = @file_rec.storage_location
|
|
27
|
+
service_manager = @app_manager.service_manager
|
|
28
|
+
md_rec = @file_rec.metadata_record
|
|
29
|
+
f_path = @file_rec.path
|
|
30
|
+
|
|
31
|
+
logger.info("Performing preserve event on #{@file_rec.path}")
|
|
32
|
+
|
|
33
|
+
needs_persist = false
|
|
34
|
+
begin
|
|
35
|
+
if !File.exist?(f_path)
|
|
36
|
+
# Need to persist metadata to avoid repeating processing of this file too soon.
|
|
37
|
+
needs_persist = true
|
|
38
|
+
record_failure(EventNames::PRESERVE, f_path, "File is registered but missing.")
|
|
39
|
+
return return_status
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# get the list of services applicable to this location and event
|
|
43
|
+
service_manager.list_services(location: storage_loc.name, event: EventNames::PRESERVE).each do |service_name|
|
|
44
|
+
# Skip over this service if it does not need to be run, unless force flag active
|
|
45
|
+
unless @force || service_manager.service_needed?(service_name, md_rec)
|
|
46
|
+
logger.debug("Service #{service_name} not needed for file '#{@file_rec.path}', skipping")
|
|
47
|
+
next
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
begin
|
|
51
|
+
logger.info("Performing preserve service #{service_name} for #{@file_rec.path}")
|
|
52
|
+
needs_persist = true
|
|
53
|
+
# execute the service
|
|
54
|
+
service_manager.perform_service(service_name, @file_rec, EventNames::PRESERVE)
|
|
55
|
+
|
|
56
|
+
# record the outcome
|
|
57
|
+
@file_rec.metadata_record.update_service_as_performed(service_name)
|
|
58
|
+
record_success(EventNames::PRESERVE, f_path, nil, service_name)
|
|
59
|
+
rescue PreservationServiceError => e
|
|
60
|
+
@file_rec.metadata_record.update_service_as_failed(service_name)
|
|
61
|
+
record_failure(EventNames::PRESERVE, f_path, e.message, service_name)
|
|
62
|
+
rescue StorageLocationUnavailableError => e
|
|
63
|
+
raise e
|
|
64
|
+
rescue StandardError => e
|
|
65
|
+
@file_rec.metadata_record.update_service_as_failed(service_name)
|
|
66
|
+
record_failure(EventNames::PRESERVE, f_path, nil, service_name, error: e)
|
|
67
|
+
return return_status
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
ensure
|
|
71
|
+
# persist the metadata out to file if any services were executed
|
|
72
|
+
if needs_persist
|
|
73
|
+
# persist the metadata
|
|
74
|
+
@app_manager.md_manager.persist(@file_rec)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
return_status
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
@@ -1,92 +1,93 @@
|
|
|
1
1
|
require 'longleaf/errors'
|
|
2
|
+
require 'longleaf/events/event_names'
|
|
3
|
+
require 'longleaf/events/event_status_tracking'
|
|
2
4
|
require 'longleaf/models/metadata_record'
|
|
3
5
|
require 'longleaf/services/metadata_deserializer'
|
|
4
6
|
require 'longleaf/services/metadata_serializer'
|
|
5
7
|
require 'time'
|
|
6
8
|
|
|
7
|
-
# Event to register a file with longleaf
|
|
8
9
|
module Longleaf
|
|
10
|
+
# Event to register a file with longleaf
|
|
9
11
|
class RegisterEvent
|
|
10
|
-
|
|
11
|
-
|
|
12
|
+
include Longleaf::EventStatusTracking
|
|
13
|
+
|
|
12
14
|
# @param file_rec [FileRecord] file record
|
|
13
15
|
# @param app_manager [ApplicationConfigManager] the application configuration
|
|
14
16
|
# @param force [boolean] if true, then already registered files will be re-registered
|
|
15
|
-
|
|
17
|
+
# @param digest_provider [#get_digests] object which provides digests for files being registered
|
|
18
|
+
def initialize(file_rec:, app_manager:, force: false, digest_provider: nil)
|
|
16
19
|
raise ArgumentError.new('Must provide a file_rec parameter') if file_rec.nil?
|
|
17
20
|
raise ArgumentError.new('Parameter file_rec must be a FileRecord') \
|
|
18
21
|
unless file_rec.is_a?(FileRecord)
|
|
19
22
|
raise ArgumentError.new('Must provide an ApplicationConfigManager') if app_manager.nil?
|
|
20
23
|
raise ArgumentError.new('Parameter app_manager must be an ApplicationConfigManager') \
|
|
21
24
|
unless app_manager.is_a?(ApplicationConfigManager)
|
|
22
|
-
|
|
25
|
+
|
|
23
26
|
@app_manager = app_manager
|
|
24
27
|
@file_rec = file_rec
|
|
25
28
|
@force = force
|
|
26
|
-
@
|
|
29
|
+
@digest_provider = digest_provider
|
|
27
30
|
end
|
|
28
|
-
|
|
31
|
+
|
|
29
32
|
# Perform a registration event on the given file
|
|
30
|
-
# @
|
|
33
|
+
# @raise RegistrationError if a file cannot be registered
|
|
31
34
|
def perform
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
35
|
+
begin
|
|
36
|
+
# Only need to re-register file if the force flag is provided
|
|
37
|
+
if @file_rec.metadata_present? && !@force
|
|
38
|
+
raise RegistrationError.new("Unable to register '#{@file_rec.path}', it is already registered.")
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# create metadata record
|
|
42
|
+
md_rec = MetadataRecord.new(registered: Time.now.utc.iso8601(3))
|
|
43
|
+
@file_rec.metadata_record = md_rec
|
|
44
|
+
|
|
45
|
+
# retain significant details from former record
|
|
46
|
+
if @file_rec.metadata_present?
|
|
47
|
+
retain_existing_properties
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
populate_file_properties
|
|
51
|
+
|
|
52
|
+
if !@digest_provider.nil?
|
|
53
|
+
checksums = @digest_provider.get_digests(@file_rec.path)
|
|
54
|
+
md_rec.checksums.merge!(checksums) unless checksums.nil?
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# persist the metadata
|
|
58
|
+
@app_manager.md_manager.persist(@file_rec)
|
|
59
|
+
|
|
60
|
+
record_success(EventNames::REGISTER, @file_rec.path)
|
|
61
|
+
rescue RegistrationError => err
|
|
62
|
+
record_failure(EventNames::REGISTER, @file_rec.path, err.message)
|
|
63
|
+
rescue InvalidStoragePathError => err
|
|
64
|
+
record_failure(EventNames::REGISTER, @file_rec.path, err.message)
|
|
45
65
|
end
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
md_rec.checksums.merge!(@checksums) unless @checksums.nil?
|
|
50
|
-
|
|
51
|
-
populate_services
|
|
52
|
-
|
|
53
|
-
# persist the metadata out to file
|
|
54
|
-
MetadataSerializer::write(metadata: md_rec, file_path: @file_rec.metadata_path)
|
|
66
|
+
|
|
67
|
+
return_status
|
|
55
68
|
end
|
|
56
|
-
|
|
69
|
+
|
|
57
70
|
private
|
|
58
71
|
def populate_file_properties
|
|
59
72
|
md_rec = @file_rec.metadata_record
|
|
60
|
-
|
|
73
|
+
|
|
61
74
|
# Set file properties
|
|
62
|
-
md_rec.last_modified = File.mtime(@file_rec.path).utc.iso8601
|
|
75
|
+
md_rec.last_modified = File.mtime(@file_rec.path).utc.iso8601(3)
|
|
63
76
|
md_rec.file_size = File.size(@file_rec.path)
|
|
64
77
|
end
|
|
65
|
-
|
|
66
|
-
def populate_services
|
|
67
|
-
md_rec = @file_rec.metadata_record
|
|
68
|
-
|
|
69
|
-
service_manager = @app_manager.service_manager
|
|
70
|
-
definitions = service_manager.list_service_definitions(location: @file_rec.storage_location.name)
|
|
71
|
-
|
|
72
|
-
# Add service section
|
|
73
|
-
definitions.each do |serv_def|
|
|
74
|
-
serv_name = serv_def.name
|
|
75
|
-
md_rec.add_service(serv_name)
|
|
76
|
-
end
|
|
77
|
-
end
|
|
78
|
-
|
|
78
|
+
|
|
79
79
|
# Copy a subset of properties from an existing metadata record to the new record
|
|
80
80
|
def retain_existing_properties
|
|
81
81
|
md_rec = @file_rec.metadata_record
|
|
82
|
-
|
|
83
|
-
old_md = MetadataDeserializer.deserialize(file_path: @file_rec.metadata_path
|
|
82
|
+
|
|
83
|
+
old_md = MetadataDeserializer.deserialize(file_path: @file_rec.metadata_path,
|
|
84
|
+
digest_algs: @file_rec.storage_location.metadata_location.digests)
|
|
84
85
|
# Copy custom properties
|
|
85
86
|
old_md.properties.each { |name, value| md_rec.properties[name] = value }
|
|
86
87
|
# Copy stale-replicas flag per service
|
|
87
88
|
old_md.list_services.each do |serv_name|
|
|
88
89
|
serv_rec = old_md.service(serv_name)
|
|
89
|
-
|
|
90
|
+
|
|
90
91
|
stale_replicas = serv_rec.stale_replicas
|
|
91
92
|
if stale_replicas
|
|
92
93
|
new_service = md_rec.service(serv_name)
|
|
@@ -95,4 +96,4 @@ module Longleaf
|
|
|
95
96
|
end
|
|
96
97
|
end
|
|
97
98
|
end
|
|
98
|
-
end
|
|
99
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
module Longleaf
|
|
2
|
+
# Hash subclass which provides case insensitive keys, where keys are always downcased.
|
|
3
|
+
class CaseInsensitiveHash < Hash
|
|
4
|
+
def [](key)
|
|
5
|
+
super _insensitive(key)
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def []=(key, value)
|
|
9
|
+
super _insensitive(key), value
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def delete(key)
|
|
13
|
+
super _insensitive(key)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def has_key?(key)
|
|
17
|
+
super _insensitive(key)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def merge(other_hash)
|
|
21
|
+
super other_hash.map {|k, v| [_insensitive(k), v] }.to_h
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def merge!(other_hash)
|
|
25
|
+
super other_hash.map {|k, v| [_insensitive(k), v] }.to_h
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Cause this hash to serialize as a regular hash to avoid deserialization failures
|
|
29
|
+
def encode_with coder
|
|
30
|
+
coder.represent_map nil, self
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
protected
|
|
34
|
+
def _insensitive(key)
|
|
35
|
+
key.respond_to?(:downcase) ? key.downcase : key
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
require 'longleaf/errors'
|
|
2
|
+
require 'digest'
|
|
3
|
+
|
|
4
|
+
module Longleaf
|
|
5
|
+
# Helper methods for generating digests
|
|
6
|
+
class DigestHelper
|
|
7
|
+
KNOWN_DIGESTS ||= ['md5', 'sha1', 'sha2', 'sha256', 'sha384', 'sha512', 'rmd160']
|
|
8
|
+
|
|
9
|
+
# @param algs Either a string containing one or an array containing zero or more digest
|
|
10
|
+
# algorithm names.
|
|
11
|
+
# @raise [InvalidDigestAlgorithmError] thrown if any of the digest algorithms listed are not
|
|
12
|
+
# known to the system.
|
|
13
|
+
def self.validate_algorithms(algs)
|
|
14
|
+
return if algs.nil?
|
|
15
|
+
if algs.is_a?(String)
|
|
16
|
+
unless self.is_known_algorithm?(algs)
|
|
17
|
+
raise InvalidDigestAlgorithmError.new("Unknown digest algorithm #{algs}")
|
|
18
|
+
end
|
|
19
|
+
else
|
|
20
|
+
unknown = algs.select { |alg| !KNOWN_DIGESTS.include?(alg) }
|
|
21
|
+
unless unknown.empty?
|
|
22
|
+
raise InvalidDigestAlgorithmError.new("Unknown digest algorithm(s): #{unknown}")
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# @param alg [String] identifier of digest algorithm
|
|
28
|
+
# @return [Boolean] true if the digest is a valid known algorithm
|
|
29
|
+
def self.is_known_algorithm?(alg)
|
|
30
|
+
KNOWN_DIGESTS.include?(alg)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Get a Digest class for the specified algorithm
|
|
34
|
+
# @param alg [String] name of the digest algorithm
|
|
35
|
+
# @return [Digest] A digest class for the requested algorithm
|
|
36
|
+
# @raise [InvalidDigestAlgorithmError] if an unknown digest algorithm is requested
|
|
37
|
+
def self.start_digest(alg)
|
|
38
|
+
case alg
|
|
39
|
+
when 'md5'
|
|
40
|
+
return Digest::MD5.new
|
|
41
|
+
when 'sha1'
|
|
42
|
+
return Digest::SHA1.new
|
|
43
|
+
when 'sha2', 'sha256'
|
|
44
|
+
return Digest::SHA2.new
|
|
45
|
+
when 'sha384'
|
|
46
|
+
return Digest::SHA2.new(384)
|
|
47
|
+
when 'sha512'
|
|
48
|
+
return Digest::SHA2.new(512)
|
|
49
|
+
when 'rmd160'
|
|
50
|
+
return Digest::RMD160.new
|
|
51
|
+
else
|
|
52
|
+
raise InvalidDigestAlgorithmError.new("Cannot produce digest for unknown algorithm '#{alg}'.")
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
require 'uri'
|
|
2
|
+
|
|
3
|
+
module Longleaf
|
|
4
|
+
# Helper for interacting with s3 uris
|
|
5
|
+
class S3UriHelper
|
|
6
|
+
ENDPOINT_PATTERN = /^(.+\.)?s3[.\-]([a-z0-9\-]+[\-.])?[a-z0-9]+\./
|
|
7
|
+
ALLOWED_SCHEMES = ['http', 'https', 's3']
|
|
8
|
+
|
|
9
|
+
# Extract the name of the s3 bucket from the provided url
|
|
10
|
+
# @param url s3 url
|
|
11
|
+
# @return the name of the bucket, or nil if the name could not be identified
|
|
12
|
+
def self.extract_bucket(url)
|
|
13
|
+
uri = s3_uri(url)
|
|
14
|
+
|
|
15
|
+
matches = ENDPOINT_PATTERN.match(uri.host)
|
|
16
|
+
if matches.nil?
|
|
17
|
+
raise ArgumentError.new("Provided URI does match the expected pattern for an S3 URI")
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
prefix = matches[1]
|
|
21
|
+
if prefix.nil? || prefix.empty?
|
|
22
|
+
# Is a path style url
|
|
23
|
+
path = uri.path
|
|
24
|
+
|
|
25
|
+
return nil if path == '/'
|
|
26
|
+
|
|
27
|
+
path_parts = path.split('/')
|
|
28
|
+
return nil if path_parts.empty?
|
|
29
|
+
return path_parts[1]
|
|
30
|
+
else
|
|
31
|
+
return prefix[0..-2]
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def self.extract_path(url)
|
|
36
|
+
uri = s3_uri(url)
|
|
37
|
+
|
|
38
|
+
matches = ENDPOINT_PATTERN.match(uri.host)
|
|
39
|
+
if matches.nil?
|
|
40
|
+
raise ArgumentError.new("Provided URI does match the expected pattern for an S3 URI")
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
path = uri.path
|
|
44
|
+
return nil if path == '/' || path.empty?
|
|
45
|
+
|
|
46
|
+
# trim off the first slash
|
|
47
|
+
path = path.partition('/').last
|
|
48
|
+
|
|
49
|
+
# Determine if the first part of the path is the bucket name
|
|
50
|
+
prefix = matches[1]
|
|
51
|
+
if prefix.nil? || prefix.empty?
|
|
52
|
+
# trim off the bucket name
|
|
53
|
+
path = path.partition('/').last
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
path
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def self.extract_region(url)
|
|
60
|
+
uri = s3_uri(url)
|
|
61
|
+
|
|
62
|
+
matches = ENDPOINT_PATTERN.match(uri.host)
|
|
63
|
+
|
|
64
|
+
if matches[2].nil?
|
|
65
|
+
# No region specified
|
|
66
|
+
nil
|
|
67
|
+
else
|
|
68
|
+
matches[2][0..-2]
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def self.s3_uri(url)
|
|
73
|
+
if url.nil?
|
|
74
|
+
raise ArgumentError.new("url cannot be empty")
|
|
75
|
+
end
|
|
76
|
+
uri = URI(url)
|
|
77
|
+
if !ALLOWED_SCHEMES.include?(uri.scheme&.downcase)
|
|
78
|
+
raise ArgumentError.new("Invalid scheme for s3 URI #{url}, only http, https and s3 are permitted")
|
|
79
|
+
end
|
|
80
|
+
if uri.host.nil?
|
|
81
|
+
raise ArgumentError.new("Invalid S3 URI, no hostname: #{url}")
|
|
82
|
+
end
|
|
83
|
+
uri
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
require 'longleaf/candidates/file_selector'
|
|
2
|
+
require 'longleaf/candidates/registered_file_selector'
|
|
3
|
+
require 'longleaf/candidates/manifest_digest_provider'
|
|
4
|
+
require 'longleaf/candidates/single_digest_provider'
|
|
5
|
+
|
|
6
|
+
module Longleaf
|
|
7
|
+
# Helper for parsing manifest inputs used for registration
|
|
8
|
+
class SelectionOptionsParser
|
|
9
|
+
extend Longleaf::Logging
|
|
10
|
+
|
|
11
|
+
# Parses the provided options to construct a file selector and digest provider for
|
|
12
|
+
# use in registration commands.
|
|
13
|
+
# @param options [Hash] command options
|
|
14
|
+
# @param app_config_manager [ApplicationConfigManager] app config manager
|
|
15
|
+
# @return The file selector and digest provider.
|
|
16
|
+
def self.parse_registration_selection_options(options, app_config_manager)
|
|
17
|
+
there_can_be_only_one("Only one of the following selection options may be provided: -m, -f, -s",
|
|
18
|
+
options, :file, :manifest, :location)
|
|
19
|
+
|
|
20
|
+
if !options[:manifest].nil?
|
|
21
|
+
digests_mapping = self.manifests_to_digest_mapping(options[:manifest])
|
|
22
|
+
selector = FileSelector.new(file_paths: digests_mapping.keys, app_config: app_config_manager)
|
|
23
|
+
digest_provider = ManifestDigestProvider.new(digests_mapping)
|
|
24
|
+
elsif !options[:file].nil?
|
|
25
|
+
if options[:checksums]
|
|
26
|
+
checksums = options[:checksums]
|
|
27
|
+
# validate checksum list format, must a comma delimited list of prefix:checksums
|
|
28
|
+
if /^[^:,]+:[^:,]+(,[^:,]+:[^:,]+)*$/.match(checksums)
|
|
29
|
+
# convert checksum list into hash with prefix as key
|
|
30
|
+
checksums = Hash[*checksums.split(/\s*[:,]\s*/)]
|
|
31
|
+
digest_provider = SingleDigestProvider.new(checksums)
|
|
32
|
+
else
|
|
33
|
+
logger.failure("Invalid checksums parameter format, see `longleaf help <command>` for more information")
|
|
34
|
+
exit 1
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
file_paths = options[:file].split(/\s*,\s*/)
|
|
39
|
+
selector = FileSelector.new(file_paths: file_paths, app_config: app_config_manager)
|
|
40
|
+
elsif !options[:location].nil?
|
|
41
|
+
storage_locations = options[:location].split(/\s*,\s*/)
|
|
42
|
+
selector = FileSelector.new(storage_locations: storage_locations, app_config: app_config_manager)
|
|
43
|
+
digest_provider = SingleDigestProvider.new(nil)
|
|
44
|
+
else
|
|
45
|
+
logger.failure("Must provide one of the following file selection options: -f, l, or -m")
|
|
46
|
+
exit 1
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
[selector, digest_provider]
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def self.there_can_be_only_one(failure_msg, options, *names)
|
|
53
|
+
got_one = false
|
|
54
|
+
names.each do |name|
|
|
55
|
+
if !options[name].nil?
|
|
56
|
+
if got_one
|
|
57
|
+
logger.failure(failure_msg)
|
|
58
|
+
exit 1
|
|
59
|
+
end
|
|
60
|
+
got_one = true
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Parses the provided manifest options, reading the contents of the manifests to produce
|
|
66
|
+
# a mapping from files to one or more algorithms.
|
|
67
|
+
# @param manifest_vals [Array] List of manifest option values. They may be in one of the following formats:
|
|
68
|
+
# <alg_name>:<manifest_path> OR <alg_name>:@-
|
|
69
|
+
#. <manifest_path> OR @-
|
|
70
|
+
# @return a hash containing the aggregated contents of the provided manifests. The keys are
|
|
71
|
+
# paths to manifested files. The values are hashes, mapping digest algorithms to digest values.
|
|
72
|
+
def self.manifests_to_digest_mapping(manifest_vals)
|
|
73
|
+
alg_manifest_pairs = []
|
|
74
|
+
# interpret option inputs into a list of algorithms to manifest sources
|
|
75
|
+
manifest_vals.each do |manifest_val|
|
|
76
|
+
if manifest_val.include?(':')
|
|
77
|
+
manifest_parts = manifest_val.split(':', 2)
|
|
78
|
+
alg_manifest_pairs << manifest_parts
|
|
79
|
+
else
|
|
80
|
+
# algorithm not specified in option value
|
|
81
|
+
alg_manifest_pairs << [nil, manifest_val]
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
if alg_manifest_pairs.select { |mpair| mpair[1] == '@-' }.count > 1
|
|
85
|
+
self.fail("Cannot specify more than one manifest from STDIN")
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# read the provided manifests to build a mapping from file uri to all supplied digests
|
|
89
|
+
digests_mapping = Hash.new { |h,k| h[k] = Hash.new }
|
|
90
|
+
alg_manifest_pairs.each do |mpair|
|
|
91
|
+
source_stream = nil
|
|
92
|
+
# Determine if reading from a manifest file or stdin
|
|
93
|
+
if mpair[1] == '@-'
|
|
94
|
+
source_stream = $stdin
|
|
95
|
+
else
|
|
96
|
+
source_stream = File.new(mpair[1])
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
current_alg = mpair[0]
|
|
100
|
+
multi_digest_manifest = current_alg.nil?
|
|
101
|
+
source_stream.each_line do |line|
|
|
102
|
+
line = line.strip
|
|
103
|
+
if multi_digest_manifest && /^[a-zA-Z0-9]+:$/ =~ line
|
|
104
|
+
# Found a digest algorithm header, assuming succeeding entries are of this type
|
|
105
|
+
current_alg = line.chomp(':')
|
|
106
|
+
# Verify that the digest algorithm is known to longleaf
|
|
107
|
+
if !DigestHelper.is_known_algorithm?(current_alg)
|
|
108
|
+
self.fail("Manifest specifies unknown digest algorithm: #{current_alg}")
|
|
109
|
+
end
|
|
110
|
+
else
|
|
111
|
+
if current_alg.nil?
|
|
112
|
+
self.fail("Manifest with unknown checksums encountered, an algorithm must be specified")
|
|
113
|
+
end
|
|
114
|
+
entry_parts = line.split(' ', 2)
|
|
115
|
+
if entry_parts.length != 2
|
|
116
|
+
self.fail("Invalid manifest entry: #{line}")
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
digests_mapping[entry_parts[1]][current_alg] = entry_parts[0]
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
digests_mapping
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
# Parses the provided options to create a selector for registered files
|
|
128
|
+
# @param options [Hash] command options
|
|
129
|
+
# @param app_config_manager [ApplicationConfigManager] app config manager
|
|
130
|
+
# @return selector
|
|
131
|
+
def self.create_registered_selector(options, app_config_manager)
|
|
132
|
+
there_can_be_only_one("Only one of the following selection options may be provided: -l, -f, -s",
|
|
133
|
+
options, :file, :location, :from_list)
|
|
134
|
+
|
|
135
|
+
if !options[:from_list].nil?
|
|
136
|
+
file_paths = read_from_list(options[:from_list])
|
|
137
|
+
return RegisteredFileSelector.new(file_paths: file_paths, app_config: app_config_manager)
|
|
138
|
+
elsif !options[:file].nil?
|
|
139
|
+
file_paths = options[:file].split(/\s*,\s*/)
|
|
140
|
+
return RegisteredFileSelector.new(file_paths: file_paths, app_config: app_config_manager)
|
|
141
|
+
elsif !options[:location].nil?
|
|
142
|
+
storage_locations = options[:location].split(/\s*,\s*/)
|
|
143
|
+
return RegisteredFileSelector.new(storage_locations: storage_locations, app_config: app_config_manager)
|
|
144
|
+
else
|
|
145
|
+
logger.failure("Must provide one of the following file selection options: -l, -f, or -s")
|
|
146
|
+
exit 1
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Parses the -l from_list option, reading the list of files specified either from the provided
|
|
151
|
+
# file path or STDIN
|
|
152
|
+
# @param from_list option value, either a file path or "@-"
|
|
153
|
+
# @return list of files from the from_list
|
|
154
|
+
def self.read_from_list(from_list)
|
|
155
|
+
from_list = from_list.strip
|
|
156
|
+
if from_list.empty?
|
|
157
|
+
logger.failure("List parameter must not be empty")
|
|
158
|
+
exit 1
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
if from_list == '@-'
|
|
162
|
+
source_stream = $stdin
|
|
163
|
+
else
|
|
164
|
+
begin
|
|
165
|
+
source_stream = File.new(from_list)
|
|
166
|
+
rescue Errno::ENOENT
|
|
167
|
+
logger.failure("Specified list file does not exist: #{from_list}")
|
|
168
|
+
exit 1
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
lines = []
|
|
173
|
+
source_stream.each_line do |line|
|
|
174
|
+
lines << line.strip
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
if lines.empty?
|
|
178
|
+
logger.failure("File list is empty, must provide one or more files for this operation")
|
|
179
|
+
exit 1
|
|
180
|
+
end
|
|
181
|
+
lines
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def self.fail(message)
|
|
185
|
+
logger.failure(message)
|
|
186
|
+
exit 1
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
end
|