longleaf 0.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +94 -0
- data/.editorconfig +13 -0
- data/.gitignore +4 -1
- data/.rubocop.yml +44 -0
- data/.rubocop_todo.yml +834 -0
- data/.yardopts +1 -0
- data/Gemfile +16 -1
- data/README.md +98 -12
- data/Rakefile +6 -0
- data/bin/setup +16 -1
- data/docs/aboutlongleaf.md +28 -0
- data/docs/extra.css +32 -0
- data/docs/img/change-file.png +0 -0
- data/docs/img/ll-example-preserved.png +0 -0
- data/docs/index.md +19 -0
- data/docs/install.md +66 -0
- data/docs/ll-example/config-example-relative.yml +33 -0
- data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
- data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
- data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
- data/docs/ll-example/metadata-dir/.gitkeep +0 -0
- data/docs/ll-example/replica-files/.gitkeep +0 -0
- data/docs/ll-example/replica-metadata/.gitkeep +0 -0
- data/docs/quickstart.md +270 -0
- data/docs/rdocs/Longleaf.html +135 -0
- data/docs/rdocs/Longleaf/AppFields.html +178 -0
- data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
- data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
- data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
- data/docs/rdocs/Longleaf/CLI.html +909 -0
- data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
- data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
- data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
- data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
- data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
- data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
- data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
- data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
- data/docs/rdocs/Longleaf/EventError.html +147 -0
- data/docs/rdocs/Longleaf/EventNames.html +163 -0
- data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
- data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
- data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
- data/docs/rdocs/Longleaf/FileRecord.html +716 -0
- data/docs/rdocs/Longleaf/FileSelector.html +901 -0
- data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
- data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
- data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
- data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
- data/docs/rdocs/Longleaf/Logging.html +405 -0
- data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
- data/docs/rdocs/Longleaf/LongleafError.html +139 -0
- data/docs/rdocs/Longleaf/MDFields.html +193 -0
- data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
- data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
- data/docs/rdocs/Longleaf/MetadataError.html +143 -0
- data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
- data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
- data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
- data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
- data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
- data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
- data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
- data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
- data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
- data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
- data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
- data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
- data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
- data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
- data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
- data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
- data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
- data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
- data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
- data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
- data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
- data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
- data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
- data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
- data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
- data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
- data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
- data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
- data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
- data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
- data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
- data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
- data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
- data/docs/rdocs/_index.html +660 -0
- data/docs/rdocs/class_list.html +51 -0
- data/docs/rdocs/css/common.css +1 -0
- data/docs/rdocs/css/full_list.css +58 -0
- data/docs/rdocs/css/style.css +496 -0
- data/docs/rdocs/file.README.html +165 -0
- data/docs/rdocs/file_list.html +56 -0
- data/docs/rdocs/frames.html +17 -0
- data/docs/rdocs/index.html +165 -0
- data/docs/rdocs/js/app.js +303 -0
- data/docs/rdocs/js/full_list.js +216 -0
- data/docs/rdocs/js/jquery.js +4 -0
- data/docs/rdocs/method_list.html +2051 -0
- data/docs/rdocs/top-level-namespace.html +110 -0
- data/lib/longleaf/candidates/file_selector.rb +150 -0
- data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
- data/lib/longleaf/candidates/physical_path_provider.rb +17 -0
- data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
- data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
- data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
- data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
- data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
- data/lib/longleaf/cli.rb +252 -46
- data/lib/longleaf/commands/deregister_command.rb +51 -0
- data/lib/longleaf/commands/preserve_command.rb +50 -0
- data/lib/longleaf/commands/register_command.rb +34 -43
- data/lib/longleaf/commands/reindex_command.rb +92 -0
- data/lib/longleaf/commands/validate_config_command.rb +33 -8
- data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
- data/lib/longleaf/errors.rb +26 -7
- data/lib/longleaf/events/deregister_event.rb +53 -0
- data/lib/longleaf/events/event_names.rb +9 -0
- data/lib/longleaf/events/event_status_tracking.rb +59 -0
- data/lib/longleaf/events/preserve_event.rb +82 -0
- data/lib/longleaf/events/register_event.rb +59 -51
- data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
- data/lib/longleaf/helpers/digest_helper.rb +56 -0
- data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
- data/lib/longleaf/helpers/selection_options_parser.rb +215 -0
- data/lib/longleaf/helpers/service_date_helper.rb +78 -0
- data/lib/longleaf/indexing/index_manager.rb +101 -0
- data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
- data/lib/longleaf/logging.rb +5 -4
- data/lib/longleaf/logging/redirecting_logger.rb +30 -25
- data/lib/longleaf/models/app_fields.rb +7 -2
- data/lib/longleaf/models/file_record.rb +31 -8
- data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
- data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
- data/lib/longleaf/models/md_fields.rb +3 -1
- data/lib/longleaf/models/metadata_location.rb +47 -0
- data/lib/longleaf/models/metadata_record.rb +43 -16
- data/lib/longleaf/models/s3_storage_location.rb +138 -0
- data/lib/longleaf/models/service_definition.rb +7 -6
- data/lib/longleaf/models/service_fields.rb +7 -1
- data/lib/longleaf/models/service_record.rb +10 -6
- data/lib/longleaf/models/storage_location.rb +24 -19
- data/lib/longleaf/models/storage_types.rb +9 -0
- data/lib/longleaf/models/system_config_fields.rb +9 -0
- data/lib/longleaf/preservation_services/file_check_service.rb +59 -0
- data/lib/longleaf/preservation_services/fixity_check_service.rb +124 -0
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +198 -0
- data/lib/longleaf/preservation_services/s3_replication_service.rb +131 -0
- data/lib/longleaf/services/application_config_deserializer.rb +81 -24
- data/lib/longleaf/services/application_config_manager.rb +20 -6
- data/lib/longleaf/services/application_config_validator.rb +19 -9
- data/lib/longleaf/services/configuration_validator.rb +67 -4
- data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
- data/lib/longleaf/services/metadata_deserializer.rb +115 -42
- data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
- data/lib/longleaf/services/metadata_serializer.rb +156 -23
- data/lib/longleaf/services/metadata_validator.rb +76 -0
- data/lib/longleaf/services/s3_location_validator.rb +19 -0
- data/lib/longleaf/services/service_class_cache.rb +112 -0
- data/lib/longleaf/services/service_definition_manager.rb +10 -7
- data/lib/longleaf/services/service_definition_validator.rb +25 -18
- data/lib/longleaf/services/service_manager.rb +86 -11
- data/lib/longleaf/services/service_mapping_manager.rb +13 -12
- data/lib/longleaf/services/service_mapping_validator.rb +36 -26
- data/lib/longleaf/services/storage_location_manager.rb +76 -15
- data/lib/longleaf/services/storage_location_validator.rb +49 -35
- data/lib/longleaf/specs/config_builder.rb +47 -23
- data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
- data/lib/longleaf/specs/custom_matchers.rb +9 -0
- data/lib/longleaf/specs/file_helpers.rb +61 -0
- data/lib/longleaf/specs/metadata_builder.rb +98 -0
- data/lib/longleaf/specs/system_config_builder.rb +27 -0
- data/lib/longleaf/version.rb +1 -1
- data/longleaf.gemspec +20 -7
- data/mkdocs.yml +21 -0
- metadata +308 -24
- data/.travis.yml +0 -4
- data/lib/longleaf/commands/abstract_command.rb +0 -37
- data/lib/longleaf/services/storage_path_validator.rb +0 -16
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
require 'uri'
|
|
2
|
+
|
|
3
|
+
module Longleaf
|
|
4
|
+
# Helper for interacting with s3 uris
|
|
5
|
+
class S3UriHelper
|
|
6
|
+
ENDPOINT_PATTERN = /^(.+\.)?s3[.\-]([a-z0-9\-]+[\-.])?[a-z0-9]+\./
|
|
7
|
+
ALLOWED_SCHEMES = ['http', 'https', 's3']
|
|
8
|
+
|
|
9
|
+
# Extract the name of the s3 bucket from the provided url
|
|
10
|
+
# @param url s3 url
|
|
11
|
+
# @return the name of the bucket, or nil if the name could not be identified
|
|
12
|
+
def self.extract_bucket(url)
|
|
13
|
+
uri = s3_uri(url)
|
|
14
|
+
|
|
15
|
+
matches = ENDPOINT_PATTERN.match(uri.host)
|
|
16
|
+
if matches.nil?
|
|
17
|
+
raise ArgumentError.new("Provided URI does match the expected pattern for an S3 URI")
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
prefix = matches[1]
|
|
21
|
+
if prefix.nil? || prefix.empty?
|
|
22
|
+
# Is a path style url
|
|
23
|
+
path = uri.path
|
|
24
|
+
|
|
25
|
+
return nil if path == '/'
|
|
26
|
+
|
|
27
|
+
path_parts = path.split('/')
|
|
28
|
+
return nil if path_parts.empty?
|
|
29
|
+
return path_parts[1]
|
|
30
|
+
else
|
|
31
|
+
return prefix[0..-2]
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def self.extract_path(url)
|
|
36
|
+
uri = s3_uri(url)
|
|
37
|
+
|
|
38
|
+
matches = ENDPOINT_PATTERN.match(uri.host)
|
|
39
|
+
if matches.nil?
|
|
40
|
+
raise ArgumentError.new("Provided URI does match the expected pattern for an S3 URI")
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
path = uri.path
|
|
44
|
+
return nil if path == '/' || path.empty?
|
|
45
|
+
|
|
46
|
+
# trim off the first slash
|
|
47
|
+
path = path.partition('/').last
|
|
48
|
+
|
|
49
|
+
# Determine if the first part of the path is the bucket name
|
|
50
|
+
prefix = matches[1]
|
|
51
|
+
if prefix.nil? || prefix.empty?
|
|
52
|
+
# trim off the bucket name
|
|
53
|
+
path = path.partition('/').last
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
path
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def self.extract_region(url)
|
|
60
|
+
uri = s3_uri(url)
|
|
61
|
+
|
|
62
|
+
matches = ENDPOINT_PATTERN.match(uri.host)
|
|
63
|
+
|
|
64
|
+
if matches[2].nil?
|
|
65
|
+
# No region specified
|
|
66
|
+
nil
|
|
67
|
+
else
|
|
68
|
+
matches[2][0..-2]
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def self.s3_uri(url)
|
|
73
|
+
if url.nil?
|
|
74
|
+
raise ArgumentError.new("url cannot be empty")
|
|
75
|
+
end
|
|
76
|
+
uri = URI(url)
|
|
77
|
+
if !ALLOWED_SCHEMES.include?(uri.scheme&.downcase)
|
|
78
|
+
raise ArgumentError.new("Invalid scheme for s3 URI #{url}, only http, https and s3 are permitted")
|
|
79
|
+
end
|
|
80
|
+
if uri.host.nil?
|
|
81
|
+
raise ArgumentError.new("Invalid S3 URI, no hostname: #{url}")
|
|
82
|
+
end
|
|
83
|
+
uri
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
require 'longleaf/candidates/file_selector'
|
|
2
|
+
require 'longleaf/candidates/registered_file_selector'
|
|
3
|
+
require 'longleaf/candidates/manifest_digest_provider'
|
|
4
|
+
require 'longleaf/candidates/physical_path_provider'
|
|
5
|
+
require 'longleaf/candidates/single_digest_provider'
|
|
6
|
+
|
|
7
|
+
module Longleaf
|
|
8
|
+
# Helper for parsing manifest inputs used for registration
|
|
9
|
+
class SelectionOptionsParser
|
|
10
|
+
extend Longleaf::Logging
|
|
11
|
+
|
|
12
|
+
# Parses the provided options to construct a file selector and digest provider for
|
|
13
|
+
# use in registration commands.
|
|
14
|
+
# @param options [Hash] command options
|
|
15
|
+
# @param app_config_manager [ApplicationConfigManager] app config manager
|
|
16
|
+
# @return The file selector and digest provider.
|
|
17
|
+
def self.parse_registration_selection_options(options, app_config_manager)
|
|
18
|
+
there_can_be_only_one("Only one of the following selection options may be provided: -m, -f, -s",
|
|
19
|
+
options, :file, :manifest, :location)
|
|
20
|
+
|
|
21
|
+
if !options[:manifest].nil?
|
|
22
|
+
digests_mapping, logical_phys_mapping = self.parse_manifest(options[:manifest])
|
|
23
|
+
physical_provider = PhysicalPathProvider.new(logical_phys_mapping)
|
|
24
|
+
selector = FileSelector.new(file_paths: digests_mapping.keys,
|
|
25
|
+
physical_provider: physical_provider,
|
|
26
|
+
app_config: app_config_manager)
|
|
27
|
+
digest_provider = ManifestDigestProvider.new(digests_mapping)
|
|
28
|
+
elsif !options[:file].nil?
|
|
29
|
+
if options[:checksums]
|
|
30
|
+
checksums = options[:checksums]
|
|
31
|
+
# validate checksum list format, must a comma delimited list of prefix:checksums
|
|
32
|
+
if /^[^:,]+:[^:,]+(,[^:,]+:[^:,]+)*$/.match(checksums)
|
|
33
|
+
# convert checksum list into hash with prefix as key
|
|
34
|
+
checksums = Hash[*checksums.split(/\s*[:,]\s*/)]
|
|
35
|
+
digest_provider = SingleDigestProvider.new(checksums)
|
|
36
|
+
else
|
|
37
|
+
logger.failure("Invalid checksums parameter format, see `longleaf help <command>` for more information")
|
|
38
|
+
exit 1
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
file_paths = self.split_quoted(options[:file], "\\s*,\\s*")
|
|
43
|
+
if !options[:physical_path].nil?
|
|
44
|
+
physical_paths = self.split_quoted(options[:physical_path], "\\s*,\\s*")
|
|
45
|
+
if physical_paths.length != file_paths.length
|
|
46
|
+
logger.failure("Invalid physical paths parameter, number of paths did not match number of logical paths")
|
|
47
|
+
exit 1
|
|
48
|
+
end
|
|
49
|
+
logical_phys_mapping = Hash[file_paths.zip physical_paths]
|
|
50
|
+
physical_provider = PhysicalPathProvider.new(logical_phys_mapping)
|
|
51
|
+
else
|
|
52
|
+
physical_provider = PhysicalPathProvider.new
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
selector = FileSelector.new(file_paths: file_paths,
|
|
56
|
+
physical_provider: physical_provider,
|
|
57
|
+
app_config: app_config_manager)
|
|
58
|
+
else
|
|
59
|
+
logger.failure("Must provide one of the following file selection options: -f, l, or -m")
|
|
60
|
+
exit 1
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
[selector, digest_provider, physical_provider]
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def self.there_can_be_only_one(failure_msg, options, *names)
|
|
67
|
+
got_one = false
|
|
68
|
+
names.each do |name|
|
|
69
|
+
if !options[name].nil?
|
|
70
|
+
if got_one
|
|
71
|
+
logger.failure(failure_msg)
|
|
72
|
+
exit 1
|
|
73
|
+
end
|
|
74
|
+
got_one = true
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Parses the provided manifest options, reading the contents of the manifests to produce
|
|
80
|
+
# a mapping from files to one or more algorithms.
|
|
81
|
+
# @param manifest_vals [Array] List of manifest option values. They may be in one of the following formats:
|
|
82
|
+
# <alg_name>:<manifest_path> OR <alg_name>:@-
|
|
83
|
+
#. <manifest_path> OR @-
|
|
84
|
+
# @return a hash containing the aggregated contents of the provided manifests. The keys are
|
|
85
|
+
# paths to manifested files. The values are hashes, mapping digest algorithms to digest values.
|
|
86
|
+
def self.parse_manifest(manifest_vals)
|
|
87
|
+
alg_manifest_pairs = []
|
|
88
|
+
# interpret option inputs into a list of algorithms to manifest sources
|
|
89
|
+
manifest_vals.each do |manifest_val|
|
|
90
|
+
if manifest_val.include?(':')
|
|
91
|
+
manifest_parts = manifest_val.split(':', 2)
|
|
92
|
+
alg_manifest_pairs << manifest_parts
|
|
93
|
+
else
|
|
94
|
+
# algorithm not specified in option value
|
|
95
|
+
alg_manifest_pairs << [nil, manifest_val]
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
if alg_manifest_pairs.select { |mpair| mpair[1] == '@-' }.count > 1
|
|
99
|
+
self.fail("Cannot specify more than one manifest from STDIN")
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# read the provided manifests to build a mapping from file uri to all supplied digests
|
|
103
|
+
digests_mapping = Hash.new { |h,k| h[k] = Hash.new }
|
|
104
|
+
logical_phys_mapping = Hash.new
|
|
105
|
+
alg_manifest_pairs.each do |mpair|
|
|
106
|
+
source_stream = nil
|
|
107
|
+
# Determine if reading from a manifest file or stdin
|
|
108
|
+
if mpair[1] == '@-'
|
|
109
|
+
source_stream = $stdin
|
|
110
|
+
else
|
|
111
|
+
source_stream = File.new(mpair[1])
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
current_alg = mpair[0]
|
|
115
|
+
multi_digest_manifest = current_alg.nil?
|
|
116
|
+
source_stream.each_line do |line|
|
|
117
|
+
line = line.strip
|
|
118
|
+
if multi_digest_manifest && /^[a-zA-Z0-9]+:$/ =~ line
|
|
119
|
+
# Found a digest algorithm header, assuming succeeding entries are of this type
|
|
120
|
+
current_alg = line.chomp(':')
|
|
121
|
+
# Verify that the digest algorithm is known to longleaf
|
|
122
|
+
if !DigestHelper.is_known_algorithm?(current_alg)
|
|
123
|
+
self.fail("Manifest specifies unknown digest algorithm: #{current_alg}")
|
|
124
|
+
end
|
|
125
|
+
else
|
|
126
|
+
if current_alg.nil?
|
|
127
|
+
self.fail("Manifest with unknown checksums encountered, an algorithm must be specified")
|
|
128
|
+
end
|
|
129
|
+
entry_parts = self.split_quoted(line)
|
|
130
|
+
if entry_parts.length != 2 && entry_parts.length != 3
|
|
131
|
+
self.fail("Invalid manifest entry: #{line}")
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
digests_mapping[entry_parts[1]][current_alg] = entry_parts[0]
|
|
135
|
+
if (entry_parts.length == 3)
|
|
136
|
+
logical_phys_mapping[entry_parts[1]] = entry_parts[2]
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
[digests_mapping, logical_phys_mapping]
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Splits a string of quoted or unquoted tokens separated by spaces
|
|
146
|
+
# @param
|
|
147
|
+
def self.split_quoted(text, delimiter = "\\s+", limit = -1)
|
|
148
|
+
text.split(/#{delimiter}(?=(?:[^'"]|'[^']*'|"[^"]*")*$)/, limit)
|
|
149
|
+
.select {|s| not s.empty? }
|
|
150
|
+
.map {|s| s.gsub(/(^ +)|( +$)|(^["']+)|(["']+$)/, '')}
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Parses the provided options to create a selector for registered files
|
|
154
|
+
# @param options [Hash] command options
|
|
155
|
+
# @param app_config_manager [ApplicationConfigManager] app config manager
|
|
156
|
+
# @return selector
|
|
157
|
+
def self.create_registered_selector(options, app_config_manager)
|
|
158
|
+
there_can_be_only_one("Only one of the following selection options may be provided: -l, -f, -s",
|
|
159
|
+
options, :file, :location, :from_list)
|
|
160
|
+
|
|
161
|
+
if !options[:from_list].nil?
|
|
162
|
+
file_paths = read_from_list(options[:from_list])
|
|
163
|
+
return RegisteredFileSelector.new(file_paths: file_paths, app_config: app_config_manager)
|
|
164
|
+
elsif !options[:file].nil?
|
|
165
|
+
file_paths = options[:file].split(/\s*,\s*/)
|
|
166
|
+
return RegisteredFileSelector.new(file_paths: file_paths, app_config: app_config_manager)
|
|
167
|
+
elsif !options[:location].nil?
|
|
168
|
+
storage_locations = options[:location].split(/\s*,\s*/)
|
|
169
|
+
return RegisteredFileSelector.new(storage_locations: storage_locations, app_config: app_config_manager)
|
|
170
|
+
else
|
|
171
|
+
logger.failure("Must provide one of the following file selection options: -l, -f, or -s")
|
|
172
|
+
exit 1
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Parses the -l from_list option, reading the list of files specified either from the provided
|
|
177
|
+
# file path or STDIN
|
|
178
|
+
# @param from_list option value, either a file path or "@-"
|
|
179
|
+
# @return list of files from the from_list
|
|
180
|
+
def self.read_from_list(from_list)
|
|
181
|
+
from_list = from_list.strip
|
|
182
|
+
if from_list.empty?
|
|
183
|
+
logger.failure("List parameter must not be empty")
|
|
184
|
+
exit 1
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
if from_list == '@-'
|
|
188
|
+
source_stream = $stdin
|
|
189
|
+
else
|
|
190
|
+
begin
|
|
191
|
+
source_stream = File.new(from_list)
|
|
192
|
+
rescue Errno::ENOENT
|
|
193
|
+
logger.failure("Specified list file does not exist: #{from_list}")
|
|
194
|
+
exit 1
|
|
195
|
+
end
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
lines = []
|
|
199
|
+
source_stream.each_line do |line|
|
|
200
|
+
lines << line.strip
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
if lines.empty?
|
|
204
|
+
logger.failure("File list is empty, must provide one or more files for this operation")
|
|
205
|
+
exit 1
|
|
206
|
+
end
|
|
207
|
+
lines
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def self.fail(message)
|
|
211
|
+
logger.failure(message)
|
|
212
|
+
exit 1
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
end
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
require 'time'
|
|
2
|
+
|
|
3
|
+
module Longleaf
|
|
4
|
+
# Helper methods for interacting with dates/timestamps on services
|
|
5
|
+
class ServiceDateHelper
|
|
6
|
+
# Adds the amount of time from modifier to the provided timestamp
|
|
7
|
+
# @param timestamp [String] ISO-8601 timestamp string
|
|
8
|
+
# @param modifier [String] amount of time to add to the timestamp. It must follow the syntax
|
|
9
|
+
# "<quantity> <time unit>", where quantity must be a positive whole number and time unit
|
|
10
|
+
# must be second, minute, hour, day, week, month or year (unit may be plural).
|
|
11
|
+
# Any info after a comma will be ignored.
|
|
12
|
+
# @return [String] the original timestamp in ISO-8601 format with the provided amount of time added.
|
|
13
|
+
def self.add_to_timestamp(timestamp, modifier)
|
|
14
|
+
if modifier =~ /^(\d+) *(second|minute|hour|day|week|month|year)s?(,.*)?/
|
|
15
|
+
value = $1.to_i
|
|
16
|
+
unit = $2
|
|
17
|
+
else
|
|
18
|
+
raise ArgumentError.new("Cannot parse time modifier #{modifier}")
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
datetime = Time.iso8601(timestamp)
|
|
22
|
+
case unit
|
|
23
|
+
when 'second'
|
|
24
|
+
unit_modifier = 1
|
|
25
|
+
when 'minute'
|
|
26
|
+
unit_modifier = 60
|
|
27
|
+
when 'hour'
|
|
28
|
+
unit_modifier = 3600
|
|
29
|
+
when 'day'
|
|
30
|
+
unit_modifier = 24 * 3600
|
|
31
|
+
when 'week'
|
|
32
|
+
unit_modifier = 7 * 24 * 3600
|
|
33
|
+
when 'month'
|
|
34
|
+
unit_modifier = 30 * 24 * 3600
|
|
35
|
+
when 'year'
|
|
36
|
+
unit_modifier = 365 * 24 * 3600
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
modified_time = datetime + (value * unit_modifier)
|
|
40
|
+
modified_time.iso8601(3)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Get a timestamp in the format expected for service timestamps.
|
|
44
|
+
# @param timestamp [Time] the time to format. Defaults to now.
|
|
45
|
+
# @return [String] the time formatted as iso8601
|
|
46
|
+
def self.formatted_timestamp(timestamp = Time.now)
|
|
47
|
+
timestamp.utc.iso8601(3).to_s
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Get the timestamp for the next time the provided service would need to be run
|
|
51
|
+
# for the object described by md_rec
|
|
52
|
+
# @param md_rec [MetadataRecord] metadata record for the file
|
|
53
|
+
# @param service_def [ServiceDefinition] definition for the service
|
|
54
|
+
# @return [String] iso8601 timestamp for the next time the service will need to run, or
|
|
55
|
+
# nil if the service does not need to run again.
|
|
56
|
+
def self.next_run_needed(md_rec, service_def)
|
|
57
|
+
raise ArgumentError.new('Must provide a md_rec parameter') if md_rec.nil?
|
|
58
|
+
raise ArgumentError.new('Must provide a service_def parameter') if service_def.nil?
|
|
59
|
+
|
|
60
|
+
service_name = service_def.name
|
|
61
|
+
service_rec = md_rec.service(service_name)
|
|
62
|
+
|
|
63
|
+
if service_rec.nil? || service_rec.timestamp.nil?
|
|
64
|
+
if service_def.delay.nil?
|
|
65
|
+
return md_rec.registered
|
|
66
|
+
else
|
|
67
|
+
return ServiceDateHelper.add_to_timestamp(md_rec.registered, service_def.delay)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
if service_def.frequency.nil?
|
|
72
|
+
return nil
|
|
73
|
+
else
|
|
74
|
+
return ServiceDateHelper.add_to_timestamp(service_rec.timestamp, service_def.frequency)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
require 'longleaf/models/system_config_fields'
|
|
2
|
+
require 'longleaf/services/metadata_persistence_manager'
|
|
3
|
+
require 'longleaf/errors'
|
|
4
|
+
|
|
5
|
+
module Longleaf
|
|
6
|
+
# Manager configures and provides access to a metadata index if one is specified
|
|
7
|
+
class IndexManager
|
|
8
|
+
SYS_FIELDS ||= Longleaf::SystemConfigFields
|
|
9
|
+
|
|
10
|
+
# @param config [Hash] The system configuration as a hash
|
|
11
|
+
# @param app_config_manager [ApplicationConfigManager] the application config
|
|
12
|
+
def initialize(config, app_config_manager)
|
|
13
|
+
@config = config
|
|
14
|
+
@app_config_manager = app_config_manager
|
|
15
|
+
init_index_driver if @config&.key?(SYS_FIELDS::MD_INDEX)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# @return true if the system is configured to use a metadata index
|
|
19
|
+
def using_index?
|
|
20
|
+
!@index_driver.nil?
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Index the provided file_rec and its metadata
|
|
24
|
+
#
|
|
25
|
+
# @param file_rec [FileRecord] file record to index
|
|
26
|
+
def index(file_rec)
|
|
27
|
+
@index_driver.index(file_rec)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Remove an entry from the index
|
|
31
|
+
# @param remove_me The record to remove from the index
|
|
32
|
+
def remove(remove_me)
|
|
33
|
+
@index_driver.remove(remove_me)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def clear_index(older_than = nil)
|
|
37
|
+
@index_driver.clear_index(older_than)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# @return true if the index should be reindexed
|
|
41
|
+
def index_stale?
|
|
42
|
+
@index_driver.is_stale?
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Setup initial structure of index implementation
|
|
46
|
+
def setup_index
|
|
47
|
+
@index_driver.setup_index
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def update_index_state
|
|
51
|
+
@index_driver.update_index_state
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Retrieves a set of which have one or more services which need to run.
|
|
55
|
+
#
|
|
56
|
+
# @param file_selector [FileSelector] selector for paths to search for files
|
|
57
|
+
# @param stale_datetime [DateTime] find file_paths with services needing to be run before this value
|
|
58
|
+
# @return [Array] array of file paths that need one or more services run, in ascending order by
|
|
59
|
+
# timestamp.
|
|
60
|
+
def paths_with_stale_services(file_selector, stale_datetime)
|
|
61
|
+
@index_driver.paths_with_stale_services(file_selector, stale_datetime)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Retrieves a page of paths for registered files.
|
|
65
|
+
# @param file_selector [FileSelector] selector for what paths to search for files
|
|
66
|
+
# @return [Array] array of file paths that are registered
|
|
67
|
+
def registered_paths(file_selector)
|
|
68
|
+
@index_driver.registered_paths(file_selector)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def each_registered_path(file_selector, older_than: nil, &block)
|
|
72
|
+
@index_driver.each_registered_path(file_selector, older_than: older_than, &block)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
private
|
|
76
|
+
def init_index_driver
|
|
77
|
+
index_conf = @config[SYS_FIELDS::MD_INDEX]
|
|
78
|
+
adapter = index_conf[SYS_FIELDS::MD_INDEX_ADAPTER]&.downcase
|
|
79
|
+
|
|
80
|
+
raise ConfigurationError.new('Must specify an adapter for the metadata index') if adapter.nil?
|
|
81
|
+
|
|
82
|
+
adapter = adapter.to_sym
|
|
83
|
+
|
|
84
|
+
case adapter
|
|
85
|
+
when :postgres, :mysql, :mysql2, :sqlite, :amalgalite
|
|
86
|
+
page_size = index_conf[SYS_FIELDS::MD_INDEX_PAGE_SIZE]&.to_int
|
|
87
|
+
|
|
88
|
+
connection = index_conf[SYS_FIELDS::MD_INDEX_CONNECTION]
|
|
89
|
+
raise ConfigurationError.new("Must specify connection details for index adapter of type '#{adapter}'") if connection.nil?
|
|
90
|
+
|
|
91
|
+
require 'longleaf/indexing/sequel_index_driver'
|
|
92
|
+
@index_driver = SequelIndexDriver.new(@app_config_manager,
|
|
93
|
+
adapter,
|
|
94
|
+
connection,
|
|
95
|
+
page_size: page_size)
|
|
96
|
+
else
|
|
97
|
+
raise ConfigurationError.new("Unknown index adapter '#{adapter}' specified.") if adapter.nil?
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|