longleaf 0.1.0.pre.3 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +94 -0
- data/.editorconfig +13 -0
- data/.gitignore +4 -1
- data/.rubocop.yml +44 -0
- data/.rubocop_todo.yml +834 -0
- data/.yardopts +1 -0
- data/Gemfile +16 -1
- data/README.md +98 -12
- data/Rakefile +6 -0
- data/bin/setup +16 -1
- data/docs/aboutlongleaf.md +28 -0
- data/docs/extra.css +32 -0
- data/docs/img/change-file.png +0 -0
- data/docs/img/ll-example-preserved.png +0 -0
- data/docs/index.md +19 -0
- data/docs/install.md +66 -0
- data/docs/ll-example/config-example-relative.yml +33 -0
- data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
- data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
- data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
- data/docs/ll-example/metadata-dir/.gitkeep +0 -0
- data/docs/ll-example/replica-files/.gitkeep +0 -0
- data/docs/ll-example/replica-metadata/.gitkeep +0 -0
- data/docs/quickstart.md +270 -0
- data/docs/rdocs/Longleaf.html +135 -0
- data/docs/rdocs/Longleaf/AppFields.html +178 -0
- data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
- data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
- data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
- data/docs/rdocs/Longleaf/CLI.html +909 -0
- data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
- data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
- data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
- data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
- data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
- data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
- data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
- data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
- data/docs/rdocs/Longleaf/EventError.html +147 -0
- data/docs/rdocs/Longleaf/EventNames.html +163 -0
- data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
- data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
- data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
- data/docs/rdocs/Longleaf/FileRecord.html +716 -0
- data/docs/rdocs/Longleaf/FileSelector.html +901 -0
- data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
- data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
- data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
- data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
- data/docs/rdocs/Longleaf/Logging.html +405 -0
- data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
- data/docs/rdocs/Longleaf/LongleafError.html +139 -0
- data/docs/rdocs/Longleaf/MDFields.html +193 -0
- data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
- data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
- data/docs/rdocs/Longleaf/MetadataError.html +143 -0
- data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
- data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
- data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
- data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
- data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
- data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
- data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
- data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
- data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
- data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
- data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
- data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
- data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
- data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
- data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
- data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
- data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
- data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
- data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
- data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
- data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
- data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
- data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
- data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
- data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
- data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
- data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
- data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
- data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
- data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
- data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
- data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
- data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
- data/docs/rdocs/_index.html +660 -0
- data/docs/rdocs/class_list.html +51 -0
- data/docs/rdocs/css/common.css +1 -0
- data/docs/rdocs/css/full_list.css +58 -0
- data/docs/rdocs/css/style.css +496 -0
- data/docs/rdocs/file.README.html +165 -0
- data/docs/rdocs/file_list.html +56 -0
- data/docs/rdocs/frames.html +17 -0
- data/docs/rdocs/index.html +165 -0
- data/docs/rdocs/js/app.js +303 -0
- data/docs/rdocs/js/full_list.js +216 -0
- data/docs/rdocs/js/jquery.js +4 -0
- data/docs/rdocs/method_list.html +2051 -0
- data/docs/rdocs/top-level-namespace.html +110 -0
- data/lib/longleaf/candidates/file_selector.rb +150 -0
- data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
- data/lib/longleaf/candidates/physical_path_provider.rb +17 -0
- data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
- data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
- data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
- data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
- data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
- data/lib/longleaf/cli.rb +249 -44
- data/lib/longleaf/commands/deregister_command.rb +51 -0
- data/lib/longleaf/commands/preserve_command.rb +50 -0
- data/lib/longleaf/commands/register_command.rb +34 -43
- data/lib/longleaf/commands/reindex_command.rb +92 -0
- data/lib/longleaf/commands/validate_config_command.rb +33 -8
- data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
- data/lib/longleaf/errors.rb +26 -7
- data/lib/longleaf/events/deregister_event.rb +53 -0
- data/lib/longleaf/events/event_names.rb +9 -0
- data/lib/longleaf/events/event_status_tracking.rb +59 -0
- data/lib/longleaf/events/preserve_event.rb +82 -0
- data/lib/longleaf/events/register_event.rb +59 -51
- data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
- data/lib/longleaf/helpers/digest_helper.rb +56 -0
- data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
- data/lib/longleaf/helpers/selection_options_parser.rb +215 -0
- data/lib/longleaf/helpers/service_date_helper.rb +78 -0
- data/lib/longleaf/indexing/index_manager.rb +101 -0
- data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
- data/lib/longleaf/logging.rb +5 -4
- data/lib/longleaf/logging/redirecting_logger.rb +30 -25
- data/lib/longleaf/models/app_fields.rb +7 -2
- data/lib/longleaf/models/file_record.rb +31 -8
- data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
- data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
- data/lib/longleaf/models/md_fields.rb +3 -1
- data/lib/longleaf/models/metadata_location.rb +47 -0
- data/lib/longleaf/models/metadata_record.rb +43 -16
- data/lib/longleaf/models/s3_storage_location.rb +138 -0
- data/lib/longleaf/models/service_definition.rb +7 -6
- data/lib/longleaf/models/service_fields.rb +7 -1
- data/lib/longleaf/models/service_record.rb +10 -6
- data/lib/longleaf/models/storage_location.rb +24 -21
- data/lib/longleaf/models/storage_types.rb +9 -0
- data/lib/longleaf/models/system_config_fields.rb +9 -0
- data/lib/longleaf/preservation_services/file_check_service.rb +59 -0
- data/lib/longleaf/preservation_services/fixity_check_service.rb +124 -0
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +198 -0
- data/lib/longleaf/preservation_services/s3_replication_service.rb +131 -0
- data/lib/longleaf/services/application_config_deserializer.rb +80 -21
- data/lib/longleaf/services/application_config_manager.rb +20 -6
- data/lib/longleaf/services/application_config_validator.rb +19 -9
- data/lib/longleaf/services/configuration_validator.rb +67 -4
- data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
- data/lib/longleaf/services/metadata_deserializer.rb +115 -42
- data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
- data/lib/longleaf/services/metadata_serializer.rb +139 -25
- data/lib/longleaf/services/metadata_validator.rb +76 -0
- data/lib/longleaf/services/s3_location_validator.rb +19 -0
- data/lib/longleaf/services/service_class_cache.rb +112 -0
- data/lib/longleaf/services/service_definition_manager.rb +10 -7
- data/lib/longleaf/services/service_definition_validator.rb +25 -18
- data/lib/longleaf/services/service_manager.rb +86 -11
- data/lib/longleaf/services/service_mapping_manager.rb +13 -12
- data/lib/longleaf/services/service_mapping_validator.rb +36 -26
- data/lib/longleaf/services/storage_location_manager.rb +76 -15
- data/lib/longleaf/services/storage_location_validator.rb +49 -35
- data/lib/longleaf/specs/config_builder.rb +47 -23
- data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
- data/lib/longleaf/specs/custom_matchers.rb +9 -0
- data/lib/longleaf/specs/file_helpers.rb +61 -0
- data/lib/longleaf/specs/metadata_builder.rb +98 -0
- data/lib/longleaf/specs/system_config_builder.rb +27 -0
- data/lib/longleaf/version.rb +1 -1
- data/longleaf.gemspec +20 -7
- data/mkdocs.yml +21 -0
- metadata +310 -26
- data/.travis.yml +0 -4
- data/lib/longleaf/commands/abstract_command.rb +0 -37
- data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module Longleaf
|
4
|
+
# Helper for interacting with s3 uris
|
5
|
+
class S3UriHelper
|
6
|
+
ENDPOINT_PATTERN = /^(.+\.)?s3[.\-]([a-z0-9\-]+[\-.])?[a-z0-9]+\./
|
7
|
+
ALLOWED_SCHEMES = ['http', 'https', 's3']
|
8
|
+
|
9
|
+
# Extract the name of the s3 bucket from the provided url
|
10
|
+
# @param url s3 url
|
11
|
+
# @return the name of the bucket, or nil if the name could not be identified
|
12
|
+
def self.extract_bucket(url)
|
13
|
+
uri = s3_uri(url)
|
14
|
+
|
15
|
+
matches = ENDPOINT_PATTERN.match(uri.host)
|
16
|
+
if matches.nil?
|
17
|
+
raise ArgumentError.new("Provided URI does match the expected pattern for an S3 URI")
|
18
|
+
end
|
19
|
+
|
20
|
+
prefix = matches[1]
|
21
|
+
if prefix.nil? || prefix.empty?
|
22
|
+
# Is a path style url
|
23
|
+
path = uri.path
|
24
|
+
|
25
|
+
return nil if path == '/'
|
26
|
+
|
27
|
+
path_parts = path.split('/')
|
28
|
+
return nil if path_parts.empty?
|
29
|
+
return path_parts[1]
|
30
|
+
else
|
31
|
+
return prefix[0..-2]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.extract_path(url)
|
36
|
+
uri = s3_uri(url)
|
37
|
+
|
38
|
+
matches = ENDPOINT_PATTERN.match(uri.host)
|
39
|
+
if matches.nil?
|
40
|
+
raise ArgumentError.new("Provided URI does match the expected pattern for an S3 URI")
|
41
|
+
end
|
42
|
+
|
43
|
+
path = uri.path
|
44
|
+
return nil if path == '/' || path.empty?
|
45
|
+
|
46
|
+
# trim off the first slash
|
47
|
+
path = path.partition('/').last
|
48
|
+
|
49
|
+
# Determine if the first part of the path is the bucket name
|
50
|
+
prefix = matches[1]
|
51
|
+
if prefix.nil? || prefix.empty?
|
52
|
+
# trim off the bucket name
|
53
|
+
path = path.partition('/').last
|
54
|
+
end
|
55
|
+
|
56
|
+
path
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.extract_region(url)
|
60
|
+
uri = s3_uri(url)
|
61
|
+
|
62
|
+
matches = ENDPOINT_PATTERN.match(uri.host)
|
63
|
+
|
64
|
+
if matches[2].nil?
|
65
|
+
# No region specified
|
66
|
+
nil
|
67
|
+
else
|
68
|
+
matches[2][0..-2]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.s3_uri(url)
|
73
|
+
if url.nil?
|
74
|
+
raise ArgumentError.new("url cannot be empty")
|
75
|
+
end
|
76
|
+
uri = URI(url)
|
77
|
+
if !ALLOWED_SCHEMES.include?(uri.scheme&.downcase)
|
78
|
+
raise ArgumentError.new("Invalid scheme for s3 URI #{url}, only http, https and s3 are permitted")
|
79
|
+
end
|
80
|
+
if uri.host.nil?
|
81
|
+
raise ArgumentError.new("Invalid S3 URI, no hostname: #{url}")
|
82
|
+
end
|
83
|
+
uri
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
@@ -0,0 +1,215 @@
|
|
1
|
+
require 'longleaf/candidates/file_selector'
|
2
|
+
require 'longleaf/candidates/registered_file_selector'
|
3
|
+
require 'longleaf/candidates/manifest_digest_provider'
|
4
|
+
require 'longleaf/candidates/physical_path_provider'
|
5
|
+
require 'longleaf/candidates/single_digest_provider'
|
6
|
+
|
7
|
+
module Longleaf
|
8
|
+
# Helper for parsing manifest inputs used for registration
|
9
|
+
class SelectionOptionsParser
|
10
|
+
extend Longleaf::Logging
|
11
|
+
|
12
|
+
# Parses the provided options to construct a file selector and digest provider for
|
13
|
+
# use in registration commands.
|
14
|
+
# @param options [Hash] command options
|
15
|
+
# @param app_config_manager [ApplicationConfigManager] app config manager
|
16
|
+
# @return The file selector and digest provider.
|
17
|
+
def self.parse_registration_selection_options(options, app_config_manager)
|
18
|
+
there_can_be_only_one("Only one of the following selection options may be provided: -m, -f, -s",
|
19
|
+
options, :file, :manifest, :location)
|
20
|
+
|
21
|
+
if !options[:manifest].nil?
|
22
|
+
digests_mapping, logical_phys_mapping = self.parse_manifest(options[:manifest])
|
23
|
+
physical_provider = PhysicalPathProvider.new(logical_phys_mapping)
|
24
|
+
selector = FileSelector.new(file_paths: digests_mapping.keys,
|
25
|
+
physical_provider: physical_provider,
|
26
|
+
app_config: app_config_manager)
|
27
|
+
digest_provider = ManifestDigestProvider.new(digests_mapping)
|
28
|
+
elsif !options[:file].nil?
|
29
|
+
if options[:checksums]
|
30
|
+
checksums = options[:checksums]
|
31
|
+
# validate checksum list format, must a comma delimited list of prefix:checksums
|
32
|
+
if /^[^:,]+:[^:,]+(,[^:,]+:[^:,]+)*$/.match(checksums)
|
33
|
+
# convert checksum list into hash with prefix as key
|
34
|
+
checksums = Hash[*checksums.split(/\s*[:,]\s*/)]
|
35
|
+
digest_provider = SingleDigestProvider.new(checksums)
|
36
|
+
else
|
37
|
+
logger.failure("Invalid checksums parameter format, see `longleaf help <command>` for more information")
|
38
|
+
exit 1
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
file_paths = self.split_quoted(options[:file], "\\s*,\\s*")
|
43
|
+
if !options[:physical_path].nil?
|
44
|
+
physical_paths = self.split_quoted(options[:physical_path], "\\s*,\\s*")
|
45
|
+
if physical_paths.length != file_paths.length
|
46
|
+
logger.failure("Invalid physical paths parameter, number of paths did not match number of logical paths")
|
47
|
+
exit 1
|
48
|
+
end
|
49
|
+
logical_phys_mapping = Hash[file_paths.zip physical_paths]
|
50
|
+
physical_provider = PhysicalPathProvider.new(logical_phys_mapping)
|
51
|
+
else
|
52
|
+
physical_provider = PhysicalPathProvider.new
|
53
|
+
end
|
54
|
+
|
55
|
+
selector = FileSelector.new(file_paths: file_paths,
|
56
|
+
physical_provider: physical_provider,
|
57
|
+
app_config: app_config_manager)
|
58
|
+
else
|
59
|
+
logger.failure("Must provide one of the following file selection options: -f, l, or -m")
|
60
|
+
exit 1
|
61
|
+
end
|
62
|
+
|
63
|
+
[selector, digest_provider, physical_provider]
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.there_can_be_only_one(failure_msg, options, *names)
|
67
|
+
got_one = false
|
68
|
+
names.each do |name|
|
69
|
+
if !options[name].nil?
|
70
|
+
if got_one
|
71
|
+
logger.failure(failure_msg)
|
72
|
+
exit 1
|
73
|
+
end
|
74
|
+
got_one = true
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# Parses the provided manifest options, reading the contents of the manifests to produce
|
80
|
+
# a mapping from files to one or more algorithms.
|
81
|
+
# @param manifest_vals [Array] List of manifest option values. They may be in one of the following formats:
|
82
|
+
# <alg_name>:<manifest_path> OR <alg_name>:@-
|
83
|
+
#. <manifest_path> OR @-
|
84
|
+
# @return a hash containing the aggregated contents of the provided manifests. The keys are
|
85
|
+
# paths to manifested files. The values are hashes, mapping digest algorithms to digest values.
|
86
|
+
def self.parse_manifest(manifest_vals)
|
87
|
+
alg_manifest_pairs = []
|
88
|
+
# interpret option inputs into a list of algorithms to manifest sources
|
89
|
+
manifest_vals.each do |manifest_val|
|
90
|
+
if manifest_val.include?(':')
|
91
|
+
manifest_parts = manifest_val.split(':', 2)
|
92
|
+
alg_manifest_pairs << manifest_parts
|
93
|
+
else
|
94
|
+
# algorithm not specified in option value
|
95
|
+
alg_manifest_pairs << [nil, manifest_val]
|
96
|
+
end
|
97
|
+
end
|
98
|
+
if alg_manifest_pairs.select { |mpair| mpair[1] == '@-' }.count > 1
|
99
|
+
self.fail("Cannot specify more than one manifest from STDIN")
|
100
|
+
end
|
101
|
+
|
102
|
+
# read the provided manifests to build a mapping from file uri to all supplied digests
|
103
|
+
digests_mapping = Hash.new { |h,k| h[k] = Hash.new }
|
104
|
+
logical_phys_mapping = Hash.new
|
105
|
+
alg_manifest_pairs.each do |mpair|
|
106
|
+
source_stream = nil
|
107
|
+
# Determine if reading from a manifest file or stdin
|
108
|
+
if mpair[1] == '@-'
|
109
|
+
source_stream = $stdin
|
110
|
+
else
|
111
|
+
source_stream = File.new(mpair[1])
|
112
|
+
end
|
113
|
+
|
114
|
+
current_alg = mpair[0]
|
115
|
+
multi_digest_manifest = current_alg.nil?
|
116
|
+
source_stream.each_line do |line|
|
117
|
+
line = line.strip
|
118
|
+
if multi_digest_manifest && /^[a-zA-Z0-9]+:$/ =~ line
|
119
|
+
# Found a digest algorithm header, assuming succeeding entries are of this type
|
120
|
+
current_alg = line.chomp(':')
|
121
|
+
# Verify that the digest algorithm is known to longleaf
|
122
|
+
if !DigestHelper.is_known_algorithm?(current_alg)
|
123
|
+
self.fail("Manifest specifies unknown digest algorithm: #{current_alg}")
|
124
|
+
end
|
125
|
+
else
|
126
|
+
if current_alg.nil?
|
127
|
+
self.fail("Manifest with unknown checksums encountered, an algorithm must be specified")
|
128
|
+
end
|
129
|
+
entry_parts = self.split_quoted(line)
|
130
|
+
if entry_parts.length != 2 && entry_parts.length != 3
|
131
|
+
self.fail("Invalid manifest entry: #{line}")
|
132
|
+
end
|
133
|
+
|
134
|
+
digests_mapping[entry_parts[1]][current_alg] = entry_parts[0]
|
135
|
+
if (entry_parts.length == 3)
|
136
|
+
logical_phys_mapping[entry_parts[1]] = entry_parts[2]
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
[digests_mapping, logical_phys_mapping]
|
143
|
+
end
|
144
|
+
|
145
|
+
# Splits a string of quoted or unquoted tokens separated by spaces
|
146
|
+
# @param
|
147
|
+
def self.split_quoted(text, delimiter = "\\s+", limit = -1)
|
148
|
+
text.split(/#{delimiter}(?=(?:[^'"]|'[^']*'|"[^"]*")*$)/, limit)
|
149
|
+
.select {|s| not s.empty? }
|
150
|
+
.map {|s| s.gsub(/(^ +)|( +$)|(^["']+)|(["']+$)/, '')}
|
151
|
+
end
|
152
|
+
|
153
|
+
# Parses the provided options to create a selector for registered files
|
154
|
+
# @param options [Hash] command options
|
155
|
+
# @param app_config_manager [ApplicationConfigManager] app config manager
|
156
|
+
# @return selector
|
157
|
+
def self.create_registered_selector(options, app_config_manager)
|
158
|
+
there_can_be_only_one("Only one of the following selection options may be provided: -l, -f, -s",
|
159
|
+
options, :file, :location, :from_list)
|
160
|
+
|
161
|
+
if !options[:from_list].nil?
|
162
|
+
file_paths = read_from_list(options[:from_list])
|
163
|
+
return RegisteredFileSelector.new(file_paths: file_paths, app_config: app_config_manager)
|
164
|
+
elsif !options[:file].nil?
|
165
|
+
file_paths = options[:file].split(/\s*,\s*/)
|
166
|
+
return RegisteredFileSelector.new(file_paths: file_paths, app_config: app_config_manager)
|
167
|
+
elsif !options[:location].nil?
|
168
|
+
storage_locations = options[:location].split(/\s*,\s*/)
|
169
|
+
return RegisteredFileSelector.new(storage_locations: storage_locations, app_config: app_config_manager)
|
170
|
+
else
|
171
|
+
logger.failure("Must provide one of the following file selection options: -l, -f, or -s")
|
172
|
+
exit 1
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
# Parses the -l from_list option, reading the list of files specified either from the provided
|
177
|
+
# file path or STDIN
|
178
|
+
# @param from_list option value, either a file path or "@-"
|
179
|
+
# @return list of files from the from_list
|
180
|
+
def self.read_from_list(from_list)
|
181
|
+
from_list = from_list.strip
|
182
|
+
if from_list.empty?
|
183
|
+
logger.failure("List parameter must not be empty")
|
184
|
+
exit 1
|
185
|
+
end
|
186
|
+
|
187
|
+
if from_list == '@-'
|
188
|
+
source_stream = $stdin
|
189
|
+
else
|
190
|
+
begin
|
191
|
+
source_stream = File.new(from_list)
|
192
|
+
rescue Errno::ENOENT
|
193
|
+
logger.failure("Specified list file does not exist: #{from_list}")
|
194
|
+
exit 1
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
lines = []
|
199
|
+
source_stream.each_line do |line|
|
200
|
+
lines << line.strip
|
201
|
+
end
|
202
|
+
|
203
|
+
if lines.empty?
|
204
|
+
logger.failure("File list is empty, must provide one or more files for this operation")
|
205
|
+
exit 1
|
206
|
+
end
|
207
|
+
lines
|
208
|
+
end
|
209
|
+
|
210
|
+
def self.fail(message)
|
211
|
+
logger.failure(message)
|
212
|
+
exit 1
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
@@ -0,0 +1,78 @@
|
|
1
|
+
require 'time'
|
2
|
+
|
3
|
+
module Longleaf
|
4
|
+
# Helper methods for interacting with dates/timestamps on services
|
5
|
+
class ServiceDateHelper
|
6
|
+
# Adds the amount of time from modifier to the provided timestamp
|
7
|
+
# @param timestamp [String] ISO-8601 timestamp string
|
8
|
+
# @param modifier [String] amount of time to add to the timestamp. It must follow the syntax
|
9
|
+
# "<quantity> <time unit>", where quantity must be a positive whole number and time unit
|
10
|
+
# must be second, minute, hour, day, week, month or year (unit may be plural).
|
11
|
+
# Any info after a comma will be ignored.
|
12
|
+
# @return [String] the original timestamp in ISO-8601 format with the provided amount of time added.
|
13
|
+
def self.add_to_timestamp(timestamp, modifier)
|
14
|
+
if modifier =~ /^(\d+) *(second|minute|hour|day|week|month|year)s?(,.*)?/
|
15
|
+
value = $1.to_i
|
16
|
+
unit = $2
|
17
|
+
else
|
18
|
+
raise ArgumentError.new("Cannot parse time modifier #{modifier}")
|
19
|
+
end
|
20
|
+
|
21
|
+
datetime = Time.iso8601(timestamp)
|
22
|
+
case unit
|
23
|
+
when 'second'
|
24
|
+
unit_modifier = 1
|
25
|
+
when 'minute'
|
26
|
+
unit_modifier = 60
|
27
|
+
when 'hour'
|
28
|
+
unit_modifier = 3600
|
29
|
+
when 'day'
|
30
|
+
unit_modifier = 24 * 3600
|
31
|
+
when 'week'
|
32
|
+
unit_modifier = 7 * 24 * 3600
|
33
|
+
when 'month'
|
34
|
+
unit_modifier = 30 * 24 * 3600
|
35
|
+
when 'year'
|
36
|
+
unit_modifier = 365 * 24 * 3600
|
37
|
+
end
|
38
|
+
|
39
|
+
modified_time = datetime + (value * unit_modifier)
|
40
|
+
modified_time.iso8601(3)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Get a timestamp in the format expected for service timestamps.
|
44
|
+
# @param timestamp [Time] the time to format. Defaults to now.
|
45
|
+
# @return [String] the time formatted as iso8601
|
46
|
+
def self.formatted_timestamp(timestamp = Time.now)
|
47
|
+
timestamp.utc.iso8601(3).to_s
|
48
|
+
end
|
49
|
+
|
50
|
+
# Get the timestamp for the next time the provided service would need to be run
|
51
|
+
# for the object described by md_rec
|
52
|
+
# @param md_rec [MetadataRecord] metadata record for the file
|
53
|
+
# @param service_def [ServiceDefinition] definition for the service
|
54
|
+
# @return [String] iso8601 timestamp for the next time the service will need to run, or
|
55
|
+
# nil if the service does not need to run again.
|
56
|
+
def self.next_run_needed(md_rec, service_def)
|
57
|
+
raise ArgumentError.new('Must provide a md_rec parameter') if md_rec.nil?
|
58
|
+
raise ArgumentError.new('Must provide a service_def parameter') if service_def.nil?
|
59
|
+
|
60
|
+
service_name = service_def.name
|
61
|
+
service_rec = md_rec.service(service_name)
|
62
|
+
|
63
|
+
if service_rec.nil? || service_rec.timestamp.nil?
|
64
|
+
if service_def.delay.nil?
|
65
|
+
return md_rec.registered
|
66
|
+
else
|
67
|
+
return ServiceDateHelper.add_to_timestamp(md_rec.registered, service_def.delay)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
if service_def.frequency.nil?
|
72
|
+
return nil
|
73
|
+
else
|
74
|
+
return ServiceDateHelper.add_to_timestamp(service_rec.timestamp, service_def.frequency)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require 'longleaf/models/system_config_fields'
|
2
|
+
require 'longleaf/services/metadata_persistence_manager'
|
3
|
+
require 'longleaf/errors'
|
4
|
+
|
5
|
+
module Longleaf
|
6
|
+
# Manager configures and provides access to a metadata index if one is specified
|
7
|
+
class IndexManager
|
8
|
+
SYS_FIELDS ||= Longleaf::SystemConfigFields
|
9
|
+
|
10
|
+
# @param config [Hash] The system configuration as a hash
|
11
|
+
# @param app_config_manager [ApplicationConfigManager] the application config
|
12
|
+
def initialize(config, app_config_manager)
|
13
|
+
@config = config
|
14
|
+
@app_config_manager = app_config_manager
|
15
|
+
init_index_driver if @config&.key?(SYS_FIELDS::MD_INDEX)
|
16
|
+
end
|
17
|
+
|
18
|
+
# @return true if the system is configured to use a metadata index
|
19
|
+
def using_index?
|
20
|
+
!@index_driver.nil?
|
21
|
+
end
|
22
|
+
|
23
|
+
# Index the provided file_rec and its metadata
|
24
|
+
#
|
25
|
+
# @param file_rec [FileRecord] file record to index
|
26
|
+
def index(file_rec)
|
27
|
+
@index_driver.index(file_rec)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Remove an entry from the index
|
31
|
+
# @param remove_me The record to remove from the index
|
32
|
+
def remove(remove_me)
|
33
|
+
@index_driver.remove(remove_me)
|
34
|
+
end
|
35
|
+
|
36
|
+
def clear_index(older_than = nil)
|
37
|
+
@index_driver.clear_index(older_than)
|
38
|
+
end
|
39
|
+
|
40
|
+
# @return true if the index should be reindexed
|
41
|
+
def index_stale?
|
42
|
+
@index_driver.is_stale?
|
43
|
+
end
|
44
|
+
|
45
|
+
# Setup initial structure of index implementation
|
46
|
+
def setup_index
|
47
|
+
@index_driver.setup_index
|
48
|
+
end
|
49
|
+
|
50
|
+
def update_index_state
|
51
|
+
@index_driver.update_index_state
|
52
|
+
end
|
53
|
+
|
54
|
+
# Retrieves a set of which have one or more services which need to run.
|
55
|
+
#
|
56
|
+
# @param file_selector [FileSelector] selector for paths to search for files
|
57
|
+
# @param stale_datetime [DateTime] find file_paths with services needing to be run before this value
|
58
|
+
# @return [Array] array of file paths that need one or more services run, in ascending order by
|
59
|
+
# timestamp.
|
60
|
+
def paths_with_stale_services(file_selector, stale_datetime)
|
61
|
+
@index_driver.paths_with_stale_services(file_selector, stale_datetime)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Retrieves a page of paths for registered files.
|
65
|
+
# @param file_selector [FileSelector] selector for what paths to search for files
|
66
|
+
# @return [Array] array of file paths that are registered
|
67
|
+
def registered_paths(file_selector)
|
68
|
+
@index_driver.registered_paths(file_selector)
|
69
|
+
end
|
70
|
+
|
71
|
+
def each_registered_path(file_selector, older_than: nil, &block)
|
72
|
+
@index_driver.each_registered_path(file_selector, older_than: older_than, &block)
|
73
|
+
end
|
74
|
+
|
75
|
+
private
|
76
|
+
def init_index_driver
|
77
|
+
index_conf = @config[SYS_FIELDS::MD_INDEX]
|
78
|
+
adapter = index_conf[SYS_FIELDS::MD_INDEX_ADAPTER]&.downcase
|
79
|
+
|
80
|
+
raise ConfigurationError.new('Must specify an adapter for the metadata index') if adapter.nil?
|
81
|
+
|
82
|
+
adapter = adapter.to_sym
|
83
|
+
|
84
|
+
case adapter
|
85
|
+
when :postgres, :mysql, :mysql2, :sqlite, :amalgalite
|
86
|
+
page_size = index_conf[SYS_FIELDS::MD_INDEX_PAGE_SIZE]&.to_int
|
87
|
+
|
88
|
+
connection = index_conf[SYS_FIELDS::MD_INDEX_CONNECTION]
|
89
|
+
raise ConfigurationError.new("Must specify connection details for index adapter of type '#{adapter}'") if connection.nil?
|
90
|
+
|
91
|
+
require 'longleaf/indexing/sequel_index_driver'
|
92
|
+
@index_driver = SequelIndexDriver.new(@app_config_manager,
|
93
|
+
adapter,
|
94
|
+
connection,
|
95
|
+
page_size: page_size)
|
96
|
+
else
|
97
|
+
raise ConfigurationError.new("Unknown index adapter '#{adapter}' specified.") if adapter.nil?
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|