longleaf 0.1.0 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +94 -0
- data/.editorconfig +13 -0
- data/.gitignore +4 -1
- data/.rubocop.yml +44 -0
- data/.rubocop_todo.yml +834 -0
- data/.yardopts +1 -0
- data/Gemfile +16 -1
- data/README.md +98 -12
- data/Rakefile +6 -0
- data/bin/setup +16 -1
- data/docs/aboutlongleaf.md +28 -0
- data/docs/extra.css +32 -0
- data/docs/img/change-file.png +0 -0
- data/docs/img/ll-example-preserved.png +0 -0
- data/docs/index.md +19 -0
- data/docs/install.md +66 -0
- data/docs/ll-example/config-example-relative.yml +33 -0
- data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
- data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
- data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
- data/docs/ll-example/metadata-dir/.gitkeep +0 -0
- data/docs/ll-example/replica-files/.gitkeep +0 -0
- data/docs/ll-example/replica-metadata/.gitkeep +0 -0
- data/docs/quickstart.md +270 -0
- data/docs/rdocs/Longleaf.html +135 -0
- data/docs/rdocs/Longleaf/AppFields.html +178 -0
- data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
- data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
- data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
- data/docs/rdocs/Longleaf/CLI.html +909 -0
- data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
- data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
- data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
- data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
- data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
- data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
- data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
- data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
- data/docs/rdocs/Longleaf/EventError.html +147 -0
- data/docs/rdocs/Longleaf/EventNames.html +163 -0
- data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
- data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
- data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
- data/docs/rdocs/Longleaf/FileRecord.html +716 -0
- data/docs/rdocs/Longleaf/FileSelector.html +901 -0
- data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
- data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
- data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
- data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
- data/docs/rdocs/Longleaf/Logging.html +405 -0
- data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
- data/docs/rdocs/Longleaf/LongleafError.html +139 -0
- data/docs/rdocs/Longleaf/MDFields.html +193 -0
- data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
- data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
- data/docs/rdocs/Longleaf/MetadataError.html +143 -0
- data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
- data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
- data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
- data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
- data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
- data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
- data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
- data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
- data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
- data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
- data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
- data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
- data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
- data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
- data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
- data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
- data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
- data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
- data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
- data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
- data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
- data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
- data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
- data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
- data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
- data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
- data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
- data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
- data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
- data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
- data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
- data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
- data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
- data/docs/rdocs/_index.html +660 -0
- data/docs/rdocs/class_list.html +51 -0
- data/docs/rdocs/css/common.css +1 -0
- data/docs/rdocs/css/full_list.css +58 -0
- data/docs/rdocs/css/style.css +496 -0
- data/docs/rdocs/file.README.html +165 -0
- data/docs/rdocs/file_list.html +56 -0
- data/docs/rdocs/frames.html +17 -0
- data/docs/rdocs/index.html +165 -0
- data/docs/rdocs/js/app.js +303 -0
- data/docs/rdocs/js/full_list.js +216 -0
- data/docs/rdocs/js/jquery.js +4 -0
- data/docs/rdocs/method_list.html +2051 -0
- data/docs/rdocs/top-level-namespace.html +110 -0
- data/lib/longleaf/candidates/file_selector.rb +150 -0
- data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
- data/lib/longleaf/candidates/physical_path_provider.rb +17 -0
- data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
- data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
- data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
- data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
- data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
- data/lib/longleaf/cli.rb +252 -46
- data/lib/longleaf/commands/deregister_command.rb +51 -0
- data/lib/longleaf/commands/preserve_command.rb +50 -0
- data/lib/longleaf/commands/register_command.rb +34 -43
- data/lib/longleaf/commands/reindex_command.rb +92 -0
- data/lib/longleaf/commands/validate_config_command.rb +33 -8
- data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
- data/lib/longleaf/errors.rb +26 -7
- data/lib/longleaf/events/deregister_event.rb +53 -0
- data/lib/longleaf/events/event_names.rb +9 -0
- data/lib/longleaf/events/event_status_tracking.rb +59 -0
- data/lib/longleaf/events/preserve_event.rb +82 -0
- data/lib/longleaf/events/register_event.rb +59 -51
- data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
- data/lib/longleaf/helpers/digest_helper.rb +56 -0
- data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
- data/lib/longleaf/helpers/selection_options_parser.rb +215 -0
- data/lib/longleaf/helpers/service_date_helper.rb +78 -0
- data/lib/longleaf/indexing/index_manager.rb +101 -0
- data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
- data/lib/longleaf/logging.rb +5 -4
- data/lib/longleaf/logging/redirecting_logger.rb +30 -25
- data/lib/longleaf/models/app_fields.rb +7 -2
- data/lib/longleaf/models/file_record.rb +31 -8
- data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
- data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
- data/lib/longleaf/models/md_fields.rb +3 -1
- data/lib/longleaf/models/metadata_location.rb +47 -0
- data/lib/longleaf/models/metadata_record.rb +43 -16
- data/lib/longleaf/models/s3_storage_location.rb +138 -0
- data/lib/longleaf/models/service_definition.rb +7 -6
- data/lib/longleaf/models/service_fields.rb +7 -1
- data/lib/longleaf/models/service_record.rb +10 -6
- data/lib/longleaf/models/storage_location.rb +24 -19
- data/lib/longleaf/models/storage_types.rb +9 -0
- data/lib/longleaf/models/system_config_fields.rb +9 -0
- data/lib/longleaf/preservation_services/file_check_service.rb +59 -0
- data/lib/longleaf/preservation_services/fixity_check_service.rb +124 -0
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +198 -0
- data/lib/longleaf/preservation_services/s3_replication_service.rb +131 -0
- data/lib/longleaf/services/application_config_deserializer.rb +81 -24
- data/lib/longleaf/services/application_config_manager.rb +20 -6
- data/lib/longleaf/services/application_config_validator.rb +19 -9
- data/lib/longleaf/services/configuration_validator.rb +67 -4
- data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
- data/lib/longleaf/services/metadata_deserializer.rb +115 -42
- data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
- data/lib/longleaf/services/metadata_serializer.rb +156 -23
- data/lib/longleaf/services/metadata_validator.rb +76 -0
- data/lib/longleaf/services/s3_location_validator.rb +19 -0
- data/lib/longleaf/services/service_class_cache.rb +112 -0
- data/lib/longleaf/services/service_definition_manager.rb +10 -7
- data/lib/longleaf/services/service_definition_validator.rb +25 -18
- data/lib/longleaf/services/service_manager.rb +86 -11
- data/lib/longleaf/services/service_mapping_manager.rb +13 -12
- data/lib/longleaf/services/service_mapping_validator.rb +36 -26
- data/lib/longleaf/services/storage_location_manager.rb +76 -15
- data/lib/longleaf/services/storage_location_validator.rb +49 -35
- data/lib/longleaf/specs/config_builder.rb +47 -23
- data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
- data/lib/longleaf/specs/custom_matchers.rb +9 -0
- data/lib/longleaf/specs/file_helpers.rb +61 -0
- data/lib/longleaf/specs/metadata_builder.rb +98 -0
- data/lib/longleaf/specs/system_config_builder.rb +27 -0
- data/lib/longleaf/version.rb +1 -1
- data/longleaf.gemspec +20 -7
- data/mkdocs.yml +21 -0
- metadata +308 -24
- data/.travis.yml +0 -4
- data/lib/longleaf/commands/abstract_command.rb +0 -37
- data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -1,10 +1,15 @@
|
|
1
1
|
module Longleaf
|
2
|
+
# Application configuration field names
|
2
3
|
class AppFields
|
3
4
|
LOCATIONS = 'locations'
|
4
5
|
SERVICES = 'services'
|
5
6
|
SERVICE_MAPPINGS = 'service_mappings'
|
6
|
-
|
7
|
+
SYSTEM = 'system'
|
8
|
+
|
7
9
|
LOCATION_PATH = 'path'
|
8
|
-
|
10
|
+
METADATA_CONFIG = 'metadata'
|
11
|
+
METADATA_DIGESTS = 'digests'
|
12
|
+
|
13
|
+
STORAGE_TYPE = 'type'
|
9
14
|
end
|
10
15
|
end
|
@@ -1,25 +1,48 @@
|
|
1
|
-
# Record for an individual file and its associated information
|
2
1
|
module Longleaf
|
2
|
+
# Record for an individual file and its associated information
|
3
3
|
class FileRecord
|
4
|
-
|
5
4
|
attr_accessor :metadata_record
|
6
5
|
attr_reader :storage_location
|
7
6
|
attr_reader :path
|
8
|
-
|
7
|
+
|
9
8
|
# @param file_path [String] path to the file
|
10
|
-
# @param storage_location [
|
11
|
-
|
9
|
+
# @param storage_location [StorageLocation] storage location containing the file
|
10
|
+
# @param metadata_record [MetadataRecord] metadata record for this file object. Optional.
|
11
|
+
# @param physical_path [String] physical path where the file is located. Defaults to the file_path.
|
12
|
+
def initialize(file_path, storage_location, metadata_record = nil, physical_path = nil)
|
12
13
|
raise ArgumentError.new("FileRecord requires a path") if file_path.nil?
|
13
14
|
raise ArgumentError.new("FileRecord requires a storage_location") if storage_location.nil?
|
14
|
-
|
15
|
+
|
15
16
|
@path = file_path
|
16
17
|
@storage_location = storage_location
|
18
|
+
@metadata_record = metadata_record
|
19
|
+
@physical_path = physical_path
|
17
20
|
end
|
18
|
-
|
21
|
+
|
19
22
|
# @return [String] path for the metadata file for this file
|
20
23
|
def metadata_path
|
21
24
|
@metadata_path = @storage_location.get_metadata_path_for(path) if @metadata_path.nil?
|
22
25
|
@metadata_path
|
23
26
|
end
|
27
|
+
|
28
|
+
def physical_path
|
29
|
+
if @physical_path.nil?
|
30
|
+
if @metadata_record.nil? || @metadata_record.physical_path.nil?
|
31
|
+
@physical_path = @path
|
32
|
+
else
|
33
|
+
@physical_path = @metadata_record.physical_path
|
34
|
+
end
|
35
|
+
end
|
36
|
+
@physical_path
|
37
|
+
end
|
38
|
+
|
39
|
+
def metadata_present?
|
40
|
+
File.exist?(metadata_path)
|
41
|
+
end
|
42
|
+
|
43
|
+
def ==(other_obj)
|
44
|
+
return false unless other_obj.is_a?(FileRecord)
|
45
|
+
path == other_obj.path
|
46
|
+
end
|
24
47
|
end
|
25
|
-
end
|
48
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'longleaf/services/metadata_serializer'
|
2
|
+
require 'longleaf/models/metadata_location'
|
3
|
+
require 'longleaf/models/storage_types'
|
4
|
+
|
5
|
+
module Longleaf
|
6
|
+
# A filesystem based location in which metadata associated with registered files is stored.
|
7
|
+
class FilesystemMetadataLocation < MetadataLocation
|
8
|
+
AF ||= Longleaf::AppFields
|
9
|
+
|
10
|
+
def initialize(config)
|
11
|
+
super(config)
|
12
|
+
end
|
13
|
+
|
14
|
+
# @return the storage type for this location
|
15
|
+
def type
|
16
|
+
StorageTypes::FILESYSTEM_STORAGE_TYPE
|
17
|
+
end
|
18
|
+
|
19
|
+
# Get the absolute path for the metadata file for the given file path located in this storage location.
|
20
|
+
# @param file_path [String] path of the file relative its storage location
|
21
|
+
# @return absolute path to the metadata
|
22
|
+
# @raise [ArgumentError] if the file_path is not provided.
|
23
|
+
def metadata_path_for(file_path)
|
24
|
+
raise ArgumentError.new("A file_path parameter is required") if file_path.nil?
|
25
|
+
raise ArgumentError.new("File path must be relative") if Pathname.new(file_path).absolute?
|
26
|
+
|
27
|
+
md_path = File.join(@path, file_path)
|
28
|
+
# If the file_path is to a file, then add metadata suffix.
|
29
|
+
if md_path.end_with?('/')
|
30
|
+
md_path
|
31
|
+
else
|
32
|
+
md_path + MetadataSerializer::metadata_suffix
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Get the metadata path relative to this location
|
37
|
+
# @param md_path [String] metadata file path
|
38
|
+
# @return the metadata path relative to this location
|
39
|
+
# @raise [ArgumentError] if the metadata path is not contained by this location
|
40
|
+
def relativize(md_path)
|
41
|
+
return md_path if Pathname.new(md_path).relative?
|
42
|
+
|
43
|
+
raise ArgumentError.new("Metadata path must be contained by this location") if !md_path.start_with?(@path)
|
44
|
+
|
45
|
+
md_path.sub(@path, "")
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
# Checks that the path defined in this metadata location are available
|
50
|
+
# @raise [StorageLocationUnavailableError] if the metadata location is not available
|
51
|
+
def available?
|
52
|
+
raise StorageLocationUnavailableError.new("Metadata path does not exist or is not a directory: #{@path}")\
|
53
|
+
unless Dir.exist?(@path)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'longleaf/models/storage_location'
|
2
|
+
require 'longleaf/models/storage_types'
|
3
|
+
|
4
|
+
module Longleaf
|
5
|
+
# A storage location in a local filesystem
|
6
|
+
class FilesystemStorageLocation < StorageLocation
|
7
|
+
# @param name [String] the name of this storage location
|
8
|
+
# @param config [Hash] hash containing the configuration options for this location
|
9
|
+
# @param md_loc [MetadataLocation] metadata location associated with this storage location
|
10
|
+
def initialize(name, config, md_loc)
|
11
|
+
super(name, config, md_loc)
|
12
|
+
@path += File::SEPARATOR unless @path.end_with?(File::SEPARATOR)
|
13
|
+
end
|
14
|
+
|
15
|
+
# @return the storage type for this location
|
16
|
+
def type
|
17
|
+
StorageTypes::FILESYSTEM_STORAGE_TYPE
|
18
|
+
end
|
19
|
+
|
20
|
+
# Get that absolute path to the file associated with the provided metadata path
|
21
|
+
# @param md_path [String] metadata file path
|
22
|
+
# @raise [ArgumentError] if the md_path is not in this storage location
|
23
|
+
# @return [String] the path for the file associated with this metadata
|
24
|
+
def get_path_from_metadata_path(md_path)
|
25
|
+
raise ArgumentError.new("A file_path parameter is required") if md_path.nil? || md_path.empty?
|
26
|
+
|
27
|
+
rel_path = @metadata_location.relative_file_path_for(md_path)
|
28
|
+
|
29
|
+
File.join(@path, rel_path)
|
30
|
+
end
|
31
|
+
|
32
|
+
# Checks that the path and metadata path defined in this location are available
|
33
|
+
# @raise [StorageLocationUnavailableError] if the storage location is not available
|
34
|
+
def available?
|
35
|
+
raise StorageLocationUnavailableError.new("Path does not exist or is not a directory: #{@path}")\
|
36
|
+
unless Dir.exist?(@path)
|
37
|
+
@metadata_location.available?
|
38
|
+
end
|
39
|
+
|
40
|
+
# Get the file path relative to this location
|
41
|
+
# @param file_path [String] file path
|
42
|
+
# @return the file path relative to this location
|
43
|
+
# @raise [ArgumentError] if the file path is not contained by this location
|
44
|
+
def relativize(file_path)
|
45
|
+
return file_path if Pathname.new(file_path).relative?
|
46
|
+
|
47
|
+
raise ArgumentError.new("Metadata path must be contained by this location") if !file_path.start_with?(@path)
|
48
|
+
|
49
|
+
file_path.sub(@path, "")
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -1,13 +1,15 @@
|
|
1
1
|
module Longleaf
|
2
|
+
# File metadata fields
|
2
3
|
class MDFields
|
3
4
|
DATA = 'data'
|
4
5
|
SERVICES = 'services'
|
5
|
-
|
6
|
+
|
6
7
|
REGISTERED_TIMESTAMP = 'registered'
|
7
8
|
DEREGISTERED_TIMESTAMP = 'deregistered'
|
8
9
|
|
9
10
|
LAST_MODIFIED = 'last-modified'
|
10
11
|
FILE_SIZE = 'size'
|
12
|
+
PHYSICAL_PATH = 'physical-path'
|
11
13
|
|
12
14
|
CHECKSUMS = 'checksums'
|
13
15
|
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'longleaf/models/app_fields'
|
2
|
+
|
3
|
+
module Longleaf
|
4
|
+
# A location in which metadata associated with registered files is stored.
|
5
|
+
class MetadataLocation
|
6
|
+
AF ||= Longleaf::AppFields
|
7
|
+
|
8
|
+
attr_reader :path
|
9
|
+
attr_reader :digests
|
10
|
+
|
11
|
+
def initialize(config)
|
12
|
+
raise ArgumentError.new("Config parameter is required") unless config
|
13
|
+
@path = config[AF::LOCATION_PATH]
|
14
|
+
raise ArgumentError.new("Parameter path is required") unless @path
|
15
|
+
@path += '/' unless @path.end_with?('/')
|
16
|
+
|
17
|
+
digests = config[AF::METADATA_DIGESTS]
|
18
|
+
if digests.nil?
|
19
|
+
@digests = []
|
20
|
+
elsif digests.is_a?(String)
|
21
|
+
@digests = [digests.downcase]
|
22
|
+
else
|
23
|
+
@digests = digests.map(&:downcase)
|
24
|
+
end
|
25
|
+
DigestHelper::validate_algorithms(@digests)
|
26
|
+
end
|
27
|
+
|
28
|
+
# Transforms the given metadata path into a relative storage location path
|
29
|
+
# @param md_path [String] path of the metadata file or directory to compute file path for.
|
30
|
+
# @return
|
31
|
+
def relative_file_path_for(md_path)
|
32
|
+
rel_md_path = relativize(md_path)
|
33
|
+
|
34
|
+
if rel_md_path.end_with?(MetadataSerializer::metadata_suffix)
|
35
|
+
rel_md_path[0..-MetadataSerializer::metadata_suffix.length - 1]
|
36
|
+
else
|
37
|
+
rel_md_path
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
# @param [String] metadata path to check
|
42
|
+
# @return true if the metadata path is contained by the path for this location
|
43
|
+
def contains?(md_path)
|
44
|
+
md_path.start_with?(@path)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -1,14 +1,17 @@
|
|
1
1
|
require_relative 'md_fields'
|
2
2
|
require_relative 'service_record'
|
3
|
+
require 'longleaf/helpers/case_insensitive_hash'
|
3
4
|
|
4
|
-
# Metadata record for a single file
|
5
5
|
module Longleaf
|
6
|
+
# Metadata record for a single file
|
6
7
|
class MetadataRecord
|
7
|
-
attr_reader :
|
8
|
+
attr_reader :registered
|
9
|
+
attr_accessor :deregistered
|
8
10
|
attr_reader :checksums
|
9
11
|
attr_reader :properties
|
10
12
|
attr_accessor :file_size, :last_modified
|
11
|
-
|
13
|
+
attr_accessor :physical_path
|
14
|
+
|
12
15
|
# @param properties [Hash] initial data properties for this record
|
13
16
|
# @param services [Hash] initial service property tree
|
14
17
|
# @param deregistered [String] deregistered timestamp
|
@@ -16,42 +19,66 @@ module Longleaf
|
|
16
19
|
# @param checksums [Hash] hash of checksum values
|
17
20
|
# @param file_size [Integer] size of file in bytes
|
18
21
|
# @param last_modified [String] iso8601 representation of the last modified date of file
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
+
# @param physical_path [String] physical path where the file is located
|
23
|
+
def initialize(properties: nil, services: nil, deregistered: nil, registered: nil, checksums: nil,
|
24
|
+
file_size: nil, last_modified: nil, physical_path: nil)
|
25
|
+
@properties = properties || Hash.new
|
22
26
|
@registered = registered
|
23
27
|
@deregistered = deregistered
|
24
|
-
@checksums =
|
25
|
-
@
|
28
|
+
@checksums = CaseInsensitiveHash.new
|
29
|
+
@checksums.merge!(checksums) unless checksums.nil?
|
30
|
+
@services = services || Hash.new
|
26
31
|
@file_size = file_size
|
27
32
|
@last_modified = last_modified
|
33
|
+
@physical_path = physical_path
|
28
34
|
end
|
29
|
-
|
35
|
+
|
30
36
|
# @return [Boolean] true if the record is deregistered
|
31
37
|
def deregistered?
|
32
38
|
!@deregistered.nil?
|
33
39
|
end
|
34
|
-
|
40
|
+
|
35
41
|
# Adds a service to this record
|
36
42
|
#
|
37
43
|
# @param name [String] identifier for the service being added
|
38
|
-
# @param
|
39
|
-
|
44
|
+
# @param service [ServiceRecord] properties for populating the new service
|
45
|
+
# @return [ServiceRecord] the service added
|
46
|
+
def add_service(name, service = ServiceRecord.new)
|
40
47
|
raise ArgumentError.new("Value must be a ServiceRecord object when adding a service") unless service.class == Longleaf::ServiceRecord
|
41
48
|
raise IndexError.new("Service with name '#{name}' already exists") if @services.key?(name)
|
42
|
-
|
49
|
+
|
43
50
|
@services[name] = service
|
44
51
|
end
|
45
|
-
|
52
|
+
|
53
|
+
# Updates details of service record as if the service had been executed.
|
54
|
+
# @param service_name [String] name of the service run
|
55
|
+
# @return [ServiceRecord] the service record updated
|
56
|
+
def update_service_as_performed(service_name)
|
57
|
+
service_rec = service(service_name) || add_service(service_name)
|
58
|
+
service_rec.run_needed = false
|
59
|
+
service_rec.timestamp = ServiceDateHelper.formatted_timestamp
|
60
|
+
service_rec
|
61
|
+
end
|
62
|
+
|
63
|
+
# Updates details of service record as if the service had encountered a
|
64
|
+
# failure during execution.
|
65
|
+
# @param service_name [String] name of the service run
|
66
|
+
# @return [ServiceRecord] the service record updated
|
67
|
+
def update_service_as_failed(service_name)
|
68
|
+
service_rec = service(service_name) || add_service(service_name)
|
69
|
+
service_rec.failure_timestamp = ServiceDateHelper.formatted_timestamp
|
70
|
+
service_rec
|
71
|
+
end
|
72
|
+
|
46
73
|
# @param name [String] name identifier of the service to retrieve
|
47
74
|
# @return [ServiceRecord] the ServiceRecord for the service identified by name, or nil
|
48
75
|
def service(name)
|
49
76
|
@services[name]
|
50
77
|
end
|
51
|
-
|
78
|
+
|
52
79
|
# @return [Array<String>] a list of name identifiers for services registered to this record
|
53
80
|
def list_services
|
54
81
|
@services.keys
|
55
82
|
end
|
56
83
|
end
|
57
|
-
end
|
84
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
require 'longleaf/models/storage_location'
|
2
|
+
require 'longleaf/models/storage_types'
|
3
|
+
require 'longleaf/helpers/s3_uri_helper'
|
4
|
+
require 'longleaf/logging'
|
5
|
+
require 'uri'
|
6
|
+
require 'aws-sdk-s3'
|
7
|
+
|
8
|
+
module Longleaf
|
9
|
+
# A storage location in a s3 bucket
|
10
|
+
#
|
11
|
+
# Optionally, the location configuration may include an "options" sub-hash in order to provide
|
12
|
+
# any of the s3 client options specified in Client initializer:
|
13
|
+
# https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#constructor_details
|
14
|
+
|
15
|
+
class S3StorageLocation < StorageLocation
|
16
|
+
include Longleaf::Logging
|
17
|
+
|
18
|
+
IS_URI_REGEX = /\A#{URI::regexp}\z/
|
19
|
+
|
20
|
+
CLIENT_OPTIONS_FIELD = 'options'
|
21
|
+
|
22
|
+
# @param name [String] the name of this storage location
|
23
|
+
# @param config [Hash] hash containing the configuration options for this location
|
24
|
+
# @param md_loc [MetadataLocation] metadata location associated with this storage location
|
25
|
+
def initialize(name, config, md_loc)
|
26
|
+
super(name, config, md_loc)
|
27
|
+
|
28
|
+
@bucket_name = S3UriHelper.extract_bucket(@path)
|
29
|
+
if @bucket_name.nil?
|
30
|
+
raise ArgumentError.new("Unable to identify bucket for location #{@name} from path #{@path}")
|
31
|
+
end
|
32
|
+
|
33
|
+
# Force path to always end with a slash
|
34
|
+
@path += '/' unless @path.end_with?('/')
|
35
|
+
|
36
|
+
custom_options = config[CLIENT_OPTIONS_FIELD]
|
37
|
+
if custom_options.nil?
|
38
|
+
@client_options = Hash.new
|
39
|
+
else
|
40
|
+
# Clone options and convert keys to symbols
|
41
|
+
@client_options = Hash[custom_options.map { |(k,v)| [k.to_sym,v] } ]
|
42
|
+
end
|
43
|
+
@client_options[:logger] = logger
|
44
|
+
@client_options[:log_level] = :debug if @client_options[:log_level].nil?
|
45
|
+
|
46
|
+
# If no region directly configured, use region from path
|
47
|
+
if !@client_options.key?(:region)
|
48
|
+
region = S3UriHelper.extract_region(@path)
|
49
|
+
@client_options[:region] = region unless region.nil?
|
50
|
+
end
|
51
|
+
|
52
|
+
@subpath_prefix = S3UriHelper.extract_path(@path)
|
53
|
+
end
|
54
|
+
|
55
|
+
# @return the storage type for this location
|
56
|
+
def type
|
57
|
+
StorageTypes::S3_STORAGE_TYPE
|
58
|
+
end
|
59
|
+
|
60
|
+
# Get that absolute path to the file associated with the provided metadata path
|
61
|
+
# @param md_path [String] metadata file path
|
62
|
+
# @raise [ArgumentError] if the md_path is not in this storage location
|
63
|
+
# @return [String] the path for the file associated with this metadata
|
64
|
+
def get_path_from_metadata_path(md_path)
|
65
|
+
raise ArgumentError.new("A file_path parameter is required") if md_path.nil? || md_path.empty?
|
66
|
+
|
67
|
+
rel_path = @metadata_location.relative_file_path_for(md_path)
|
68
|
+
|
69
|
+
URI.join(@path, rel_path).to_s
|
70
|
+
end
|
71
|
+
|
72
|
+
# Checks that the path and metadata path defined in this location are available
|
73
|
+
# @raise [StorageLocationUnavailableError] if the storage location is not available
|
74
|
+
def available?
|
75
|
+
begin
|
76
|
+
s3_client().head_bucket({ bucket: @bucket_name, use_accelerate_endpoint: false })
|
77
|
+
rescue StandardError => e
|
78
|
+
raise StorageLocationUnavailableError.new("Destination bucket #{@bucket_name} does not exist " \
|
79
|
+
+ "or is not accessible: #{e.message}")
|
80
|
+
end
|
81
|
+
@metadata_location.available?
|
82
|
+
end
|
83
|
+
|
84
|
+
# Get the file path relative to this location
|
85
|
+
# @param file_path [String] file path
|
86
|
+
# @return the file path relative to this location
|
87
|
+
# @raise [ArgumentError] if the file path is not contained by this location
|
88
|
+
def relativize(file_path)
|
89
|
+
raise ArgumentError.new("Must provide a non-nil path to relativize") if file_path.nil?
|
90
|
+
|
91
|
+
if file_path.start_with?(@path)
|
92
|
+
file_path[@path.length..-1]
|
93
|
+
else
|
94
|
+
if file_path =~ IS_URI_REGEX
|
95
|
+
raise ArgumentError.new("Path #{file_path} is not contained by #{@name}")
|
96
|
+
else
|
97
|
+
# path already relative
|
98
|
+
file_path
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
# Prefixes the provided path with the query path portion of the location's path
|
104
|
+
# after the bucket uri, used to place relative paths into the same sub-URL of a bucket.
|
105
|
+
# For example:
|
106
|
+
# Given a location with 'path' http://example.s3-amazonaws.com/env/test/
|
107
|
+
# Where rel_path = 'path/to/text.txt'
|
108
|
+
# The result would be 'env/test/path/to/text.txt'
|
109
|
+
# @param rel_path relative path to work with
|
110
|
+
# @return the given relative path prefixed with the path portion of the storage location path
|
111
|
+
def relative_to_bucket_path(rel_path)
|
112
|
+
raise ArgumentError.new("Must provide a non-nil path") if rel_path.nil?
|
113
|
+
|
114
|
+
if @subpath_prefix.nil?
|
115
|
+
return rel_path
|
116
|
+
end
|
117
|
+
|
118
|
+
@subpath_prefix + rel_path
|
119
|
+
end
|
120
|
+
|
121
|
+
# @return the bucket used by this storage location
|
122
|
+
def s3_bucket
|
123
|
+
if @bucket.nil?
|
124
|
+
@s3 = Aws::S3::Resource.new(client: s3_client())
|
125
|
+
@bucket = @s3.bucket(@bucket_name)
|
126
|
+
end
|
127
|
+
@bucket
|
128
|
+
end
|
129
|
+
|
130
|
+
# @return the s3 client used by this storage locatio
|
131
|
+
def s3_client
|
132
|
+
if @client.nil?
|
133
|
+
@client = Aws::S3::Client.new(**@client_options)
|
134
|
+
end
|
135
|
+
@client
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|