longleaf 0.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +94 -0
- data/.editorconfig +13 -0
- data/.gitignore +4 -1
- data/.rubocop.yml +44 -0
- data/.rubocop_todo.yml +834 -0
- data/.yardopts +1 -0
- data/Gemfile +16 -1
- data/README.md +98 -12
- data/Rakefile +6 -0
- data/bin/setup +16 -1
- data/docs/aboutlongleaf.md +28 -0
- data/docs/extra.css +32 -0
- data/docs/img/change-file.png +0 -0
- data/docs/img/ll-example-preserved.png +0 -0
- data/docs/index.md +19 -0
- data/docs/install.md +66 -0
- data/docs/ll-example/config-example-relative.yml +33 -0
- data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
- data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
- data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
- data/docs/ll-example/metadata-dir/.gitkeep +0 -0
- data/docs/ll-example/replica-files/.gitkeep +0 -0
- data/docs/ll-example/replica-metadata/.gitkeep +0 -0
- data/docs/quickstart.md +270 -0
- data/docs/rdocs/Longleaf.html +135 -0
- data/docs/rdocs/Longleaf/AppFields.html +178 -0
- data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
- data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
- data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
- data/docs/rdocs/Longleaf/CLI.html +909 -0
- data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
- data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
- data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
- data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
- data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
- data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
- data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
- data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
- data/docs/rdocs/Longleaf/EventError.html +147 -0
- data/docs/rdocs/Longleaf/EventNames.html +163 -0
- data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
- data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
- data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
- data/docs/rdocs/Longleaf/FileRecord.html +716 -0
- data/docs/rdocs/Longleaf/FileSelector.html +901 -0
- data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
- data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
- data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
- data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
- data/docs/rdocs/Longleaf/Logging.html +405 -0
- data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
- data/docs/rdocs/Longleaf/LongleafError.html +139 -0
- data/docs/rdocs/Longleaf/MDFields.html +193 -0
- data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
- data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
- data/docs/rdocs/Longleaf/MetadataError.html +143 -0
- data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
- data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
- data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
- data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
- data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
- data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
- data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
- data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
- data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
- data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
- data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
- data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
- data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
- data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
- data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
- data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
- data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
- data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
- data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
- data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
- data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
- data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
- data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
- data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
- data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
- data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
- data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
- data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
- data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
- data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
- data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
- data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
- data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
- data/docs/rdocs/_index.html +660 -0
- data/docs/rdocs/class_list.html +51 -0
- data/docs/rdocs/css/common.css +1 -0
- data/docs/rdocs/css/full_list.css +58 -0
- data/docs/rdocs/css/style.css +496 -0
- data/docs/rdocs/file.README.html +165 -0
- data/docs/rdocs/file_list.html +56 -0
- data/docs/rdocs/frames.html +17 -0
- data/docs/rdocs/index.html +165 -0
- data/docs/rdocs/js/app.js +303 -0
- data/docs/rdocs/js/full_list.js +216 -0
- data/docs/rdocs/js/jquery.js +4 -0
- data/docs/rdocs/method_list.html +2051 -0
- data/docs/rdocs/top-level-namespace.html +110 -0
- data/lib/longleaf/candidates/file_selector.rb +150 -0
- data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
- data/lib/longleaf/candidates/physical_path_provider.rb +17 -0
- data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
- data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
- data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
- data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
- data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
- data/lib/longleaf/cli.rb +252 -46
- data/lib/longleaf/commands/deregister_command.rb +51 -0
- data/lib/longleaf/commands/preserve_command.rb +50 -0
- data/lib/longleaf/commands/register_command.rb +34 -43
- data/lib/longleaf/commands/reindex_command.rb +92 -0
- data/lib/longleaf/commands/validate_config_command.rb +33 -8
- data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
- data/lib/longleaf/errors.rb +26 -7
- data/lib/longleaf/events/deregister_event.rb +53 -0
- data/lib/longleaf/events/event_names.rb +9 -0
- data/lib/longleaf/events/event_status_tracking.rb +59 -0
- data/lib/longleaf/events/preserve_event.rb +82 -0
- data/lib/longleaf/events/register_event.rb +59 -51
- data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
- data/lib/longleaf/helpers/digest_helper.rb +56 -0
- data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
- data/lib/longleaf/helpers/selection_options_parser.rb +215 -0
- data/lib/longleaf/helpers/service_date_helper.rb +78 -0
- data/lib/longleaf/indexing/index_manager.rb +101 -0
- data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
- data/lib/longleaf/logging.rb +5 -4
- data/lib/longleaf/logging/redirecting_logger.rb +30 -25
- data/lib/longleaf/models/app_fields.rb +7 -2
- data/lib/longleaf/models/file_record.rb +31 -8
- data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
- data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
- data/lib/longleaf/models/md_fields.rb +3 -1
- data/lib/longleaf/models/metadata_location.rb +47 -0
- data/lib/longleaf/models/metadata_record.rb +43 -16
- data/lib/longleaf/models/s3_storage_location.rb +138 -0
- data/lib/longleaf/models/service_definition.rb +7 -6
- data/lib/longleaf/models/service_fields.rb +7 -1
- data/lib/longleaf/models/service_record.rb +10 -6
- data/lib/longleaf/models/storage_location.rb +24 -19
- data/lib/longleaf/models/storage_types.rb +9 -0
- data/lib/longleaf/models/system_config_fields.rb +9 -0
- data/lib/longleaf/preservation_services/file_check_service.rb +59 -0
- data/lib/longleaf/preservation_services/fixity_check_service.rb +124 -0
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +198 -0
- data/lib/longleaf/preservation_services/s3_replication_service.rb +131 -0
- data/lib/longleaf/services/application_config_deserializer.rb +81 -24
- data/lib/longleaf/services/application_config_manager.rb +20 -6
- data/lib/longleaf/services/application_config_validator.rb +19 -9
- data/lib/longleaf/services/configuration_validator.rb +67 -4
- data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
- data/lib/longleaf/services/metadata_deserializer.rb +115 -42
- data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
- data/lib/longleaf/services/metadata_serializer.rb +156 -23
- data/lib/longleaf/services/metadata_validator.rb +76 -0
- data/lib/longleaf/services/s3_location_validator.rb +19 -0
- data/lib/longleaf/services/service_class_cache.rb +112 -0
- data/lib/longleaf/services/service_definition_manager.rb +10 -7
- data/lib/longleaf/services/service_definition_validator.rb +25 -18
- data/lib/longleaf/services/service_manager.rb +86 -11
- data/lib/longleaf/services/service_mapping_manager.rb +13 -12
- data/lib/longleaf/services/service_mapping_validator.rb +36 -26
- data/lib/longleaf/services/storage_location_manager.rb +76 -15
- data/lib/longleaf/services/storage_location_validator.rb +49 -35
- data/lib/longleaf/specs/config_builder.rb +47 -23
- data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
- data/lib/longleaf/specs/custom_matchers.rb +9 -0
- data/lib/longleaf/specs/file_helpers.rb +61 -0
- data/lib/longleaf/specs/metadata_builder.rb +98 -0
- data/lib/longleaf/specs/system_config_builder.rb +27 -0
- data/lib/longleaf/version.rb +1 -1
- data/longleaf.gemspec +20 -7
- data/mkdocs.yml +21 -0
- metadata +308 -24
- data/.travis.yml +0 -4
- data/lib/longleaf/commands/abstract_command.rb +0 -37
- data/lib/longleaf/services/storage_path_validator.rb +0 -16
|
@@ -1,10 +1,15 @@
|
|
|
1
1
|
module Longleaf
|
|
2
|
+
# Application configuration field names
|
|
2
3
|
class AppFields
|
|
3
4
|
LOCATIONS = 'locations'
|
|
4
5
|
SERVICES = 'services'
|
|
5
6
|
SERVICE_MAPPINGS = 'service_mappings'
|
|
6
|
-
|
|
7
|
+
SYSTEM = 'system'
|
|
8
|
+
|
|
7
9
|
LOCATION_PATH = 'path'
|
|
8
|
-
|
|
10
|
+
METADATA_CONFIG = 'metadata'
|
|
11
|
+
METADATA_DIGESTS = 'digests'
|
|
12
|
+
|
|
13
|
+
STORAGE_TYPE = 'type'
|
|
9
14
|
end
|
|
10
15
|
end
|
|
@@ -1,25 +1,48 @@
|
|
|
1
|
-
# Record for an individual file and its associated information
|
|
2
1
|
module Longleaf
|
|
2
|
+
# Record for an individual file and its associated information
|
|
3
3
|
class FileRecord
|
|
4
|
-
|
|
5
4
|
attr_accessor :metadata_record
|
|
6
5
|
attr_reader :storage_location
|
|
7
6
|
attr_reader :path
|
|
8
|
-
|
|
7
|
+
|
|
9
8
|
# @param file_path [String] path to the file
|
|
10
|
-
# @param storage_location [
|
|
11
|
-
|
|
9
|
+
# @param storage_location [StorageLocation] storage location containing the file
|
|
10
|
+
# @param metadata_record [MetadataRecord] metadata record for this file object. Optional.
|
|
11
|
+
# @param physical_path [String] physical path where the file is located. Defaults to the file_path.
|
|
12
|
+
def initialize(file_path, storage_location, metadata_record = nil, physical_path = nil)
|
|
12
13
|
raise ArgumentError.new("FileRecord requires a path") if file_path.nil?
|
|
13
14
|
raise ArgumentError.new("FileRecord requires a storage_location") if storage_location.nil?
|
|
14
|
-
|
|
15
|
+
|
|
15
16
|
@path = file_path
|
|
16
17
|
@storage_location = storage_location
|
|
18
|
+
@metadata_record = metadata_record
|
|
19
|
+
@physical_path = physical_path
|
|
17
20
|
end
|
|
18
|
-
|
|
21
|
+
|
|
19
22
|
# @return [String] path for the metadata file for this file
|
|
20
23
|
def metadata_path
|
|
21
24
|
@metadata_path = @storage_location.get_metadata_path_for(path) if @metadata_path.nil?
|
|
22
25
|
@metadata_path
|
|
23
26
|
end
|
|
27
|
+
|
|
28
|
+
def physical_path
|
|
29
|
+
if @physical_path.nil?
|
|
30
|
+
if @metadata_record.nil? || @metadata_record.physical_path.nil?
|
|
31
|
+
@physical_path = @path
|
|
32
|
+
else
|
|
33
|
+
@physical_path = @metadata_record.physical_path
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
@physical_path
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def metadata_present?
|
|
40
|
+
File.exist?(metadata_path)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def ==(other_obj)
|
|
44
|
+
return false unless other_obj.is_a?(FileRecord)
|
|
45
|
+
path == other_obj.path
|
|
46
|
+
end
|
|
24
47
|
end
|
|
25
|
-
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
require 'longleaf/services/metadata_serializer'
|
|
2
|
+
require 'longleaf/models/metadata_location'
|
|
3
|
+
require 'longleaf/models/storage_types'
|
|
4
|
+
|
|
5
|
+
module Longleaf
|
|
6
|
+
# A filesystem based location in which metadata associated with registered files is stored.
|
|
7
|
+
class FilesystemMetadataLocation < MetadataLocation
|
|
8
|
+
AF ||= Longleaf::AppFields
|
|
9
|
+
|
|
10
|
+
def initialize(config)
|
|
11
|
+
super(config)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# @return the storage type for this location
|
|
15
|
+
def type
|
|
16
|
+
StorageTypes::FILESYSTEM_STORAGE_TYPE
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Get the absolute path for the metadata file for the given file path located in this storage location.
|
|
20
|
+
# @param file_path [String] path of the file relative its storage location
|
|
21
|
+
# @return absolute path to the metadata
|
|
22
|
+
# @raise [ArgumentError] if the file_path is not provided.
|
|
23
|
+
def metadata_path_for(file_path)
|
|
24
|
+
raise ArgumentError.new("A file_path parameter is required") if file_path.nil?
|
|
25
|
+
raise ArgumentError.new("File path must be relative") if Pathname.new(file_path).absolute?
|
|
26
|
+
|
|
27
|
+
md_path = File.join(@path, file_path)
|
|
28
|
+
# If the file_path is to a file, then add metadata suffix.
|
|
29
|
+
if md_path.end_with?('/')
|
|
30
|
+
md_path
|
|
31
|
+
else
|
|
32
|
+
md_path + MetadataSerializer::metadata_suffix
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Get the metadata path relative to this location
|
|
37
|
+
# @param md_path [String] metadata file path
|
|
38
|
+
# @return the metadata path relative to this location
|
|
39
|
+
# @raise [ArgumentError] if the metadata path is not contained by this location
|
|
40
|
+
def relativize(md_path)
|
|
41
|
+
return md_path if Pathname.new(md_path).relative?
|
|
42
|
+
|
|
43
|
+
raise ArgumentError.new("Metadata path must be contained by this location") if !md_path.start_with?(@path)
|
|
44
|
+
|
|
45
|
+
md_path.sub(@path, "")
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
# Checks that the path defined in this metadata location are available
|
|
50
|
+
# @raise [StorageLocationUnavailableError] if the metadata location is not available
|
|
51
|
+
def available?
|
|
52
|
+
raise StorageLocationUnavailableError.new("Metadata path does not exist or is not a directory: #{@path}")\
|
|
53
|
+
unless Dir.exist?(@path)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
require 'longleaf/models/storage_location'
|
|
2
|
+
require 'longleaf/models/storage_types'
|
|
3
|
+
|
|
4
|
+
module Longleaf
|
|
5
|
+
# A storage location in a local filesystem
|
|
6
|
+
class FilesystemStorageLocation < StorageLocation
|
|
7
|
+
# @param name [String] the name of this storage location
|
|
8
|
+
# @param config [Hash] hash containing the configuration options for this location
|
|
9
|
+
# @param md_loc [MetadataLocation] metadata location associated with this storage location
|
|
10
|
+
def initialize(name, config, md_loc)
|
|
11
|
+
super(name, config, md_loc)
|
|
12
|
+
@path += File::SEPARATOR unless @path.end_with?(File::SEPARATOR)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
# @return the storage type for this location
|
|
16
|
+
def type
|
|
17
|
+
StorageTypes::FILESYSTEM_STORAGE_TYPE
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Get that absolute path to the file associated with the provided metadata path
|
|
21
|
+
# @param md_path [String] metadata file path
|
|
22
|
+
# @raise [ArgumentError] if the md_path is not in this storage location
|
|
23
|
+
# @return [String] the path for the file associated with this metadata
|
|
24
|
+
def get_path_from_metadata_path(md_path)
|
|
25
|
+
raise ArgumentError.new("A file_path parameter is required") if md_path.nil? || md_path.empty?
|
|
26
|
+
|
|
27
|
+
rel_path = @metadata_location.relative_file_path_for(md_path)
|
|
28
|
+
|
|
29
|
+
File.join(@path, rel_path)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Checks that the path and metadata path defined in this location are available
|
|
33
|
+
# @raise [StorageLocationUnavailableError] if the storage location is not available
|
|
34
|
+
def available?
|
|
35
|
+
raise StorageLocationUnavailableError.new("Path does not exist or is not a directory: #{@path}")\
|
|
36
|
+
unless Dir.exist?(@path)
|
|
37
|
+
@metadata_location.available?
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Get the file path relative to this location
|
|
41
|
+
# @param file_path [String] file path
|
|
42
|
+
# @return the file path relative to this location
|
|
43
|
+
# @raise [ArgumentError] if the file path is not contained by this location
|
|
44
|
+
def relativize(file_path)
|
|
45
|
+
return file_path if Pathname.new(file_path).relative?
|
|
46
|
+
|
|
47
|
+
raise ArgumentError.new("Metadata path must be contained by this location") if !file_path.start_with?(@path)
|
|
48
|
+
|
|
49
|
+
file_path.sub(@path, "")
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
module Longleaf
|
|
2
|
+
# File metadata fields
|
|
2
3
|
class MDFields
|
|
3
4
|
DATA = 'data'
|
|
4
5
|
SERVICES = 'services'
|
|
5
|
-
|
|
6
|
+
|
|
6
7
|
REGISTERED_TIMESTAMP = 'registered'
|
|
7
8
|
DEREGISTERED_TIMESTAMP = 'deregistered'
|
|
8
9
|
|
|
9
10
|
LAST_MODIFIED = 'last-modified'
|
|
10
11
|
FILE_SIZE = 'size'
|
|
12
|
+
PHYSICAL_PATH = 'physical-path'
|
|
11
13
|
|
|
12
14
|
CHECKSUMS = 'checksums'
|
|
13
15
|
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
require 'longleaf/models/app_fields'
|
|
2
|
+
|
|
3
|
+
module Longleaf
|
|
4
|
+
# A location in which metadata associated with registered files is stored.
|
|
5
|
+
class MetadataLocation
|
|
6
|
+
AF ||= Longleaf::AppFields
|
|
7
|
+
|
|
8
|
+
attr_reader :path
|
|
9
|
+
attr_reader :digests
|
|
10
|
+
|
|
11
|
+
def initialize(config)
|
|
12
|
+
raise ArgumentError.new("Config parameter is required") unless config
|
|
13
|
+
@path = config[AF::LOCATION_PATH]
|
|
14
|
+
raise ArgumentError.new("Parameter path is required") unless @path
|
|
15
|
+
@path += '/' unless @path.end_with?('/')
|
|
16
|
+
|
|
17
|
+
digests = config[AF::METADATA_DIGESTS]
|
|
18
|
+
if digests.nil?
|
|
19
|
+
@digests = []
|
|
20
|
+
elsif digests.is_a?(String)
|
|
21
|
+
@digests = [digests.downcase]
|
|
22
|
+
else
|
|
23
|
+
@digests = digests.map(&:downcase)
|
|
24
|
+
end
|
|
25
|
+
DigestHelper::validate_algorithms(@digests)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Transforms the given metadata path into a relative storage location path
|
|
29
|
+
# @param md_path [String] path of the metadata file or directory to compute file path for.
|
|
30
|
+
# @return
|
|
31
|
+
def relative_file_path_for(md_path)
|
|
32
|
+
rel_md_path = relativize(md_path)
|
|
33
|
+
|
|
34
|
+
if rel_md_path.end_with?(MetadataSerializer::metadata_suffix)
|
|
35
|
+
rel_md_path[0..-MetadataSerializer::metadata_suffix.length - 1]
|
|
36
|
+
else
|
|
37
|
+
rel_md_path
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# @param [String] metadata path to check
|
|
42
|
+
# @return true if the metadata path is contained by the path for this location
|
|
43
|
+
def contains?(md_path)
|
|
44
|
+
md_path.start_with?(@path)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -1,14 +1,17 @@
|
|
|
1
1
|
require_relative 'md_fields'
|
|
2
2
|
require_relative 'service_record'
|
|
3
|
+
require 'longleaf/helpers/case_insensitive_hash'
|
|
3
4
|
|
|
4
|
-
# Metadata record for a single file
|
|
5
5
|
module Longleaf
|
|
6
|
+
# Metadata record for a single file
|
|
6
7
|
class MetadataRecord
|
|
7
|
-
attr_reader :
|
|
8
|
+
attr_reader :registered
|
|
9
|
+
attr_accessor :deregistered
|
|
8
10
|
attr_reader :checksums
|
|
9
11
|
attr_reader :properties
|
|
10
12
|
attr_accessor :file_size, :last_modified
|
|
11
|
-
|
|
13
|
+
attr_accessor :physical_path
|
|
14
|
+
|
|
12
15
|
# @param properties [Hash] initial data properties for this record
|
|
13
16
|
# @param services [Hash] initial service property tree
|
|
14
17
|
# @param deregistered [String] deregistered timestamp
|
|
@@ -16,42 +19,66 @@ module Longleaf
|
|
|
16
19
|
# @param checksums [Hash] hash of checksum values
|
|
17
20
|
# @param file_size [Integer] size of file in bytes
|
|
18
21
|
# @param last_modified [String] iso8601 representation of the last modified date of file
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
+
# @param physical_path [String] physical path where the file is located
|
|
23
|
+
def initialize(properties: nil, services: nil, deregistered: nil, registered: nil, checksums: nil,
|
|
24
|
+
file_size: nil, last_modified: nil, physical_path: nil)
|
|
25
|
+
@properties = properties || Hash.new
|
|
22
26
|
@registered = registered
|
|
23
27
|
@deregistered = deregistered
|
|
24
|
-
@checksums =
|
|
25
|
-
@
|
|
28
|
+
@checksums = CaseInsensitiveHash.new
|
|
29
|
+
@checksums.merge!(checksums) unless checksums.nil?
|
|
30
|
+
@services = services || Hash.new
|
|
26
31
|
@file_size = file_size
|
|
27
32
|
@last_modified = last_modified
|
|
33
|
+
@physical_path = physical_path
|
|
28
34
|
end
|
|
29
|
-
|
|
35
|
+
|
|
30
36
|
# @return [Boolean] true if the record is deregistered
|
|
31
37
|
def deregistered?
|
|
32
38
|
!@deregistered.nil?
|
|
33
39
|
end
|
|
34
|
-
|
|
40
|
+
|
|
35
41
|
# Adds a service to this record
|
|
36
42
|
#
|
|
37
43
|
# @param name [String] identifier for the service being added
|
|
38
|
-
# @param
|
|
39
|
-
|
|
44
|
+
# @param service [ServiceRecord] properties for populating the new service
|
|
45
|
+
# @return [ServiceRecord] the service added
|
|
46
|
+
def add_service(name, service = ServiceRecord.new)
|
|
40
47
|
raise ArgumentError.new("Value must be a ServiceRecord object when adding a service") unless service.class == Longleaf::ServiceRecord
|
|
41
48
|
raise IndexError.new("Service with name '#{name}' already exists") if @services.key?(name)
|
|
42
|
-
|
|
49
|
+
|
|
43
50
|
@services[name] = service
|
|
44
51
|
end
|
|
45
|
-
|
|
52
|
+
|
|
53
|
+
# Updates details of service record as if the service had been executed.
|
|
54
|
+
# @param service_name [String] name of the service run
|
|
55
|
+
# @return [ServiceRecord] the service record updated
|
|
56
|
+
def update_service_as_performed(service_name)
|
|
57
|
+
service_rec = service(service_name) || add_service(service_name)
|
|
58
|
+
service_rec.run_needed = false
|
|
59
|
+
service_rec.timestamp = ServiceDateHelper.formatted_timestamp
|
|
60
|
+
service_rec
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Updates details of service record as if the service had encountered a
|
|
64
|
+
# failure during execution.
|
|
65
|
+
# @param service_name [String] name of the service run
|
|
66
|
+
# @return [ServiceRecord] the service record updated
|
|
67
|
+
def update_service_as_failed(service_name)
|
|
68
|
+
service_rec = service(service_name) || add_service(service_name)
|
|
69
|
+
service_rec.failure_timestamp = ServiceDateHelper.formatted_timestamp
|
|
70
|
+
service_rec
|
|
71
|
+
end
|
|
72
|
+
|
|
46
73
|
# @param name [String] name identifier of the service to retrieve
|
|
47
74
|
# @return [ServiceRecord] the ServiceRecord for the service identified by name, or nil
|
|
48
75
|
def service(name)
|
|
49
76
|
@services[name]
|
|
50
77
|
end
|
|
51
|
-
|
|
78
|
+
|
|
52
79
|
# @return [Array<String>] a list of name identifiers for services registered to this record
|
|
53
80
|
def list_services
|
|
54
81
|
@services.keys
|
|
55
82
|
end
|
|
56
83
|
end
|
|
57
|
-
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
require 'longleaf/models/storage_location'
|
|
2
|
+
require 'longleaf/models/storage_types'
|
|
3
|
+
require 'longleaf/helpers/s3_uri_helper'
|
|
4
|
+
require 'longleaf/logging'
|
|
5
|
+
require 'uri'
|
|
6
|
+
require 'aws-sdk-s3'
|
|
7
|
+
|
|
8
|
+
module Longleaf
|
|
9
|
+
# A storage location in a s3 bucket
|
|
10
|
+
#
|
|
11
|
+
# Optionally, the location configuration may include an "options" sub-hash in order to provide
|
|
12
|
+
# any of the s3 client options specified in Client initializer:
|
|
13
|
+
# https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#constructor_details
|
|
14
|
+
|
|
15
|
+
class S3StorageLocation < StorageLocation
|
|
16
|
+
include Longleaf::Logging
|
|
17
|
+
|
|
18
|
+
IS_URI_REGEX = /\A#{URI::regexp}\z/
|
|
19
|
+
|
|
20
|
+
CLIENT_OPTIONS_FIELD = 'options'
|
|
21
|
+
|
|
22
|
+
# @param name [String] the name of this storage location
|
|
23
|
+
# @param config [Hash] hash containing the configuration options for this location
|
|
24
|
+
# @param md_loc [MetadataLocation] metadata location associated with this storage location
|
|
25
|
+
def initialize(name, config, md_loc)
|
|
26
|
+
super(name, config, md_loc)
|
|
27
|
+
|
|
28
|
+
@bucket_name = S3UriHelper.extract_bucket(@path)
|
|
29
|
+
if @bucket_name.nil?
|
|
30
|
+
raise ArgumentError.new("Unable to identify bucket for location #{@name} from path #{@path}")
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Force path to always end with a slash
|
|
34
|
+
@path += '/' unless @path.end_with?('/')
|
|
35
|
+
|
|
36
|
+
custom_options = config[CLIENT_OPTIONS_FIELD]
|
|
37
|
+
if custom_options.nil?
|
|
38
|
+
@client_options = Hash.new
|
|
39
|
+
else
|
|
40
|
+
# Clone options and convert keys to symbols
|
|
41
|
+
@client_options = Hash[custom_options.map { |(k,v)| [k.to_sym,v] } ]
|
|
42
|
+
end
|
|
43
|
+
@client_options[:logger] = logger
|
|
44
|
+
@client_options[:log_level] = :debug if @client_options[:log_level].nil?
|
|
45
|
+
|
|
46
|
+
# If no region directly configured, use region from path
|
|
47
|
+
if !@client_options.key?(:region)
|
|
48
|
+
region = S3UriHelper.extract_region(@path)
|
|
49
|
+
@client_options[:region] = region unless region.nil?
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
@subpath_prefix = S3UriHelper.extract_path(@path)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# @return the storage type for this location
|
|
56
|
+
def type
|
|
57
|
+
StorageTypes::S3_STORAGE_TYPE
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Get that absolute path to the file associated with the provided metadata path
|
|
61
|
+
# @param md_path [String] metadata file path
|
|
62
|
+
# @raise [ArgumentError] if the md_path is not in this storage location
|
|
63
|
+
# @return [String] the path for the file associated with this metadata
|
|
64
|
+
def get_path_from_metadata_path(md_path)
|
|
65
|
+
raise ArgumentError.new("A file_path parameter is required") if md_path.nil? || md_path.empty?
|
|
66
|
+
|
|
67
|
+
rel_path = @metadata_location.relative_file_path_for(md_path)
|
|
68
|
+
|
|
69
|
+
URI.join(@path, rel_path).to_s
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Checks that the path and metadata path defined in this location are available
|
|
73
|
+
# @raise [StorageLocationUnavailableError] if the storage location is not available
|
|
74
|
+
def available?
|
|
75
|
+
begin
|
|
76
|
+
s3_client().head_bucket({ bucket: @bucket_name, use_accelerate_endpoint: false })
|
|
77
|
+
rescue StandardError => e
|
|
78
|
+
raise StorageLocationUnavailableError.new("Destination bucket #{@bucket_name} does not exist " \
|
|
79
|
+
+ "or is not accessible: #{e.message}")
|
|
80
|
+
end
|
|
81
|
+
@metadata_location.available?
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Get the file path relative to this location
|
|
85
|
+
# @param file_path [String] file path
|
|
86
|
+
# @return the file path relative to this location
|
|
87
|
+
# @raise [ArgumentError] if the file path is not contained by this location
|
|
88
|
+
def relativize(file_path)
|
|
89
|
+
raise ArgumentError.new("Must provide a non-nil path to relativize") if file_path.nil?
|
|
90
|
+
|
|
91
|
+
if file_path.start_with?(@path)
|
|
92
|
+
file_path[@path.length..-1]
|
|
93
|
+
else
|
|
94
|
+
if file_path =~ IS_URI_REGEX
|
|
95
|
+
raise ArgumentError.new("Path #{file_path} is not contained by #{@name}")
|
|
96
|
+
else
|
|
97
|
+
# path already relative
|
|
98
|
+
file_path
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Prefixes the provided path with the query path portion of the location's path
|
|
104
|
+
# after the bucket uri, used to place relative paths into the same sub-URL of a bucket.
|
|
105
|
+
# For example:
|
|
106
|
+
# Given a location with 'path' http://example.s3-amazonaws.com/env/test/
|
|
107
|
+
# Where rel_path = 'path/to/text.txt'
|
|
108
|
+
# The result would be 'env/test/path/to/text.txt'
|
|
109
|
+
# @param rel_path relative path to work with
|
|
110
|
+
# @return the given relative path prefixed with the path portion of the storage location path
|
|
111
|
+
def relative_to_bucket_path(rel_path)
|
|
112
|
+
raise ArgumentError.new("Must provide a non-nil path") if rel_path.nil?
|
|
113
|
+
|
|
114
|
+
if @subpath_prefix.nil?
|
|
115
|
+
return rel_path
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
@subpath_prefix + rel_path
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# @return the bucket used by this storage location
|
|
122
|
+
def s3_bucket
|
|
123
|
+
if @bucket.nil?
|
|
124
|
+
@s3 = Aws::S3::Resource.new(client: s3_client())
|
|
125
|
+
@bucket = @s3.bucket(@bucket_name)
|
|
126
|
+
end
|
|
127
|
+
@bucket
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# @return the s3 client used by this storage locatio
|
|
131
|
+
def s3_client
|
|
132
|
+
if @client.nil?
|
|
133
|
+
@client = Aws::S3::Client.new(**@client_options)
|
|
134
|
+
end
|
|
135
|
+
@client
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|