longleaf 0.2.0.pre.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +84 -0
- data/.gitignore +4 -2
- data/.rubocop.yml +42 -2
- data/.rubocop_todo.yml +390 -311
- data/.yardopts +1 -0
- data/Gemfile +16 -1
- data/README.md +67 -13
- data/Rakefile +6 -0
- data/bin/setup +16 -1
- data/docs/aboutlongleaf.md +28 -0
- data/docs/extra.css +32 -0
- data/docs/img/change-file.png +0 -0
- data/docs/img/ll-example-preserved.png +0 -0
- data/docs/index.md +19 -0
- data/docs/install.md +66 -0
- data/docs/ll-example/config-example-relative.yml +33 -0
- data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
- data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
- data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
- data/docs/ll-example/metadata-dir/.gitkeep +0 -0
- data/docs/ll-example/replica-files/.gitkeep +0 -0
- data/docs/ll-example/replica-metadata/.gitkeep +0 -0
- data/docs/quickstart.md +270 -0
- data/docs/rdocs/Longleaf.html +135 -0
- data/docs/rdocs/Longleaf/AppFields.html +178 -0
- data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
- data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
- data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
- data/docs/rdocs/Longleaf/CLI.html +909 -0
- data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
- data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
- data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
- data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
- data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
- data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
- data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
- data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
- data/docs/rdocs/Longleaf/EventError.html +147 -0
- data/docs/rdocs/Longleaf/EventNames.html +163 -0
- data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
- data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
- data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
- data/docs/rdocs/Longleaf/FileRecord.html +716 -0
- data/docs/rdocs/Longleaf/FileSelector.html +901 -0
- data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
- data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
- data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
- data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
- data/docs/rdocs/Longleaf/Logging.html +405 -0
- data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
- data/docs/rdocs/Longleaf/LongleafError.html +139 -0
- data/docs/rdocs/Longleaf/MDFields.html +193 -0
- data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
- data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
- data/docs/rdocs/Longleaf/MetadataError.html +143 -0
- data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
- data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
- data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
- data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
- data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
- data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
- data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
- data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
- data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
- data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
- data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
- data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
- data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
- data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
- data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
- data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
- data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
- data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
- data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
- data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
- data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
- data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
- data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
- data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
- data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
- data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
- data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
- data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
- data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
- data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
- data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
- data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
- data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
- data/docs/rdocs/_index.html +660 -0
- data/docs/rdocs/class_list.html +51 -0
- data/docs/rdocs/css/common.css +1 -0
- data/docs/rdocs/css/full_list.css +58 -0
- data/docs/rdocs/css/style.css +496 -0
- data/docs/rdocs/file.README.html +165 -0
- data/docs/rdocs/file_list.html +56 -0
- data/docs/rdocs/frames.html +17 -0
- data/docs/rdocs/index.html +165 -0
- data/docs/rdocs/js/app.js +303 -0
- data/docs/rdocs/js/full_list.js +216 -0
- data/docs/rdocs/js/jquery.js +4 -0
- data/docs/rdocs/method_list.html +2051 -0
- data/docs/rdocs/top-level-namespace.html +110 -0
- data/lib/longleaf/candidates/file_selector.rb +47 -15
- data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
- data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +29 -35
- data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
- data/lib/longleaf/candidates/service_candidate_locator.rb +9 -4
- data/lib/longleaf/cli.rb +162 -80
- data/lib/longleaf/commands/deregister_command.rb +12 -11
- data/lib/longleaf/commands/preserve_command.rb +13 -8
- data/lib/longleaf/commands/register_command.rb +9 -6
- data/lib/longleaf/commands/reindex_command.rb +92 -0
- data/lib/longleaf/commands/validate_config_command.rb +27 -6
- data/lib/longleaf/commands/validate_metadata_command.rb +11 -9
- data/lib/longleaf/errors.rb +12 -12
- data/lib/longleaf/events/deregister_event.rb +13 -15
- data/lib/longleaf/events/event_status_tracking.rb +7 -7
- data/lib/longleaf/events/preserve_event.rb +24 -14
- data/lib/longleaf/events/register_event.rb +21 -35
- data/lib/longleaf/helpers/digest_helper.rb +4 -4
- data/lib/longleaf/helpers/service_date_helper.rb +5 -6
- data/lib/longleaf/indexing/index_manager.rb +101 -0
- data/lib/longleaf/indexing/sequel_index_driver.rb +324 -0
- data/lib/longleaf/logging.rb +4 -4
- data/lib/longleaf/logging/redirecting_logger.rb +20 -20
- data/lib/longleaf/models/app_fields.rb +2 -1
- data/lib/longleaf/models/file_record.rb +10 -6
- data/lib/longleaf/models/md_fields.rb +1 -1
- data/lib/longleaf/models/metadata_record.rb +22 -12
- data/lib/longleaf/models/service_definition.rb +3 -3
- data/lib/longleaf/models/service_fields.rb +1 -1
- data/lib/longleaf/models/service_record.rb +6 -5
- data/lib/longleaf/models/storage_location.rb +26 -7
- data/lib/longleaf/models/system_config_fields.rb +9 -0
- data/lib/longleaf/preservation_services/file_check_service.rb +58 -0
- data/lib/longleaf/preservation_services/fixity_check_service.rb +16 -14
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +32 -31
- data/lib/longleaf/services/application_config_deserializer.rb +55 -18
- data/lib/longleaf/services/application_config_manager.rb +16 -4
- data/lib/longleaf/services/application_config_validator.rb +1 -2
- data/lib/longleaf/services/configuration_validator.rb +6 -4
- data/lib/longleaf/services/metadata_deserializer.rb +40 -38
- data/lib/longleaf/services/metadata_persistence_manager.rb +46 -0
- data/lib/longleaf/services/metadata_serializer.rb +23 -22
- data/lib/longleaf/services/service_class_cache.rb +15 -15
- data/lib/longleaf/services/service_definition_manager.rb +5 -6
- data/lib/longleaf/services/service_definition_validator.rb +5 -6
- data/lib/longleaf/services/service_manager.rb +37 -17
- data/lib/longleaf/services/service_mapping_manager.rb +9 -9
- data/lib/longleaf/services/service_mapping_validator.rb +9 -10
- data/lib/longleaf/services/storage_location_manager.rb +22 -8
- data/lib/longleaf/services/storage_location_validator.rb +11 -8
- data/lib/longleaf/services/storage_path_validator.rb +1 -1
- data/lib/longleaf/specs/config_builder.rb +30 -17
- data/lib/longleaf/specs/custom_matchers.rb +1 -1
- data/lib/longleaf/specs/file_helpers.rb +15 -14
- data/lib/longleaf/specs/metadata_builder.rb +91 -0
- data/lib/longleaf/specs/system_config_builder.rb +27 -0
- data/lib/longleaf/version.rb +1 -1
- data/longleaf.gemspec +17 -7
- data/mkdocs.yml +20 -0
- metadata +233 -22
|
@@ -10,7 +10,7 @@ module Longleaf
|
|
|
10
10
|
# Event to register a file with longleaf
|
|
11
11
|
class RegisterEvent
|
|
12
12
|
include Longleaf::EventStatusTracking
|
|
13
|
-
|
|
13
|
+
|
|
14
14
|
# @param file_rec [FileRecord] file record
|
|
15
15
|
# @param app_manager [ApplicationConfigManager] the application configuration
|
|
16
16
|
# @param force [boolean] if true, then already registered files will be re-registered
|
|
@@ -21,75 +21,61 @@ module Longleaf
|
|
|
21
21
|
raise ArgumentError.new('Must provide an ApplicationConfigManager') if app_manager.nil?
|
|
22
22
|
raise ArgumentError.new('Parameter app_manager must be an ApplicationConfigManager') \
|
|
23
23
|
unless app_manager.is_a?(ApplicationConfigManager)
|
|
24
|
-
|
|
24
|
+
|
|
25
25
|
@app_manager = app_manager
|
|
26
26
|
@file_rec = file_rec
|
|
27
27
|
@force = force
|
|
28
28
|
@checksums = checksums
|
|
29
29
|
end
|
|
30
|
-
|
|
30
|
+
|
|
31
31
|
# Perform a registration event on the given file
|
|
32
|
-
# @raise RegistrationError if a file cannot be registered
|
|
32
|
+
# @raise RegistrationError if a file cannot be registered
|
|
33
33
|
def perform
|
|
34
34
|
begin
|
|
35
35
|
# Only need to re-register file if the force flag is provided
|
|
36
36
|
if @file_rec.metadata_present? && !@force
|
|
37
37
|
raise RegistrationError.new("Unable to register '#{@file_rec.path}', it is already registered.")
|
|
38
38
|
end
|
|
39
|
-
|
|
39
|
+
|
|
40
40
|
# create metadata record
|
|
41
|
-
md_rec = MetadataRecord.new(registered: Time.now.utc.iso8601)
|
|
41
|
+
md_rec = MetadataRecord.new(registered: Time.now.utc.iso8601(3))
|
|
42
42
|
@file_rec.metadata_record = md_rec
|
|
43
|
-
|
|
43
|
+
|
|
44
44
|
# retain significant details from former record
|
|
45
45
|
if @file_rec.metadata_present?
|
|
46
46
|
retain_existing_properties
|
|
47
47
|
end
|
|
48
|
-
|
|
48
|
+
|
|
49
49
|
populate_file_properties
|
|
50
|
-
|
|
50
|
+
|
|
51
51
|
md_rec.checksums.merge!(@checksums) unless @checksums.nil?
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
MetadataSerializer::write(metadata: md_rec,
|
|
57
|
-
file_path: @file_rec.metadata_path,
|
|
58
|
-
digest_algs: @file_rec.storage_location.metadata_digests)
|
|
59
|
-
|
|
52
|
+
|
|
53
|
+
# persist the metadata
|
|
54
|
+
@app_manager.md_manager.persist(@file_rec)
|
|
55
|
+
|
|
60
56
|
record_success(EventNames::REGISTER, @file_rec.path)
|
|
61
57
|
rescue RegistrationError => err
|
|
62
58
|
record_failure(EventNames::REGISTER, @file_rec.path, err.message)
|
|
63
59
|
rescue InvalidStoragePathError => err
|
|
64
60
|
record_failure(EventNames::REGISTER, @file_rec.path, err.message)
|
|
65
61
|
end
|
|
66
|
-
|
|
62
|
+
|
|
67
63
|
return_status
|
|
68
64
|
end
|
|
69
|
-
|
|
65
|
+
|
|
70
66
|
private
|
|
71
67
|
def populate_file_properties
|
|
72
68
|
md_rec = @file_rec.metadata_record
|
|
73
|
-
|
|
69
|
+
|
|
74
70
|
# Set file properties
|
|
75
|
-
md_rec.last_modified = File.mtime(@file_rec.path).utc.iso8601
|
|
71
|
+
md_rec.last_modified = File.mtime(@file_rec.path).utc.iso8601(3)
|
|
76
72
|
md_rec.file_size = File.size(@file_rec.path)
|
|
77
73
|
end
|
|
78
|
-
|
|
79
|
-
def populate_services
|
|
80
|
-
md_rec = @file_rec.metadata_record
|
|
81
|
-
|
|
82
|
-
service_manager = @app_manager.service_manager
|
|
83
|
-
service_names = service_manager.list_services(location: @file_rec.storage_location.name)
|
|
84
|
-
|
|
85
|
-
# Add service section
|
|
86
|
-
service_names.each { |serv_name| md_rec.add_service(serv_name) }
|
|
87
|
-
end
|
|
88
|
-
|
|
74
|
+
|
|
89
75
|
# Copy a subset of properties from an existing metadata record to the new record
|
|
90
76
|
def retain_existing_properties
|
|
91
77
|
md_rec = @file_rec.metadata_record
|
|
92
|
-
|
|
78
|
+
|
|
93
79
|
old_md = MetadataDeserializer.deserialize(file_path: @file_rec.metadata_path,
|
|
94
80
|
digest_algs: @file_rec.storage_location.metadata_digests)
|
|
95
81
|
# Copy custom properties
|
|
@@ -97,7 +83,7 @@ module Longleaf
|
|
|
97
83
|
# Copy stale-replicas flag per service
|
|
98
84
|
old_md.list_services.each do |serv_name|
|
|
99
85
|
serv_rec = old_md.service(serv_name)
|
|
100
|
-
|
|
86
|
+
|
|
101
87
|
stale_replicas = serv_rec.stale_replicas
|
|
102
88
|
if stale_replicas
|
|
103
89
|
new_service = md_rec.service(serv_name)
|
|
@@ -106,4 +92,4 @@ module Longleaf
|
|
|
106
92
|
end
|
|
107
93
|
end
|
|
108
94
|
end
|
|
109
|
-
end
|
|
95
|
+
end
|
|
@@ -5,7 +5,7 @@ module Longleaf
|
|
|
5
5
|
# Helper methods for generating digests
|
|
6
6
|
class DigestHelper
|
|
7
7
|
KNOWN_DIGESTS ||= ['md5', 'sha1', 'sha2', 'sha256', 'sha384', 'sha512', 'rmd160']
|
|
8
|
-
|
|
8
|
+
|
|
9
9
|
# @param algs Either a string containing one or an array containing zero or more digest
|
|
10
10
|
# algorithm names.
|
|
11
11
|
# @raise [InvalidDigestAlgorithmError] thrown if any of the digest algorithms listed are not
|
|
@@ -19,11 +19,11 @@ module Longleaf
|
|
|
19
19
|
else
|
|
20
20
|
unknown = algs.select { |alg| !KNOWN_DIGESTS.include?(alg) }
|
|
21
21
|
unless unknown.empty?
|
|
22
|
-
raise InvalidDigestAlgorithmError.new("Unknown digest algorithm(s): #{unknown
|
|
22
|
+
raise InvalidDigestAlgorithmError.new("Unknown digest algorithm(s): #{unknown}")
|
|
23
23
|
end
|
|
24
24
|
end
|
|
25
25
|
end
|
|
26
|
-
|
|
26
|
+
|
|
27
27
|
# Get a Digest class for the specified algorithm
|
|
28
28
|
# @param alg [String] name of the digest algorithm
|
|
29
29
|
# @return [Digest] A digest class for the requested algorithm
|
|
@@ -47,4 +47,4 @@ module Longleaf
|
|
|
47
47
|
end
|
|
48
48
|
end
|
|
49
49
|
end
|
|
50
|
-
end
|
|
50
|
+
end
|
|
@@ -3,7 +3,6 @@ require 'time'
|
|
|
3
3
|
module Longleaf
|
|
4
4
|
# Helper methods for interacting with dates/timestamps on services
|
|
5
5
|
class ServiceDateHelper
|
|
6
|
-
|
|
7
6
|
# Adds the amount of time from modifier to the provided timestamp
|
|
8
7
|
# @param timestamp [String] ISO-8601 timestamp string
|
|
9
8
|
# @param modifier [String] amount of time to add to the timestamp. It must follow the syntax
|
|
@@ -18,7 +17,7 @@ module Longleaf
|
|
|
18
17
|
else
|
|
19
18
|
raise ArgumentError.new("Cannot parse time modifier #{modifier}")
|
|
20
19
|
end
|
|
21
|
-
|
|
20
|
+
|
|
22
21
|
datetime = Time.iso8601(timestamp)
|
|
23
22
|
case unit
|
|
24
23
|
when 'second'
|
|
@@ -36,16 +35,16 @@ module Longleaf
|
|
|
36
35
|
when 'year'
|
|
37
36
|
unit_modifier = 365 * 24 * 3600
|
|
38
37
|
end
|
|
39
|
-
|
|
38
|
+
|
|
40
39
|
modified_time = datetime + (value * unit_modifier)
|
|
41
40
|
modified_time.iso8601
|
|
42
41
|
end
|
|
43
|
-
|
|
42
|
+
|
|
44
43
|
# Get a timestamp in the format expected for service timestamps.
|
|
45
44
|
# @param timestamp [Time] the time to format. Defaults to now.
|
|
46
45
|
# @return [String] the time formatted as iso8601
|
|
47
46
|
def self.formatted_timestamp(timestamp = Time.now)
|
|
48
|
-
timestamp.iso8601.to_s
|
|
47
|
+
timestamp.utc.iso8601(3).to_s
|
|
49
48
|
end
|
|
50
49
|
end
|
|
51
|
-
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
require 'longleaf/models/system_config_fields'
|
|
2
|
+
require 'longleaf/services/metadata_persistence_manager'
|
|
3
|
+
require 'longleaf/errors'
|
|
4
|
+
|
|
5
|
+
module Longleaf
|
|
6
|
+
# Manager configures and provides access to a metadata index if one is specified
|
|
7
|
+
class IndexManager
|
|
8
|
+
SYS_FIELDS ||= Longleaf::SystemConfigFields
|
|
9
|
+
|
|
10
|
+
# @param config [Hash] The system configuration as a hash
|
|
11
|
+
# @param app_config_manager [ApplicationConfigManager] the application config
|
|
12
|
+
def initialize(config, app_config_manager)
|
|
13
|
+
@config = config
|
|
14
|
+
@app_config_manager = app_config_manager
|
|
15
|
+
init_index_driver if @config&.key?(SYS_FIELDS::MD_INDEX)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# @return true if the system is configured to use a metadata index
|
|
19
|
+
def using_index?
|
|
20
|
+
!@index_driver.nil?
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Index the provided file_rec and its metadata
|
|
24
|
+
#
|
|
25
|
+
# @param file_rec [FileRecord] file record to index
|
|
26
|
+
def index(file_rec)
|
|
27
|
+
@index_driver.index(file_rec)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Remove an entry from the index
|
|
31
|
+
# @param remove_me The record to remove from the index
|
|
32
|
+
def remove(remove_me)
|
|
33
|
+
@index_driver.remove(remove_me)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def clear_index(older_than = nil)
|
|
37
|
+
@index_driver.clear_index(older_than)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# @return true if the index should be reindexed
|
|
41
|
+
def index_stale?
|
|
42
|
+
@index_driver.is_stale?
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Setup initial structure of index implementation
|
|
46
|
+
def setup_index
|
|
47
|
+
@index_driver.setup_index
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def update_index_state
|
|
51
|
+
@index_driver.update_index_state
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Retrieves a set of which have one or more services which need to run.
|
|
55
|
+
#
|
|
56
|
+
# @param file_selector [FileSelector] selector for paths to search for files
|
|
57
|
+
# @param stale_datetime [DateTime] find file_paths with services needing to be run before this value
|
|
58
|
+
# @return [Array] array of file paths that need one or more services run, in ascending order by
|
|
59
|
+
# timestamp.
|
|
60
|
+
def paths_with_stale_services(file_selector, stale_datetime)
|
|
61
|
+
@index_driver.paths_with_stale_services(file_selector, stale_datetime)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Retrieves a page of paths for registered files.
|
|
65
|
+
# @param file_selector [FileSelector] selector for what paths to search for files
|
|
66
|
+
# @return [Array] array of file paths that are registered
|
|
67
|
+
def registered_paths(file_selector)
|
|
68
|
+
@index_driver.registered_paths(file_selector)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def each_registered_path(file_selector, older_than: nil, &block)
|
|
72
|
+
@index_driver.each_registered_path(file_selector, older_than: older_than, &block)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
private
|
|
76
|
+
def init_index_driver
|
|
77
|
+
index_conf = @config[SYS_FIELDS::MD_INDEX]
|
|
78
|
+
adapter = index_conf[SYS_FIELDS::MD_INDEX_ADAPTER]&.downcase
|
|
79
|
+
|
|
80
|
+
raise ConfigurationError.new('Must specify an adapter for the metadata index') if adapter.nil?
|
|
81
|
+
|
|
82
|
+
adapter = adapter.to_sym
|
|
83
|
+
|
|
84
|
+
case adapter
|
|
85
|
+
when :postgres, :mysql, :mysql2, :sqlite, :amalgalite
|
|
86
|
+
page_size = index_conf[SYS_FIELDS::MD_INDEX_PAGE_SIZE]&.to_int
|
|
87
|
+
|
|
88
|
+
connection = index_conf[SYS_FIELDS::MD_INDEX_CONNECTION]
|
|
89
|
+
raise ConfigurationError.new("Must specify connection details for index adapter of type '#{adapter}'") if connection.nil?
|
|
90
|
+
|
|
91
|
+
require 'longleaf/indexing/sequel_index_driver'
|
|
92
|
+
@index_driver = SequelIndexDriver.new(@app_config_manager,
|
|
93
|
+
adapter,
|
|
94
|
+
connection,
|
|
95
|
+
page_size: page_size)
|
|
96
|
+
else
|
|
97
|
+
raise ConfigurationError.new("Unknown index adapter '#{adapter}' specified.") if adapter.nil?
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
require 'sequel'
|
|
2
|
+
require 'digest/md5'
|
|
3
|
+
require 'longleaf/events/event_names'
|
|
4
|
+
require 'longleaf/candidates/file_selector'
|
|
5
|
+
require 'longleaf/version'
|
|
6
|
+
require 'longleaf/models/system_config_fields'
|
|
7
|
+
require 'longleaf/logging'
|
|
8
|
+
|
|
9
|
+
module Longleaf
|
|
10
|
+
# Driver for interacting with RDBM based metadata index using the Sequel ORM gem.
|
|
11
|
+
# Users must create the database and credentials for connecting to it in advance,
|
|
12
|
+
# if using a database application that requires creation of databases (ie, not sqlite).
|
|
13
|
+
# The default database name is 'longleaf_metadata_index' but may be overridden.
|
|
14
|
+
#
|
|
15
|
+
# See the Sequel documentation for details about accepted connection parameters:
|
|
16
|
+
# https://github.com/jeremyevans/sequel/blob/master/doc/opening_databases.rdoc
|
|
17
|
+
class SequelIndexDriver
|
|
18
|
+
include Longleaf::Logging
|
|
19
|
+
INDEX_DB_NAME ||= 'longleaf_metadata_index'
|
|
20
|
+
PRESERVE_TBL ||= "preserve_service_times".to_sym
|
|
21
|
+
INDEX_STATE_TBL ||= "index_state".to_sym
|
|
22
|
+
DEFAULT_PAGE_SIZE ||= 1000
|
|
23
|
+
TIMESTAMP_FORMAT ||= '%Y-%m-%d %H:%M:%S.%3N'
|
|
24
|
+
|
|
25
|
+
# Initialize the index driver
|
|
26
|
+
#
|
|
27
|
+
# @param app_config [ApplicationConfigManager] the application configuration manager
|
|
28
|
+
# @param adapter [String] name of the database adapter to use.
|
|
29
|
+
# @param conn_details Details about the configuration and connection to the database used for the index.
|
|
30
|
+
# If a string is provided, it will be used as the connection URL and must identify the adapter.
|
|
31
|
+
# If a hash is provided, it used as the parameters for the database connection.
|
|
32
|
+
# @param page_size [Integer] number of results to retrieve per query when getting candidates
|
|
33
|
+
def initialize(app_config, adapter, conn_details, page_size: nil)
|
|
34
|
+
Sequel.default_timezone = :utc
|
|
35
|
+
@app_config = app_config
|
|
36
|
+
@adapter = adapter
|
|
37
|
+
@conn_details = conn_details
|
|
38
|
+
# Digest of the app config file so we can tell if it changes
|
|
39
|
+
@config_md5 = app_config.config_md5
|
|
40
|
+
@page_size = page_size.nil? || page_size <= 0 ? DEFAULT_PAGE_SIZE : page_size
|
|
41
|
+
|
|
42
|
+
if @conn_details.is_a?(Hash)
|
|
43
|
+
# Add in the adapter name
|
|
44
|
+
@conn_details['adapter'] = adapter unless @conn_details.key?('adapter')
|
|
45
|
+
# Add in default database name if none was specified
|
|
46
|
+
@conn_details['database'] = INDEX_DB_NAME unless @conn_details.key?('database')
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Returns true if the application configuration does not match the configuration used for
|
|
51
|
+
# the last reindex.
|
|
52
|
+
def is_stale?
|
|
53
|
+
db_conn[INDEX_STATE_TBL].where(config_md5: @config_md5).count == 0
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Index the provided file_rec and its metadata
|
|
57
|
+
#
|
|
58
|
+
# @param file_rec [FileRecord] file record to index
|
|
59
|
+
def index(file_rec)
|
|
60
|
+
file_path = file_rec.path
|
|
61
|
+
md_rec = file_rec.metadata_record
|
|
62
|
+
storage_loc = file_rec.storage_location
|
|
63
|
+
service_manager = @app_config.service_manager
|
|
64
|
+
|
|
65
|
+
# Produce a list of service definitions which should apply to the file
|
|
66
|
+
expected_services = service_manager.list_service_definitions(
|
|
67
|
+
location: storage_loc.name)
|
|
68
|
+
|
|
69
|
+
first_timestamp = first_service_execution_timestamp(expected_services, md_rec)
|
|
70
|
+
delay_until_timestamp = delay_until_timestamp(md_rec)
|
|
71
|
+
|
|
72
|
+
first_timestamp = convert_iso8601_to_timestamp(first_timestamp)
|
|
73
|
+
delay_until_timestamp = convert_iso8601_to_timestamp(delay_until_timestamp)
|
|
74
|
+
now_stamp = Time.now.utc.strftime(TIMESTAMP_FORMAT)
|
|
75
|
+
|
|
76
|
+
if @adapter == :mysql || @adapter == :mysql2
|
|
77
|
+
preserve_tbl.on_duplicate_key_update
|
|
78
|
+
.insert(file_path: file_path,
|
|
79
|
+
storage_location: storage_loc.name,
|
|
80
|
+
service_time: first_timestamp,
|
|
81
|
+
delay_until_time: delay_until_timestamp,
|
|
82
|
+
updated: now_stamp)
|
|
83
|
+
else
|
|
84
|
+
preserve_tbl.insert_conflict(target: :file_path,
|
|
85
|
+
update: {
|
|
86
|
+
storage_location: storage_loc.name,
|
|
87
|
+
service_time: first_timestamp,
|
|
88
|
+
delay_until_time: delay_until_timestamp,
|
|
89
|
+
updated: now_stamp } )
|
|
90
|
+
.insert(file_path: file_path,
|
|
91
|
+
storage_location: storage_loc.name,
|
|
92
|
+
service_time: first_timestamp,
|
|
93
|
+
delay_until_time: delay_until_timestamp,
|
|
94
|
+
updated: now_stamp)
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Find the earliest service execution time for any services expected to be run for the specified file.
|
|
99
|
+
#
|
|
100
|
+
# @param expected_services [Array] list of ServiceDefinition objects expected for specified file.
|
|
101
|
+
# @param md_rec [MetadataRecord] metadata record for the file being evaluated
|
|
102
|
+
# @return The timestamp of the earliest service execution time for the file described by md_rec, in iso8601 format.
|
|
103
|
+
# Returns nil if no services are expected all services have already run and do not have a next occurrence, or
|
|
104
|
+
# the file is deregistered.
|
|
105
|
+
def first_service_execution_timestamp(expected_services, md_rec)
|
|
106
|
+
current_time = Time.now.utc.iso8601(3)
|
|
107
|
+
if md_rec.deregistered?
|
|
108
|
+
return nil
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
service_times = Array.new
|
|
112
|
+
|
|
113
|
+
present_services = md_rec.list_services
|
|
114
|
+
|
|
115
|
+
expected_services.each do |service_def|
|
|
116
|
+
service_name = service_def.name
|
|
117
|
+
# Service has never run, set execution time to now
|
|
118
|
+
if !present_services.include?(service_name)
|
|
119
|
+
service_times << current_time
|
|
120
|
+
next
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
service_rec = md_rec.service(service_name)
|
|
124
|
+
|
|
125
|
+
# Service either needs a run or has no timestamp, so execution time of now
|
|
126
|
+
if service_rec.run_needed || service_rec.timestamp.nil?
|
|
127
|
+
service_times << current_time
|
|
128
|
+
next
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Calculate the next time this service should run based on frequency
|
|
132
|
+
frequency = service_def.frequency
|
|
133
|
+
unless frequency.nil?
|
|
134
|
+
service_timestamp = service_rec.timestamp
|
|
135
|
+
service_times << ServiceDateHelper.add_to_timestamp(service_timestamp, frequency)
|
|
136
|
+
next
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
# Return the lowest service execution time
|
|
140
|
+
service_times.min
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# @return The first failure timestamp for any service, or nil if there were none.
|
|
144
|
+
def delay_until_timestamp(md_rec)
|
|
145
|
+
md_rec.list_services.each do |service_name|
|
|
146
|
+
service_rec = md_rec.service(service_name)
|
|
147
|
+
return service_rec.failure_timestamp unless service_rec.failure_timestamp.nil?
|
|
148
|
+
end
|
|
149
|
+
# return lowest possible date
|
|
150
|
+
return minimum_timestamp
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Remove an entry from the index
|
|
154
|
+
# @param remove_me The record to remove from the index. May be a FileRecord or a String.
|
|
155
|
+
def remove(remove_me)
|
|
156
|
+
if remove_me.is_a?(FileRecord)
|
|
157
|
+
path = remove_me.path
|
|
158
|
+
else
|
|
159
|
+
path = remove_me
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
result = preserve_tbl.where(file_path: path).delete
|
|
163
|
+
if result == 0
|
|
164
|
+
logger.warn("Could not remove #{path} from the index, path was not present.")
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Remove all entries from the index
|
|
169
|
+
# @param older_than [Time] Optional. If provided, only entries that have not been indexed
|
|
170
|
+
# since before the provided time will be deleted.
|
|
171
|
+
def clear_index(older_than = nil)
|
|
172
|
+
if older_than.nil?
|
|
173
|
+
preserve_tbl.delete
|
|
174
|
+
else
|
|
175
|
+
older_than_timestamp = older_than.utc.strftime(TIMESTAMP_FORMAT)
|
|
176
|
+
preserve_tbl.where { updated < older_than_timestamp }.delete
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Initialize the index's database using the provided configuration
|
|
181
|
+
def setup_index
|
|
182
|
+
# Create the table for tracking when files will need preservation services run on them.
|
|
183
|
+
case @adapter
|
|
184
|
+
when :mysql, :mysql2
|
|
185
|
+
# mysql does not support 'text' fields as primary keys
|
|
186
|
+
db_conn.create_table!(PRESERVE_TBL) do
|
|
187
|
+
String :file_path, primary_key: true, size: 768
|
|
188
|
+
column :storage_location, 'varchar(128)'
|
|
189
|
+
column :service_time, 'timestamp(3)', { :null => true }
|
|
190
|
+
column :delay_until_time, 'timestamp(3)'
|
|
191
|
+
column :updated, 'timestamp(3)'
|
|
192
|
+
end
|
|
193
|
+
else
|
|
194
|
+
db_conn.create_table!(PRESERVE_TBL) do
|
|
195
|
+
String :file_path, primary_key: true, text: true
|
|
196
|
+
column :storage_location, 'varchar(128)'
|
|
197
|
+
column :service_time, 'timestamp(3)', { :null => true }
|
|
198
|
+
column :delay_until_time, 'timestamp(3)'
|
|
199
|
+
column :updated, 'timestamp(3)'
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Setup database indexes
|
|
204
|
+
case @adapter
|
|
205
|
+
when :postgres
|
|
206
|
+
db_conn.run("CREATE INDEX service_times_file_path_text_index ON preserve_service_times (file_path text_pattern_ops)")
|
|
207
|
+
when :sqlite, :amalgalite
|
|
208
|
+
db_conn.run("CREATE INDEX service_times_file_path_text_index ON preserve_service_times (file_path collate nocase)")
|
|
209
|
+
end
|
|
210
|
+
db_conn.run("CREATE INDEX service_times_storage_location_index ON preserve_service_times (storage_location)")
|
|
211
|
+
|
|
212
|
+
# Create table for tracking the state of the index
|
|
213
|
+
db_conn.create_table!(INDEX_STATE_TBL) do
|
|
214
|
+
String :config_md5
|
|
215
|
+
DateTime :last_reindexed
|
|
216
|
+
String :longleaf_version
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
# Prepopulate the index state information
|
|
220
|
+
update_index_state
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# Updates the state information for the index to indicate that the index has been refreshed
|
|
224
|
+
# or is in sync with the application's configuration.
|
|
225
|
+
def update_index_state
|
|
226
|
+
index_state_tbl = db_conn[INDEX_STATE_TBL]
|
|
227
|
+
index_state_tbl.delete
|
|
228
|
+
index_state_tbl.insert(
|
|
229
|
+
config_md5: @config_md5,
|
|
230
|
+
last_reindexed: Time.now.utc,
|
|
231
|
+
longleaf_version: Longleaf::VERSION)
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
# Retrieves page of file paths which have one or more services which need to run.
|
|
235
|
+
# @param file_selector [FileSelector] selector for what paths to search for files
|
|
236
|
+
# @param stale_datetime [DateTime] find file_paths with services needing to be run before this value
|
|
237
|
+
# @return [Array] array of file paths that need one or more services run.
|
|
238
|
+
def paths_with_stale_services(file_selector, stale_datetime)
|
|
239
|
+
if @preserve_dataset.nil?
|
|
240
|
+
@preserve_dataset = db_conn
|
|
241
|
+
.from(PRESERVE_TBL)
|
|
242
|
+
.exclude(service_time: nil)
|
|
243
|
+
.limit(@page_size)
|
|
244
|
+
.order(Sequel.asc(:service_time))
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# retrieve and return a page of results
|
|
248
|
+
ds = add_path_restrictions(@preserve_dataset, file_selector)
|
|
249
|
+
.where { service_time <= stale_datetime }
|
|
250
|
+
.where { delay_until_time < stale_datetime }
|
|
251
|
+
.select_map(:file_path)
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
# Retrieves a page of paths for registered files.
|
|
255
|
+
# @param file_selector [FileSelector] selector for what paths to search for files
|
|
256
|
+
# @return [Array] array of file paths that are registered
|
|
257
|
+
def registered_paths(file_selector)
|
|
258
|
+
# retrieve and return a page of results
|
|
259
|
+
add_path_restrictions(registered_dataset, file_selector)
|
|
260
|
+
.select_map(:file_path)
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# Calls the provided block once per each registered file path registered.
|
|
264
|
+
# Must be passed a block.
|
|
265
|
+
# @param file_selector [FileSelector] selector for what paths to search for files
|
|
266
|
+
# @param older_than [Time] Optional. If provided, only files that have not been
|
|
267
|
+
# indexed since before this timestamp will be returned.
|
|
268
|
+
def each_registered_path(file_selector, older_than: nil, &block)
|
|
269
|
+
dataset = add_path_restrictions(registered_dataset, file_selector)
|
|
270
|
+
.select(:file_path)
|
|
271
|
+
if !older_than.nil?
|
|
272
|
+
older_than_timestamp = older_than.utc.strftime(TIMESTAMP_FORMAT)
|
|
273
|
+
dataset = dataset.where { updated < older_than_timestamp }
|
|
274
|
+
end
|
|
275
|
+
# Yield to the provided block once per row return
|
|
276
|
+
dataset.paged_each(:rows_per_fetch => @page_size) do |row|
|
|
277
|
+
block.call(row[:file_path])
|
|
278
|
+
end
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
private
|
|
282
|
+
def db_conn
|
|
283
|
+
@connection = Sequel.connect(@conn_details) if @connection.nil?
|
|
284
|
+
@connection
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def preserve_tbl
|
|
288
|
+
@preserve_tbl = db_conn[PRESERVE_TBL] if @preserve_tbl.nil?
|
|
289
|
+
@preserve_tbl
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
def add_path_restrictions(dataset, file_selector)
|
|
293
|
+
if file_selector.specificity == FileSelector::SPECIFICITY_STORAGE_LOCATION
|
|
294
|
+
dataset.where(storage_location: file_selector.storage_locations)
|
|
295
|
+
else
|
|
296
|
+
# Reformat all selected paths into LIKE partial string matches
|
|
297
|
+
path_conds = file_selector.target_paths.map { |path| path.end_with?('/') ? path + '%' : path }
|
|
298
|
+
dataset.where(Sequel.like(:file_path, *path_conds))
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
def convert_iso8601_to_timestamp(iso8601)
|
|
303
|
+
return nil if iso8601.nil?
|
|
304
|
+
Time.iso8601(iso8601).strftime(TIMESTAMP_FORMAT)
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
def minimum_timestamp
|
|
308
|
+
if @min_timestamp.nil?
|
|
309
|
+
@min_timestamp = ServiceDateHelper.formatted_timestamp(Time.at(0).utc)
|
|
310
|
+
end
|
|
311
|
+
@min_timestamp
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
def registered_dataset
|
|
315
|
+
if @registered_dataset.nil?
|
|
316
|
+
@registered_dataset = db_conn
|
|
317
|
+
.from(PRESERVE_TBL)
|
|
318
|
+
.limit(@page_size)
|
|
319
|
+
.order(Sequel.asc(:service_time))
|
|
320
|
+
end
|
|
321
|
+
@registered_dataset
|
|
322
|
+
end
|
|
323
|
+
end
|
|
324
|
+
end
|