longleaf 0.1.0.pre.2 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +94 -0
- data/.editorconfig +13 -0
- data/.gitignore +4 -1
- data/.rubocop.yml +44 -0
- data/.rubocop_todo.yml +834 -0
- data/.yardopts +1 -0
- data/Gemfile +16 -1
- data/README.md +98 -12
- data/Rakefile +6 -0
- data/bin/setup +16 -1
- data/docs/aboutlongleaf.md +28 -0
- data/docs/extra.css +32 -0
- data/docs/img/change-file.png +0 -0
- data/docs/img/ll-example-preserved.png +0 -0
- data/docs/index.md +19 -0
- data/docs/install.md +66 -0
- data/docs/ll-example/config-example-relative.yml +33 -0
- data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
- data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
- data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
- data/docs/ll-example/metadata-dir/.gitkeep +0 -0
- data/docs/ll-example/replica-files/.gitkeep +0 -0
- data/docs/ll-example/replica-metadata/.gitkeep +0 -0
- data/docs/quickstart.md +270 -0
- data/docs/rdocs/Longleaf.html +135 -0
- data/docs/rdocs/Longleaf/AppFields.html +178 -0
- data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
- data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
- data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
- data/docs/rdocs/Longleaf/CLI.html +909 -0
- data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
- data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
- data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
- data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
- data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
- data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
- data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
- data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
- data/docs/rdocs/Longleaf/EventError.html +147 -0
- data/docs/rdocs/Longleaf/EventNames.html +163 -0
- data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
- data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
- data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
- data/docs/rdocs/Longleaf/FileRecord.html +716 -0
- data/docs/rdocs/Longleaf/FileSelector.html +901 -0
- data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
- data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
- data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
- data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
- data/docs/rdocs/Longleaf/Logging.html +405 -0
- data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
- data/docs/rdocs/Longleaf/LongleafError.html +139 -0
- data/docs/rdocs/Longleaf/MDFields.html +193 -0
- data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
- data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
- data/docs/rdocs/Longleaf/MetadataError.html +143 -0
- data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
- data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
- data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
- data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
- data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
- data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
- data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
- data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
- data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
- data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
- data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
- data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
- data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
- data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
- data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
- data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
- data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
- data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
- data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
- data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
- data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
- data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
- data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
- data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
- data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
- data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
- data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
- data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
- data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
- data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
- data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
- data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
- data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
- data/docs/rdocs/_index.html +660 -0
- data/docs/rdocs/class_list.html +51 -0
- data/docs/rdocs/css/common.css +1 -0
- data/docs/rdocs/css/full_list.css +58 -0
- data/docs/rdocs/css/style.css +496 -0
- data/docs/rdocs/file.README.html +165 -0
- data/docs/rdocs/file_list.html +56 -0
- data/docs/rdocs/frames.html +17 -0
- data/docs/rdocs/index.html +165 -0
- data/docs/rdocs/js/app.js +303 -0
- data/docs/rdocs/js/full_list.js +216 -0
- data/docs/rdocs/js/jquery.js +4 -0
- data/docs/rdocs/method_list.html +2051 -0
- data/docs/rdocs/top-level-namespace.html +110 -0
- data/lib/longleaf/candidates/file_selector.rb +139 -0
- data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
- data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
- data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
- data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
- data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
- data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
- data/lib/longleaf/cli.rb +237 -46
- data/lib/longleaf/commands/deregister_command.rb +51 -0
- data/lib/longleaf/commands/preserve_command.rb +50 -0
- data/lib/longleaf/commands/register_command.rb +32 -43
- data/lib/longleaf/commands/reindex_command.rb +92 -0
- data/lib/longleaf/commands/validate_config_command.rb +33 -8
- data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
- data/lib/longleaf/errors.rb +26 -7
- data/lib/longleaf/events/deregister_event.rb +53 -0
- data/lib/longleaf/events/event_names.rb +9 -0
- data/lib/longleaf/events/event_status_tracking.rb +59 -0
- data/lib/longleaf/events/preserve_event.rb +81 -0
- data/lib/longleaf/events/register_event.rb +52 -51
- data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
- data/lib/longleaf/helpers/digest_helper.rb +56 -0
- data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
- data/lib/longleaf/helpers/selection_options_parser.rb +189 -0
- data/lib/longleaf/helpers/service_date_helper.rb +78 -0
- data/lib/longleaf/indexing/index_manager.rb +101 -0
- data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
- data/lib/longleaf/logging.rb +5 -4
- data/lib/longleaf/logging/redirecting_logger.rb +26 -25
- data/lib/longleaf/models/app_fields.rb +7 -2
- data/lib/longleaf/models/file_record.rb +17 -8
- data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
- data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
- data/lib/longleaf/models/md_fields.rb +2 -1
- data/lib/longleaf/models/metadata_location.rb +47 -0
- data/lib/longleaf/models/metadata_record.rb +39 -15
- data/lib/longleaf/models/s3_storage_location.rb +133 -0
- data/lib/longleaf/models/service_definition.rb +7 -6
- data/lib/longleaf/models/service_fields.rb +7 -1
- data/lib/longleaf/models/service_record.rb +10 -6
- data/lib/longleaf/models/storage_location.rb +24 -19
- data/lib/longleaf/models/storage_types.rb +9 -0
- data/lib/longleaf/models/system_config_fields.rb +9 -0
- data/lib/longleaf/preservation_services/file_check_service.rb +58 -0
- data/lib/longleaf/preservation_services/fixity_check_service.rb +123 -0
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +182 -0
- data/lib/longleaf/preservation_services/s3_replication_service.rb +143 -0
- data/lib/longleaf/services/application_config_deserializer.rb +81 -24
- data/lib/longleaf/services/application_config_manager.rb +20 -6
- data/lib/longleaf/services/application_config_validator.rb +19 -9
- data/lib/longleaf/services/configuration_validator.rb +67 -4
- data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
- data/lib/longleaf/services/metadata_deserializer.rb +113 -42
- data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
- data/lib/longleaf/services/metadata_serializer.rb +138 -25
- data/lib/longleaf/services/metadata_validator.rb +76 -0
- data/lib/longleaf/services/s3_location_validator.rb +19 -0
- data/lib/longleaf/services/service_class_cache.rb +112 -0
- data/lib/longleaf/services/service_definition_manager.rb +10 -7
- data/lib/longleaf/services/service_definition_validator.rb +25 -18
- data/lib/longleaf/services/service_manager.rb +86 -11
- data/lib/longleaf/services/service_mapping_manager.rb +13 -12
- data/lib/longleaf/services/service_mapping_validator.rb +36 -26
- data/lib/longleaf/services/storage_location_manager.rb +76 -15
- data/lib/longleaf/services/storage_location_validator.rb +49 -35
- data/lib/longleaf/specs/config_builder.rb +47 -23
- data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
- data/lib/longleaf/specs/custom_matchers.rb +9 -0
- data/lib/longleaf/specs/file_helpers.rb +61 -0
- data/lib/longleaf/specs/metadata_builder.rb +92 -0
- data/lib/longleaf/specs/system_config_builder.rb +27 -0
- data/lib/longleaf/version.rb +1 -1
- data/longleaf.gemspec +20 -7
- data/mkdocs.yml +21 -0
- metadata +306 -23
- data/.travis.yml +0 -4
- data/lib/longleaf/commands/abstract_command.rb +0 -37
- data/lib/longleaf/services/storage_path_validator.rb +0 -16
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
require 'time'
|
|
2
|
+
|
|
3
|
+
module Longleaf
|
|
4
|
+
# Helper methods for interacting with dates/timestamps on services
|
|
5
|
+
class ServiceDateHelper
|
|
6
|
+
# Adds the amount of time from modifier to the provided timestamp
|
|
7
|
+
# @param timestamp [String] ISO-8601 timestamp string
|
|
8
|
+
# @param modifier [String] amount of time to add to the timestamp. It must follow the syntax
|
|
9
|
+
# "<quantity> <time unit>", where quantity must be a positive whole number and time unit
|
|
10
|
+
# must be second, minute, hour, day, week, month or year (unit may be plural).
|
|
11
|
+
# Any info after a comma will be ignored.
|
|
12
|
+
# @return [String] the original timestamp in ISO-8601 format with the provided amount of time added.
|
|
13
|
+
def self.add_to_timestamp(timestamp, modifier)
|
|
14
|
+
if modifier =~ /^(\d+) *(second|minute|hour|day|week|month|year)s?(,.*)?/
|
|
15
|
+
value = $1.to_i
|
|
16
|
+
unit = $2
|
|
17
|
+
else
|
|
18
|
+
raise ArgumentError.new("Cannot parse time modifier #{modifier}")
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
datetime = Time.iso8601(timestamp)
|
|
22
|
+
case unit
|
|
23
|
+
when 'second'
|
|
24
|
+
unit_modifier = 1
|
|
25
|
+
when 'minute'
|
|
26
|
+
unit_modifier = 60
|
|
27
|
+
when 'hour'
|
|
28
|
+
unit_modifier = 3600
|
|
29
|
+
when 'day'
|
|
30
|
+
unit_modifier = 24 * 3600
|
|
31
|
+
when 'week'
|
|
32
|
+
unit_modifier = 7 * 24 * 3600
|
|
33
|
+
when 'month'
|
|
34
|
+
unit_modifier = 30 * 24 * 3600
|
|
35
|
+
when 'year'
|
|
36
|
+
unit_modifier = 365 * 24 * 3600
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
modified_time = datetime + (value * unit_modifier)
|
|
40
|
+
modified_time.iso8601(3)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Get a timestamp in the format expected for service timestamps.
|
|
44
|
+
# @param timestamp [Time] the time to format. Defaults to now.
|
|
45
|
+
# @return [String] the time formatted as iso8601
|
|
46
|
+
def self.formatted_timestamp(timestamp = Time.now)
|
|
47
|
+
timestamp.utc.iso8601(3).to_s
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Get the timestamp for the next time the provided service would need to be run
|
|
51
|
+
# for the object described by md_rec
|
|
52
|
+
# @param md_rec [MetadataRecord] metadata record for the file
|
|
53
|
+
# @param service_def [ServiceDefinition] definition for the service
|
|
54
|
+
# @return [String] iso8601 timestamp for the next time the service will need to run, or
|
|
55
|
+
# nil if the service does not need to run again.
|
|
56
|
+
def self.next_run_needed(md_rec, service_def)
|
|
57
|
+
raise ArgumentError.new('Must provide a md_rec parameter') if md_rec.nil?
|
|
58
|
+
raise ArgumentError.new('Must provide a service_def parameter') if service_def.nil?
|
|
59
|
+
|
|
60
|
+
service_name = service_def.name
|
|
61
|
+
service_rec = md_rec.service(service_name)
|
|
62
|
+
|
|
63
|
+
if service_rec.nil? || service_rec.timestamp.nil?
|
|
64
|
+
if service_def.delay.nil?
|
|
65
|
+
return md_rec.registered
|
|
66
|
+
else
|
|
67
|
+
return ServiceDateHelper.add_to_timestamp(md_rec.registered, service_def.delay)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
if service_def.frequency.nil?
|
|
72
|
+
return nil
|
|
73
|
+
else
|
|
74
|
+
return ServiceDateHelper.add_to_timestamp(service_rec.timestamp, service_def.frequency)
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
require 'longleaf/models/system_config_fields'
|
|
2
|
+
require 'longleaf/services/metadata_persistence_manager'
|
|
3
|
+
require 'longleaf/errors'
|
|
4
|
+
|
|
5
|
+
module Longleaf
|
|
6
|
+
# Manager configures and provides access to a metadata index if one is specified
|
|
7
|
+
class IndexManager
|
|
8
|
+
SYS_FIELDS ||= Longleaf::SystemConfigFields
|
|
9
|
+
|
|
10
|
+
# @param config [Hash] The system configuration as a hash
|
|
11
|
+
# @param app_config_manager [ApplicationConfigManager] the application config
|
|
12
|
+
def initialize(config, app_config_manager)
|
|
13
|
+
@config = config
|
|
14
|
+
@app_config_manager = app_config_manager
|
|
15
|
+
init_index_driver if @config&.key?(SYS_FIELDS::MD_INDEX)
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# @return true if the system is configured to use a metadata index
|
|
19
|
+
def using_index?
|
|
20
|
+
!@index_driver.nil?
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Index the provided file_rec and its metadata
|
|
24
|
+
#
|
|
25
|
+
# @param file_rec [FileRecord] file record to index
|
|
26
|
+
def index(file_rec)
|
|
27
|
+
@index_driver.index(file_rec)
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Remove an entry from the index
|
|
31
|
+
# @param remove_me The record to remove from the index
|
|
32
|
+
def remove(remove_me)
|
|
33
|
+
@index_driver.remove(remove_me)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def clear_index(older_than = nil)
|
|
37
|
+
@index_driver.clear_index(older_than)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# @return true if the index should be reindexed
|
|
41
|
+
def index_stale?
|
|
42
|
+
@index_driver.is_stale?
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Setup initial structure of index implementation
|
|
46
|
+
def setup_index
|
|
47
|
+
@index_driver.setup_index
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def update_index_state
|
|
51
|
+
@index_driver.update_index_state
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Retrieves a set of which have one or more services which need to run.
|
|
55
|
+
#
|
|
56
|
+
# @param file_selector [FileSelector] selector for paths to search for files
|
|
57
|
+
# @param stale_datetime [DateTime] find file_paths with services needing to be run before this value
|
|
58
|
+
# @return [Array] array of file paths that need one or more services run, in ascending order by
|
|
59
|
+
# timestamp.
|
|
60
|
+
def paths_with_stale_services(file_selector, stale_datetime)
|
|
61
|
+
@index_driver.paths_with_stale_services(file_selector, stale_datetime)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Retrieves a page of paths for registered files.
|
|
65
|
+
# @param file_selector [FileSelector] selector for what paths to search for files
|
|
66
|
+
# @return [Array] array of file paths that are registered
|
|
67
|
+
def registered_paths(file_selector)
|
|
68
|
+
@index_driver.registered_paths(file_selector)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def each_registered_path(file_selector, older_than: nil, &block)
|
|
72
|
+
@index_driver.each_registered_path(file_selector, older_than: older_than, &block)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
private
|
|
76
|
+
def init_index_driver
|
|
77
|
+
index_conf = @config[SYS_FIELDS::MD_INDEX]
|
|
78
|
+
adapter = index_conf[SYS_FIELDS::MD_INDEX_ADAPTER]&.downcase
|
|
79
|
+
|
|
80
|
+
raise ConfigurationError.new('Must specify an adapter for the metadata index') if adapter.nil?
|
|
81
|
+
|
|
82
|
+
adapter = adapter.to_sym
|
|
83
|
+
|
|
84
|
+
case adapter
|
|
85
|
+
when :postgres, :mysql, :mysql2, :sqlite, :amalgalite
|
|
86
|
+
page_size = index_conf[SYS_FIELDS::MD_INDEX_PAGE_SIZE]&.to_int
|
|
87
|
+
|
|
88
|
+
connection = index_conf[SYS_FIELDS::MD_INDEX_CONNECTION]
|
|
89
|
+
raise ConfigurationError.new("Must specify connection details for index adapter of type '#{adapter}'") if connection.nil?
|
|
90
|
+
|
|
91
|
+
require 'longleaf/indexing/sequel_index_driver'
|
|
92
|
+
@index_driver = SequelIndexDriver.new(@app_config_manager,
|
|
93
|
+
adapter,
|
|
94
|
+
connection,
|
|
95
|
+
page_size: page_size)
|
|
96
|
+
else
|
|
97
|
+
raise ConfigurationError.new("Unknown index adapter '#{adapter}' specified.") if adapter.nil?
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
require 'sequel'
|
|
2
|
+
require 'digest/md5'
|
|
3
|
+
require 'longleaf/events/event_names'
|
|
4
|
+
require 'longleaf/candidates/file_selector'
|
|
5
|
+
require 'longleaf/version'
|
|
6
|
+
require 'longleaf/models/system_config_fields'
|
|
7
|
+
require 'longleaf/logging'
|
|
8
|
+
|
|
9
|
+
module Longleaf
|
|
10
|
+
# Driver for interacting with RDBM based metadata index using the Sequel ORM gem.
|
|
11
|
+
# Users must create the database and credentials for connecting to it in advance,
|
|
12
|
+
# if using a database application that requires creation of databases (ie, not sqlite).
|
|
13
|
+
# The default database name is 'longleaf_metadata_index' but may be overridden.
|
|
14
|
+
#
|
|
15
|
+
# See the Sequel documentation for details about accepted connection parameters:
|
|
16
|
+
# https://github.com/jeremyevans/sequel/blob/master/doc/opening_databases.rdoc
|
|
17
|
+
class SequelIndexDriver
|
|
18
|
+
include Longleaf::Logging
|
|
19
|
+
INDEX_DB_NAME ||= 'longleaf_metadata_index'
|
|
20
|
+
PRESERVE_TBL ||= "preserve_service_times".to_sym
|
|
21
|
+
INDEX_STATE_TBL ||= "index_state".to_sym
|
|
22
|
+
DEFAULT_PAGE_SIZE ||= 1000
|
|
23
|
+
TIMESTAMP_FORMAT ||= '%Y-%m-%d %H:%M:%S.%3N'
|
|
24
|
+
|
|
25
|
+
# Initialize the index driver
|
|
26
|
+
#
|
|
27
|
+
# @param app_config [ApplicationConfigManager] the application configuration manager
|
|
28
|
+
# @param adapter [String] name of the database adapter to use.
|
|
29
|
+
# @param conn_details Details about the configuration and connection to the database used for the index.
|
|
30
|
+
# If a string is provided, it will be used as the connection URL and must identify the adapter.
|
|
31
|
+
# If a hash is provided, it used as the parameters for the database connection.
|
|
32
|
+
# @param page_size [Integer] number of results to retrieve per query when getting candidates
|
|
33
|
+
def initialize(app_config, adapter, conn_details, page_size: nil)
|
|
34
|
+
Sequel.default_timezone = :utc
|
|
35
|
+
@app_config = app_config
|
|
36
|
+
@adapter = adapter
|
|
37
|
+
@conn_details = conn_details
|
|
38
|
+
# Digest of the app config file so we can tell if it changes
|
|
39
|
+
@config_md5 = app_config.config_md5
|
|
40
|
+
@page_size = page_size.nil? || page_size <= 0 ? DEFAULT_PAGE_SIZE : page_size
|
|
41
|
+
|
|
42
|
+
if @conn_details.is_a?(Hash)
|
|
43
|
+
# Add in the adapter name
|
|
44
|
+
@conn_details['adapter'] = adapter unless @conn_details.key?('adapter')
|
|
45
|
+
# Add in default database name if none was specified
|
|
46
|
+
@conn_details['database'] = INDEX_DB_NAME unless @conn_details.key?('database')
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Returns true if the application configuration does not match the configuration used for
|
|
51
|
+
# the last reindex.
|
|
52
|
+
def is_stale?
|
|
53
|
+
db_conn[INDEX_STATE_TBL].where(config_md5: @config_md5).count == 0
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Index the provided file_rec and its metadata
|
|
57
|
+
#
|
|
58
|
+
# @param file_rec [FileRecord] file record to index
|
|
59
|
+
def index(file_rec)
|
|
60
|
+
file_path = file_rec.path
|
|
61
|
+
md_rec = file_rec.metadata_record
|
|
62
|
+
storage_loc = file_rec.storage_location
|
|
63
|
+
service_manager = @app_config.service_manager
|
|
64
|
+
|
|
65
|
+
# Produce a list of service definitions which should apply to the file
|
|
66
|
+
expected_services = service_manager.list_service_definitions(
|
|
67
|
+
location: storage_loc.name)
|
|
68
|
+
|
|
69
|
+
first_timestamp = first_service_execution_timestamp(expected_services, md_rec)
|
|
70
|
+
delay_until_timestamp = delay_until_timestamp(md_rec)
|
|
71
|
+
|
|
72
|
+
first_timestamp = convert_iso8601_to_timestamp(first_timestamp)
|
|
73
|
+
delay_until_timestamp = convert_iso8601_to_timestamp(delay_until_timestamp)
|
|
74
|
+
now_stamp = Time.now.utc.strftime(TIMESTAMP_FORMAT)
|
|
75
|
+
|
|
76
|
+
if @adapter == :mysql || @adapter == :mysql2
|
|
77
|
+
preserve_tbl.on_duplicate_key_update
|
|
78
|
+
.insert(file_path: file_path,
|
|
79
|
+
storage_location: storage_loc.name,
|
|
80
|
+
service_time: first_timestamp,
|
|
81
|
+
delay_until_time: delay_until_timestamp,
|
|
82
|
+
updated: now_stamp)
|
|
83
|
+
else
|
|
84
|
+
preserve_tbl.insert_conflict(target: :file_path,
|
|
85
|
+
update: {
|
|
86
|
+
storage_location: storage_loc.name,
|
|
87
|
+
service_time: first_timestamp,
|
|
88
|
+
delay_until_time: delay_until_timestamp,
|
|
89
|
+
updated: now_stamp } )
|
|
90
|
+
.insert(file_path: file_path,
|
|
91
|
+
storage_location: storage_loc.name,
|
|
92
|
+
service_time: first_timestamp,
|
|
93
|
+
delay_until_time: delay_until_timestamp,
|
|
94
|
+
updated: now_stamp)
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Find the earliest service execution time for any services expected to be run for the specified file.
|
|
99
|
+
#
|
|
100
|
+
# @param expected_services [Array] list of ServiceDefinition objects expected for specified file.
|
|
101
|
+
# @param md_rec [MetadataRecord] metadata record for the file being evaluated
|
|
102
|
+
# @return The timestamp of the earliest service execution time for the file described by md_rec, in iso8601 format.
|
|
103
|
+
# Returns nil if no services are expected all services have already run and do not have a next occurrence, or
|
|
104
|
+
# the file is deregistered.
|
|
105
|
+
def first_service_execution_timestamp(expected_services, md_rec)
|
|
106
|
+
current_time = Time.now.utc.iso8601(3)
|
|
107
|
+
if md_rec.deregistered?
|
|
108
|
+
return nil
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
service_times = Array.new
|
|
112
|
+
|
|
113
|
+
present_services = md_rec.list_services
|
|
114
|
+
|
|
115
|
+
expected_services.each do |service_def|
|
|
116
|
+
service_name = service_def.name
|
|
117
|
+
|
|
118
|
+
next_run = ServiceDateHelper.next_run_needed(md_rec, service_def)
|
|
119
|
+
service_times << next_run unless next_run.nil?
|
|
120
|
+
end
|
|
121
|
+
# Return the lowest service execution time
|
|
122
|
+
service_times.min
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# @return The first failure timestamp for any service, or nil if there were none.
|
|
126
|
+
def delay_until_timestamp(md_rec)
|
|
127
|
+
md_rec.list_services.each do |service_name|
|
|
128
|
+
service_rec = md_rec.service(service_name)
|
|
129
|
+
return service_rec.failure_timestamp unless service_rec.failure_timestamp.nil?
|
|
130
|
+
end
|
|
131
|
+
# return lowest possible date
|
|
132
|
+
return minimum_timestamp
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Remove an entry from the index
|
|
136
|
+
# @param remove_me The record to remove from the index. May be a FileRecord or a String.
|
|
137
|
+
def remove(remove_me)
|
|
138
|
+
if remove_me.is_a?(FileRecord)
|
|
139
|
+
path = remove_me.path
|
|
140
|
+
else
|
|
141
|
+
path = remove_me
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
result = preserve_tbl.where(file_path: path).delete
|
|
145
|
+
if result == 0
|
|
146
|
+
logger.warn("Could not remove #{path} from the index, path was not present.")
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Remove all entries from the index
|
|
151
|
+
# @param older_than [Time] Optional. If provided, only entries that have not been indexed
|
|
152
|
+
# since before the provided time will be deleted.
|
|
153
|
+
def clear_index(older_than = nil)
|
|
154
|
+
if older_than.nil?
|
|
155
|
+
preserve_tbl.delete
|
|
156
|
+
else
|
|
157
|
+
older_than_timestamp = older_than.utc.strftime(TIMESTAMP_FORMAT)
|
|
158
|
+
preserve_tbl.where { updated < older_than_timestamp }.delete
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Initialize the index's database using the provided configuration
|
|
163
|
+
def setup_index
|
|
164
|
+
# Create the table for tracking when files will need preservation services run on them.
|
|
165
|
+
case @adapter
|
|
166
|
+
when :mysql, :mysql2
|
|
167
|
+
# mysql does not support 'text' fields as primary keys
|
|
168
|
+
db_conn.create_table!(PRESERVE_TBL) do
|
|
169
|
+
String :file_path, primary_key: true, size: 768
|
|
170
|
+
column :storage_location, 'varchar(128)'
|
|
171
|
+
column :service_time, 'timestamp(3)', { :null => true }
|
|
172
|
+
column :delay_until_time, 'timestamp(3)'
|
|
173
|
+
column :updated, 'timestamp(3)'
|
|
174
|
+
end
|
|
175
|
+
else
|
|
176
|
+
db_conn.create_table!(PRESERVE_TBL) do
|
|
177
|
+
String :file_path, primary_key: true, text: true
|
|
178
|
+
column :storage_location, 'varchar(128)'
|
|
179
|
+
column :service_time, 'timestamp(3)', { :null => true }
|
|
180
|
+
column :delay_until_time, 'timestamp(3)'
|
|
181
|
+
column :updated, 'timestamp(3)'
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Setup database indexes
|
|
186
|
+
case @adapter
|
|
187
|
+
when :postgres
|
|
188
|
+
db_conn.run("CREATE INDEX service_times_file_path_text_index ON preserve_service_times (file_path text_pattern_ops)")
|
|
189
|
+
when :sqlite, :amalgalite
|
|
190
|
+
db_conn.run("CREATE INDEX service_times_file_path_text_index ON preserve_service_times (file_path collate nocase)")
|
|
191
|
+
end
|
|
192
|
+
db_conn.run("CREATE INDEX service_times_storage_location_index ON preserve_service_times (storage_location)")
|
|
193
|
+
|
|
194
|
+
# Create table for tracking the state of the index
|
|
195
|
+
db_conn.create_table!(INDEX_STATE_TBL) do
|
|
196
|
+
String :config_md5
|
|
197
|
+
DateTime :last_reindexed
|
|
198
|
+
String :longleaf_version
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# Prepopulate the index state information
|
|
202
|
+
update_index_state
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# Updates the state information for the index to indicate that the index has been refreshed
|
|
206
|
+
# or is in sync with the application's configuration.
|
|
207
|
+
def update_index_state
|
|
208
|
+
index_state_tbl = db_conn[INDEX_STATE_TBL]
|
|
209
|
+
index_state_tbl.delete
|
|
210
|
+
index_state_tbl.insert(
|
|
211
|
+
config_md5: @config_md5,
|
|
212
|
+
last_reindexed: Time.now.utc,
|
|
213
|
+
longleaf_version: Longleaf::VERSION)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# Retrieves page of file paths which have one or more services which need to run.
|
|
217
|
+
# @param file_selector [FileSelector] selector for what paths to search for files
|
|
218
|
+
# @param stale_datetime [DateTime] find file_paths with services needing to be run before this value
|
|
219
|
+
# @return [Array] array of file paths that need one or more services run.
|
|
220
|
+
def paths_with_stale_services(file_selector, stale_datetime)
|
|
221
|
+
if @preserve_dataset.nil?
|
|
222
|
+
@preserve_dataset = db_conn
|
|
223
|
+
.from(PRESERVE_TBL)
|
|
224
|
+
.exclude(service_time: nil)
|
|
225
|
+
.limit(@page_size)
|
|
226
|
+
.order(Sequel.asc(:service_time))
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# retrieve and return a page of results
|
|
230
|
+
ds = add_path_restrictions(@preserve_dataset, file_selector)
|
|
231
|
+
.where { service_time <= stale_datetime }
|
|
232
|
+
.where { delay_until_time < stale_datetime }
|
|
233
|
+
.select_map(:file_path)
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
# Retrieves a page of paths for registered files.
|
|
237
|
+
# @param file_selector [FileSelector] selector for what paths to search for files
|
|
238
|
+
# @return [Array] array of file paths that are registered
|
|
239
|
+
def registered_paths(file_selector)
|
|
240
|
+
# retrieve and return a page of results
|
|
241
|
+
add_path_restrictions(registered_dataset, file_selector)
|
|
242
|
+
.select_map(:file_path)
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
# Calls the provided block once per each registered file path registered.
|
|
246
|
+
# Must be passed a block.
|
|
247
|
+
# @param file_selector [FileSelector] selector for what paths to search for files
|
|
248
|
+
# @param older_than [Time] Optional. If provided, only files that have not been
|
|
249
|
+
# indexed since before this timestamp will be returned.
|
|
250
|
+
def each_registered_path(file_selector, older_than: nil, &block)
|
|
251
|
+
dataset = add_path_restrictions(registered_dataset, file_selector)
|
|
252
|
+
.select(:file_path)
|
|
253
|
+
if !older_than.nil?
|
|
254
|
+
older_than_timestamp = older_than.utc.strftime(TIMESTAMP_FORMAT)
|
|
255
|
+
dataset = dataset.where { updated < older_than_timestamp }
|
|
256
|
+
end
|
|
257
|
+
# Yield to the provided block once per row return
|
|
258
|
+
dataset.paged_each(:rows_per_fetch => @page_size) do |row|
|
|
259
|
+
block.call(row[:file_path])
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
private
|
|
264
|
+
def db_conn
|
|
265
|
+
@connection = Sequel.connect(@conn_details) if @connection.nil?
|
|
266
|
+
@connection
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def preserve_tbl
|
|
270
|
+
@preserve_tbl = db_conn[PRESERVE_TBL] if @preserve_tbl.nil?
|
|
271
|
+
@preserve_tbl
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
def add_path_restrictions(dataset, file_selector)
|
|
275
|
+
if file_selector.specificity == FileSelector::SPECIFICITY_STORAGE_LOCATION
|
|
276
|
+
dataset.where(storage_location: file_selector.storage_locations)
|
|
277
|
+
else
|
|
278
|
+
# Reformat all selected paths into LIKE partial string matches
|
|
279
|
+
path_conds = file_selector.target_paths.map { |path| path.end_with?('/') ? path + '%' : path }
|
|
280
|
+
dataset.where(Sequel.like(:file_path, *path_conds))
|
|
281
|
+
end
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
def convert_iso8601_to_timestamp(iso8601)
|
|
285
|
+
return nil if iso8601.nil?
|
|
286
|
+
Time.iso8601(iso8601).strftime(TIMESTAMP_FORMAT)
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
def minimum_timestamp
|
|
290
|
+
if @min_timestamp.nil?
|
|
291
|
+
@min_timestamp = ServiceDateHelper.formatted_timestamp(Time.at(0).utc)
|
|
292
|
+
end
|
|
293
|
+
@min_timestamp
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
def registered_dataset
|
|
297
|
+
if @registered_dataset.nil?
|
|
298
|
+
@registered_dataset = db_conn
|
|
299
|
+
.from(PRESERVE_TBL)
|
|
300
|
+
.limit(@page_size)
|
|
301
|
+
.order(Sequel.asc(:service_time))
|
|
302
|
+
end
|
|
303
|
+
@registered_dataset
|
|
304
|
+
end
|
|
305
|
+
end
|
|
306
|
+
end
|