longleaf 0.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +94 -0
- data/.editorconfig +13 -0
- data/.gitignore +4 -1
- data/.rubocop.yml +44 -0
- data/.rubocop_todo.yml +834 -0
- data/.yardopts +1 -0
- data/Gemfile +16 -1
- data/README.md +98 -12
- data/Rakefile +6 -0
- data/bin/setup +16 -1
- data/docs/aboutlongleaf.md +28 -0
- data/docs/extra.css +32 -0
- data/docs/img/change-file.png +0 -0
- data/docs/img/ll-example-preserved.png +0 -0
- data/docs/index.md +19 -0
- data/docs/install.md +66 -0
- data/docs/ll-example/config-example-relative.yml +33 -0
- data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
- data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
- data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
- data/docs/ll-example/metadata-dir/.gitkeep +0 -0
- data/docs/ll-example/replica-files/.gitkeep +0 -0
- data/docs/ll-example/replica-metadata/.gitkeep +0 -0
- data/docs/quickstart.md +270 -0
- data/docs/rdocs/Longleaf.html +135 -0
- data/docs/rdocs/Longleaf/AppFields.html +178 -0
- data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
- data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
- data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
- data/docs/rdocs/Longleaf/CLI.html +909 -0
- data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
- data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
- data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
- data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
- data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
- data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
- data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
- data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
- data/docs/rdocs/Longleaf/EventError.html +147 -0
- data/docs/rdocs/Longleaf/EventNames.html +163 -0
- data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
- data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
- data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
- data/docs/rdocs/Longleaf/FileRecord.html +716 -0
- data/docs/rdocs/Longleaf/FileSelector.html +901 -0
- data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
- data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
- data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
- data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
- data/docs/rdocs/Longleaf/Logging.html +405 -0
- data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
- data/docs/rdocs/Longleaf/LongleafError.html +139 -0
- data/docs/rdocs/Longleaf/MDFields.html +193 -0
- data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
- data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
- data/docs/rdocs/Longleaf/MetadataError.html +143 -0
- data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
- data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
- data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
- data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
- data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
- data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
- data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
- data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
- data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
- data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
- data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
- data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
- data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
- data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
- data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
- data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
- data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
- data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
- data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
- data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
- data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
- data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
- data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
- data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
- data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
- data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
- data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
- data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
- data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
- data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
- data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
- data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
- data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
- data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
- data/docs/rdocs/_index.html +660 -0
- data/docs/rdocs/class_list.html +51 -0
- data/docs/rdocs/css/common.css +1 -0
- data/docs/rdocs/css/full_list.css +58 -0
- data/docs/rdocs/css/style.css +496 -0
- data/docs/rdocs/file.README.html +165 -0
- data/docs/rdocs/file_list.html +56 -0
- data/docs/rdocs/frames.html +17 -0
- data/docs/rdocs/index.html +165 -0
- data/docs/rdocs/js/app.js +303 -0
- data/docs/rdocs/js/full_list.js +216 -0
- data/docs/rdocs/js/jquery.js +4 -0
- data/docs/rdocs/method_list.html +2051 -0
- data/docs/rdocs/top-level-namespace.html +110 -0
- data/lib/longleaf/candidates/file_selector.rb +150 -0
- data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
- data/lib/longleaf/candidates/physical_path_provider.rb +17 -0
- data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
- data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
- data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
- data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
- data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
- data/lib/longleaf/cli.rb +252 -46
- data/lib/longleaf/commands/deregister_command.rb +51 -0
- data/lib/longleaf/commands/preserve_command.rb +50 -0
- data/lib/longleaf/commands/register_command.rb +34 -43
- data/lib/longleaf/commands/reindex_command.rb +92 -0
- data/lib/longleaf/commands/validate_config_command.rb +33 -8
- data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
- data/lib/longleaf/errors.rb +26 -7
- data/lib/longleaf/events/deregister_event.rb +53 -0
- data/lib/longleaf/events/event_names.rb +9 -0
- data/lib/longleaf/events/event_status_tracking.rb +59 -0
- data/lib/longleaf/events/preserve_event.rb +82 -0
- data/lib/longleaf/events/register_event.rb +59 -51
- data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
- data/lib/longleaf/helpers/digest_helper.rb +56 -0
- data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
- data/lib/longleaf/helpers/selection_options_parser.rb +215 -0
- data/lib/longleaf/helpers/service_date_helper.rb +78 -0
- data/lib/longleaf/indexing/index_manager.rb +101 -0
- data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
- data/lib/longleaf/logging.rb +5 -4
- data/lib/longleaf/logging/redirecting_logger.rb +30 -25
- data/lib/longleaf/models/app_fields.rb +7 -2
- data/lib/longleaf/models/file_record.rb +31 -8
- data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
- data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
- data/lib/longleaf/models/md_fields.rb +3 -1
- data/lib/longleaf/models/metadata_location.rb +47 -0
- data/lib/longleaf/models/metadata_record.rb +43 -16
- data/lib/longleaf/models/s3_storage_location.rb +138 -0
- data/lib/longleaf/models/service_definition.rb +7 -6
- data/lib/longleaf/models/service_fields.rb +7 -1
- data/lib/longleaf/models/service_record.rb +10 -6
- data/lib/longleaf/models/storage_location.rb +24 -19
- data/lib/longleaf/models/storage_types.rb +9 -0
- data/lib/longleaf/models/system_config_fields.rb +9 -0
- data/lib/longleaf/preservation_services/file_check_service.rb +59 -0
- data/lib/longleaf/preservation_services/fixity_check_service.rb +124 -0
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +198 -0
- data/lib/longleaf/preservation_services/s3_replication_service.rb +131 -0
- data/lib/longleaf/services/application_config_deserializer.rb +81 -24
- data/lib/longleaf/services/application_config_manager.rb +20 -6
- data/lib/longleaf/services/application_config_validator.rb +19 -9
- data/lib/longleaf/services/configuration_validator.rb +67 -4
- data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
- data/lib/longleaf/services/metadata_deserializer.rb +115 -42
- data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
- data/lib/longleaf/services/metadata_serializer.rb +156 -23
- data/lib/longleaf/services/metadata_validator.rb +76 -0
- data/lib/longleaf/services/s3_location_validator.rb +19 -0
- data/lib/longleaf/services/service_class_cache.rb +112 -0
- data/lib/longleaf/services/service_definition_manager.rb +10 -7
- data/lib/longleaf/services/service_definition_validator.rb +25 -18
- data/lib/longleaf/services/service_manager.rb +86 -11
- data/lib/longleaf/services/service_mapping_manager.rb +13 -12
- data/lib/longleaf/services/service_mapping_validator.rb +36 -26
- data/lib/longleaf/services/storage_location_manager.rb +76 -15
- data/lib/longleaf/services/storage_location_validator.rb +49 -35
- data/lib/longleaf/specs/config_builder.rb +47 -23
- data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
- data/lib/longleaf/specs/custom_matchers.rb +9 -0
- data/lib/longleaf/specs/file_helpers.rb +61 -0
- data/lib/longleaf/specs/metadata_builder.rb +98 -0
- data/lib/longleaf/specs/system_config_builder.rb +27 -0
- data/lib/longleaf/version.rb +1 -1
- data/longleaf.gemspec +20 -7
- data/mkdocs.yml +21 -0
- metadata +308 -24
- data/.travis.yml +0 -4
- data/lib/longleaf/commands/abstract_command.rb +0 -37
- data/lib/longleaf/services/storage_path_validator.rb +0 -16
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
require 'sequel'
|
|
2
|
+
require 'digest/md5'
|
|
3
|
+
require 'longleaf/events/event_names'
|
|
4
|
+
require 'longleaf/candidates/file_selector'
|
|
5
|
+
require 'longleaf/version'
|
|
6
|
+
require 'longleaf/models/system_config_fields'
|
|
7
|
+
require 'longleaf/logging'
|
|
8
|
+
|
|
9
|
+
module Longleaf
|
|
10
|
+
# Driver for interacting with RDBM based metadata index using the Sequel ORM gem.
|
|
11
|
+
# Users must create the database and credentials for connecting to it in advance,
|
|
12
|
+
# if using a database application that requires creation of databases (ie, not sqlite).
|
|
13
|
+
# The default database name is 'longleaf_metadata_index' but may be overridden.
|
|
14
|
+
#
|
|
15
|
+
# See the Sequel documentation for details about accepted connection parameters:
|
|
16
|
+
# https://github.com/jeremyevans/sequel/blob/master/doc/opening_databases.rdoc
|
|
17
|
+
class SequelIndexDriver
|
|
18
|
+
include Longleaf::Logging
|
|
19
|
+
INDEX_DB_NAME ||= 'longleaf_metadata_index'
|
|
20
|
+
PRESERVE_TBL ||= "preserve_service_times".to_sym
|
|
21
|
+
INDEX_STATE_TBL ||= "index_state".to_sym
|
|
22
|
+
DEFAULT_PAGE_SIZE ||= 1000
|
|
23
|
+
TIMESTAMP_FORMAT ||= '%Y-%m-%d %H:%M:%S.%3N'
|
|
24
|
+
|
|
25
|
+
# Initialize the index driver
|
|
26
|
+
#
|
|
27
|
+
# @param app_config [ApplicationConfigManager] the application configuration manager
|
|
28
|
+
# @param adapter [String] name of the database adapter to use.
|
|
29
|
+
# @param conn_details Details about the configuration and connection to the database used for the index.
|
|
30
|
+
# If a string is provided, it will be used as the connection URL and must identify the adapter.
|
|
31
|
+
# If a hash is provided, it used as the parameters for the database connection.
|
|
32
|
+
# @param page_size [Integer] number of results to retrieve per query when getting candidates
|
|
33
|
+
def initialize(app_config, adapter, conn_details, page_size: nil)
|
|
34
|
+
Sequel.default_timezone = :utc
|
|
35
|
+
@app_config = app_config
|
|
36
|
+
@adapter = adapter
|
|
37
|
+
@conn_details = conn_details
|
|
38
|
+
# Digest of the app config file so we can tell if it changes
|
|
39
|
+
@config_md5 = app_config.config_md5
|
|
40
|
+
@page_size = page_size.nil? || page_size <= 0 ? DEFAULT_PAGE_SIZE : page_size
|
|
41
|
+
|
|
42
|
+
if @conn_details.is_a?(Hash)
|
|
43
|
+
# Add in the adapter name
|
|
44
|
+
@conn_details['adapter'] = adapter unless @conn_details.key?('adapter')
|
|
45
|
+
# Add in default database name if none was specified
|
|
46
|
+
@conn_details['database'] = INDEX_DB_NAME unless @conn_details.key?('database')
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Returns true if the application configuration does not match the configuration used for
|
|
51
|
+
# the last reindex.
|
|
52
|
+
def is_stale?
|
|
53
|
+
db_conn[INDEX_STATE_TBL].where(config_md5: @config_md5).count == 0
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Index the provided file_rec and its metadata
|
|
57
|
+
#
|
|
58
|
+
# @param file_rec [FileRecord] file record to index
|
|
59
|
+
def index(file_rec)
|
|
60
|
+
file_path = file_rec.path
|
|
61
|
+
md_rec = file_rec.metadata_record
|
|
62
|
+
storage_loc = file_rec.storage_location
|
|
63
|
+
service_manager = @app_config.service_manager
|
|
64
|
+
|
|
65
|
+
# Produce a list of service definitions which should apply to the file
|
|
66
|
+
expected_services = service_manager.list_service_definitions(
|
|
67
|
+
location: storage_loc.name)
|
|
68
|
+
|
|
69
|
+
first_timestamp = first_service_execution_timestamp(expected_services, md_rec)
|
|
70
|
+
delay_until_timestamp = delay_until_timestamp(md_rec)
|
|
71
|
+
|
|
72
|
+
first_timestamp = convert_iso8601_to_timestamp(first_timestamp)
|
|
73
|
+
delay_until_timestamp = convert_iso8601_to_timestamp(delay_until_timestamp)
|
|
74
|
+
now_stamp = Time.now.utc.strftime(TIMESTAMP_FORMAT)
|
|
75
|
+
|
|
76
|
+
if @adapter == :mysql || @adapter == :mysql2
|
|
77
|
+
preserve_tbl.on_duplicate_key_update
|
|
78
|
+
.insert(file_path: file_path,
|
|
79
|
+
storage_location: storage_loc.name,
|
|
80
|
+
service_time: first_timestamp,
|
|
81
|
+
delay_until_time: delay_until_timestamp,
|
|
82
|
+
updated: now_stamp)
|
|
83
|
+
else
|
|
84
|
+
preserve_tbl.insert_conflict(target: :file_path,
|
|
85
|
+
update: {
|
|
86
|
+
storage_location: storage_loc.name,
|
|
87
|
+
service_time: first_timestamp,
|
|
88
|
+
delay_until_time: delay_until_timestamp,
|
|
89
|
+
updated: now_stamp } )
|
|
90
|
+
.insert(file_path: file_path,
|
|
91
|
+
storage_location: storage_loc.name,
|
|
92
|
+
service_time: first_timestamp,
|
|
93
|
+
delay_until_time: delay_until_timestamp,
|
|
94
|
+
updated: now_stamp)
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Find the earliest service execution time for any services expected to be run for the specified file.
|
|
99
|
+
#
|
|
100
|
+
# @param expected_services [Array] list of ServiceDefinition objects expected for specified file.
|
|
101
|
+
# @param md_rec [MetadataRecord] metadata record for the file being evaluated
|
|
102
|
+
# @return The timestamp of the earliest service execution time for the file described by md_rec, in iso8601 format.
|
|
103
|
+
# Returns nil if no services are expected all services have already run and do not have a next occurrence, or
|
|
104
|
+
# the file is deregistered.
|
|
105
|
+
def first_service_execution_timestamp(expected_services, md_rec)
|
|
106
|
+
current_time = Time.now.utc.iso8601(3)
|
|
107
|
+
if md_rec.deregistered?
|
|
108
|
+
return nil
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
service_times = Array.new
|
|
112
|
+
|
|
113
|
+
present_services = md_rec.list_services
|
|
114
|
+
|
|
115
|
+
expected_services.each do |service_def|
|
|
116
|
+
service_name = service_def.name
|
|
117
|
+
|
|
118
|
+
next_run = ServiceDateHelper.next_run_needed(md_rec, service_def)
|
|
119
|
+
service_times << next_run unless next_run.nil?
|
|
120
|
+
end
|
|
121
|
+
# Return the lowest service execution time
|
|
122
|
+
service_times.min
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# @return The first failure timestamp for any service, or nil if there were none.
|
|
126
|
+
def delay_until_timestamp(md_rec)
|
|
127
|
+
md_rec.list_services.each do |service_name|
|
|
128
|
+
service_rec = md_rec.service(service_name)
|
|
129
|
+
return service_rec.failure_timestamp unless service_rec.failure_timestamp.nil?
|
|
130
|
+
end
|
|
131
|
+
# return lowest possible date
|
|
132
|
+
return minimum_timestamp
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Remove an entry from the index
|
|
136
|
+
# @param remove_me The record to remove from the index. May be a FileRecord or a String.
|
|
137
|
+
def remove(remove_me)
|
|
138
|
+
if remove_me.is_a?(FileRecord)
|
|
139
|
+
path = remove_me.path
|
|
140
|
+
else
|
|
141
|
+
path = remove_me
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
result = preserve_tbl.where(file_path: path).delete
|
|
145
|
+
if result == 0
|
|
146
|
+
logger.warn("Could not remove #{path} from the index, path was not present.")
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Remove all entries from the index
|
|
151
|
+
# @param older_than [Time] Optional. If provided, only entries that have not been indexed
|
|
152
|
+
# since before the provided time will be deleted.
|
|
153
|
+
def clear_index(older_than = nil)
|
|
154
|
+
if older_than.nil?
|
|
155
|
+
preserve_tbl.delete
|
|
156
|
+
else
|
|
157
|
+
older_than_timestamp = older_than.utc.strftime(TIMESTAMP_FORMAT)
|
|
158
|
+
preserve_tbl.where { updated < older_than_timestamp }.delete
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Initialize the index's database using the provided configuration
|
|
163
|
+
def setup_index
|
|
164
|
+
# Create the table for tracking when files will need preservation services run on them.
|
|
165
|
+
case @adapter
|
|
166
|
+
when :mysql, :mysql2
|
|
167
|
+
# mysql does not support 'text' fields as primary keys
|
|
168
|
+
db_conn.create_table!(PRESERVE_TBL) do
|
|
169
|
+
String :file_path, primary_key: true, size: 768
|
|
170
|
+
column :storage_location, 'varchar(128)'
|
|
171
|
+
column :service_time, 'timestamp(3)', { :null => true }
|
|
172
|
+
column :delay_until_time, 'timestamp(3)'
|
|
173
|
+
column :updated, 'timestamp(3)'
|
|
174
|
+
end
|
|
175
|
+
else
|
|
176
|
+
db_conn.create_table!(PRESERVE_TBL) do
|
|
177
|
+
String :file_path, primary_key: true, text: true
|
|
178
|
+
column :storage_location, 'varchar(128)'
|
|
179
|
+
column :service_time, 'timestamp(3)', { :null => true }
|
|
180
|
+
column :delay_until_time, 'timestamp(3)'
|
|
181
|
+
column :updated, 'timestamp(3)'
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Setup database indexes
|
|
186
|
+
case @adapter
|
|
187
|
+
when :postgres
|
|
188
|
+
db_conn.run("CREATE INDEX service_times_file_path_text_index ON preserve_service_times (file_path text_pattern_ops)")
|
|
189
|
+
when :sqlite, :amalgalite
|
|
190
|
+
db_conn.run("CREATE INDEX service_times_file_path_text_index ON preserve_service_times (file_path collate nocase)")
|
|
191
|
+
end
|
|
192
|
+
db_conn.run("CREATE INDEX service_times_storage_location_index ON preserve_service_times (storage_location)")
|
|
193
|
+
|
|
194
|
+
# Create table for tracking the state of the index
|
|
195
|
+
db_conn.create_table!(INDEX_STATE_TBL) do
|
|
196
|
+
String :config_md5
|
|
197
|
+
DateTime :last_reindexed
|
|
198
|
+
String :longleaf_version
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# Prepopulate the index state information
|
|
202
|
+
update_index_state
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# Updates the state information for the index to indicate that the index has been refreshed
|
|
206
|
+
# or is in sync with the application's configuration.
|
|
207
|
+
def update_index_state
|
|
208
|
+
index_state_tbl = db_conn[INDEX_STATE_TBL]
|
|
209
|
+
index_state_tbl.delete
|
|
210
|
+
index_state_tbl.insert(
|
|
211
|
+
config_md5: @config_md5,
|
|
212
|
+
last_reindexed: Time.now.utc,
|
|
213
|
+
longleaf_version: Longleaf::VERSION)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# Retrieves page of file paths which have one or more services which need to run.
|
|
217
|
+
# @param file_selector [FileSelector] selector for what paths to search for files
|
|
218
|
+
# @param stale_datetime [DateTime] find file_paths with services needing to be run before this value
|
|
219
|
+
# @return [Array] array of file paths that need one or more services run.
|
|
220
|
+
def paths_with_stale_services(file_selector, stale_datetime)
|
|
221
|
+
if @preserve_dataset.nil?
|
|
222
|
+
@preserve_dataset = db_conn
|
|
223
|
+
.from(PRESERVE_TBL)
|
|
224
|
+
.exclude(service_time: nil)
|
|
225
|
+
.limit(@page_size)
|
|
226
|
+
.order(Sequel.asc(:service_time))
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# retrieve and return a page of results
|
|
230
|
+
ds = add_path_restrictions(@preserve_dataset, file_selector)
|
|
231
|
+
.where { service_time <= stale_datetime }
|
|
232
|
+
.where { delay_until_time < stale_datetime }
|
|
233
|
+
.select_map(:file_path)
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
# Retrieves a page of paths for registered files.
|
|
237
|
+
# @param file_selector [FileSelector] selector for what paths to search for files
|
|
238
|
+
# @return [Array] array of file paths that are registered
|
|
239
|
+
def registered_paths(file_selector)
|
|
240
|
+
# retrieve and return a page of results
|
|
241
|
+
add_path_restrictions(registered_dataset, file_selector)
|
|
242
|
+
.select_map(:file_path)
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
# Calls the provided block once per each registered file path registered.
|
|
246
|
+
# Must be passed a block.
|
|
247
|
+
# @param file_selector [FileSelector] selector for what paths to search for files
|
|
248
|
+
# @param older_than [Time] Optional. If provided, only files that have not been
|
|
249
|
+
# indexed since before this timestamp will be returned.
|
|
250
|
+
def each_registered_path(file_selector, older_than: nil, &block)
|
|
251
|
+
dataset = add_path_restrictions(registered_dataset, file_selector)
|
|
252
|
+
.select(:file_path)
|
|
253
|
+
if !older_than.nil?
|
|
254
|
+
older_than_timestamp = older_than.utc.strftime(TIMESTAMP_FORMAT)
|
|
255
|
+
dataset = dataset.where { updated < older_than_timestamp }
|
|
256
|
+
end
|
|
257
|
+
# Yield to the provided block once per row return
|
|
258
|
+
dataset.paged_each(:rows_per_fetch => @page_size) do |row|
|
|
259
|
+
block.call(row[:file_path])
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
private
|
|
264
|
+
def db_conn
|
|
265
|
+
@connection = Sequel.connect(@conn_details) if @connection.nil?
|
|
266
|
+
@connection
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def preserve_tbl
|
|
270
|
+
@preserve_tbl = db_conn[PRESERVE_TBL] if @preserve_tbl.nil?
|
|
271
|
+
@preserve_tbl
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
def add_path_restrictions(dataset, file_selector)
|
|
275
|
+
if file_selector.specificity == FileSelector::SPECIFICITY_STORAGE_LOCATION
|
|
276
|
+
dataset.where(storage_location: file_selector.storage_locations)
|
|
277
|
+
else
|
|
278
|
+
# Reformat all selected paths into LIKE partial string matches
|
|
279
|
+
path_conds = file_selector.target_paths.map { |path| path.end_with?('/') ? path + '%' : path }
|
|
280
|
+
dataset.where(Sequel.like(:file_path, *path_conds))
|
|
281
|
+
end
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
def convert_iso8601_to_timestamp(iso8601)
|
|
285
|
+
return nil if iso8601.nil?
|
|
286
|
+
Time.iso8601(iso8601).strftime(TIMESTAMP_FORMAT)
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
def minimum_timestamp
|
|
290
|
+
if @min_timestamp.nil?
|
|
291
|
+
@min_timestamp = ServiceDateHelper.formatted_timestamp(Time.at(0).utc)
|
|
292
|
+
end
|
|
293
|
+
@min_timestamp
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
def registered_dataset
|
|
297
|
+
if @registered_dataset.nil?
|
|
298
|
+
@registered_dataset = db_conn
|
|
299
|
+
.from(PRESERVE_TBL)
|
|
300
|
+
.limit(@page_size)
|
|
301
|
+
.order(Sequel.asc(:service_time))
|
|
302
|
+
end
|
|
303
|
+
@registered_dataset
|
|
304
|
+
end
|
|
305
|
+
end
|
|
306
|
+
end
|
data/lib/longleaf/logging.rb
CHANGED
|
@@ -1,21 +1,22 @@
|
|
|
1
1
|
require 'longleaf/logging/redirecting_logger'
|
|
2
2
|
|
|
3
3
|
module Longleaf
|
|
4
|
+
# Module for access logging within longleaf
|
|
4
5
|
module Logging
|
|
5
6
|
# Get the main logger for longleaf
|
|
6
7
|
def logger
|
|
7
8
|
Logging.logger
|
|
8
9
|
end
|
|
9
|
-
|
|
10
|
+
|
|
10
11
|
# Get the main logger for longleaf
|
|
11
12
|
def self.logger
|
|
12
13
|
@logger ||= RedirectingLogger.new
|
|
13
14
|
end
|
|
14
|
-
|
|
15
|
+
|
|
15
16
|
def initialize_logger(failure_only, log_level, log_format, datetime_format)
|
|
16
17
|
Logging.initialize_logger(failure_only, log_level, log_format, datetime_format)
|
|
17
18
|
end
|
|
18
|
-
|
|
19
|
+
|
|
19
20
|
def self.initialize_logger(failure_only, log_level, log_format, datetime_format)
|
|
20
21
|
@logger = RedirectingLogger.new(failure_only: failure_only,
|
|
21
22
|
log_level: log_level,
|
|
@@ -23,4 +24,4 @@ module Longleaf
|
|
|
23
24
|
datetime_format: datetime_format)
|
|
24
25
|
end
|
|
25
26
|
end
|
|
26
|
-
end
|
|
27
|
+
end
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
require 'logger'
|
|
2
2
|
|
|
3
|
-
# Logger which directs messages to stdout and/or stderr, depending on the nature of the message.
|
|
4
|
-
# Status logging, which includes standard logger methods, goes to STDERR.
|
|
5
|
-
# Operation success and failure messages go to STDOUT, and to STDERR at info level.
|
|
6
3
|
module Longleaf
|
|
7
4
|
module Logging
|
|
5
|
+
# Logger which directs messages to stdout and/or stderr, depending on the nature of the message.
|
|
6
|
+
# Status logging, which includes standard logger methods, goes to STDERR.
|
|
7
|
+
# Operation success and failure messages go to STDOUT, and to STDERR at info level.
|
|
8
8
|
class RedirectingLogger
|
|
9
|
-
# @param
|
|
9
|
+
# @param [Boolean] failure_only If set to true, only failure messages will be output to STDOUT
|
|
10
10
|
# @param log_level [String] logger level used for output to STDERR
|
|
11
|
-
# @param log_format [
|
|
11
|
+
# @param log_format [String] format string for log entries to STDERR. There are 4 variables available
|
|
12
12
|
# for inclusion in the output: severity, datetime, progname, msg. Variables must be wrapped in %{}.
|
|
13
13
|
# @param datetime_format [String] datetime formatting string used for logger dates appearing in STDERR.
|
|
14
14
|
def initialize(failure_only: false, log_level: 'WARN', log_format: nil, datetime_format: nil)
|
|
@@ -25,12 +25,12 @@ module Longleaf
|
|
|
25
25
|
@stderr_log.formatter = proc do |severity, datetime, progname, msg|
|
|
26
26
|
# Make sure the format ends with a newline
|
|
27
27
|
@log_format = @log_format + "\n" unless @log_format.end_with?("\n")
|
|
28
|
-
|
|
28
|
+
|
|
29
29
|
formatted_date = @stderr_log.datetime_format.nil? ? datetime : datetime.strftime(datetime_format)
|
|
30
30
|
@log_format % { :severity => severity, :datetime => formatted_date, :progname => progname, :msg => msg }
|
|
31
31
|
end
|
|
32
32
|
end
|
|
33
|
-
|
|
33
|
+
|
|
34
34
|
@stdout_log = Logger.new($stdout)
|
|
35
35
|
@stdout_log.formatter = proc do |severity, datetime, progname, msg|
|
|
36
36
|
"#{msg}\n"
|
|
@@ -41,33 +41,38 @@ module Longleaf
|
|
|
41
41
|
@stdout_log.level = 'info'
|
|
42
42
|
end
|
|
43
43
|
end
|
|
44
|
-
|
|
44
|
+
|
|
45
45
|
def debug(progname = nil, &block)
|
|
46
46
|
@stderr_log.debug(progname, &block)
|
|
47
47
|
end
|
|
48
|
-
|
|
48
|
+
|
|
49
49
|
def info(progname = nil, &block)
|
|
50
50
|
@stderr_log.info(progname, &block)
|
|
51
51
|
end
|
|
52
|
-
|
|
52
|
+
|
|
53
53
|
def warn(progname = nil, &block)
|
|
54
54
|
@stderr_log.warn(progname, &block)
|
|
55
55
|
end
|
|
56
|
-
|
|
56
|
+
|
|
57
57
|
def error(progname = nil, &block)
|
|
58
58
|
@stderr_log.error(progname, &block)
|
|
59
59
|
end
|
|
60
|
-
|
|
60
|
+
|
|
61
61
|
def fatal(progname = nil, &block)
|
|
62
62
|
@stderr_log.fatal(progname, &block)
|
|
63
63
|
end
|
|
64
|
-
|
|
64
|
+
|
|
65
65
|
def unknown(progname = nil, &block)
|
|
66
66
|
@stderr_log.unknown(progname, &block)
|
|
67
67
|
end
|
|
68
|
-
|
|
68
|
+
|
|
69
|
+
def <<(msg)
|
|
70
|
+
@stderr_log << msg
|
|
71
|
+
end
|
|
72
|
+
|
|
69
73
|
# Logs a success message to STDOUT, as well as STDERR at info level.
|
|
70
|
-
#
|
|
74
|
+
#
|
|
75
|
+
# @param [String] eventOrMessage name of the preservation event which succeeded,
|
|
71
76
|
# or the message to output if it is the only parameter. Required.
|
|
72
77
|
# @param file_name [String] file name which is the subject of this message.
|
|
73
78
|
# @param message [String] descriptive message to accompany this output
|
|
@@ -75,10 +80,10 @@ module Longleaf
|
|
|
75
80
|
def success(eventOrMessage, file_name = nil, message = nil, service = nil)
|
|
76
81
|
outcome('SUCCESS', eventOrMessage, file_name, message, service)
|
|
77
82
|
end
|
|
78
|
-
|
|
83
|
+
|
|
79
84
|
# Logs a failure message to STDOUT, as well as STDERR at info level.
|
|
80
85
|
# If an error was provided, it is logged to STDERR at error level.
|
|
81
|
-
# @param eventOrMessage [String] name of the preservation event which failed,
|
|
86
|
+
# @param eventOrMessage [String] name of the preservation event which failed,
|
|
82
87
|
# or the message to output if it is the only parameter.
|
|
83
88
|
# @param file_name [String] file name which is the subject of this message.
|
|
84
89
|
# @param message [String] descriptive message to accompany this output
|
|
@@ -87,17 +92,18 @@ module Longleaf
|
|
|
87
92
|
def failure(eventOrMessage, file_name = nil, message = nil, service = nil, error: nil)
|
|
88
93
|
text = outcome_text('FAILURE', eventOrMessage, file_name, message, service, error)
|
|
89
94
|
@stdout_log.warn(text)
|
|
90
|
-
|
|
95
|
+
|
|
91
96
|
@stderr_log.info(text)
|
|
92
97
|
@stderr_log.error("#{error.message}") unless error.nil?
|
|
98
|
+
@stderr_log.error("#{error.backtrace}") unless error.nil? || error.backtrace.nil?
|
|
93
99
|
end
|
|
94
|
-
|
|
100
|
+
|
|
95
101
|
# Logs an outcome message to STDOUT, as well as STDERR at info level.
|
|
96
102
|
# If file_name and message are nil, eventOrMessage will be used as the message.
|
|
97
103
|
#
|
|
98
104
|
# @param outcome [String] The status of the outcome. Required.
|
|
99
|
-
# @param eventOrMessage [String] name of the preservation event which was successful,
|
|
100
|
-
#
|
|
105
|
+
# @param eventOrMessage [String] name of the preservation event which was successful,
|
|
106
|
+
# or the message to output if it is the only parameter. Required.
|
|
101
107
|
# @param file_name [String] file name which is the subject of this message.
|
|
102
108
|
# @param message [String] descriptive message to accompany this output
|
|
103
109
|
# @param service [String] name of the service which executed.
|
|
@@ -107,14 +113,13 @@ module Longleaf
|
|
|
107
113
|
@stdout_log.info(text)
|
|
108
114
|
@stderr_log.info(text)
|
|
109
115
|
end
|
|
110
|
-
|
|
111
|
-
# FAILURE verify[cdr_fixity_check] /path/to/file: Something terrible
|
|
116
|
+
|
|
112
117
|
private
|
|
113
118
|
def outcome_text(outcome, eventOrMessage, file_name = nil, message = nil, service = nil, error = nil)
|
|
114
119
|
message_only = file_name.nil? && message.nil? && error.nil?
|
|
115
|
-
|
|
120
|
+
|
|
116
121
|
text = "#{outcome}"
|
|
117
|
-
|
|
122
|
+
|
|
118
123
|
if message_only
|
|
119
124
|
text << ": #{eventOrMessage}"
|
|
120
125
|
else
|