longleaf 0.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +94 -0
  3. data/.editorconfig +13 -0
  4. data/.gitignore +4 -1
  5. data/.rubocop.yml +44 -0
  6. data/.rubocop_todo.yml +834 -0
  7. data/.yardopts +1 -0
  8. data/Gemfile +16 -1
  9. data/README.md +98 -12
  10. data/Rakefile +6 -0
  11. data/bin/setup +16 -1
  12. data/docs/aboutlongleaf.md +28 -0
  13. data/docs/extra.css +32 -0
  14. data/docs/img/change-file.png +0 -0
  15. data/docs/img/ll-example-preserved.png +0 -0
  16. data/docs/index.md +19 -0
  17. data/docs/install.md +66 -0
  18. data/docs/ll-example/config-example-relative.yml +33 -0
  19. data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
  20. data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
  21. data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
  22. data/docs/ll-example/metadata-dir/.gitkeep +0 -0
  23. data/docs/ll-example/replica-files/.gitkeep +0 -0
  24. data/docs/ll-example/replica-metadata/.gitkeep +0 -0
  25. data/docs/quickstart.md +270 -0
  26. data/docs/rdocs/Longleaf.html +135 -0
  27. data/docs/rdocs/Longleaf/AppFields.html +178 -0
  28. data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
  29. data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
  30. data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
  31. data/docs/rdocs/Longleaf/CLI.html +909 -0
  32. data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
  33. data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
  34. data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
  35. data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
  36. data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
  37. data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
  38. data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
  39. data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
  40. data/docs/rdocs/Longleaf/EventError.html +147 -0
  41. data/docs/rdocs/Longleaf/EventNames.html +163 -0
  42. data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
  43. data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
  44. data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
  45. data/docs/rdocs/Longleaf/FileRecord.html +716 -0
  46. data/docs/rdocs/Longleaf/FileSelector.html +901 -0
  47. data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
  48. data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
  49. data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
  50. data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
  51. data/docs/rdocs/Longleaf/Logging.html +405 -0
  52. data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
  53. data/docs/rdocs/Longleaf/LongleafError.html +139 -0
  54. data/docs/rdocs/Longleaf/MDFields.html +193 -0
  55. data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
  56. data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
  57. data/docs/rdocs/Longleaf/MetadataError.html +143 -0
  58. data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
  59. data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
  60. data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
  61. data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
  62. data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
  63. data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
  64. data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
  65. data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
  66. data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
  67. data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
  68. data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
  69. data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
  70. data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
  71. data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
  72. data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
  73. data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
  74. data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
  75. data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
  76. data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
  77. data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
  78. data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
  79. data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
  80. data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
  81. data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
  82. data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
  83. data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
  84. data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
  85. data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
  86. data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
  87. data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
  88. data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
  89. data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
  90. data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
  91. data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
  92. data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
  93. data/docs/rdocs/_index.html +660 -0
  94. data/docs/rdocs/class_list.html +51 -0
  95. data/docs/rdocs/css/common.css +1 -0
  96. data/docs/rdocs/css/full_list.css +58 -0
  97. data/docs/rdocs/css/style.css +496 -0
  98. data/docs/rdocs/file.README.html +165 -0
  99. data/docs/rdocs/file_list.html +56 -0
  100. data/docs/rdocs/frames.html +17 -0
  101. data/docs/rdocs/index.html +165 -0
  102. data/docs/rdocs/js/app.js +303 -0
  103. data/docs/rdocs/js/full_list.js +216 -0
  104. data/docs/rdocs/js/jquery.js +4 -0
  105. data/docs/rdocs/method_list.html +2051 -0
  106. data/docs/rdocs/top-level-namespace.html +110 -0
  107. data/lib/longleaf/candidates/file_selector.rb +150 -0
  108. data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
  109. data/lib/longleaf/candidates/physical_path_provider.rb +17 -0
  110. data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
  111. data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
  112. data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
  113. data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
  114. data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
  115. data/lib/longleaf/cli.rb +252 -46
  116. data/lib/longleaf/commands/deregister_command.rb +51 -0
  117. data/lib/longleaf/commands/preserve_command.rb +50 -0
  118. data/lib/longleaf/commands/register_command.rb +34 -43
  119. data/lib/longleaf/commands/reindex_command.rb +92 -0
  120. data/lib/longleaf/commands/validate_config_command.rb +33 -8
  121. data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
  122. data/lib/longleaf/errors.rb +26 -7
  123. data/lib/longleaf/events/deregister_event.rb +53 -0
  124. data/lib/longleaf/events/event_names.rb +9 -0
  125. data/lib/longleaf/events/event_status_tracking.rb +59 -0
  126. data/lib/longleaf/events/preserve_event.rb +82 -0
  127. data/lib/longleaf/events/register_event.rb +59 -51
  128. data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
  129. data/lib/longleaf/helpers/digest_helper.rb +56 -0
  130. data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
  131. data/lib/longleaf/helpers/selection_options_parser.rb +215 -0
  132. data/lib/longleaf/helpers/service_date_helper.rb +78 -0
  133. data/lib/longleaf/indexing/index_manager.rb +101 -0
  134. data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
  135. data/lib/longleaf/logging.rb +5 -4
  136. data/lib/longleaf/logging/redirecting_logger.rb +30 -25
  137. data/lib/longleaf/models/app_fields.rb +7 -2
  138. data/lib/longleaf/models/file_record.rb +31 -8
  139. data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
  140. data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
  141. data/lib/longleaf/models/md_fields.rb +3 -1
  142. data/lib/longleaf/models/metadata_location.rb +47 -0
  143. data/lib/longleaf/models/metadata_record.rb +43 -16
  144. data/lib/longleaf/models/s3_storage_location.rb +138 -0
  145. data/lib/longleaf/models/service_definition.rb +7 -6
  146. data/lib/longleaf/models/service_fields.rb +7 -1
  147. data/lib/longleaf/models/service_record.rb +10 -6
  148. data/lib/longleaf/models/storage_location.rb +24 -19
  149. data/lib/longleaf/models/storage_types.rb +9 -0
  150. data/lib/longleaf/models/system_config_fields.rb +9 -0
  151. data/lib/longleaf/preservation_services/file_check_service.rb +59 -0
  152. data/lib/longleaf/preservation_services/fixity_check_service.rb +124 -0
  153. data/lib/longleaf/preservation_services/rsync_replication_service.rb +198 -0
  154. data/lib/longleaf/preservation_services/s3_replication_service.rb +131 -0
  155. data/lib/longleaf/services/application_config_deserializer.rb +81 -24
  156. data/lib/longleaf/services/application_config_manager.rb +20 -6
  157. data/lib/longleaf/services/application_config_validator.rb +19 -9
  158. data/lib/longleaf/services/configuration_validator.rb +67 -4
  159. data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
  160. data/lib/longleaf/services/metadata_deserializer.rb +115 -42
  161. data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
  162. data/lib/longleaf/services/metadata_serializer.rb +156 -23
  163. data/lib/longleaf/services/metadata_validator.rb +76 -0
  164. data/lib/longleaf/services/s3_location_validator.rb +19 -0
  165. data/lib/longleaf/services/service_class_cache.rb +112 -0
  166. data/lib/longleaf/services/service_definition_manager.rb +10 -7
  167. data/lib/longleaf/services/service_definition_validator.rb +25 -18
  168. data/lib/longleaf/services/service_manager.rb +86 -11
  169. data/lib/longleaf/services/service_mapping_manager.rb +13 -12
  170. data/lib/longleaf/services/service_mapping_validator.rb +36 -26
  171. data/lib/longleaf/services/storage_location_manager.rb +76 -15
  172. data/lib/longleaf/services/storage_location_validator.rb +49 -35
  173. data/lib/longleaf/specs/config_builder.rb +47 -23
  174. data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
  175. data/lib/longleaf/specs/custom_matchers.rb +9 -0
  176. data/lib/longleaf/specs/file_helpers.rb +61 -0
  177. data/lib/longleaf/specs/metadata_builder.rb +98 -0
  178. data/lib/longleaf/specs/system_config_builder.rb +27 -0
  179. data/lib/longleaf/version.rb +1 -1
  180. data/longleaf.gemspec +20 -7
  181. data/mkdocs.yml +21 -0
  182. metadata +308 -24
  183. data/.travis.yml +0 -4
  184. data/lib/longleaf/commands/abstract_command.rb +0 -37
  185. data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -0,0 +1,306 @@
1
+ require 'sequel'
2
+ require 'digest/md5'
3
+ require 'longleaf/events/event_names'
4
+ require 'longleaf/candidates/file_selector'
5
+ require 'longleaf/version'
6
+ require 'longleaf/models/system_config_fields'
7
+ require 'longleaf/logging'
8
+
9
+ module Longleaf
10
+ # Driver for interacting with RDBM based metadata index using the Sequel ORM gem.
11
+ # Users must create the database and credentials for connecting to it in advance,
12
+ # if using a database application that requires creation of databases (ie, not sqlite).
13
+ # The default database name is 'longleaf_metadata_index' but may be overridden.
14
+ #
15
+ # See the Sequel documentation for details about accepted connection parameters:
16
+ # https://github.com/jeremyevans/sequel/blob/master/doc/opening_databases.rdoc
17
+ class SequelIndexDriver
18
+ include Longleaf::Logging
19
+ INDEX_DB_NAME ||= 'longleaf_metadata_index'
20
+ PRESERVE_TBL ||= "preserve_service_times".to_sym
21
+ INDEX_STATE_TBL ||= "index_state".to_sym
22
+ DEFAULT_PAGE_SIZE ||= 1000
23
+ TIMESTAMP_FORMAT ||= '%Y-%m-%d %H:%M:%S.%3N'
24
+
25
+ # Initialize the index driver
26
+ #
27
+ # @param app_config [ApplicationConfigManager] the application configuration manager
28
+ # @param adapter [String] name of the database adapter to use.
29
+ # @param conn_details Details about the configuration and connection to the database used for the index.
30
+ # If a string is provided, it will be used as the connection URL and must identify the adapter.
31
+ # If a hash is provided, it used as the parameters for the database connection.
32
+ # @param page_size [Integer] number of results to retrieve per query when getting candidates
33
+ def initialize(app_config, adapter, conn_details, page_size: nil)
34
+ Sequel.default_timezone = :utc
35
+ @app_config = app_config
36
+ @adapter = adapter
37
+ @conn_details = conn_details
38
+ # Digest of the app config file so we can tell if it changes
39
+ @config_md5 = app_config.config_md5
40
+ @page_size = page_size.nil? || page_size <= 0 ? DEFAULT_PAGE_SIZE : page_size
41
+
42
+ if @conn_details.is_a?(Hash)
43
+ # Add in the adapter name
44
+ @conn_details['adapter'] = adapter unless @conn_details.key?('adapter')
45
+ # Add in default database name if none was specified
46
+ @conn_details['database'] = INDEX_DB_NAME unless @conn_details.key?('database')
47
+ end
48
+ end
49
+
50
+ # Returns true if the application configuration does not match the configuration used for
51
+ # the last reindex.
52
+ def is_stale?
53
+ db_conn[INDEX_STATE_TBL].where(config_md5: @config_md5).count == 0
54
+ end
55
+
56
+ # Index the provided file_rec and its metadata
57
+ #
58
+ # @param file_rec [FileRecord] file record to index
59
+ def index(file_rec)
60
+ file_path = file_rec.path
61
+ md_rec = file_rec.metadata_record
62
+ storage_loc = file_rec.storage_location
63
+ service_manager = @app_config.service_manager
64
+
65
+ # Produce a list of service definitions which should apply to the file
66
+ expected_services = service_manager.list_service_definitions(
67
+ location: storage_loc.name)
68
+
69
+ first_timestamp = first_service_execution_timestamp(expected_services, md_rec)
70
+ delay_until_timestamp = delay_until_timestamp(md_rec)
71
+
72
+ first_timestamp = convert_iso8601_to_timestamp(first_timestamp)
73
+ delay_until_timestamp = convert_iso8601_to_timestamp(delay_until_timestamp)
74
+ now_stamp = Time.now.utc.strftime(TIMESTAMP_FORMAT)
75
+
76
+ if @adapter == :mysql || @adapter == :mysql2
77
+ preserve_tbl.on_duplicate_key_update
78
+ .insert(file_path: file_path,
79
+ storage_location: storage_loc.name,
80
+ service_time: first_timestamp,
81
+ delay_until_time: delay_until_timestamp,
82
+ updated: now_stamp)
83
+ else
84
+ preserve_tbl.insert_conflict(target: :file_path,
85
+ update: {
86
+ storage_location: storage_loc.name,
87
+ service_time: first_timestamp,
88
+ delay_until_time: delay_until_timestamp,
89
+ updated: now_stamp } )
90
+ .insert(file_path: file_path,
91
+ storage_location: storage_loc.name,
92
+ service_time: first_timestamp,
93
+ delay_until_time: delay_until_timestamp,
94
+ updated: now_stamp)
95
+ end
96
+ end
97
+
98
+ # Find the earliest service execution time for any services expected to be run for the specified file.
99
+ #
100
+ # @param expected_services [Array] list of ServiceDefinition objects expected for specified file.
101
+ # @param md_rec [MetadataRecord] metadata record for the file being evaluated
102
+ # @return The timestamp of the earliest service execution time for the file described by md_rec, in iso8601 format.
103
+ # Returns nil if no services are expected all services have already run and do not have a next occurrence, or
104
+ # the file is deregistered.
105
+ def first_service_execution_timestamp(expected_services, md_rec)
106
+ current_time = Time.now.utc.iso8601(3)
107
+ if md_rec.deregistered?
108
+ return nil
109
+ end
110
+
111
+ service_times = Array.new
112
+
113
+ present_services = md_rec.list_services
114
+
115
+ expected_services.each do |service_def|
116
+ service_name = service_def.name
117
+
118
+ next_run = ServiceDateHelper.next_run_needed(md_rec, service_def)
119
+ service_times << next_run unless next_run.nil?
120
+ end
121
+ # Return the lowest service execution time
122
+ service_times.min
123
+ end
124
+
125
+ # @return The first failure timestamp for any service, or nil if there were none.
126
+ def delay_until_timestamp(md_rec)
127
+ md_rec.list_services.each do |service_name|
128
+ service_rec = md_rec.service(service_name)
129
+ return service_rec.failure_timestamp unless service_rec.failure_timestamp.nil?
130
+ end
131
+ # return lowest possible date
132
+ return minimum_timestamp
133
+ end
134
+
135
+ # Remove an entry from the index
136
+ # @param remove_me The record to remove from the index. May be a FileRecord or a String.
137
+ def remove(remove_me)
138
+ if remove_me.is_a?(FileRecord)
139
+ path = remove_me.path
140
+ else
141
+ path = remove_me
142
+ end
143
+
144
+ result = preserve_tbl.where(file_path: path).delete
145
+ if result == 0
146
+ logger.warn("Could not remove #{path} from the index, path was not present.")
147
+ end
148
+ end
149
+
150
+ # Remove all entries from the index
151
+ # @param older_than [Time] Optional. If provided, only entries that have not been indexed
152
+ # since before the provided time will be deleted.
153
+ def clear_index(older_than = nil)
154
+ if older_than.nil?
155
+ preserve_tbl.delete
156
+ else
157
+ older_than_timestamp = older_than.utc.strftime(TIMESTAMP_FORMAT)
158
+ preserve_tbl.where { updated < older_than_timestamp }.delete
159
+ end
160
+ end
161
+
162
+ # Initialize the index's database using the provided configuration
163
+ def setup_index
164
+ # Create the table for tracking when files will need preservation services run on them.
165
+ case @adapter
166
+ when :mysql, :mysql2
167
+ # mysql does not support 'text' fields as primary keys
168
+ db_conn.create_table!(PRESERVE_TBL) do
169
+ String :file_path, primary_key: true, size: 768
170
+ column :storage_location, 'varchar(128)'
171
+ column :service_time, 'timestamp(3)', { :null => true }
172
+ column :delay_until_time, 'timestamp(3)'
173
+ column :updated, 'timestamp(3)'
174
+ end
175
+ else
176
+ db_conn.create_table!(PRESERVE_TBL) do
177
+ String :file_path, primary_key: true, text: true
178
+ column :storage_location, 'varchar(128)'
179
+ column :service_time, 'timestamp(3)', { :null => true }
180
+ column :delay_until_time, 'timestamp(3)'
181
+ column :updated, 'timestamp(3)'
182
+ end
183
+ end
184
+
185
+ # Setup database indexes
186
+ case @adapter
187
+ when :postgres
188
+ db_conn.run("CREATE INDEX service_times_file_path_text_index ON preserve_service_times (file_path text_pattern_ops)")
189
+ when :sqlite, :amalgalite
190
+ db_conn.run("CREATE INDEX service_times_file_path_text_index ON preserve_service_times (file_path collate nocase)")
191
+ end
192
+ db_conn.run("CREATE INDEX service_times_storage_location_index ON preserve_service_times (storage_location)")
193
+
194
+ # Create table for tracking the state of the index
195
+ db_conn.create_table!(INDEX_STATE_TBL) do
196
+ String :config_md5
197
+ DateTime :last_reindexed
198
+ String :longleaf_version
199
+ end
200
+
201
+ # Prepopulate the index state information
202
+ update_index_state
203
+ end
204
+
205
+ # Updates the state information for the index to indicate that the index has been refreshed
206
+ # or is in sync with the application's configuration.
207
+ def update_index_state
208
+ index_state_tbl = db_conn[INDEX_STATE_TBL]
209
+ index_state_tbl.delete
210
+ index_state_tbl.insert(
211
+ config_md5: @config_md5,
212
+ last_reindexed: Time.now.utc,
213
+ longleaf_version: Longleaf::VERSION)
214
+ end
215
+
216
+ # Retrieves page of file paths which have one or more services which need to run.
217
+ # @param file_selector [FileSelector] selector for what paths to search for files
218
+ # @param stale_datetime [DateTime] find file_paths with services needing to be run before this value
219
+ # @return [Array] array of file paths that need one or more services run.
220
+ def paths_with_stale_services(file_selector, stale_datetime)
221
+ if @preserve_dataset.nil?
222
+ @preserve_dataset = db_conn
223
+ .from(PRESERVE_TBL)
224
+ .exclude(service_time: nil)
225
+ .limit(@page_size)
226
+ .order(Sequel.asc(:service_time))
227
+ end
228
+
229
+ # retrieve and return a page of results
230
+ ds = add_path_restrictions(@preserve_dataset, file_selector)
231
+ .where { service_time <= stale_datetime }
232
+ .where { delay_until_time < stale_datetime }
233
+ .select_map(:file_path)
234
+ end
235
+
236
+ # Retrieves a page of paths for registered files.
237
+ # @param file_selector [FileSelector] selector for what paths to search for files
238
+ # @return [Array] array of file paths that are registered
239
+ def registered_paths(file_selector)
240
+ # retrieve and return a page of results
241
+ add_path_restrictions(registered_dataset, file_selector)
242
+ .select_map(:file_path)
243
+ end
244
+
245
+ # Calls the provided block once per each registered file path registered.
246
+ # Must be passed a block.
247
+ # @param file_selector [FileSelector] selector for what paths to search for files
248
+ # @param older_than [Time] Optional. If provided, only files that have not been
249
+ # indexed since before this timestamp will be returned.
250
+ def each_registered_path(file_selector, older_than: nil, &block)
251
+ dataset = add_path_restrictions(registered_dataset, file_selector)
252
+ .select(:file_path)
253
+ if !older_than.nil?
254
+ older_than_timestamp = older_than.utc.strftime(TIMESTAMP_FORMAT)
255
+ dataset = dataset.where { updated < older_than_timestamp }
256
+ end
257
+ # Yield to the provided block once per row return
258
+ dataset.paged_each(:rows_per_fetch => @page_size) do |row|
259
+ block.call(row[:file_path])
260
+ end
261
+ end
262
+
263
+ private
264
+ def db_conn
265
+ @connection = Sequel.connect(@conn_details) if @connection.nil?
266
+ @connection
267
+ end
268
+
269
+ def preserve_tbl
270
+ @preserve_tbl = db_conn[PRESERVE_TBL] if @preserve_tbl.nil?
271
+ @preserve_tbl
272
+ end
273
+
274
+ def add_path_restrictions(dataset, file_selector)
275
+ if file_selector.specificity == FileSelector::SPECIFICITY_STORAGE_LOCATION
276
+ dataset.where(storage_location: file_selector.storage_locations)
277
+ else
278
+ # Reformat all selected paths into LIKE partial string matches
279
+ path_conds = file_selector.target_paths.map { |path| path.end_with?('/') ? path + '%' : path }
280
+ dataset.where(Sequel.like(:file_path, *path_conds))
281
+ end
282
+ end
283
+
284
+ def convert_iso8601_to_timestamp(iso8601)
285
+ return nil if iso8601.nil?
286
+ Time.iso8601(iso8601).strftime(TIMESTAMP_FORMAT)
287
+ end
288
+
289
+ def minimum_timestamp
290
+ if @min_timestamp.nil?
291
+ @min_timestamp = ServiceDateHelper.formatted_timestamp(Time.at(0).utc)
292
+ end
293
+ @min_timestamp
294
+ end
295
+
296
+ def registered_dataset
297
+ if @registered_dataset.nil?
298
+ @registered_dataset = db_conn
299
+ .from(PRESERVE_TBL)
300
+ .limit(@page_size)
301
+ .order(Sequel.asc(:service_time))
302
+ end
303
+ @registered_dataset
304
+ end
305
+ end
306
+ end
@@ -1,21 +1,22 @@
1
1
  require 'longleaf/logging/redirecting_logger'
2
2
 
3
3
  module Longleaf
4
+ # Module for access logging within longleaf
4
5
  module Logging
5
6
  # Get the main logger for longleaf
6
7
  def logger
7
8
  Logging.logger
8
9
  end
9
-
10
+
10
11
  # Get the main logger for longleaf
11
12
  def self.logger
12
13
  @logger ||= RedirectingLogger.new
13
14
  end
14
-
15
+
15
16
  def initialize_logger(failure_only, log_level, log_format, datetime_format)
16
17
  Logging.initialize_logger(failure_only, log_level, log_format, datetime_format)
17
18
  end
18
-
19
+
19
20
  def self.initialize_logger(failure_only, log_level, log_format, datetime_format)
20
21
  @logger = RedirectingLogger.new(failure_only: failure_only,
21
22
  log_level: log_level,
@@ -23,4 +24,4 @@ module Longleaf
23
24
  datetime_format: datetime_format)
24
25
  end
25
26
  end
26
- end
27
+ end
@@ -1,14 +1,14 @@
1
1
  require 'logger'
2
2
 
3
- # Logger which directs messages to stdout and/or stderr, depending on the nature of the message.
4
- # Status logging, which includes standard logger methods, goes to STDERR.
5
- # Operation success and failure messages go to STDOUT, and to STDERR at info level.
6
3
  module Longleaf
7
4
  module Logging
5
+ # Logger which directs messages to stdout and/or stderr, depending on the nature of the message.
6
+ # Status logging, which includes standard logger methods, goes to STDERR.
7
+ # Operation success and failure messages go to STDOUT, and to STDERR at info level.
8
8
  class RedirectingLogger
9
- # @param failure_only [Boolean] If set to true, only failure messages will be output to STDOUT
9
+ # @param [Boolean] failure_only If set to true, only failure messages will be output to STDOUT
10
10
  # @param log_level [String] logger level used for output to STDERR
11
- # @param log_format [Strfailure_onlying] format string for log entries to STDERR. There are 4 variables available
11
+ # @param log_format [String] format string for log entries to STDERR. There are 4 variables available
12
12
  # for inclusion in the output: severity, datetime, progname, msg. Variables must be wrapped in %{}.
13
13
  # @param datetime_format [String] datetime formatting string used for logger dates appearing in STDERR.
14
14
  def initialize(failure_only: false, log_level: 'WARN', log_format: nil, datetime_format: nil)
@@ -25,12 +25,12 @@ module Longleaf
25
25
  @stderr_log.formatter = proc do |severity, datetime, progname, msg|
26
26
  # Make sure the format ends with a newline
27
27
  @log_format = @log_format + "\n" unless @log_format.end_with?("\n")
28
-
28
+
29
29
  formatted_date = @stderr_log.datetime_format.nil? ? datetime : datetime.strftime(datetime_format)
30
30
  @log_format % { :severity => severity, :datetime => formatted_date, :progname => progname, :msg => msg }
31
31
  end
32
32
  end
33
-
33
+
34
34
  @stdout_log = Logger.new($stdout)
35
35
  @stdout_log.formatter = proc do |severity, datetime, progname, msg|
36
36
  "#{msg}\n"
@@ -41,33 +41,38 @@ module Longleaf
41
41
  @stdout_log.level = 'info'
42
42
  end
43
43
  end
44
-
44
+
45
45
  def debug(progname = nil, &block)
46
46
  @stderr_log.debug(progname, &block)
47
47
  end
48
-
48
+
49
49
  def info(progname = nil, &block)
50
50
  @stderr_log.info(progname, &block)
51
51
  end
52
-
52
+
53
53
  def warn(progname = nil, &block)
54
54
  @stderr_log.warn(progname, &block)
55
55
  end
56
-
56
+
57
57
  def error(progname = nil, &block)
58
58
  @stderr_log.error(progname, &block)
59
59
  end
60
-
60
+
61
61
  def fatal(progname = nil, &block)
62
62
  @stderr_log.fatal(progname, &block)
63
63
  end
64
-
64
+
65
65
  def unknown(progname = nil, &block)
66
66
  @stderr_log.unknown(progname, &block)
67
67
  end
68
-
68
+
69
+ def <<(msg)
70
+ @stderr_log << msg
71
+ end
72
+
69
73
  # Logs a success message to STDOUT, as well as STDERR at info level.
70
- # @param eventOrMessage [String] name of the preservation event which succeeded,
74
+ #
75
+ # @param [String] eventOrMessage name of the preservation event which succeeded,
71
76
  # or the message to output if it is the only parameter. Required.
72
77
  # @param file_name [String] file name which is the subject of this message.
73
78
  # @param message [String] descriptive message to accompany this output
@@ -75,10 +80,10 @@ module Longleaf
75
80
  def success(eventOrMessage, file_name = nil, message = nil, service = nil)
76
81
  outcome('SUCCESS', eventOrMessage, file_name, message, service)
77
82
  end
78
-
83
+
79
84
  # Logs a failure message to STDOUT, as well as STDERR at info level.
80
85
  # If an error was provided, it is logged to STDERR at error level.
81
- # @param eventOrMessage [String] name of the preservation event which failed,
86
+ # @param eventOrMessage [String] name of the preservation event which failed,
82
87
  # or the message to output if it is the only parameter.
83
88
  # @param file_name [String] file name which is the subject of this message.
84
89
  # @param message [String] descriptive message to accompany this output
@@ -87,17 +92,18 @@ module Longleaf
87
92
  def failure(eventOrMessage, file_name = nil, message = nil, service = nil, error: nil)
88
93
  text = outcome_text('FAILURE', eventOrMessage, file_name, message, service, error)
89
94
  @stdout_log.warn(text)
90
-
95
+
91
96
  @stderr_log.info(text)
92
97
  @stderr_log.error("#{error.message}") unless error.nil?
98
+ @stderr_log.error("#{error.backtrace}") unless error.nil? || error.backtrace.nil?
93
99
  end
94
-
100
+
95
101
  # Logs an outcome message to STDOUT, as well as STDERR at info level.
96
102
  # If file_name and message are nil, eventOrMessage will be used as the message.
97
103
  #
98
104
  # @param outcome [String] The status of the outcome. Required.
99
- # @param eventOrMessage [String] name of the preservation event which was successful,
100
- # or the message to output if it is the only parameter. Required.
105
+ # @param eventOrMessage [String] name of the preservation event which was successful,
106
+ # or the message to output if it is the only parameter. Required.
101
107
  # @param file_name [String] file name which is the subject of this message.
102
108
  # @param message [String] descriptive message to accompany this output
103
109
  # @param service [String] name of the service which executed.
@@ -107,14 +113,13 @@ module Longleaf
107
113
  @stdout_log.info(text)
108
114
  @stderr_log.info(text)
109
115
  end
110
-
111
- # FAILURE verify[cdr_fixity_check] /path/to/file: Something terrible
116
+
112
117
  private
113
118
  def outcome_text(outcome, eventOrMessage, file_name = nil, message = nil, service = nil, error = nil)
114
119
  message_only = file_name.nil? && message.nil? && error.nil?
115
-
120
+
116
121
  text = "#{outcome}"
117
-
122
+
118
123
  if message_only
119
124
  text << ": #{eventOrMessage}"
120
125
  else