longleaf 0.1.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (185) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +94 -0
  3. data/.editorconfig +13 -0
  4. data/.gitignore +4 -1
  5. data/.rubocop.yml +44 -0
  6. data/.rubocop_todo.yml +834 -0
  7. data/.yardopts +1 -0
  8. data/Gemfile +16 -1
  9. data/README.md +98 -12
  10. data/Rakefile +6 -0
  11. data/bin/setup +16 -1
  12. data/docs/aboutlongleaf.md +28 -0
  13. data/docs/extra.css +32 -0
  14. data/docs/img/change-file.png +0 -0
  15. data/docs/img/ll-example-preserved.png +0 -0
  16. data/docs/index.md +19 -0
  17. data/docs/install.md +66 -0
  18. data/docs/ll-example/config-example-relative.yml +33 -0
  19. data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
  20. data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
  21. data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
  22. data/docs/ll-example/metadata-dir/.gitkeep +0 -0
  23. data/docs/ll-example/replica-files/.gitkeep +0 -0
  24. data/docs/ll-example/replica-metadata/.gitkeep +0 -0
  25. data/docs/quickstart.md +270 -0
  26. data/docs/rdocs/Longleaf.html +135 -0
  27. data/docs/rdocs/Longleaf/AppFields.html +178 -0
  28. data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
  29. data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
  30. data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
  31. data/docs/rdocs/Longleaf/CLI.html +909 -0
  32. data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
  33. data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
  34. data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
  35. data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
  36. data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
  37. data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
  38. data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
  39. data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
  40. data/docs/rdocs/Longleaf/EventError.html +147 -0
  41. data/docs/rdocs/Longleaf/EventNames.html +163 -0
  42. data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
  43. data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
  44. data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
  45. data/docs/rdocs/Longleaf/FileRecord.html +716 -0
  46. data/docs/rdocs/Longleaf/FileSelector.html +901 -0
  47. data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
  48. data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
  49. data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
  50. data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
  51. data/docs/rdocs/Longleaf/Logging.html +405 -0
  52. data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
  53. data/docs/rdocs/Longleaf/LongleafError.html +139 -0
  54. data/docs/rdocs/Longleaf/MDFields.html +193 -0
  55. data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
  56. data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
  57. data/docs/rdocs/Longleaf/MetadataError.html +143 -0
  58. data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
  59. data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
  60. data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
  61. data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
  62. data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
  63. data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
  64. data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
  65. data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
  66. data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
  67. data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
  68. data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
  69. data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
  70. data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
  71. data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
  72. data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
  73. data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
  74. data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
  75. data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
  76. data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
  77. data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
  78. data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
  79. data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
  80. data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
  81. data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
  82. data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
  83. data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
  84. data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
  85. data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
  86. data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
  87. data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
  88. data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
  89. data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
  90. data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
  91. data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
  92. data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
  93. data/docs/rdocs/_index.html +660 -0
  94. data/docs/rdocs/class_list.html +51 -0
  95. data/docs/rdocs/css/common.css +1 -0
  96. data/docs/rdocs/css/full_list.css +58 -0
  97. data/docs/rdocs/css/style.css +496 -0
  98. data/docs/rdocs/file.README.html +165 -0
  99. data/docs/rdocs/file_list.html +56 -0
  100. data/docs/rdocs/frames.html +17 -0
  101. data/docs/rdocs/index.html +165 -0
  102. data/docs/rdocs/js/app.js +303 -0
  103. data/docs/rdocs/js/full_list.js +216 -0
  104. data/docs/rdocs/js/jquery.js +4 -0
  105. data/docs/rdocs/method_list.html +2051 -0
  106. data/docs/rdocs/top-level-namespace.html +110 -0
  107. data/lib/longleaf/candidates/file_selector.rb +150 -0
  108. data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
  109. data/lib/longleaf/candidates/physical_path_provider.rb +17 -0
  110. data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
  111. data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
  112. data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
  113. data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
  114. data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
  115. data/lib/longleaf/cli.rb +252 -46
  116. data/lib/longleaf/commands/deregister_command.rb +51 -0
  117. data/lib/longleaf/commands/preserve_command.rb +50 -0
  118. data/lib/longleaf/commands/register_command.rb +34 -43
  119. data/lib/longleaf/commands/reindex_command.rb +92 -0
  120. data/lib/longleaf/commands/validate_config_command.rb +33 -8
  121. data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
  122. data/lib/longleaf/errors.rb +26 -7
  123. data/lib/longleaf/events/deregister_event.rb +53 -0
  124. data/lib/longleaf/events/event_names.rb +9 -0
  125. data/lib/longleaf/events/event_status_tracking.rb +59 -0
  126. data/lib/longleaf/events/preserve_event.rb +82 -0
  127. data/lib/longleaf/events/register_event.rb +59 -51
  128. data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
  129. data/lib/longleaf/helpers/digest_helper.rb +56 -0
  130. data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
  131. data/lib/longleaf/helpers/selection_options_parser.rb +215 -0
  132. data/lib/longleaf/helpers/service_date_helper.rb +78 -0
  133. data/lib/longleaf/indexing/index_manager.rb +101 -0
  134. data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
  135. data/lib/longleaf/logging.rb +5 -4
  136. data/lib/longleaf/logging/redirecting_logger.rb +30 -25
  137. data/lib/longleaf/models/app_fields.rb +7 -2
  138. data/lib/longleaf/models/file_record.rb +31 -8
  139. data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
  140. data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
  141. data/lib/longleaf/models/md_fields.rb +3 -1
  142. data/lib/longleaf/models/metadata_location.rb +47 -0
  143. data/lib/longleaf/models/metadata_record.rb +43 -16
  144. data/lib/longleaf/models/s3_storage_location.rb +138 -0
  145. data/lib/longleaf/models/service_definition.rb +7 -6
  146. data/lib/longleaf/models/service_fields.rb +7 -1
  147. data/lib/longleaf/models/service_record.rb +10 -6
  148. data/lib/longleaf/models/storage_location.rb +24 -19
  149. data/lib/longleaf/models/storage_types.rb +9 -0
  150. data/lib/longleaf/models/system_config_fields.rb +9 -0
  151. data/lib/longleaf/preservation_services/file_check_service.rb +59 -0
  152. data/lib/longleaf/preservation_services/fixity_check_service.rb +124 -0
  153. data/lib/longleaf/preservation_services/rsync_replication_service.rb +198 -0
  154. data/lib/longleaf/preservation_services/s3_replication_service.rb +131 -0
  155. data/lib/longleaf/services/application_config_deserializer.rb +81 -24
  156. data/lib/longleaf/services/application_config_manager.rb +20 -6
  157. data/lib/longleaf/services/application_config_validator.rb +19 -9
  158. data/lib/longleaf/services/configuration_validator.rb +67 -4
  159. data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
  160. data/lib/longleaf/services/metadata_deserializer.rb +115 -42
  161. data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
  162. data/lib/longleaf/services/metadata_serializer.rb +156 -23
  163. data/lib/longleaf/services/metadata_validator.rb +76 -0
  164. data/lib/longleaf/services/s3_location_validator.rb +19 -0
  165. data/lib/longleaf/services/service_class_cache.rb +112 -0
  166. data/lib/longleaf/services/service_definition_manager.rb +10 -7
  167. data/lib/longleaf/services/service_definition_validator.rb +25 -18
  168. data/lib/longleaf/services/service_manager.rb +86 -11
  169. data/lib/longleaf/services/service_mapping_manager.rb +13 -12
  170. data/lib/longleaf/services/service_mapping_validator.rb +36 -26
  171. data/lib/longleaf/services/storage_location_manager.rb +76 -15
  172. data/lib/longleaf/services/storage_location_validator.rb +49 -35
  173. data/lib/longleaf/specs/config_builder.rb +47 -23
  174. data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
  175. data/lib/longleaf/specs/custom_matchers.rb +9 -0
  176. data/lib/longleaf/specs/file_helpers.rb +61 -0
  177. data/lib/longleaf/specs/metadata_builder.rb +98 -0
  178. data/lib/longleaf/specs/system_config_builder.rb +27 -0
  179. data/lib/longleaf/version.rb +1 -1
  180. data/longleaf.gemspec +20 -7
  181. data/mkdocs.yml +21 -0
  182. metadata +308 -24
  183. data/.travis.yml +0 -4
  184. data/lib/longleaf/commands/abstract_command.rb +0 -37
  185. data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -0,0 +1,306 @@
1
+ require 'sequel'
2
+ require 'digest/md5'
3
+ require 'longleaf/events/event_names'
4
+ require 'longleaf/candidates/file_selector'
5
+ require 'longleaf/version'
6
+ require 'longleaf/models/system_config_fields'
7
+ require 'longleaf/logging'
8
+
9
+ module Longleaf
10
+ # Driver for interacting with RDBM based metadata index using the Sequel ORM gem.
11
+ # Users must create the database and credentials for connecting to it in advance,
12
+ # if using a database application that requires creation of databases (ie, not sqlite).
13
+ # The default database name is 'longleaf_metadata_index' but may be overridden.
14
+ #
15
+ # See the Sequel documentation for details about accepted connection parameters:
16
+ # https://github.com/jeremyevans/sequel/blob/master/doc/opening_databases.rdoc
17
+ class SequelIndexDriver
18
+ include Longleaf::Logging
19
+ INDEX_DB_NAME ||= 'longleaf_metadata_index'
20
+ PRESERVE_TBL ||= "preserve_service_times".to_sym
21
+ INDEX_STATE_TBL ||= "index_state".to_sym
22
+ DEFAULT_PAGE_SIZE ||= 1000
23
+ TIMESTAMP_FORMAT ||= '%Y-%m-%d %H:%M:%S.%3N'
24
+
25
+ # Initialize the index driver
26
+ #
27
+ # @param app_config [ApplicationConfigManager] the application configuration manager
28
+ # @param adapter [String] name of the database adapter to use.
29
+ # @param conn_details Details about the configuration and connection to the database used for the index.
30
+ # If a string is provided, it will be used as the connection URL and must identify the adapter.
31
+ # If a hash is provided, it used as the parameters for the database connection.
32
+ # @param page_size [Integer] number of results to retrieve per query when getting candidates
33
+ def initialize(app_config, adapter, conn_details, page_size: nil)
34
+ Sequel.default_timezone = :utc
35
+ @app_config = app_config
36
+ @adapter = adapter
37
+ @conn_details = conn_details
38
+ # Digest of the app config file so we can tell if it changes
39
+ @config_md5 = app_config.config_md5
40
+ @page_size = page_size.nil? || page_size <= 0 ? DEFAULT_PAGE_SIZE : page_size
41
+
42
+ if @conn_details.is_a?(Hash)
43
+ # Add in the adapter name
44
+ @conn_details['adapter'] = adapter unless @conn_details.key?('adapter')
45
+ # Add in default database name if none was specified
46
+ @conn_details['database'] = INDEX_DB_NAME unless @conn_details.key?('database')
47
+ end
48
+ end
49
+
50
+ # Returns true if the application configuration does not match the configuration used for
51
+ # the last reindex.
52
+ def is_stale?
53
+ db_conn[INDEX_STATE_TBL].where(config_md5: @config_md5).count == 0
54
+ end
55
+
56
+ # Index the provided file_rec and its metadata
57
+ #
58
+ # @param file_rec [FileRecord] file record to index
59
+ def index(file_rec)
60
+ file_path = file_rec.path
61
+ md_rec = file_rec.metadata_record
62
+ storage_loc = file_rec.storage_location
63
+ service_manager = @app_config.service_manager
64
+
65
+ # Produce a list of service definitions which should apply to the file
66
+ expected_services = service_manager.list_service_definitions(
67
+ location: storage_loc.name)
68
+
69
+ first_timestamp = first_service_execution_timestamp(expected_services, md_rec)
70
+ delay_until_timestamp = delay_until_timestamp(md_rec)
71
+
72
+ first_timestamp = convert_iso8601_to_timestamp(first_timestamp)
73
+ delay_until_timestamp = convert_iso8601_to_timestamp(delay_until_timestamp)
74
+ now_stamp = Time.now.utc.strftime(TIMESTAMP_FORMAT)
75
+
76
+ if @adapter == :mysql || @adapter == :mysql2
77
+ preserve_tbl.on_duplicate_key_update
78
+ .insert(file_path: file_path,
79
+ storage_location: storage_loc.name,
80
+ service_time: first_timestamp,
81
+ delay_until_time: delay_until_timestamp,
82
+ updated: now_stamp)
83
+ else
84
+ preserve_tbl.insert_conflict(target: :file_path,
85
+ update: {
86
+ storage_location: storage_loc.name,
87
+ service_time: first_timestamp,
88
+ delay_until_time: delay_until_timestamp,
89
+ updated: now_stamp } )
90
+ .insert(file_path: file_path,
91
+ storage_location: storage_loc.name,
92
+ service_time: first_timestamp,
93
+ delay_until_time: delay_until_timestamp,
94
+ updated: now_stamp)
95
+ end
96
+ end
97
+
98
+ # Find the earliest service execution time for any services expected to be run for the specified file.
99
+ #
100
+ # @param expected_services [Array] list of ServiceDefinition objects expected for specified file.
101
+ # @param md_rec [MetadataRecord] metadata record for the file being evaluated
102
+ # @return The timestamp of the earliest service execution time for the file described by md_rec, in iso8601 format.
103
+ # Returns nil if no services are expected all services have already run and do not have a next occurrence, or
104
+ # the file is deregistered.
105
+ def first_service_execution_timestamp(expected_services, md_rec)
106
+ current_time = Time.now.utc.iso8601(3)
107
+ if md_rec.deregistered?
108
+ return nil
109
+ end
110
+
111
+ service_times = Array.new
112
+
113
+ present_services = md_rec.list_services
114
+
115
+ expected_services.each do |service_def|
116
+ service_name = service_def.name
117
+
118
+ next_run = ServiceDateHelper.next_run_needed(md_rec, service_def)
119
+ service_times << next_run unless next_run.nil?
120
+ end
121
+ # Return the lowest service execution time
122
+ service_times.min
123
+ end
124
+
125
+ # @return The first failure timestamp for any service, or nil if there were none.
126
+ def delay_until_timestamp(md_rec)
127
+ md_rec.list_services.each do |service_name|
128
+ service_rec = md_rec.service(service_name)
129
+ return service_rec.failure_timestamp unless service_rec.failure_timestamp.nil?
130
+ end
131
+ # return lowest possible date
132
+ return minimum_timestamp
133
+ end
134
+
135
+ # Remove an entry from the index
136
+ # @param remove_me The record to remove from the index. May be a FileRecord or a String.
137
+ def remove(remove_me)
138
+ if remove_me.is_a?(FileRecord)
139
+ path = remove_me.path
140
+ else
141
+ path = remove_me
142
+ end
143
+
144
+ result = preserve_tbl.where(file_path: path).delete
145
+ if result == 0
146
+ logger.warn("Could not remove #{path} from the index, path was not present.")
147
+ end
148
+ end
149
+
150
+ # Remove all entries from the index
151
+ # @param older_than [Time] Optional. If provided, only entries that have not been indexed
152
+ # since before the provided time will be deleted.
153
+ def clear_index(older_than = nil)
154
+ if older_than.nil?
155
+ preserve_tbl.delete
156
+ else
157
+ older_than_timestamp = older_than.utc.strftime(TIMESTAMP_FORMAT)
158
+ preserve_tbl.where { updated < older_than_timestamp }.delete
159
+ end
160
+ end
161
+
162
+ # Initialize the index's database using the provided configuration
163
+ def setup_index
164
+ # Create the table for tracking when files will need preservation services run on them.
165
+ case @adapter
166
+ when :mysql, :mysql2
167
+ # mysql does not support 'text' fields as primary keys
168
+ db_conn.create_table!(PRESERVE_TBL) do
169
+ String :file_path, primary_key: true, size: 768
170
+ column :storage_location, 'varchar(128)'
171
+ column :service_time, 'timestamp(3)', { :null => true }
172
+ column :delay_until_time, 'timestamp(3)'
173
+ column :updated, 'timestamp(3)'
174
+ end
175
+ else
176
+ db_conn.create_table!(PRESERVE_TBL) do
177
+ String :file_path, primary_key: true, text: true
178
+ column :storage_location, 'varchar(128)'
179
+ column :service_time, 'timestamp(3)', { :null => true }
180
+ column :delay_until_time, 'timestamp(3)'
181
+ column :updated, 'timestamp(3)'
182
+ end
183
+ end
184
+
185
+ # Setup database indexes
186
+ case @adapter
187
+ when :postgres
188
+ db_conn.run("CREATE INDEX service_times_file_path_text_index ON preserve_service_times (file_path text_pattern_ops)")
189
+ when :sqlite, :amalgalite
190
+ db_conn.run("CREATE INDEX service_times_file_path_text_index ON preserve_service_times (file_path collate nocase)")
191
+ end
192
+ db_conn.run("CREATE INDEX service_times_storage_location_index ON preserve_service_times (storage_location)")
193
+
194
+ # Create table for tracking the state of the index
195
+ db_conn.create_table!(INDEX_STATE_TBL) do
196
+ String :config_md5
197
+ DateTime :last_reindexed
198
+ String :longleaf_version
199
+ end
200
+
201
+ # Prepopulate the index state information
202
+ update_index_state
203
+ end
204
+
205
+ # Updates the state information for the index to indicate that the index has been refreshed
206
+ # or is in sync with the application's configuration.
207
+ def update_index_state
208
+ index_state_tbl = db_conn[INDEX_STATE_TBL]
209
+ index_state_tbl.delete
210
+ index_state_tbl.insert(
211
+ config_md5: @config_md5,
212
+ last_reindexed: Time.now.utc,
213
+ longleaf_version: Longleaf::VERSION)
214
+ end
215
+
216
+ # Retrieves page of file paths which have one or more services which need to run.
217
+ # @param file_selector [FileSelector] selector for what paths to search for files
218
+ # @param stale_datetime [DateTime] find file_paths with services needing to be run before this value
219
+ # @return [Array] array of file paths that need one or more services run.
220
+ def paths_with_stale_services(file_selector, stale_datetime)
221
+ if @preserve_dataset.nil?
222
+ @preserve_dataset = db_conn
223
+ .from(PRESERVE_TBL)
224
+ .exclude(service_time: nil)
225
+ .limit(@page_size)
226
+ .order(Sequel.asc(:service_time))
227
+ end
228
+
229
+ # retrieve and return a page of results
230
+ ds = add_path_restrictions(@preserve_dataset, file_selector)
231
+ .where { service_time <= stale_datetime }
232
+ .where { delay_until_time < stale_datetime }
233
+ .select_map(:file_path)
234
+ end
235
+
236
+ # Retrieves a page of paths for registered files.
237
+ # @param file_selector [FileSelector] selector for what paths to search for files
238
+ # @return [Array] array of file paths that are registered
239
+ def registered_paths(file_selector)
240
+ # retrieve and return a page of results
241
+ add_path_restrictions(registered_dataset, file_selector)
242
+ .select_map(:file_path)
243
+ end
244
+
245
+ # Calls the provided block once per each registered file path registered.
246
+ # Must be passed a block.
247
+ # @param file_selector [FileSelector] selector for what paths to search for files
248
+ # @param older_than [Time] Optional. If provided, only files that have not been
249
+ # indexed since before this timestamp will be returned.
250
+ def each_registered_path(file_selector, older_than: nil, &block)
251
+ dataset = add_path_restrictions(registered_dataset, file_selector)
252
+ .select(:file_path)
253
+ if !older_than.nil?
254
+ older_than_timestamp = older_than.utc.strftime(TIMESTAMP_FORMAT)
255
+ dataset = dataset.where { updated < older_than_timestamp }
256
+ end
257
+ # Yield to the provided block once per row return
258
+ dataset.paged_each(:rows_per_fetch => @page_size) do |row|
259
+ block.call(row[:file_path])
260
+ end
261
+ end
262
+
263
+ private
264
+ def db_conn
265
+ @connection = Sequel.connect(@conn_details) if @connection.nil?
266
+ @connection
267
+ end
268
+
269
+ def preserve_tbl
270
+ @preserve_tbl = db_conn[PRESERVE_TBL] if @preserve_tbl.nil?
271
+ @preserve_tbl
272
+ end
273
+
274
+ def add_path_restrictions(dataset, file_selector)
275
+ if file_selector.specificity == FileSelector::SPECIFICITY_STORAGE_LOCATION
276
+ dataset.where(storage_location: file_selector.storage_locations)
277
+ else
278
+ # Reformat all selected paths into LIKE partial string matches
279
+ path_conds = file_selector.target_paths.map { |path| path.end_with?('/') ? path + '%' : path }
280
+ dataset.where(Sequel.like(:file_path, *path_conds))
281
+ end
282
+ end
283
+
284
+ def convert_iso8601_to_timestamp(iso8601)
285
+ return nil if iso8601.nil?
286
+ Time.iso8601(iso8601).strftime(TIMESTAMP_FORMAT)
287
+ end
288
+
289
+ def minimum_timestamp
290
+ if @min_timestamp.nil?
291
+ @min_timestamp = ServiceDateHelper.formatted_timestamp(Time.at(0).utc)
292
+ end
293
+ @min_timestamp
294
+ end
295
+
296
+ def registered_dataset
297
+ if @registered_dataset.nil?
298
+ @registered_dataset = db_conn
299
+ .from(PRESERVE_TBL)
300
+ .limit(@page_size)
301
+ .order(Sequel.asc(:service_time))
302
+ end
303
+ @registered_dataset
304
+ end
305
+ end
306
+ end
@@ -1,21 +1,22 @@
1
1
  require 'longleaf/logging/redirecting_logger'
2
2
 
3
3
  module Longleaf
4
+ # Module for access logging within longleaf
4
5
  module Logging
5
6
  # Get the main logger for longleaf
6
7
  def logger
7
8
  Logging.logger
8
9
  end
9
-
10
+
10
11
  # Get the main logger for longleaf
11
12
  def self.logger
12
13
  @logger ||= RedirectingLogger.new
13
14
  end
14
-
15
+
15
16
  def initialize_logger(failure_only, log_level, log_format, datetime_format)
16
17
  Logging.initialize_logger(failure_only, log_level, log_format, datetime_format)
17
18
  end
18
-
19
+
19
20
  def self.initialize_logger(failure_only, log_level, log_format, datetime_format)
20
21
  @logger = RedirectingLogger.new(failure_only: failure_only,
21
22
  log_level: log_level,
@@ -23,4 +24,4 @@ module Longleaf
23
24
  datetime_format: datetime_format)
24
25
  end
25
26
  end
26
- end
27
+ end
@@ -1,14 +1,14 @@
1
1
  require 'logger'
2
2
 
3
- # Logger which directs messages to stdout and/or stderr, depending on the nature of the message.
4
- # Status logging, which includes standard logger methods, goes to STDERR.
5
- # Operation success and failure messages go to STDOUT, and to STDERR at info level.
6
3
  module Longleaf
7
4
  module Logging
5
+ # Logger which directs messages to stdout and/or stderr, depending on the nature of the message.
6
+ # Status logging, which includes standard logger methods, goes to STDERR.
7
+ # Operation success and failure messages go to STDOUT, and to STDERR at info level.
8
8
  class RedirectingLogger
9
- # @param failure_only [Boolean] If set to true, only failure messages will be output to STDOUT
9
+ # @param [Boolean] failure_only If set to true, only failure messages will be output to STDOUT
10
10
  # @param log_level [String] logger level used for output to STDERR
11
- # @param log_format [Strfailure_onlying] format string for log entries to STDERR. There are 4 variables available
11
+ # @param log_format [String] format string for log entries to STDERR. There are 4 variables available
12
12
  # for inclusion in the output: severity, datetime, progname, msg. Variables must be wrapped in %{}.
13
13
  # @param datetime_format [String] datetime formatting string used for logger dates appearing in STDERR.
14
14
  def initialize(failure_only: false, log_level: 'WARN', log_format: nil, datetime_format: nil)
@@ -25,12 +25,12 @@ module Longleaf
25
25
  @stderr_log.formatter = proc do |severity, datetime, progname, msg|
26
26
  # Make sure the format ends with a newline
27
27
  @log_format = @log_format + "\n" unless @log_format.end_with?("\n")
28
-
28
+
29
29
  formatted_date = @stderr_log.datetime_format.nil? ? datetime : datetime.strftime(datetime_format)
30
30
  @log_format % { :severity => severity, :datetime => formatted_date, :progname => progname, :msg => msg }
31
31
  end
32
32
  end
33
-
33
+
34
34
  @stdout_log = Logger.new($stdout)
35
35
  @stdout_log.formatter = proc do |severity, datetime, progname, msg|
36
36
  "#{msg}\n"
@@ -41,33 +41,38 @@ module Longleaf
41
41
  @stdout_log.level = 'info'
42
42
  end
43
43
  end
44
-
44
+
45
45
  def debug(progname = nil, &block)
46
46
  @stderr_log.debug(progname, &block)
47
47
  end
48
-
48
+
49
49
  def info(progname = nil, &block)
50
50
  @stderr_log.info(progname, &block)
51
51
  end
52
-
52
+
53
53
  def warn(progname = nil, &block)
54
54
  @stderr_log.warn(progname, &block)
55
55
  end
56
-
56
+
57
57
  def error(progname = nil, &block)
58
58
  @stderr_log.error(progname, &block)
59
59
  end
60
-
60
+
61
61
  def fatal(progname = nil, &block)
62
62
  @stderr_log.fatal(progname, &block)
63
63
  end
64
-
64
+
65
65
  def unknown(progname = nil, &block)
66
66
  @stderr_log.unknown(progname, &block)
67
67
  end
68
-
68
+
69
+ def <<(msg)
70
+ @stderr_log << msg
71
+ end
72
+
69
73
  # Logs a success message to STDOUT, as well as STDERR at info level.
70
- # @param eventOrMessage [String] name of the preservation event which succeeded,
74
+ #
75
+ # @param [String] eventOrMessage name of the preservation event which succeeded,
71
76
  # or the message to output if it is the only parameter. Required.
72
77
  # @param file_name [String] file name which is the subject of this message.
73
78
  # @param message [String] descriptive message to accompany this output
@@ -75,10 +80,10 @@ module Longleaf
75
80
  def success(eventOrMessage, file_name = nil, message = nil, service = nil)
76
81
  outcome('SUCCESS', eventOrMessage, file_name, message, service)
77
82
  end
78
-
83
+
79
84
  # Logs a failure message to STDOUT, as well as STDERR at info level.
80
85
  # If an error was provided, it is logged to STDERR at error level.
81
- # @param eventOrMessage [String] name of the preservation event which failed,
86
+ # @param eventOrMessage [String] name of the preservation event which failed,
82
87
  # or the message to output if it is the only parameter.
83
88
  # @param file_name [String] file name which is the subject of this message.
84
89
  # @param message [String] descriptive message to accompany this output
@@ -87,17 +92,18 @@ module Longleaf
87
92
  def failure(eventOrMessage, file_name = nil, message = nil, service = nil, error: nil)
88
93
  text = outcome_text('FAILURE', eventOrMessage, file_name, message, service, error)
89
94
  @stdout_log.warn(text)
90
-
95
+
91
96
  @stderr_log.info(text)
92
97
  @stderr_log.error("#{error.message}") unless error.nil?
98
+ @stderr_log.error("#{error.backtrace}") unless error.nil? || error.backtrace.nil?
93
99
  end
94
-
100
+
95
101
  # Logs an outcome message to STDOUT, as well as STDERR at info level.
96
102
  # If file_name and message are nil, eventOrMessage will be used as the message.
97
103
  #
98
104
  # @param outcome [String] The status of the outcome. Required.
99
- # @param eventOrMessage [String] name of the preservation event which was successful,
100
- # or the message to output if it is the only parameter. Required.
105
+ # @param eventOrMessage [String] name of the preservation event which was successful,
106
+ # or the message to output if it is the only parameter. Required.
101
107
  # @param file_name [String] file name which is the subject of this message.
102
108
  # @param message [String] descriptive message to accompany this output
103
109
  # @param service [String] name of the service which executed.
@@ -107,14 +113,13 @@ module Longleaf
107
113
  @stdout_log.info(text)
108
114
  @stderr_log.info(text)
109
115
  end
110
-
111
- # FAILURE verify[cdr_fixity_check] /path/to/file: Something terrible
116
+
112
117
  private
113
118
  def outcome_text(outcome, eventOrMessage, file_name = nil, message = nil, service = nil, error = nil)
114
119
  message_only = file_name.nil? && message.nil? && error.nil?
115
-
120
+
116
121
  text = "#{outcome}"
117
-
122
+
118
123
  if message_only
119
124
  text << ": #{eventOrMessage}"
120
125
  else