longleaf 0.2.0.pre.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (165) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +84 -0
  3. data/.gitignore +4 -2
  4. data/.rubocop.yml +42 -2
  5. data/.rubocop_todo.yml +390 -311
  6. data/.yardopts +1 -0
  7. data/Gemfile +16 -1
  8. data/README.md +67 -13
  9. data/Rakefile +6 -0
  10. data/bin/setup +16 -1
  11. data/docs/aboutlongleaf.md +28 -0
  12. data/docs/extra.css +32 -0
  13. data/docs/img/change-file.png +0 -0
  14. data/docs/img/ll-example-preserved.png +0 -0
  15. data/docs/index.md +19 -0
  16. data/docs/install.md +66 -0
  17. data/docs/ll-example/config-example-relative.yml +33 -0
  18. data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
  19. data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
  20. data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
  21. data/docs/ll-example/metadata-dir/.gitkeep +0 -0
  22. data/docs/ll-example/replica-files/.gitkeep +0 -0
  23. data/docs/ll-example/replica-metadata/.gitkeep +0 -0
  24. data/docs/quickstart.md +270 -0
  25. data/docs/rdocs/Longleaf.html +135 -0
  26. data/docs/rdocs/Longleaf/AppFields.html +178 -0
  27. data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
  28. data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
  29. data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
  30. data/docs/rdocs/Longleaf/CLI.html +909 -0
  31. data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
  32. data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
  33. data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
  34. data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
  35. data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
  36. data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
  37. data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
  38. data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
  39. data/docs/rdocs/Longleaf/EventError.html +147 -0
  40. data/docs/rdocs/Longleaf/EventNames.html +163 -0
  41. data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
  42. data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
  43. data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
  44. data/docs/rdocs/Longleaf/FileRecord.html +716 -0
  45. data/docs/rdocs/Longleaf/FileSelector.html +901 -0
  46. data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
  47. data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
  48. data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
  49. data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
  50. data/docs/rdocs/Longleaf/Logging.html +405 -0
  51. data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
  52. data/docs/rdocs/Longleaf/LongleafError.html +139 -0
  53. data/docs/rdocs/Longleaf/MDFields.html +193 -0
  54. data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
  55. data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
  56. data/docs/rdocs/Longleaf/MetadataError.html +143 -0
  57. data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
  58. data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
  59. data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
  60. data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
  61. data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
  62. data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
  63. data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
  64. data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
  65. data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
  66. data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
  67. data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
  68. data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
  69. data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
  70. data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
  71. data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
  72. data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
  73. data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
  74. data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
  75. data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
  76. data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
  77. data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
  78. data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
  79. data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
  80. data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
  81. data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
  82. data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
  83. data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
  84. data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
  85. data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
  86. data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
  87. data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
  88. data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
  89. data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
  90. data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
  91. data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
  92. data/docs/rdocs/_index.html +660 -0
  93. data/docs/rdocs/class_list.html +51 -0
  94. data/docs/rdocs/css/common.css +1 -0
  95. data/docs/rdocs/css/full_list.css +58 -0
  96. data/docs/rdocs/css/style.css +496 -0
  97. data/docs/rdocs/file.README.html +165 -0
  98. data/docs/rdocs/file_list.html +56 -0
  99. data/docs/rdocs/frames.html +17 -0
  100. data/docs/rdocs/index.html +165 -0
  101. data/docs/rdocs/js/app.js +303 -0
  102. data/docs/rdocs/js/full_list.js +216 -0
  103. data/docs/rdocs/js/jquery.js +4 -0
  104. data/docs/rdocs/method_list.html +2051 -0
  105. data/docs/rdocs/top-level-namespace.html +110 -0
  106. data/lib/longleaf/candidates/file_selector.rb +47 -15
  107. data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
  108. data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +29 -35
  109. data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
  110. data/lib/longleaf/candidates/service_candidate_locator.rb +9 -4
  111. data/lib/longleaf/cli.rb +162 -80
  112. data/lib/longleaf/commands/deregister_command.rb +12 -11
  113. data/lib/longleaf/commands/preserve_command.rb +13 -8
  114. data/lib/longleaf/commands/register_command.rb +9 -6
  115. data/lib/longleaf/commands/reindex_command.rb +92 -0
  116. data/lib/longleaf/commands/validate_config_command.rb +27 -6
  117. data/lib/longleaf/commands/validate_metadata_command.rb +11 -9
  118. data/lib/longleaf/errors.rb +12 -12
  119. data/lib/longleaf/events/deregister_event.rb +13 -15
  120. data/lib/longleaf/events/event_status_tracking.rb +7 -7
  121. data/lib/longleaf/events/preserve_event.rb +24 -14
  122. data/lib/longleaf/events/register_event.rb +21 -35
  123. data/lib/longleaf/helpers/digest_helper.rb +4 -4
  124. data/lib/longleaf/helpers/service_date_helper.rb +5 -6
  125. data/lib/longleaf/indexing/index_manager.rb +101 -0
  126. data/lib/longleaf/indexing/sequel_index_driver.rb +324 -0
  127. data/lib/longleaf/logging.rb +4 -4
  128. data/lib/longleaf/logging/redirecting_logger.rb +20 -20
  129. data/lib/longleaf/models/app_fields.rb +2 -1
  130. data/lib/longleaf/models/file_record.rb +10 -6
  131. data/lib/longleaf/models/md_fields.rb +1 -1
  132. data/lib/longleaf/models/metadata_record.rb +22 -12
  133. data/lib/longleaf/models/service_definition.rb +3 -3
  134. data/lib/longleaf/models/service_fields.rb +1 -1
  135. data/lib/longleaf/models/service_record.rb +6 -5
  136. data/lib/longleaf/models/storage_location.rb +26 -7
  137. data/lib/longleaf/models/system_config_fields.rb +9 -0
  138. data/lib/longleaf/preservation_services/file_check_service.rb +58 -0
  139. data/lib/longleaf/preservation_services/fixity_check_service.rb +16 -14
  140. data/lib/longleaf/preservation_services/rsync_replication_service.rb +32 -31
  141. data/lib/longleaf/services/application_config_deserializer.rb +55 -18
  142. data/lib/longleaf/services/application_config_manager.rb +16 -4
  143. data/lib/longleaf/services/application_config_validator.rb +1 -2
  144. data/lib/longleaf/services/configuration_validator.rb +6 -4
  145. data/lib/longleaf/services/metadata_deserializer.rb +40 -38
  146. data/lib/longleaf/services/metadata_persistence_manager.rb +46 -0
  147. data/lib/longleaf/services/metadata_serializer.rb +23 -22
  148. data/lib/longleaf/services/service_class_cache.rb +15 -15
  149. data/lib/longleaf/services/service_definition_manager.rb +5 -6
  150. data/lib/longleaf/services/service_definition_validator.rb +5 -6
  151. data/lib/longleaf/services/service_manager.rb +37 -17
  152. data/lib/longleaf/services/service_mapping_manager.rb +9 -9
  153. data/lib/longleaf/services/service_mapping_validator.rb +9 -10
  154. data/lib/longleaf/services/storage_location_manager.rb +22 -8
  155. data/lib/longleaf/services/storage_location_validator.rb +11 -8
  156. data/lib/longleaf/services/storage_path_validator.rb +1 -1
  157. data/lib/longleaf/specs/config_builder.rb +30 -17
  158. data/lib/longleaf/specs/custom_matchers.rb +1 -1
  159. data/lib/longleaf/specs/file_helpers.rb +15 -14
  160. data/lib/longleaf/specs/metadata_builder.rb +91 -0
  161. data/lib/longleaf/specs/system_config_builder.rb +27 -0
  162. data/lib/longleaf/version.rb +1 -1
  163. data/longleaf.gemspec +17 -7
  164. data/mkdocs.yml +20 -0
  165. metadata +233 -22
@@ -10,7 +10,7 @@ module Longleaf
10
10
  # Event to register a file with longleaf
11
11
  class RegisterEvent
12
12
  include Longleaf::EventStatusTracking
13
-
13
+
14
14
  # @param file_rec [FileRecord] file record
15
15
  # @param app_manager [ApplicationConfigManager] the application configuration
16
16
  # @param force [boolean] if true, then already registered files will be re-registered
@@ -21,75 +21,61 @@ module Longleaf
21
21
  raise ArgumentError.new('Must provide an ApplicationConfigManager') if app_manager.nil?
22
22
  raise ArgumentError.new('Parameter app_manager must be an ApplicationConfigManager') \
23
23
  unless app_manager.is_a?(ApplicationConfigManager)
24
-
24
+
25
25
  @app_manager = app_manager
26
26
  @file_rec = file_rec
27
27
  @force = force
28
28
  @checksums = checksums
29
29
  end
30
-
30
+
31
31
  # Perform a registration event on the given file
32
- # @raise RegistrationError if a file cannot be registered
32
+ # @raise RegistrationError if a file cannot be registered
33
33
  def perform
34
34
  begin
35
35
  # Only need to re-register file if the force flag is provided
36
36
  if @file_rec.metadata_present? && !@force
37
37
  raise RegistrationError.new("Unable to register '#{@file_rec.path}', it is already registered.")
38
38
  end
39
-
39
+
40
40
  # create metadata record
41
- md_rec = MetadataRecord.new(registered: Time.now.utc.iso8601)
41
+ md_rec = MetadataRecord.new(registered: Time.now.utc.iso8601(3))
42
42
  @file_rec.metadata_record = md_rec
43
-
43
+
44
44
  # retain significant details from former record
45
45
  if @file_rec.metadata_present?
46
46
  retain_existing_properties
47
47
  end
48
-
48
+
49
49
  populate_file_properties
50
-
50
+
51
51
  md_rec.checksums.merge!(@checksums) unless @checksums.nil?
52
-
53
- populate_services
54
-
55
- # persist the metadata out to file
56
- MetadataSerializer::write(metadata: md_rec,
57
- file_path: @file_rec.metadata_path,
58
- digest_algs: @file_rec.storage_location.metadata_digests)
59
-
52
+
53
+ # persist the metadata
54
+ @app_manager.md_manager.persist(@file_rec)
55
+
60
56
  record_success(EventNames::REGISTER, @file_rec.path)
61
57
  rescue RegistrationError => err
62
58
  record_failure(EventNames::REGISTER, @file_rec.path, err.message)
63
59
  rescue InvalidStoragePathError => err
64
60
  record_failure(EventNames::REGISTER, @file_rec.path, err.message)
65
61
  end
66
-
62
+
67
63
  return_status
68
64
  end
69
-
65
+
70
66
  private
71
67
  def populate_file_properties
72
68
  md_rec = @file_rec.metadata_record
73
-
69
+
74
70
  # Set file properties
75
- md_rec.last_modified = File.mtime(@file_rec.path).utc.iso8601
71
+ md_rec.last_modified = File.mtime(@file_rec.path).utc.iso8601(3)
76
72
  md_rec.file_size = File.size(@file_rec.path)
77
73
  end
78
-
79
- def populate_services
80
- md_rec = @file_rec.metadata_record
81
-
82
- service_manager = @app_manager.service_manager
83
- service_names = service_manager.list_services(location: @file_rec.storage_location.name)
84
-
85
- # Add service section
86
- service_names.each { |serv_name| md_rec.add_service(serv_name) }
87
- end
88
-
74
+
89
75
  # Copy a subset of properties from an existing metadata record to the new record
90
76
  def retain_existing_properties
91
77
  md_rec = @file_rec.metadata_record
92
-
78
+
93
79
  old_md = MetadataDeserializer.deserialize(file_path: @file_rec.metadata_path,
94
80
  digest_algs: @file_rec.storage_location.metadata_digests)
95
81
  # Copy custom properties
@@ -97,7 +83,7 @@ module Longleaf
97
83
  # Copy stale-replicas flag per service
98
84
  old_md.list_services.each do |serv_name|
99
85
  serv_rec = old_md.service(serv_name)
100
-
86
+
101
87
  stale_replicas = serv_rec.stale_replicas
102
88
  if stale_replicas
103
89
  new_service = md_rec.service(serv_name)
@@ -106,4 +92,4 @@ module Longleaf
106
92
  end
107
93
  end
108
94
  end
109
- end
95
+ end
@@ -5,7 +5,7 @@ module Longleaf
5
5
  # Helper methods for generating digests
6
6
  class DigestHelper
7
7
  KNOWN_DIGESTS ||= ['md5', 'sha1', 'sha2', 'sha256', 'sha384', 'sha512', 'rmd160']
8
-
8
+
9
9
  # @param algs Either a string containing one or an array containing zero or more digest
10
10
  # algorithm names.
11
11
  # @raise [InvalidDigestAlgorithmError] thrown if any of the digest algorithms listed are not
@@ -19,11 +19,11 @@ module Longleaf
19
19
  else
20
20
  unknown = algs.select { |alg| !KNOWN_DIGESTS.include?(alg) }
21
21
  unless unknown.empty?
22
- raise InvalidDigestAlgorithmError.new("Unknown digest algorithm(s): #{unknown.to_s}")
22
+ raise InvalidDigestAlgorithmError.new("Unknown digest algorithm(s): #{unknown}")
23
23
  end
24
24
  end
25
25
  end
26
-
26
+
27
27
  # Get a Digest class for the specified algorithm
28
28
  # @param alg [String] name of the digest algorithm
29
29
  # @return [Digest] A digest class for the requested algorithm
@@ -47,4 +47,4 @@ module Longleaf
47
47
  end
48
48
  end
49
49
  end
50
- end
50
+ end
@@ -3,7 +3,6 @@ require 'time'
3
3
  module Longleaf
4
4
  # Helper methods for interacting with dates/timestamps on services
5
5
  class ServiceDateHelper
6
-
7
6
  # Adds the amount of time from modifier to the provided timestamp
8
7
  # @param timestamp [String] ISO-8601 timestamp string
9
8
  # @param modifier [String] amount of time to add to the timestamp. It must follow the syntax
@@ -18,7 +17,7 @@ module Longleaf
18
17
  else
19
18
  raise ArgumentError.new("Cannot parse time modifier #{modifier}")
20
19
  end
21
-
20
+
22
21
  datetime = Time.iso8601(timestamp)
23
22
  case unit
24
23
  when 'second'
@@ -36,16 +35,16 @@ module Longleaf
36
35
  when 'year'
37
36
  unit_modifier = 365 * 24 * 3600
38
37
  end
39
-
38
+
40
39
  modified_time = datetime + (value * unit_modifier)
41
40
  modified_time.iso8601
42
41
  end
43
-
42
+
44
43
  # Get a timestamp in the format expected for service timestamps.
45
44
  # @param timestamp [Time] the time to format. Defaults to now.
46
45
  # @return [String] the time formatted as iso8601
47
46
  def self.formatted_timestamp(timestamp = Time.now)
48
- timestamp.iso8601.to_s
47
+ timestamp.utc.iso8601(3).to_s
49
48
  end
50
49
  end
51
- end
50
+ end
@@ -0,0 +1,101 @@
1
+ require 'longleaf/models/system_config_fields'
2
+ require 'longleaf/services/metadata_persistence_manager'
3
+ require 'longleaf/errors'
4
+
5
+ module Longleaf
6
+ # Manager configures and provides access to a metadata index if one is specified
7
+ class IndexManager
8
+ SYS_FIELDS ||= Longleaf::SystemConfigFields
9
+
10
+ # @param config [Hash] The system configuration as a hash
11
+ # @param app_config_manager [ApplicationConfigManager] the application config
12
+ def initialize(config, app_config_manager)
13
+ @config = config
14
+ @app_config_manager = app_config_manager
15
+ init_index_driver if @config&.key?(SYS_FIELDS::MD_INDEX)
16
+ end
17
+
18
+ # @return true if the system is configured to use a metadata index
19
+ def using_index?
20
+ !@index_driver.nil?
21
+ end
22
+
23
+ # Index the provided file_rec and its metadata
24
+ #
25
+ # @param file_rec [FileRecord] file record to index
26
+ def index(file_rec)
27
+ @index_driver.index(file_rec)
28
+ end
29
+
30
+ # Remove an entry from the index
31
+ # @param remove_me The record to remove from the index
32
+ def remove(remove_me)
33
+ @index_driver.remove(remove_me)
34
+ end
35
+
36
+ def clear_index(older_than = nil)
37
+ @index_driver.clear_index(older_than)
38
+ end
39
+
40
+ # @return true if the index should be reindexed
41
+ def index_stale?
42
+ @index_driver.is_stale?
43
+ end
44
+
45
+ # Setup initial structure of index implementation
46
+ def setup_index
47
+ @index_driver.setup_index
48
+ end
49
+
50
+ def update_index_state
51
+ @index_driver.update_index_state
52
+ end
53
+
54
+ # Retrieves a set of which have one or more services which need to run.
55
+ #
56
+ # @param file_selector [FileSelector] selector for paths to search for files
57
+ # @param stale_datetime [DateTime] find file_paths with services needing to be run before this value
58
+ # @return [Array] array of file paths that need one or more services run, in ascending order by
59
+ # timestamp.
60
+ def paths_with_stale_services(file_selector, stale_datetime)
61
+ @index_driver.paths_with_stale_services(file_selector, stale_datetime)
62
+ end
63
+
64
+ # Retrieves a page of paths for registered files.
65
+ # @param file_selector [FileSelector] selector for what paths to search for files
66
+ # @return [Array] array of file paths that are registered
67
+ def registered_paths(file_selector)
68
+ @index_driver.registered_paths(file_selector)
69
+ end
70
+
71
+ def each_registered_path(file_selector, older_than: nil, &block)
72
+ @index_driver.each_registered_path(file_selector, older_than: older_than, &block)
73
+ end
74
+
75
+ private
76
+ def init_index_driver
77
+ index_conf = @config[SYS_FIELDS::MD_INDEX]
78
+ adapter = index_conf[SYS_FIELDS::MD_INDEX_ADAPTER]&.downcase
79
+
80
+ raise ConfigurationError.new('Must specify an adapter for the metadata index') if adapter.nil?
81
+
82
+ adapter = adapter.to_sym
83
+
84
+ case adapter
85
+ when :postgres, :mysql, :mysql2, :sqlite, :amalgalite
86
+ page_size = index_conf[SYS_FIELDS::MD_INDEX_PAGE_SIZE]&.to_int
87
+
88
+ connection = index_conf[SYS_FIELDS::MD_INDEX_CONNECTION]
89
+ raise ConfigurationError.new("Must specify connection details for index adapter of type '#{adapter}'") if connection.nil?
90
+
91
+ require 'longleaf/indexing/sequel_index_driver'
92
+ @index_driver = SequelIndexDriver.new(@app_config_manager,
93
+ adapter,
94
+ connection,
95
+ page_size: page_size)
96
+ else
97
+ raise ConfigurationError.new("Unknown index adapter '#{adapter}' specified.") if adapter.nil?
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,324 @@
1
+ require 'sequel'
2
+ require 'digest/md5'
3
+ require 'longleaf/events/event_names'
4
+ require 'longleaf/candidates/file_selector'
5
+ require 'longleaf/version'
6
+ require 'longleaf/models/system_config_fields'
7
+ require 'longleaf/logging'
8
+
9
+ module Longleaf
10
+ # Driver for interacting with RDBM based metadata index using the Sequel ORM gem.
11
+ # Users must create the database and credentials for connecting to it in advance,
12
+ # if using a database application that requires creation of databases (ie, not sqlite).
13
+ # The default database name is 'longleaf_metadata_index' but may be overridden.
14
+ #
15
+ # See the Sequel documentation for details about accepted connection parameters:
16
+ # https://github.com/jeremyevans/sequel/blob/master/doc/opening_databases.rdoc
17
+ class SequelIndexDriver
18
+ include Longleaf::Logging
19
+ INDEX_DB_NAME ||= 'longleaf_metadata_index'
20
+ PRESERVE_TBL ||= "preserve_service_times".to_sym
21
+ INDEX_STATE_TBL ||= "index_state".to_sym
22
+ DEFAULT_PAGE_SIZE ||= 1000
23
+ TIMESTAMP_FORMAT ||= '%Y-%m-%d %H:%M:%S.%3N'
24
+
25
+ # Initialize the index driver
26
+ #
27
+ # @param app_config [ApplicationConfigManager] the application configuration manager
28
+ # @param adapter [String] name of the database adapter to use.
29
+ # @param conn_details Details about the configuration and connection to the database used for the index.
30
+ # If a string is provided, it will be used as the connection URL and must identify the adapter.
31
+ # If a hash is provided, it used as the parameters for the database connection.
32
+ # @param page_size [Integer] number of results to retrieve per query when getting candidates
33
+ def initialize(app_config, adapter, conn_details, page_size: nil)
34
+ Sequel.default_timezone = :utc
35
+ @app_config = app_config
36
+ @adapter = adapter
37
+ @conn_details = conn_details
38
+ # Digest of the app config file so we can tell if it changes
39
+ @config_md5 = app_config.config_md5
40
+ @page_size = page_size.nil? || page_size <= 0 ? DEFAULT_PAGE_SIZE : page_size
41
+
42
+ if @conn_details.is_a?(Hash)
43
+ # Add in the adapter name
44
+ @conn_details['adapter'] = adapter unless @conn_details.key?('adapter')
45
+ # Add in default database name if none was specified
46
+ @conn_details['database'] = INDEX_DB_NAME unless @conn_details.key?('database')
47
+ end
48
+ end
49
+
50
+ # Returns true if the application configuration does not match the configuration used for
51
+ # the last reindex.
52
+ def is_stale?
53
+ db_conn[INDEX_STATE_TBL].where(config_md5: @config_md5).count == 0
54
+ end
55
+
56
+ # Index the provided file_rec and its metadata
57
+ #
58
+ # @param file_rec [FileRecord] file record to index
59
+ def index(file_rec)
60
+ file_path = file_rec.path
61
+ md_rec = file_rec.metadata_record
62
+ storage_loc = file_rec.storage_location
63
+ service_manager = @app_config.service_manager
64
+
65
+ # Produce a list of service definitions which should apply to the file
66
+ expected_services = service_manager.list_service_definitions(
67
+ location: storage_loc.name)
68
+
69
+ first_timestamp = first_service_execution_timestamp(expected_services, md_rec)
70
+ delay_until_timestamp = delay_until_timestamp(md_rec)
71
+
72
+ first_timestamp = convert_iso8601_to_timestamp(first_timestamp)
73
+ delay_until_timestamp = convert_iso8601_to_timestamp(delay_until_timestamp)
74
+ now_stamp = Time.now.utc.strftime(TIMESTAMP_FORMAT)
75
+
76
+ if @adapter == :mysql || @adapter == :mysql2
77
+ preserve_tbl.on_duplicate_key_update
78
+ .insert(file_path: file_path,
79
+ storage_location: storage_loc.name,
80
+ service_time: first_timestamp,
81
+ delay_until_time: delay_until_timestamp,
82
+ updated: now_stamp)
83
+ else
84
+ preserve_tbl.insert_conflict(target: :file_path,
85
+ update: {
86
+ storage_location: storage_loc.name,
87
+ service_time: first_timestamp,
88
+ delay_until_time: delay_until_timestamp,
89
+ updated: now_stamp } )
90
+ .insert(file_path: file_path,
91
+ storage_location: storage_loc.name,
92
+ service_time: first_timestamp,
93
+ delay_until_time: delay_until_timestamp,
94
+ updated: now_stamp)
95
+ end
96
+ end
97
+
98
+ # Find the earliest service execution time for any services expected to be run for the specified file.
99
+ #
100
+ # @param expected_services [Array] list of ServiceDefinition objects expected for specified file.
101
+ # @param md_rec [MetadataRecord] metadata record for the file being evaluated
102
+ # @return The timestamp of the earliest service execution time for the file described by md_rec, in iso8601 format.
103
+ # Returns nil if no services are expected all services have already run and do not have a next occurrence, or
104
+ # the file is deregistered.
105
+ def first_service_execution_timestamp(expected_services, md_rec)
106
+ current_time = Time.now.utc.iso8601(3)
107
+ if md_rec.deregistered?
108
+ return nil
109
+ end
110
+
111
+ service_times = Array.new
112
+
113
+ present_services = md_rec.list_services
114
+
115
+ expected_services.each do |service_def|
116
+ service_name = service_def.name
117
+ # Service has never run, set execution time to now
118
+ if !present_services.include?(service_name)
119
+ service_times << current_time
120
+ next
121
+ end
122
+
123
+ service_rec = md_rec.service(service_name)
124
+
125
+ # Service either needs a run or has no timestamp, so execution time of now
126
+ if service_rec.run_needed || service_rec.timestamp.nil?
127
+ service_times << current_time
128
+ next
129
+ end
130
+
131
+ # Calculate the next time this service should run based on frequency
132
+ frequency = service_def.frequency
133
+ unless frequency.nil?
134
+ service_timestamp = service_rec.timestamp
135
+ service_times << ServiceDateHelper.add_to_timestamp(service_timestamp, frequency)
136
+ next
137
+ end
138
+ end
139
+ # Return the lowest service execution time
140
+ service_times.min
141
+ end
142
+
143
+ # @return The first failure timestamp for any service, or nil if there were none.
144
+ def delay_until_timestamp(md_rec)
145
+ md_rec.list_services.each do |service_name|
146
+ service_rec = md_rec.service(service_name)
147
+ return service_rec.failure_timestamp unless service_rec.failure_timestamp.nil?
148
+ end
149
+ # return lowest possible date
150
+ return minimum_timestamp
151
+ end
152
+
153
+ # Remove an entry from the index
154
+ # @param remove_me The record to remove from the index. May be a FileRecord or a String.
155
+ def remove(remove_me)
156
+ if remove_me.is_a?(FileRecord)
157
+ path = remove_me.path
158
+ else
159
+ path = remove_me
160
+ end
161
+
162
+ result = preserve_tbl.where(file_path: path).delete
163
+ if result == 0
164
+ logger.warn("Could not remove #{path} from the index, path was not present.")
165
+ end
166
+ end
167
+
168
+ # Remove all entries from the index
169
+ # @param older_than [Time] Optional. If provided, only entries that have not been indexed
170
+ # since before the provided time will be deleted.
171
+ def clear_index(older_than = nil)
172
+ if older_than.nil?
173
+ preserve_tbl.delete
174
+ else
175
+ older_than_timestamp = older_than.utc.strftime(TIMESTAMP_FORMAT)
176
+ preserve_tbl.where { updated < older_than_timestamp }.delete
177
+ end
178
+ end
179
+
180
+ # Initialize the index's database using the provided configuration
181
+ def setup_index
182
+ # Create the table for tracking when files will need preservation services run on them.
183
+ case @adapter
184
+ when :mysql, :mysql2
185
+ # mysql does not support 'text' fields as primary keys
186
+ db_conn.create_table!(PRESERVE_TBL) do
187
+ String :file_path, primary_key: true, size: 768
188
+ column :storage_location, 'varchar(128)'
189
+ column :service_time, 'timestamp(3)', { :null => true }
190
+ column :delay_until_time, 'timestamp(3)'
191
+ column :updated, 'timestamp(3)'
192
+ end
193
+ else
194
+ db_conn.create_table!(PRESERVE_TBL) do
195
+ String :file_path, primary_key: true, text: true
196
+ column :storage_location, 'varchar(128)'
197
+ column :service_time, 'timestamp(3)', { :null => true }
198
+ column :delay_until_time, 'timestamp(3)'
199
+ column :updated, 'timestamp(3)'
200
+ end
201
+ end
202
+
203
+ # Setup database indexes
204
+ case @adapter
205
+ when :postgres
206
+ db_conn.run("CREATE INDEX service_times_file_path_text_index ON preserve_service_times (file_path text_pattern_ops)")
207
+ when :sqlite, :amalgalite
208
+ db_conn.run("CREATE INDEX service_times_file_path_text_index ON preserve_service_times (file_path collate nocase)")
209
+ end
210
+ db_conn.run("CREATE INDEX service_times_storage_location_index ON preserve_service_times (storage_location)")
211
+
212
+ # Create table for tracking the state of the index
213
+ db_conn.create_table!(INDEX_STATE_TBL) do
214
+ String :config_md5
215
+ DateTime :last_reindexed
216
+ String :longleaf_version
217
+ end
218
+
219
+ # Prepopulate the index state information
220
+ update_index_state
221
+ end
222
+
223
+ # Updates the state information for the index to indicate that the index has been refreshed
224
+ # or is in sync with the application's configuration.
225
+ def update_index_state
226
+ index_state_tbl = db_conn[INDEX_STATE_TBL]
227
+ index_state_tbl.delete
228
+ index_state_tbl.insert(
229
+ config_md5: @config_md5,
230
+ last_reindexed: Time.now.utc,
231
+ longleaf_version: Longleaf::VERSION)
232
+ end
233
+
234
+ # Retrieves page of file paths which have one or more services which need to run.
235
+ # @param file_selector [FileSelector] selector for what paths to search for files
236
+ # @param stale_datetime [DateTime] find file_paths with services needing to be run before this value
237
+ # @return [Array] array of file paths that need one or more services run.
238
+ def paths_with_stale_services(file_selector, stale_datetime)
239
+ if @preserve_dataset.nil?
240
+ @preserve_dataset = db_conn
241
+ .from(PRESERVE_TBL)
242
+ .exclude(service_time: nil)
243
+ .limit(@page_size)
244
+ .order(Sequel.asc(:service_time))
245
+ end
246
+
247
+ # retrieve and return a page of results
248
+ ds = add_path_restrictions(@preserve_dataset, file_selector)
249
+ .where { service_time <= stale_datetime }
250
+ .where { delay_until_time < stale_datetime }
251
+ .select_map(:file_path)
252
+ end
253
+
254
+ # Retrieves a page of paths for registered files.
255
+ # @param file_selector [FileSelector] selector for what paths to search for files
256
+ # @return [Array] array of file paths that are registered
257
+ def registered_paths(file_selector)
258
+ # retrieve and return a page of results
259
+ add_path_restrictions(registered_dataset, file_selector)
260
+ .select_map(:file_path)
261
+ end
262
+
263
+ # Calls the provided block once per each registered file path registered.
264
+ # Must be passed a block.
265
+ # @param file_selector [FileSelector] selector for what paths to search for files
266
+ # @param older_than [Time] Optional. If provided, only files that have not been
267
+ # indexed since before this timestamp will be returned.
268
+ def each_registered_path(file_selector, older_than: nil, &block)
269
+ dataset = add_path_restrictions(registered_dataset, file_selector)
270
+ .select(:file_path)
271
+ if !older_than.nil?
272
+ older_than_timestamp = older_than.utc.strftime(TIMESTAMP_FORMAT)
273
+ dataset = dataset.where { updated < older_than_timestamp }
274
+ end
275
+ # Yield to the provided block once per row return
276
+ dataset.paged_each(:rows_per_fetch => @page_size) do |row|
277
+ block.call(row[:file_path])
278
+ end
279
+ end
280
+
281
+ private
282
+ def db_conn
283
+ @connection = Sequel.connect(@conn_details) if @connection.nil?
284
+ @connection
285
+ end
286
+
287
+ def preserve_tbl
288
+ @preserve_tbl = db_conn[PRESERVE_TBL] if @preserve_tbl.nil?
289
+ @preserve_tbl
290
+ end
291
+
292
+ def add_path_restrictions(dataset, file_selector)
293
+ if file_selector.specificity == FileSelector::SPECIFICITY_STORAGE_LOCATION
294
+ dataset.where(storage_location: file_selector.storage_locations)
295
+ else
296
+ # Reformat all selected paths into LIKE partial string matches
297
+ path_conds = file_selector.target_paths.map { |path| path.end_with?('/') ? path + '%' : path }
298
+ dataset.where(Sequel.like(:file_path, *path_conds))
299
+ end
300
+ end
301
+
302
+ def convert_iso8601_to_timestamp(iso8601)
303
+ return nil if iso8601.nil?
304
+ Time.iso8601(iso8601).strftime(TIMESTAMP_FORMAT)
305
+ end
306
+
307
+ def minimum_timestamp
308
+ if @min_timestamp.nil?
309
+ @min_timestamp = ServiceDateHelper.formatted_timestamp(Time.at(0).utc)
310
+ end
311
+ @min_timestamp
312
+ end
313
+
314
+ def registered_dataset
315
+ if @registered_dataset.nil?
316
+ @registered_dataset = db_conn
317
+ .from(PRESERVE_TBL)
318
+ .limit(@page_size)
319
+ .order(Sequel.asc(:service_time))
320
+ end
321
+ @registered_dataset
322
+ end
323
+ end
324
+ end