longleaf 0.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +94 -0
  3. data/.editorconfig +13 -0
  4. data/.gitignore +4 -1
  5. data/.rubocop.yml +44 -0
  6. data/.rubocop_todo.yml +834 -0
  7. data/.yardopts +1 -0
  8. data/Gemfile +16 -1
  9. data/README.md +98 -12
  10. data/Rakefile +6 -0
  11. data/bin/setup +16 -1
  12. data/docs/aboutlongleaf.md +28 -0
  13. data/docs/extra.css +32 -0
  14. data/docs/img/change-file.png +0 -0
  15. data/docs/img/ll-example-preserved.png +0 -0
  16. data/docs/index.md +19 -0
  17. data/docs/install.md +66 -0
  18. data/docs/ll-example/config-example-relative.yml +33 -0
  19. data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
  20. data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
  21. data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
  22. data/docs/ll-example/metadata-dir/.gitkeep +0 -0
  23. data/docs/ll-example/replica-files/.gitkeep +0 -0
  24. data/docs/ll-example/replica-metadata/.gitkeep +0 -0
  25. data/docs/quickstart.md +270 -0
  26. data/docs/rdocs/Longleaf.html +135 -0
  27. data/docs/rdocs/Longleaf/AppFields.html +178 -0
  28. data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
  29. data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
  30. data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
  31. data/docs/rdocs/Longleaf/CLI.html +909 -0
  32. data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
  33. data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
  34. data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
  35. data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
  36. data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
  37. data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
  38. data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
  39. data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
  40. data/docs/rdocs/Longleaf/EventError.html +147 -0
  41. data/docs/rdocs/Longleaf/EventNames.html +163 -0
  42. data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
  43. data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
  44. data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
  45. data/docs/rdocs/Longleaf/FileRecord.html +716 -0
  46. data/docs/rdocs/Longleaf/FileSelector.html +901 -0
  47. data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
  48. data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
  49. data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
  50. data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
  51. data/docs/rdocs/Longleaf/Logging.html +405 -0
  52. data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
  53. data/docs/rdocs/Longleaf/LongleafError.html +139 -0
  54. data/docs/rdocs/Longleaf/MDFields.html +193 -0
  55. data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
  56. data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
  57. data/docs/rdocs/Longleaf/MetadataError.html +143 -0
  58. data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
  59. data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
  60. data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
  61. data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
  62. data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
  63. data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
  64. data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
  65. data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
  66. data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
  67. data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
  68. data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
  69. data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
  70. data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
  71. data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
  72. data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
  73. data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
  74. data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
  75. data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
  76. data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
  77. data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
  78. data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
  79. data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
  80. data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
  81. data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
  82. data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
  83. data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
  84. data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
  85. data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
  86. data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
  87. data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
  88. data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
  89. data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
  90. data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
  91. data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
  92. data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
  93. data/docs/rdocs/_index.html +660 -0
  94. data/docs/rdocs/class_list.html +51 -0
  95. data/docs/rdocs/css/common.css +1 -0
  96. data/docs/rdocs/css/full_list.css +58 -0
  97. data/docs/rdocs/css/style.css +496 -0
  98. data/docs/rdocs/file.README.html +165 -0
  99. data/docs/rdocs/file_list.html +56 -0
  100. data/docs/rdocs/frames.html +17 -0
  101. data/docs/rdocs/index.html +165 -0
  102. data/docs/rdocs/js/app.js +303 -0
  103. data/docs/rdocs/js/full_list.js +216 -0
  104. data/docs/rdocs/js/jquery.js +4 -0
  105. data/docs/rdocs/method_list.html +2051 -0
  106. data/docs/rdocs/top-level-namespace.html +110 -0
  107. data/lib/longleaf/candidates/file_selector.rb +150 -0
  108. data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
  109. data/lib/longleaf/candidates/physical_path_provider.rb +17 -0
  110. data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
  111. data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
  112. data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
  113. data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
  114. data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
  115. data/lib/longleaf/cli.rb +252 -46
  116. data/lib/longleaf/commands/deregister_command.rb +51 -0
  117. data/lib/longleaf/commands/preserve_command.rb +50 -0
  118. data/lib/longleaf/commands/register_command.rb +34 -43
  119. data/lib/longleaf/commands/reindex_command.rb +92 -0
  120. data/lib/longleaf/commands/validate_config_command.rb +33 -8
  121. data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
  122. data/lib/longleaf/errors.rb +26 -7
  123. data/lib/longleaf/events/deregister_event.rb +53 -0
  124. data/lib/longleaf/events/event_names.rb +9 -0
  125. data/lib/longleaf/events/event_status_tracking.rb +59 -0
  126. data/lib/longleaf/events/preserve_event.rb +82 -0
  127. data/lib/longleaf/events/register_event.rb +59 -51
  128. data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
  129. data/lib/longleaf/helpers/digest_helper.rb +56 -0
  130. data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
  131. data/lib/longleaf/helpers/selection_options_parser.rb +215 -0
  132. data/lib/longleaf/helpers/service_date_helper.rb +78 -0
  133. data/lib/longleaf/indexing/index_manager.rb +101 -0
  134. data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
  135. data/lib/longleaf/logging.rb +5 -4
  136. data/lib/longleaf/logging/redirecting_logger.rb +30 -25
  137. data/lib/longleaf/models/app_fields.rb +7 -2
  138. data/lib/longleaf/models/file_record.rb +31 -8
  139. data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
  140. data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
  141. data/lib/longleaf/models/md_fields.rb +3 -1
  142. data/lib/longleaf/models/metadata_location.rb +47 -0
  143. data/lib/longleaf/models/metadata_record.rb +43 -16
  144. data/lib/longleaf/models/s3_storage_location.rb +138 -0
  145. data/lib/longleaf/models/service_definition.rb +7 -6
  146. data/lib/longleaf/models/service_fields.rb +7 -1
  147. data/lib/longleaf/models/service_record.rb +10 -6
  148. data/lib/longleaf/models/storage_location.rb +24 -19
  149. data/lib/longleaf/models/storage_types.rb +9 -0
  150. data/lib/longleaf/models/system_config_fields.rb +9 -0
  151. data/lib/longleaf/preservation_services/file_check_service.rb +59 -0
  152. data/lib/longleaf/preservation_services/fixity_check_service.rb +124 -0
  153. data/lib/longleaf/preservation_services/rsync_replication_service.rb +198 -0
  154. data/lib/longleaf/preservation_services/s3_replication_service.rb +131 -0
  155. data/lib/longleaf/services/application_config_deserializer.rb +81 -24
  156. data/lib/longleaf/services/application_config_manager.rb +20 -6
  157. data/lib/longleaf/services/application_config_validator.rb +19 -9
  158. data/lib/longleaf/services/configuration_validator.rb +67 -4
  159. data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
  160. data/lib/longleaf/services/metadata_deserializer.rb +115 -42
  161. data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
  162. data/lib/longleaf/services/metadata_serializer.rb +156 -23
  163. data/lib/longleaf/services/metadata_validator.rb +76 -0
  164. data/lib/longleaf/services/s3_location_validator.rb +19 -0
  165. data/lib/longleaf/services/service_class_cache.rb +112 -0
  166. data/lib/longleaf/services/service_definition_manager.rb +10 -7
  167. data/lib/longleaf/services/service_definition_validator.rb +25 -18
  168. data/lib/longleaf/services/service_manager.rb +86 -11
  169. data/lib/longleaf/services/service_mapping_manager.rb +13 -12
  170. data/lib/longleaf/services/service_mapping_validator.rb +36 -26
  171. data/lib/longleaf/services/storage_location_manager.rb +76 -15
  172. data/lib/longleaf/services/storage_location_validator.rb +49 -35
  173. data/lib/longleaf/specs/config_builder.rb +47 -23
  174. data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
  175. data/lib/longleaf/specs/custom_matchers.rb +9 -0
  176. data/lib/longleaf/specs/file_helpers.rb +61 -0
  177. data/lib/longleaf/specs/metadata_builder.rb +98 -0
  178. data/lib/longleaf/specs/system_config_builder.rb +27 -0
  179. data/lib/longleaf/version.rb +1 -1
  180. data/longleaf.gemspec +20 -7
  181. data/mkdocs.yml +21 -0
  182. metadata +308 -24
  183. data/.travis.yml +0 -4
  184. data/lib/longleaf/commands/abstract_command.rb +0 -37
  185. data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -0,0 +1,86 @@
1
+ require 'uri'
2
+
3
+ module Longleaf
4
+ # Helper for interacting with s3 uris
5
+ class S3UriHelper
6
+ ENDPOINT_PATTERN = /^(.+\.)?s3[.\-]([a-z0-9\-]+[\-.])?[a-z0-9]+\./
7
+ ALLOWED_SCHEMES = ['http', 'https', 's3']
8
+
9
+ # Extract the name of the s3 bucket from the provided url
10
+ # @param url s3 url
11
+ # @return the name of the bucket, or nil if the name could not be identified
12
+ def self.extract_bucket(url)
13
+ uri = s3_uri(url)
14
+
15
+ matches = ENDPOINT_PATTERN.match(uri.host)
16
+ if matches.nil?
17
+ raise ArgumentError.new("Provided URI does match the expected pattern for an S3 URI")
18
+ end
19
+
20
+ prefix = matches[1]
21
+ if prefix.nil? || prefix.empty?
22
+ # Is a path style url
23
+ path = uri.path
24
+
25
+ return nil if path == '/'
26
+
27
+ path_parts = path.split('/')
28
+ return nil if path_parts.empty?
29
+ return path_parts[1]
30
+ else
31
+ return prefix[0..-2]
32
+ end
33
+ end
34
+
35
+ def self.extract_path(url)
36
+ uri = s3_uri(url)
37
+
38
+ matches = ENDPOINT_PATTERN.match(uri.host)
39
+ if matches.nil?
40
+ raise ArgumentError.new("Provided URI does match the expected pattern for an S3 URI")
41
+ end
42
+
43
+ path = uri.path
44
+ return nil if path == '/' || path.empty?
45
+
46
+ # trim off the first slash
47
+ path = path.partition('/').last
48
+
49
+ # Determine if the first part of the path is the bucket name
50
+ prefix = matches[1]
51
+ if prefix.nil? || prefix.empty?
52
+ # trim off the bucket name
53
+ path = path.partition('/').last
54
+ end
55
+
56
+ path
57
+ end
58
+
59
+ def self.extract_region(url)
60
+ uri = s3_uri(url)
61
+
62
+ matches = ENDPOINT_PATTERN.match(uri.host)
63
+
64
+ if matches[2].nil?
65
+ # No region specified
66
+ nil
67
+ else
68
+ matches[2][0..-2]
69
+ end
70
+ end
71
+
72
+ def self.s3_uri(url)
73
+ if url.nil?
74
+ raise ArgumentError.new("url cannot be empty")
75
+ end
76
+ uri = URI(url)
77
+ if !ALLOWED_SCHEMES.include?(uri.scheme&.downcase)
78
+ raise ArgumentError.new("Invalid scheme for s3 URI #{url}, only http, https and s3 are permitted")
79
+ end
80
+ if uri.host.nil?
81
+ raise ArgumentError.new("Invalid S3 URI, no hostname: #{url}")
82
+ end
83
+ uri
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,215 @@
1
+ require 'longleaf/candidates/file_selector'
2
+ require 'longleaf/candidates/registered_file_selector'
3
+ require 'longleaf/candidates/manifest_digest_provider'
4
+ require 'longleaf/candidates/physical_path_provider'
5
+ require 'longleaf/candidates/single_digest_provider'
6
+
7
+ module Longleaf
8
+ # Helper for parsing manifest inputs used for registration
9
+ class SelectionOptionsParser
10
+ extend Longleaf::Logging
11
+
12
+ # Parses the provided options to construct a file selector and digest provider for
13
+ # use in registration commands.
14
+ # @param options [Hash] command options
15
+ # @param app_config_manager [ApplicationConfigManager] app config manager
16
+ # @return The file selector and digest provider.
17
+ def self.parse_registration_selection_options(options, app_config_manager)
18
+ there_can_be_only_one("Only one of the following selection options may be provided: -m, -f, -s",
19
+ options, :file, :manifest, :location)
20
+
21
+ if !options[:manifest].nil?
22
+ digests_mapping, logical_phys_mapping = self.parse_manifest(options[:manifest])
23
+ physical_provider = PhysicalPathProvider.new(logical_phys_mapping)
24
+ selector = FileSelector.new(file_paths: digests_mapping.keys,
25
+ physical_provider: physical_provider,
26
+ app_config: app_config_manager)
27
+ digest_provider = ManifestDigestProvider.new(digests_mapping)
28
+ elsif !options[:file].nil?
29
+ if options[:checksums]
30
+ checksums = options[:checksums]
31
+ # validate checksum list format, must a comma delimited list of prefix:checksums
32
+ if /^[^:,]+:[^:,]+(,[^:,]+:[^:,]+)*$/.match(checksums)
33
+ # convert checksum list into hash with prefix as key
34
+ checksums = Hash[*checksums.split(/\s*[:,]\s*/)]
35
+ digest_provider = SingleDigestProvider.new(checksums)
36
+ else
37
+ logger.failure("Invalid checksums parameter format, see `longleaf help <command>` for more information")
38
+ exit 1
39
+ end
40
+ end
41
+
42
+ file_paths = self.split_quoted(options[:file], "\\s*,\\s*")
43
+ if !options[:physical_path].nil?
44
+ physical_paths = self.split_quoted(options[:physical_path], "\\s*,\\s*")
45
+ if physical_paths.length != file_paths.length
46
+ logger.failure("Invalid physical paths parameter, number of paths did not match number of logical paths")
47
+ exit 1
48
+ end
49
+ logical_phys_mapping = Hash[file_paths.zip physical_paths]
50
+ physical_provider = PhysicalPathProvider.new(logical_phys_mapping)
51
+ else
52
+ physical_provider = PhysicalPathProvider.new
53
+ end
54
+
55
+ selector = FileSelector.new(file_paths: file_paths,
56
+ physical_provider: physical_provider,
57
+ app_config: app_config_manager)
58
+ else
59
+ logger.failure("Must provide one of the following file selection options: -f, l, or -m")
60
+ exit 1
61
+ end
62
+
63
+ [selector, digest_provider, physical_provider]
64
+ end
65
+
66
+ def self.there_can_be_only_one(failure_msg, options, *names)
67
+ got_one = false
68
+ names.each do |name|
69
+ if !options[name].nil?
70
+ if got_one
71
+ logger.failure(failure_msg)
72
+ exit 1
73
+ end
74
+ got_one = true
75
+ end
76
+ end
77
+ end
78
+
79
+ # Parses the provided manifest options, reading the contents of the manifests to produce
80
+ # a mapping from files to one or more algorithms.
81
+ # @param manifest_vals [Array] List of manifest option values. They may be in one of the following formats:
82
+ # <alg_name>:<manifest_path> OR <alg_name>:@-
83
+ #. <manifest_path> OR @-
84
+ # @return a hash containing the aggregated contents of the provided manifests. The keys are
85
+ # paths to manifested files. The values are hashes, mapping digest algorithms to digest values.
86
+ def self.parse_manifest(manifest_vals)
87
+ alg_manifest_pairs = []
88
+ # interpret option inputs into a list of algorithms to manifest sources
89
+ manifest_vals.each do |manifest_val|
90
+ if manifest_val.include?(':')
91
+ manifest_parts = manifest_val.split(':', 2)
92
+ alg_manifest_pairs << manifest_parts
93
+ else
94
+ # algorithm not specified in option value
95
+ alg_manifest_pairs << [nil, manifest_val]
96
+ end
97
+ end
98
+ if alg_manifest_pairs.select { |mpair| mpair[1] == '@-' }.count > 1
99
+ self.fail("Cannot specify more than one manifest from STDIN")
100
+ end
101
+
102
+ # read the provided manifests to build a mapping from file uri to all supplied digests
103
+ digests_mapping = Hash.new { |h,k| h[k] = Hash.new }
104
+ logical_phys_mapping = Hash.new
105
+ alg_manifest_pairs.each do |mpair|
106
+ source_stream = nil
107
+ # Determine if reading from a manifest file or stdin
108
+ if mpair[1] == '@-'
109
+ source_stream = $stdin
110
+ else
111
+ source_stream = File.new(mpair[1])
112
+ end
113
+
114
+ current_alg = mpair[0]
115
+ multi_digest_manifest = current_alg.nil?
116
+ source_stream.each_line do |line|
117
+ line = line.strip
118
+ if multi_digest_manifest && /^[a-zA-Z0-9]+:$/ =~ line
119
+ # Found a digest algorithm header, assuming succeeding entries are of this type
120
+ current_alg = line.chomp(':')
121
+ # Verify that the digest algorithm is known to longleaf
122
+ if !DigestHelper.is_known_algorithm?(current_alg)
123
+ self.fail("Manifest specifies unknown digest algorithm: #{current_alg}")
124
+ end
125
+ else
126
+ if current_alg.nil?
127
+ self.fail("Manifest with unknown checksums encountered, an algorithm must be specified")
128
+ end
129
+ entry_parts = self.split_quoted(line)
130
+ if entry_parts.length != 2 && entry_parts.length != 3
131
+ self.fail("Invalid manifest entry: #{line}")
132
+ end
133
+
134
+ digests_mapping[entry_parts[1]][current_alg] = entry_parts[0]
135
+ if (entry_parts.length == 3)
136
+ logical_phys_mapping[entry_parts[1]] = entry_parts[2]
137
+ end
138
+ end
139
+ end
140
+ end
141
+
142
+ [digests_mapping, logical_phys_mapping]
143
+ end
144
+
145
+ # Splits a string of quoted or unquoted tokens separated by spaces
146
+ # @param
147
+ def self.split_quoted(text, delimiter = "\\s+", limit = -1)
148
+ text.split(/#{delimiter}(?=(?:[^'"]|'[^']*'|"[^"]*")*$)/, limit)
149
+ .select {|s| not s.empty? }
150
+ .map {|s| s.gsub(/(^ +)|( +$)|(^["']+)|(["']+$)/, '')}
151
+ end
152
+
153
+ # Parses the provided options to create a selector for registered files
154
+ # @param options [Hash] command options
155
+ # @param app_config_manager [ApplicationConfigManager] app config manager
156
+ # @return selector
157
+ def self.create_registered_selector(options, app_config_manager)
158
+ there_can_be_only_one("Only one of the following selection options may be provided: -l, -f, -s",
159
+ options, :file, :location, :from_list)
160
+
161
+ if !options[:from_list].nil?
162
+ file_paths = read_from_list(options[:from_list])
163
+ return RegisteredFileSelector.new(file_paths: file_paths, app_config: app_config_manager)
164
+ elsif !options[:file].nil?
165
+ file_paths = options[:file].split(/\s*,\s*/)
166
+ return RegisteredFileSelector.new(file_paths: file_paths, app_config: app_config_manager)
167
+ elsif !options[:location].nil?
168
+ storage_locations = options[:location].split(/\s*,\s*/)
169
+ return RegisteredFileSelector.new(storage_locations: storage_locations, app_config: app_config_manager)
170
+ else
171
+ logger.failure("Must provide one of the following file selection options: -l, -f, or -s")
172
+ exit 1
173
+ end
174
+ end
175
+
176
+ # Parses the -l from_list option, reading the list of files specified either from the provided
177
+ # file path or STDIN
178
+ # @param from_list option value, either a file path or "@-"
179
+ # @return list of files from the from_list
180
+ def self.read_from_list(from_list)
181
+ from_list = from_list.strip
182
+ if from_list.empty?
183
+ logger.failure("List parameter must not be empty")
184
+ exit 1
185
+ end
186
+
187
+ if from_list == '@-'
188
+ source_stream = $stdin
189
+ else
190
+ begin
191
+ source_stream = File.new(from_list)
192
+ rescue Errno::ENOENT
193
+ logger.failure("Specified list file does not exist: #{from_list}")
194
+ exit 1
195
+ end
196
+ end
197
+
198
+ lines = []
199
+ source_stream.each_line do |line|
200
+ lines << line.strip
201
+ end
202
+
203
+ if lines.empty?
204
+ logger.failure("File list is empty, must provide one or more files for this operation")
205
+ exit 1
206
+ end
207
+ lines
208
+ end
209
+
210
+ def self.fail(message)
211
+ logger.failure(message)
212
+ exit 1
213
+ end
214
+ end
215
+ end
@@ -0,0 +1,78 @@
1
+ require 'time'
2
+
3
+ module Longleaf
4
+ # Helper methods for interacting with dates/timestamps on services
5
+ class ServiceDateHelper
6
+ # Adds the amount of time from modifier to the provided timestamp
7
+ # @param timestamp [String] ISO-8601 timestamp string
8
+ # @param modifier [String] amount of time to add to the timestamp. It must follow the syntax
9
+ # "<quantity> <time unit>", where quantity must be a positive whole number and time unit
10
+ # must be second, minute, hour, day, week, month or year (unit may be plural).
11
+ # Any info after a comma will be ignored.
12
+ # @return [String] the original timestamp in ISO-8601 format with the provided amount of time added.
13
+ def self.add_to_timestamp(timestamp, modifier)
14
+ if modifier =~ /^(\d+) *(second|minute|hour|day|week|month|year)s?(,.*)?/
15
+ value = $1.to_i
16
+ unit = $2
17
+ else
18
+ raise ArgumentError.new("Cannot parse time modifier #{modifier}")
19
+ end
20
+
21
+ datetime = Time.iso8601(timestamp)
22
+ case unit
23
+ when 'second'
24
+ unit_modifier = 1
25
+ when 'minute'
26
+ unit_modifier = 60
27
+ when 'hour'
28
+ unit_modifier = 3600
29
+ when 'day'
30
+ unit_modifier = 24 * 3600
31
+ when 'week'
32
+ unit_modifier = 7 * 24 * 3600
33
+ when 'month'
34
+ unit_modifier = 30 * 24 * 3600
35
+ when 'year'
36
+ unit_modifier = 365 * 24 * 3600
37
+ end
38
+
39
+ modified_time = datetime + (value * unit_modifier)
40
+ modified_time.iso8601(3)
41
+ end
42
+
43
+ # Get a timestamp in the format expected for service timestamps.
44
+ # @param timestamp [Time] the time to format. Defaults to now.
45
+ # @return [String] the time formatted as iso8601
46
+ def self.formatted_timestamp(timestamp = Time.now)
47
+ timestamp.utc.iso8601(3).to_s
48
+ end
49
+
50
+ # Get the timestamp for the next time the provided service would need to be run
51
+ # for the object described by md_rec
52
+ # @param md_rec [MetadataRecord] metadata record for the file
53
+ # @param service_def [ServiceDefinition] definition for the service
54
+ # @return [String] iso8601 timestamp for the next time the service will need to run, or
55
+ # nil if the service does not need to run again.
56
+ def self.next_run_needed(md_rec, service_def)
57
+ raise ArgumentError.new('Must provide a md_rec parameter') if md_rec.nil?
58
+ raise ArgumentError.new('Must provide a service_def parameter') if service_def.nil?
59
+
60
+ service_name = service_def.name
61
+ service_rec = md_rec.service(service_name)
62
+
63
+ if service_rec.nil? || service_rec.timestamp.nil?
64
+ if service_def.delay.nil?
65
+ return md_rec.registered
66
+ else
67
+ return ServiceDateHelper.add_to_timestamp(md_rec.registered, service_def.delay)
68
+ end
69
+ end
70
+
71
+ if service_def.frequency.nil?
72
+ return nil
73
+ else
74
+ return ServiceDateHelper.add_to_timestamp(service_rec.timestamp, service_def.frequency)
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,101 @@
1
+ require 'longleaf/models/system_config_fields'
2
+ require 'longleaf/services/metadata_persistence_manager'
3
+ require 'longleaf/errors'
4
+
5
+ module Longleaf
6
+ # Manager configures and provides access to a metadata index if one is specified
7
+ class IndexManager
8
+ SYS_FIELDS ||= Longleaf::SystemConfigFields
9
+
10
+ # @param config [Hash] The system configuration as a hash
11
+ # @param app_config_manager [ApplicationConfigManager] the application config
12
+ def initialize(config, app_config_manager)
13
+ @config = config
14
+ @app_config_manager = app_config_manager
15
+ init_index_driver if @config&.key?(SYS_FIELDS::MD_INDEX)
16
+ end
17
+
18
+ # @return true if the system is configured to use a metadata index
19
+ def using_index?
20
+ !@index_driver.nil?
21
+ end
22
+
23
+ # Index the provided file_rec and its metadata
24
+ #
25
+ # @param file_rec [FileRecord] file record to index
26
+ def index(file_rec)
27
+ @index_driver.index(file_rec)
28
+ end
29
+
30
+ # Remove an entry from the index
31
+ # @param remove_me The record to remove from the index
32
+ def remove(remove_me)
33
+ @index_driver.remove(remove_me)
34
+ end
35
+
36
+ def clear_index(older_than = nil)
37
+ @index_driver.clear_index(older_than)
38
+ end
39
+
40
+ # @return true if the index should be reindexed
41
+ def index_stale?
42
+ @index_driver.is_stale?
43
+ end
44
+
45
+ # Setup initial structure of index implementation
46
+ def setup_index
47
+ @index_driver.setup_index
48
+ end
49
+
50
+ def update_index_state
51
+ @index_driver.update_index_state
52
+ end
53
+
54
+ # Retrieves a set of which have one or more services which need to run.
55
+ #
56
+ # @param file_selector [FileSelector] selector for paths to search for files
57
+ # @param stale_datetime [DateTime] find file_paths with services needing to be run before this value
58
+ # @return [Array] array of file paths that need one or more services run, in ascending order by
59
+ # timestamp.
60
+ def paths_with_stale_services(file_selector, stale_datetime)
61
+ @index_driver.paths_with_stale_services(file_selector, stale_datetime)
62
+ end
63
+
64
+ # Retrieves a page of paths for registered files.
65
+ # @param file_selector [FileSelector] selector for what paths to search for files
66
+ # @return [Array] array of file paths that are registered
67
+ def registered_paths(file_selector)
68
+ @index_driver.registered_paths(file_selector)
69
+ end
70
+
71
+ def each_registered_path(file_selector, older_than: nil, &block)
72
+ @index_driver.each_registered_path(file_selector, older_than: older_than, &block)
73
+ end
74
+
75
+ private
76
+ def init_index_driver
77
+ index_conf = @config[SYS_FIELDS::MD_INDEX]
78
+ adapter = index_conf[SYS_FIELDS::MD_INDEX_ADAPTER]&.downcase
79
+
80
+ raise ConfigurationError.new('Must specify an adapter for the metadata index') if adapter.nil?
81
+
82
+ adapter = adapter.to_sym
83
+
84
+ case adapter
85
+ when :postgres, :mysql, :mysql2, :sqlite, :amalgalite
86
+ page_size = index_conf[SYS_FIELDS::MD_INDEX_PAGE_SIZE]&.to_int
87
+
88
+ connection = index_conf[SYS_FIELDS::MD_INDEX_CONNECTION]
89
+ raise ConfigurationError.new("Must specify connection details for index adapter of type '#{adapter}'") if connection.nil?
90
+
91
+ require 'longleaf/indexing/sequel_index_driver'
92
+ @index_driver = SequelIndexDriver.new(@app_config_manager,
93
+ adapter,
94
+ connection,
95
+ page_size: page_size)
96
+ else
97
+ raise ConfigurationError.new("Unknown index adapter '#{adapter}' specified.") if adapter.nil?
98
+ end
99
+ end
100
+ end
101
+ end