longleaf 0.1.0.pre.3 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (185) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +94 -0
  3. data/.editorconfig +13 -0
  4. data/.gitignore +4 -1
  5. data/.rubocop.yml +44 -0
  6. data/.rubocop_todo.yml +834 -0
  7. data/.yardopts +1 -0
  8. data/Gemfile +16 -1
  9. data/README.md +98 -12
  10. data/Rakefile +6 -0
  11. data/bin/setup +16 -1
  12. data/docs/aboutlongleaf.md +28 -0
  13. data/docs/extra.css +32 -0
  14. data/docs/img/change-file.png +0 -0
  15. data/docs/img/ll-example-preserved.png +0 -0
  16. data/docs/index.md +19 -0
  17. data/docs/install.md +66 -0
  18. data/docs/ll-example/config-example-relative.yml +33 -0
  19. data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
  20. data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
  21. data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
  22. data/docs/ll-example/metadata-dir/.gitkeep +0 -0
  23. data/docs/ll-example/replica-files/.gitkeep +0 -0
  24. data/docs/ll-example/replica-metadata/.gitkeep +0 -0
  25. data/docs/quickstart.md +270 -0
  26. data/docs/rdocs/Longleaf.html +135 -0
  27. data/docs/rdocs/Longleaf/AppFields.html +178 -0
  28. data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
  29. data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
  30. data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
  31. data/docs/rdocs/Longleaf/CLI.html +909 -0
  32. data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
  33. data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
  34. data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
  35. data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
  36. data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
  37. data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
  38. data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
  39. data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
  40. data/docs/rdocs/Longleaf/EventError.html +147 -0
  41. data/docs/rdocs/Longleaf/EventNames.html +163 -0
  42. data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
  43. data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
  44. data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
  45. data/docs/rdocs/Longleaf/FileRecord.html +716 -0
  46. data/docs/rdocs/Longleaf/FileSelector.html +901 -0
  47. data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
  48. data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
  49. data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
  50. data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
  51. data/docs/rdocs/Longleaf/Logging.html +405 -0
  52. data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
  53. data/docs/rdocs/Longleaf/LongleafError.html +139 -0
  54. data/docs/rdocs/Longleaf/MDFields.html +193 -0
  55. data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
  56. data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
  57. data/docs/rdocs/Longleaf/MetadataError.html +143 -0
  58. data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
  59. data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
  60. data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
  61. data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
  62. data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
  63. data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
  64. data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
  65. data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
  66. data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
  67. data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
  68. data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
  69. data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
  70. data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
  71. data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
  72. data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
  73. data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
  74. data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
  75. data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
  76. data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
  77. data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
  78. data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
  79. data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
  80. data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
  81. data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
  82. data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
  83. data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
  84. data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
  85. data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
  86. data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
  87. data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
  88. data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
  89. data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
  90. data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
  91. data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
  92. data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
  93. data/docs/rdocs/_index.html +660 -0
  94. data/docs/rdocs/class_list.html +51 -0
  95. data/docs/rdocs/css/common.css +1 -0
  96. data/docs/rdocs/css/full_list.css +58 -0
  97. data/docs/rdocs/css/style.css +496 -0
  98. data/docs/rdocs/file.README.html +165 -0
  99. data/docs/rdocs/file_list.html +56 -0
  100. data/docs/rdocs/frames.html +17 -0
  101. data/docs/rdocs/index.html +165 -0
  102. data/docs/rdocs/js/app.js +303 -0
  103. data/docs/rdocs/js/full_list.js +216 -0
  104. data/docs/rdocs/js/jquery.js +4 -0
  105. data/docs/rdocs/method_list.html +2051 -0
  106. data/docs/rdocs/top-level-namespace.html +110 -0
  107. data/lib/longleaf/candidates/file_selector.rb +150 -0
  108. data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
  109. data/lib/longleaf/candidates/physical_path_provider.rb +17 -0
  110. data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
  111. data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
  112. data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
  113. data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
  114. data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
  115. data/lib/longleaf/cli.rb +249 -44
  116. data/lib/longleaf/commands/deregister_command.rb +51 -0
  117. data/lib/longleaf/commands/preserve_command.rb +50 -0
  118. data/lib/longleaf/commands/register_command.rb +34 -43
  119. data/lib/longleaf/commands/reindex_command.rb +92 -0
  120. data/lib/longleaf/commands/validate_config_command.rb +33 -8
  121. data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
  122. data/lib/longleaf/errors.rb +26 -7
  123. data/lib/longleaf/events/deregister_event.rb +53 -0
  124. data/lib/longleaf/events/event_names.rb +9 -0
  125. data/lib/longleaf/events/event_status_tracking.rb +59 -0
  126. data/lib/longleaf/events/preserve_event.rb +82 -0
  127. data/lib/longleaf/events/register_event.rb +59 -51
  128. data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
  129. data/lib/longleaf/helpers/digest_helper.rb +56 -0
  130. data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
  131. data/lib/longleaf/helpers/selection_options_parser.rb +215 -0
  132. data/lib/longleaf/helpers/service_date_helper.rb +78 -0
  133. data/lib/longleaf/indexing/index_manager.rb +101 -0
  134. data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
  135. data/lib/longleaf/logging.rb +5 -4
  136. data/lib/longleaf/logging/redirecting_logger.rb +30 -25
  137. data/lib/longleaf/models/app_fields.rb +7 -2
  138. data/lib/longleaf/models/file_record.rb +31 -8
  139. data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
  140. data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
  141. data/lib/longleaf/models/md_fields.rb +3 -1
  142. data/lib/longleaf/models/metadata_location.rb +47 -0
  143. data/lib/longleaf/models/metadata_record.rb +43 -16
  144. data/lib/longleaf/models/s3_storage_location.rb +138 -0
  145. data/lib/longleaf/models/service_definition.rb +7 -6
  146. data/lib/longleaf/models/service_fields.rb +7 -1
  147. data/lib/longleaf/models/service_record.rb +10 -6
  148. data/lib/longleaf/models/storage_location.rb +24 -21
  149. data/lib/longleaf/models/storage_types.rb +9 -0
  150. data/lib/longleaf/models/system_config_fields.rb +9 -0
  151. data/lib/longleaf/preservation_services/file_check_service.rb +59 -0
  152. data/lib/longleaf/preservation_services/fixity_check_service.rb +124 -0
  153. data/lib/longleaf/preservation_services/rsync_replication_service.rb +198 -0
  154. data/lib/longleaf/preservation_services/s3_replication_service.rb +131 -0
  155. data/lib/longleaf/services/application_config_deserializer.rb +80 -21
  156. data/lib/longleaf/services/application_config_manager.rb +20 -6
  157. data/lib/longleaf/services/application_config_validator.rb +19 -9
  158. data/lib/longleaf/services/configuration_validator.rb +67 -4
  159. data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
  160. data/lib/longleaf/services/metadata_deserializer.rb +115 -42
  161. data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
  162. data/lib/longleaf/services/metadata_serializer.rb +139 -25
  163. data/lib/longleaf/services/metadata_validator.rb +76 -0
  164. data/lib/longleaf/services/s3_location_validator.rb +19 -0
  165. data/lib/longleaf/services/service_class_cache.rb +112 -0
  166. data/lib/longleaf/services/service_definition_manager.rb +10 -7
  167. data/lib/longleaf/services/service_definition_validator.rb +25 -18
  168. data/lib/longleaf/services/service_manager.rb +86 -11
  169. data/lib/longleaf/services/service_mapping_manager.rb +13 -12
  170. data/lib/longleaf/services/service_mapping_validator.rb +36 -26
  171. data/lib/longleaf/services/storage_location_manager.rb +76 -15
  172. data/lib/longleaf/services/storage_location_validator.rb +49 -35
  173. data/lib/longleaf/specs/config_builder.rb +47 -23
  174. data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
  175. data/lib/longleaf/specs/custom_matchers.rb +9 -0
  176. data/lib/longleaf/specs/file_helpers.rb +61 -0
  177. data/lib/longleaf/specs/metadata_builder.rb +98 -0
  178. data/lib/longleaf/specs/system_config_builder.rb +27 -0
  179. data/lib/longleaf/version.rb +1 -1
  180. data/longleaf.gemspec +20 -7
  181. data/mkdocs.yml +21 -0
  182. metadata +310 -26
  183. data/.travis.yml +0 -4
  184. data/lib/longleaf/commands/abstract_command.rb +0 -37
  185. data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -0,0 +1,86 @@
1
+ require 'uri'
2
+
3
+ module Longleaf
4
+ # Helper for interacting with s3 uris
5
+ class S3UriHelper
6
+ ENDPOINT_PATTERN = /^(.+\.)?s3[.\-]([a-z0-9\-]+[\-.])?[a-z0-9]+\./
7
+ ALLOWED_SCHEMES = ['http', 'https', 's3']
8
+
9
+ # Extract the name of the s3 bucket from the provided url
10
+ # @param url s3 url
11
+ # @return the name of the bucket, or nil if the name could not be identified
12
+ def self.extract_bucket(url)
13
+ uri = s3_uri(url)
14
+
15
+ matches = ENDPOINT_PATTERN.match(uri.host)
16
+ if matches.nil?
17
+ raise ArgumentError.new("Provided URI does match the expected pattern for an S3 URI")
18
+ end
19
+
20
+ prefix = matches[1]
21
+ if prefix.nil? || prefix.empty?
22
+ # Is a path style url
23
+ path = uri.path
24
+
25
+ return nil if path == '/'
26
+
27
+ path_parts = path.split('/')
28
+ return nil if path_parts.empty?
29
+ return path_parts[1]
30
+ else
31
+ return prefix[0..-2]
32
+ end
33
+ end
34
+
35
+ def self.extract_path(url)
36
+ uri = s3_uri(url)
37
+
38
+ matches = ENDPOINT_PATTERN.match(uri.host)
39
+ if matches.nil?
40
+ raise ArgumentError.new("Provided URI does match the expected pattern for an S3 URI")
41
+ end
42
+
43
+ path = uri.path
44
+ return nil if path == '/' || path.empty?
45
+
46
+ # trim off the first slash
47
+ path = path.partition('/').last
48
+
49
+ # Determine if the first part of the path is the bucket name
50
+ prefix = matches[1]
51
+ if prefix.nil? || prefix.empty?
52
+ # trim off the bucket name
53
+ path = path.partition('/').last
54
+ end
55
+
56
+ path
57
+ end
58
+
59
+ def self.extract_region(url)
60
+ uri = s3_uri(url)
61
+
62
+ matches = ENDPOINT_PATTERN.match(uri.host)
63
+
64
+ if matches[2].nil?
65
+ # No region specified
66
+ nil
67
+ else
68
+ matches[2][0..-2]
69
+ end
70
+ end
71
+
72
+ def self.s3_uri(url)
73
+ if url.nil?
74
+ raise ArgumentError.new("url cannot be empty")
75
+ end
76
+ uri = URI(url)
77
+ if !ALLOWED_SCHEMES.include?(uri.scheme&.downcase)
78
+ raise ArgumentError.new("Invalid scheme for s3 URI #{url}, only http, https and s3 are permitted")
79
+ end
80
+ if uri.host.nil?
81
+ raise ArgumentError.new("Invalid S3 URI, no hostname: #{url}")
82
+ end
83
+ uri
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,215 @@
1
+ require 'longleaf/candidates/file_selector'
2
+ require 'longleaf/candidates/registered_file_selector'
3
+ require 'longleaf/candidates/manifest_digest_provider'
4
+ require 'longleaf/candidates/physical_path_provider'
5
+ require 'longleaf/candidates/single_digest_provider'
6
+
7
+ module Longleaf
8
+ # Helper for parsing manifest inputs used for registration
9
+ class SelectionOptionsParser
10
+ extend Longleaf::Logging
11
+
12
+ # Parses the provided options to construct a file selector and digest provider for
13
+ # use in registration commands.
14
+ # @param options [Hash] command options
15
+ # @param app_config_manager [ApplicationConfigManager] app config manager
16
+ # @return The file selector and digest provider.
17
+ def self.parse_registration_selection_options(options, app_config_manager)
18
+ there_can_be_only_one("Only one of the following selection options may be provided: -m, -f, -s",
19
+ options, :file, :manifest, :location)
20
+
21
+ if !options[:manifest].nil?
22
+ digests_mapping, logical_phys_mapping = self.parse_manifest(options[:manifest])
23
+ physical_provider = PhysicalPathProvider.new(logical_phys_mapping)
24
+ selector = FileSelector.new(file_paths: digests_mapping.keys,
25
+ physical_provider: physical_provider,
26
+ app_config: app_config_manager)
27
+ digest_provider = ManifestDigestProvider.new(digests_mapping)
28
+ elsif !options[:file].nil?
29
+ if options[:checksums]
30
+ checksums = options[:checksums]
31
+ # validate checksum list format, must a comma delimited list of prefix:checksums
32
+ if /^[^:,]+:[^:,]+(,[^:,]+:[^:,]+)*$/.match(checksums)
33
+ # convert checksum list into hash with prefix as key
34
+ checksums = Hash[*checksums.split(/\s*[:,]\s*/)]
35
+ digest_provider = SingleDigestProvider.new(checksums)
36
+ else
37
+ logger.failure("Invalid checksums parameter format, see `longleaf help <command>` for more information")
38
+ exit 1
39
+ end
40
+ end
41
+
42
+ file_paths = self.split_quoted(options[:file], "\\s*,\\s*")
43
+ if !options[:physical_path].nil?
44
+ physical_paths = self.split_quoted(options[:physical_path], "\\s*,\\s*")
45
+ if physical_paths.length != file_paths.length
46
+ logger.failure("Invalid physical paths parameter, number of paths did not match number of logical paths")
47
+ exit 1
48
+ end
49
+ logical_phys_mapping = Hash[file_paths.zip physical_paths]
50
+ physical_provider = PhysicalPathProvider.new(logical_phys_mapping)
51
+ else
52
+ physical_provider = PhysicalPathProvider.new
53
+ end
54
+
55
+ selector = FileSelector.new(file_paths: file_paths,
56
+ physical_provider: physical_provider,
57
+ app_config: app_config_manager)
58
+ else
59
+ logger.failure("Must provide one of the following file selection options: -f, l, or -m")
60
+ exit 1
61
+ end
62
+
63
+ [selector, digest_provider, physical_provider]
64
+ end
65
+
66
+ def self.there_can_be_only_one(failure_msg, options, *names)
67
+ got_one = false
68
+ names.each do |name|
69
+ if !options[name].nil?
70
+ if got_one
71
+ logger.failure(failure_msg)
72
+ exit 1
73
+ end
74
+ got_one = true
75
+ end
76
+ end
77
+ end
78
+
79
+ # Parses the provided manifest options, reading the contents of the manifests to produce
80
+ # a mapping from files to one or more algorithms.
81
+ # @param manifest_vals [Array] List of manifest option values. They may be in one of the following formats:
82
+ # <alg_name>:<manifest_path> OR <alg_name>:@-
83
+ #. <manifest_path> OR @-
84
+ # @return a hash containing the aggregated contents of the provided manifests. The keys are
85
+ # paths to manifested files. The values are hashes, mapping digest algorithms to digest values.
86
+ def self.parse_manifest(manifest_vals)
87
+ alg_manifest_pairs = []
88
+ # interpret option inputs into a list of algorithms to manifest sources
89
+ manifest_vals.each do |manifest_val|
90
+ if manifest_val.include?(':')
91
+ manifest_parts = manifest_val.split(':', 2)
92
+ alg_manifest_pairs << manifest_parts
93
+ else
94
+ # algorithm not specified in option value
95
+ alg_manifest_pairs << [nil, manifest_val]
96
+ end
97
+ end
98
+ if alg_manifest_pairs.select { |mpair| mpair[1] == '@-' }.count > 1
99
+ self.fail("Cannot specify more than one manifest from STDIN")
100
+ end
101
+
102
+ # read the provided manifests to build a mapping from file uri to all supplied digests
103
+ digests_mapping = Hash.new { |h,k| h[k] = Hash.new }
104
+ logical_phys_mapping = Hash.new
105
+ alg_manifest_pairs.each do |mpair|
106
+ source_stream = nil
107
+ # Determine if reading from a manifest file or stdin
108
+ if mpair[1] == '@-'
109
+ source_stream = $stdin
110
+ else
111
+ source_stream = File.new(mpair[1])
112
+ end
113
+
114
+ current_alg = mpair[0]
115
+ multi_digest_manifest = current_alg.nil?
116
+ source_stream.each_line do |line|
117
+ line = line.strip
118
+ if multi_digest_manifest && /^[a-zA-Z0-9]+:$/ =~ line
119
+ # Found a digest algorithm header, assuming succeeding entries are of this type
120
+ current_alg = line.chomp(':')
121
+ # Verify that the digest algorithm is known to longleaf
122
+ if !DigestHelper.is_known_algorithm?(current_alg)
123
+ self.fail("Manifest specifies unknown digest algorithm: #{current_alg}")
124
+ end
125
+ else
126
+ if current_alg.nil?
127
+ self.fail("Manifest with unknown checksums encountered, an algorithm must be specified")
128
+ end
129
+ entry_parts = self.split_quoted(line)
130
+ if entry_parts.length != 2 && entry_parts.length != 3
131
+ self.fail("Invalid manifest entry: #{line}")
132
+ end
133
+
134
+ digests_mapping[entry_parts[1]][current_alg] = entry_parts[0]
135
+ if (entry_parts.length == 3)
136
+ logical_phys_mapping[entry_parts[1]] = entry_parts[2]
137
+ end
138
+ end
139
+ end
140
+ end
141
+
142
+ [digests_mapping, logical_phys_mapping]
143
+ end
144
+
145
+ # Splits a string of quoted or unquoted tokens separated by spaces
146
+ # @param
147
+ def self.split_quoted(text, delimiter = "\\s+", limit = -1)
148
+ text.split(/#{delimiter}(?=(?:[^'"]|'[^']*'|"[^"]*")*$)/, limit)
149
+ .select {|s| not s.empty? }
150
+ .map {|s| s.gsub(/(^ +)|( +$)|(^["']+)|(["']+$)/, '')}
151
+ end
152
+
153
+ # Parses the provided options to create a selector for registered files
154
+ # @param options [Hash] command options
155
+ # @param app_config_manager [ApplicationConfigManager] app config manager
156
+ # @return selector
157
+ def self.create_registered_selector(options, app_config_manager)
158
+ there_can_be_only_one("Only one of the following selection options may be provided: -l, -f, -s",
159
+ options, :file, :location, :from_list)
160
+
161
+ if !options[:from_list].nil?
162
+ file_paths = read_from_list(options[:from_list])
163
+ return RegisteredFileSelector.new(file_paths: file_paths, app_config: app_config_manager)
164
+ elsif !options[:file].nil?
165
+ file_paths = options[:file].split(/\s*,\s*/)
166
+ return RegisteredFileSelector.new(file_paths: file_paths, app_config: app_config_manager)
167
+ elsif !options[:location].nil?
168
+ storage_locations = options[:location].split(/\s*,\s*/)
169
+ return RegisteredFileSelector.new(storage_locations: storage_locations, app_config: app_config_manager)
170
+ else
171
+ logger.failure("Must provide one of the following file selection options: -l, -f, or -s")
172
+ exit 1
173
+ end
174
+ end
175
+
176
+ # Parses the -l from_list option, reading the list of files specified either from the provided
177
+ # file path or STDIN
178
+ # @param from_list option value, either a file path or "@-"
179
+ # @return list of files from the from_list
180
+ def self.read_from_list(from_list)
181
+ from_list = from_list.strip
182
+ if from_list.empty?
183
+ logger.failure("List parameter must not be empty")
184
+ exit 1
185
+ end
186
+
187
+ if from_list == '@-'
188
+ source_stream = $stdin
189
+ else
190
+ begin
191
+ source_stream = File.new(from_list)
192
+ rescue Errno::ENOENT
193
+ logger.failure("Specified list file does not exist: #{from_list}")
194
+ exit 1
195
+ end
196
+ end
197
+
198
+ lines = []
199
+ source_stream.each_line do |line|
200
+ lines << line.strip
201
+ end
202
+
203
+ if lines.empty?
204
+ logger.failure("File list is empty, must provide one or more files for this operation")
205
+ exit 1
206
+ end
207
+ lines
208
+ end
209
+
210
+ def self.fail(message)
211
+ logger.failure(message)
212
+ exit 1
213
+ end
214
+ end
215
+ end
@@ -0,0 +1,78 @@
1
+ require 'time'
2
+
3
+ module Longleaf
4
+ # Helper methods for interacting with dates/timestamps on services
5
+ class ServiceDateHelper
6
+ # Adds the amount of time from modifier to the provided timestamp
7
+ # @param timestamp [String] ISO-8601 timestamp string
8
+ # @param modifier [String] amount of time to add to the timestamp. It must follow the syntax
9
+ # "<quantity> <time unit>", where quantity must be a positive whole number and time unit
10
+ # must be second, minute, hour, day, week, month or year (unit may be plural).
11
+ # Any info after a comma will be ignored.
12
+ # @return [String] the original timestamp in ISO-8601 format with the provided amount of time added.
13
+ def self.add_to_timestamp(timestamp, modifier)
14
+ if modifier =~ /^(\d+) *(second|minute|hour|day|week|month|year)s?(,.*)?/
15
+ value = $1.to_i
16
+ unit = $2
17
+ else
18
+ raise ArgumentError.new("Cannot parse time modifier #{modifier}")
19
+ end
20
+
21
+ datetime = Time.iso8601(timestamp)
22
+ case unit
23
+ when 'second'
24
+ unit_modifier = 1
25
+ when 'minute'
26
+ unit_modifier = 60
27
+ when 'hour'
28
+ unit_modifier = 3600
29
+ when 'day'
30
+ unit_modifier = 24 * 3600
31
+ when 'week'
32
+ unit_modifier = 7 * 24 * 3600
33
+ when 'month'
34
+ unit_modifier = 30 * 24 * 3600
35
+ when 'year'
36
+ unit_modifier = 365 * 24 * 3600
37
+ end
38
+
39
+ modified_time = datetime + (value * unit_modifier)
40
+ modified_time.iso8601(3)
41
+ end
42
+
43
+ # Get a timestamp in the format expected for service timestamps.
44
+ # @param timestamp [Time] the time to format. Defaults to now.
45
+ # @return [String] the time formatted as iso8601
46
+ def self.formatted_timestamp(timestamp = Time.now)
47
+ timestamp.utc.iso8601(3).to_s
48
+ end
49
+
50
+ # Get the timestamp for the next time the provided service would need to be run
51
+ # for the object described by md_rec
52
+ # @param md_rec [MetadataRecord] metadata record for the file
53
+ # @param service_def [ServiceDefinition] definition for the service
54
+ # @return [String] iso8601 timestamp for the next time the service will need to run, or
55
+ # nil if the service does not need to run again.
56
+ def self.next_run_needed(md_rec, service_def)
57
+ raise ArgumentError.new('Must provide a md_rec parameter') if md_rec.nil?
58
+ raise ArgumentError.new('Must provide a service_def parameter') if service_def.nil?
59
+
60
+ service_name = service_def.name
61
+ service_rec = md_rec.service(service_name)
62
+
63
+ if service_rec.nil? || service_rec.timestamp.nil?
64
+ if service_def.delay.nil?
65
+ return md_rec.registered
66
+ else
67
+ return ServiceDateHelper.add_to_timestamp(md_rec.registered, service_def.delay)
68
+ end
69
+ end
70
+
71
+ if service_def.frequency.nil?
72
+ return nil
73
+ else
74
+ return ServiceDateHelper.add_to_timestamp(service_rec.timestamp, service_def.frequency)
75
+ end
76
+ end
77
+ end
78
+ end
@@ -0,0 +1,101 @@
1
+ require 'longleaf/models/system_config_fields'
2
+ require 'longleaf/services/metadata_persistence_manager'
3
+ require 'longleaf/errors'
4
+
5
+ module Longleaf
6
+ # Manager configures and provides access to a metadata index if one is specified
7
+ class IndexManager
8
+ SYS_FIELDS ||= Longleaf::SystemConfigFields
9
+
10
+ # @param config [Hash] The system configuration as a hash
11
+ # @param app_config_manager [ApplicationConfigManager] the application config
12
+ def initialize(config, app_config_manager)
13
+ @config = config
14
+ @app_config_manager = app_config_manager
15
+ init_index_driver if @config&.key?(SYS_FIELDS::MD_INDEX)
16
+ end
17
+
18
+ # @return true if the system is configured to use a metadata index
19
+ def using_index?
20
+ !@index_driver.nil?
21
+ end
22
+
23
+ # Index the provided file_rec and its metadata
24
+ #
25
+ # @param file_rec [FileRecord] file record to index
26
+ def index(file_rec)
27
+ @index_driver.index(file_rec)
28
+ end
29
+
30
+ # Remove an entry from the index
31
+ # @param remove_me The record to remove from the index
32
+ def remove(remove_me)
33
+ @index_driver.remove(remove_me)
34
+ end
35
+
36
+ def clear_index(older_than = nil)
37
+ @index_driver.clear_index(older_than)
38
+ end
39
+
40
+ # @return true if the index should be reindexed
41
+ def index_stale?
42
+ @index_driver.is_stale?
43
+ end
44
+
45
+ # Setup initial structure of index implementation
46
+ def setup_index
47
+ @index_driver.setup_index
48
+ end
49
+
50
+ def update_index_state
51
+ @index_driver.update_index_state
52
+ end
53
+
54
+ # Retrieves a set of which have one or more services which need to run.
55
+ #
56
+ # @param file_selector [FileSelector] selector for paths to search for files
57
+ # @param stale_datetime [DateTime] find file_paths with services needing to be run before this value
58
+ # @return [Array] array of file paths that need one or more services run, in ascending order by
59
+ # timestamp.
60
+ def paths_with_stale_services(file_selector, stale_datetime)
61
+ @index_driver.paths_with_stale_services(file_selector, stale_datetime)
62
+ end
63
+
64
+ # Retrieves a page of paths for registered files.
65
+ # @param file_selector [FileSelector] selector for what paths to search for files
66
+ # @return [Array] array of file paths that are registered
67
+ def registered_paths(file_selector)
68
+ @index_driver.registered_paths(file_selector)
69
+ end
70
+
71
+ def each_registered_path(file_selector, older_than: nil, &block)
72
+ @index_driver.each_registered_path(file_selector, older_than: older_than, &block)
73
+ end
74
+
75
+ private
76
+ def init_index_driver
77
+ index_conf = @config[SYS_FIELDS::MD_INDEX]
78
+ adapter = index_conf[SYS_FIELDS::MD_INDEX_ADAPTER]&.downcase
79
+
80
+ raise ConfigurationError.new('Must specify an adapter for the metadata index') if adapter.nil?
81
+
82
+ adapter = adapter.to_sym
83
+
84
+ case adapter
85
+ when :postgres, :mysql, :mysql2, :sqlite, :amalgalite
86
+ page_size = index_conf[SYS_FIELDS::MD_INDEX_PAGE_SIZE]&.to_int
87
+
88
+ connection = index_conf[SYS_FIELDS::MD_INDEX_CONNECTION]
89
+ raise ConfigurationError.new("Must specify connection details for index adapter of type '#{adapter}'") if connection.nil?
90
+
91
+ require 'longleaf/indexing/sequel_index_driver'
92
+ @index_driver = SequelIndexDriver.new(@app_config_manager,
93
+ adapter,
94
+ connection,
95
+ page_size: page_size)
96
+ else
97
+ raise ConfigurationError.new("Unknown index adapter '#{adapter}' specified.") if adapter.nil?
98
+ end
99
+ end
100
+ end
101
+ end