longleaf 0.1.0.pre.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +94 -0
  3. data/.editorconfig +13 -0
  4. data/.gitignore +4 -1
  5. data/.rubocop.yml +44 -0
  6. data/.rubocop_todo.yml +834 -0
  7. data/.yardopts +1 -0
  8. data/Gemfile +16 -1
  9. data/README.md +98 -12
  10. data/Rakefile +6 -0
  11. data/bin/setup +16 -1
  12. data/docs/aboutlongleaf.md +28 -0
  13. data/docs/extra.css +32 -0
  14. data/docs/img/change-file.png +0 -0
  15. data/docs/img/ll-example-preserved.png +0 -0
  16. data/docs/index.md +19 -0
  17. data/docs/install.md +66 -0
  18. data/docs/ll-example/config-example-relative.yml +33 -0
  19. data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
  20. data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
  21. data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
  22. data/docs/ll-example/metadata-dir/.gitkeep +0 -0
  23. data/docs/ll-example/replica-files/.gitkeep +0 -0
  24. data/docs/ll-example/replica-metadata/.gitkeep +0 -0
  25. data/docs/quickstart.md +270 -0
  26. data/docs/rdocs/Longleaf.html +135 -0
  27. data/docs/rdocs/Longleaf/AppFields.html +178 -0
  28. data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
  29. data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
  30. data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
  31. data/docs/rdocs/Longleaf/CLI.html +909 -0
  32. data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
  33. data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
  34. data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
  35. data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
  36. data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
  37. data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
  38. data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
  39. data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
  40. data/docs/rdocs/Longleaf/EventError.html +147 -0
  41. data/docs/rdocs/Longleaf/EventNames.html +163 -0
  42. data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
  43. data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
  44. data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
  45. data/docs/rdocs/Longleaf/FileRecord.html +716 -0
  46. data/docs/rdocs/Longleaf/FileSelector.html +901 -0
  47. data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
  48. data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
  49. data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
  50. data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
  51. data/docs/rdocs/Longleaf/Logging.html +405 -0
  52. data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
  53. data/docs/rdocs/Longleaf/LongleafError.html +139 -0
  54. data/docs/rdocs/Longleaf/MDFields.html +193 -0
  55. data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
  56. data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
  57. data/docs/rdocs/Longleaf/MetadataError.html +143 -0
  58. data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
  59. data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
  60. data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
  61. data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
  62. data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
  63. data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
  64. data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
  65. data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
  66. data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
  67. data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
  68. data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
  69. data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
  70. data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
  71. data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
  72. data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
  73. data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
  74. data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
  75. data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
  76. data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
  77. data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
  78. data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
  79. data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
  80. data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
  81. data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
  82. data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
  83. data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
  84. data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
  85. data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
  86. data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
  87. data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
  88. data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
  89. data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
  90. data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
  91. data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
  92. data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
  93. data/docs/rdocs/_index.html +660 -0
  94. data/docs/rdocs/class_list.html +51 -0
  95. data/docs/rdocs/css/common.css +1 -0
  96. data/docs/rdocs/css/full_list.css +58 -0
  97. data/docs/rdocs/css/style.css +496 -0
  98. data/docs/rdocs/file.README.html +165 -0
  99. data/docs/rdocs/file_list.html +56 -0
  100. data/docs/rdocs/frames.html +17 -0
  101. data/docs/rdocs/index.html +165 -0
  102. data/docs/rdocs/js/app.js +303 -0
  103. data/docs/rdocs/js/full_list.js +216 -0
  104. data/docs/rdocs/js/jquery.js +4 -0
  105. data/docs/rdocs/method_list.html +2051 -0
  106. data/docs/rdocs/top-level-namespace.html +110 -0
  107. data/lib/longleaf/candidates/file_selector.rb +139 -0
  108. data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
  109. data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
  110. data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
  111. data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
  112. data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
  113. data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
  114. data/lib/longleaf/cli.rb +237 -46
  115. data/lib/longleaf/commands/deregister_command.rb +51 -0
  116. data/lib/longleaf/commands/preserve_command.rb +50 -0
  117. data/lib/longleaf/commands/register_command.rb +32 -43
  118. data/lib/longleaf/commands/reindex_command.rb +92 -0
  119. data/lib/longleaf/commands/validate_config_command.rb +33 -8
  120. data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
  121. data/lib/longleaf/errors.rb +26 -7
  122. data/lib/longleaf/events/deregister_event.rb +53 -0
  123. data/lib/longleaf/events/event_names.rb +9 -0
  124. data/lib/longleaf/events/event_status_tracking.rb +59 -0
  125. data/lib/longleaf/events/preserve_event.rb +81 -0
  126. data/lib/longleaf/events/register_event.rb +52 -51
  127. data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
  128. data/lib/longleaf/helpers/digest_helper.rb +56 -0
  129. data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
  130. data/lib/longleaf/helpers/selection_options_parser.rb +189 -0
  131. data/lib/longleaf/helpers/service_date_helper.rb +78 -0
  132. data/lib/longleaf/indexing/index_manager.rb +101 -0
  133. data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
  134. data/lib/longleaf/logging.rb +5 -4
  135. data/lib/longleaf/logging/redirecting_logger.rb +26 -25
  136. data/lib/longleaf/models/app_fields.rb +7 -2
  137. data/lib/longleaf/models/file_record.rb +17 -8
  138. data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
  139. data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
  140. data/lib/longleaf/models/md_fields.rb +2 -1
  141. data/lib/longleaf/models/metadata_location.rb +47 -0
  142. data/lib/longleaf/models/metadata_record.rb +39 -15
  143. data/lib/longleaf/models/s3_storage_location.rb +133 -0
  144. data/lib/longleaf/models/service_definition.rb +7 -6
  145. data/lib/longleaf/models/service_fields.rb +7 -1
  146. data/lib/longleaf/models/service_record.rb +10 -6
  147. data/lib/longleaf/models/storage_location.rb +24 -19
  148. data/lib/longleaf/models/storage_types.rb +9 -0
  149. data/lib/longleaf/models/system_config_fields.rb +9 -0
  150. data/lib/longleaf/preservation_services/file_check_service.rb +58 -0
  151. data/lib/longleaf/preservation_services/fixity_check_service.rb +123 -0
  152. data/lib/longleaf/preservation_services/rsync_replication_service.rb +182 -0
  153. data/lib/longleaf/preservation_services/s3_replication_service.rb +143 -0
  154. data/lib/longleaf/services/application_config_deserializer.rb +81 -24
  155. data/lib/longleaf/services/application_config_manager.rb +20 -6
  156. data/lib/longleaf/services/application_config_validator.rb +19 -9
  157. data/lib/longleaf/services/configuration_validator.rb +67 -4
  158. data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
  159. data/lib/longleaf/services/metadata_deserializer.rb +113 -42
  160. data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
  161. data/lib/longleaf/services/metadata_serializer.rb +138 -25
  162. data/lib/longleaf/services/metadata_validator.rb +76 -0
  163. data/lib/longleaf/services/s3_location_validator.rb +19 -0
  164. data/lib/longleaf/services/service_class_cache.rb +112 -0
  165. data/lib/longleaf/services/service_definition_manager.rb +10 -7
  166. data/lib/longleaf/services/service_definition_validator.rb +25 -18
  167. data/lib/longleaf/services/service_manager.rb +86 -11
  168. data/lib/longleaf/services/service_mapping_manager.rb +13 -12
  169. data/lib/longleaf/services/service_mapping_validator.rb +36 -26
  170. data/lib/longleaf/services/storage_location_manager.rb +76 -15
  171. data/lib/longleaf/services/storage_location_validator.rb +49 -35
  172. data/lib/longleaf/specs/config_builder.rb +47 -23
  173. data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
  174. data/lib/longleaf/specs/custom_matchers.rb +9 -0
  175. data/lib/longleaf/specs/file_helpers.rb +61 -0
  176. data/lib/longleaf/specs/metadata_builder.rb +92 -0
  177. data/lib/longleaf/specs/system_config_builder.rb +27 -0
  178. data/lib/longleaf/version.rb +1 -1
  179. data/longleaf.gemspec +20 -7
  180. data/mkdocs.yml +21 -0
  181. metadata +306 -23
  182. data/.travis.yml +0 -4
  183. data/lib/longleaf/commands/abstract_command.rb +0 -37
  184. data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -0,0 +1,81 @@
1
+ require 'longleaf/services/service_manager'
2
+ require 'longleaf/events/event_names'
3
+ require 'longleaf/events/event_status_tracking'
4
+ require 'longleaf/logging'
5
+
6
+ module Longleaf
7
+ # Verify event for a single file
8
+ class PreserveEvent
9
+ include Longleaf::Logging
10
+ include Longleaf::EventStatusTracking
11
+
12
+ # @param file_rec [FileRecord] file record
13
+ # @param app_manager [ApplicationConfigManager] the application configuration
14
+ # @param force [boolean] if true, then services run regardless of whether they are flagged as needed
15
+ def initialize(file_rec:, app_manager:, force: false)
16
+ raise ArgumentError.new('Must provide a file_rec parameter') if file_rec.nil?
17
+ raise ArgumentError.new('Must provide an ApplicationConfigManager') if app_manager.nil?
18
+
19
+ @app_manager = app_manager
20
+ @file_rec = file_rec
21
+ @force = force
22
+ end
23
+
24
+ # Perform a preserve event on the given file, updating its metadata record if any services were executed.
25
+ def perform
26
+ storage_loc = @file_rec.storage_location
27
+ service_manager = @app_manager.service_manager
28
+ md_rec = @file_rec.metadata_record
29
+ f_path = @file_rec.path
30
+
31
+ logger.info("Performing preserve event on #{@file_rec.path}")
32
+
33
+ needs_persist = false
34
+ begin
35
+ if !File.exist?(f_path)
36
+ # Need to persist metadata to avoid repeating processing of this file too soon.
37
+ needs_persist = true
38
+ record_failure(EventNames::PRESERVE, f_path, "File is registered but missing.")
39
+ return return_status
40
+ end
41
+
42
+ # get the list of services applicable to this location and event
43
+ service_manager.list_services(location: storage_loc.name, event: EventNames::PRESERVE).each do |service_name|
44
+ # Skip over this service if it does not need to be run, unless force flag active
45
+ unless @force || service_manager.service_needed?(service_name, md_rec)
46
+ logger.debug("Service #{service_name} not needed for file '#{@file_rec.path}', skipping")
47
+ next
48
+ end
49
+
50
+ begin
51
+ logger.info("Performing preserve service #{service_name} for #{@file_rec.path}")
52
+ needs_persist = true
53
+ # execute the service
54
+ service_manager.perform_service(service_name, @file_rec, EventNames::PRESERVE)
55
+
56
+ # record the outcome
57
+ @file_rec.metadata_record.update_service_as_performed(service_name)
58
+ record_success(EventNames::PRESERVE, f_path, nil, service_name)
59
+ rescue PreservationServiceError => e
60
+ @file_rec.metadata_record.update_service_as_failed(service_name)
61
+ record_failure(EventNames::PRESERVE, f_path, e.message, service_name)
62
+ rescue StorageLocationUnavailableError => e
63
+ raise e
64
+ rescue StandardError => e
65
+ @file_rec.metadata_record.update_service_as_failed(service_name)
66
+ record_failure(EventNames::PRESERVE, f_path, nil, service_name, error: e)
67
+ return return_status
68
+ end
69
+ end
70
+ ensure
71
+ # persist the metadata out to file if any services were executed
72
+ if needs_persist
73
+ # persist the metadata
74
+ @app_manager.md_manager.persist(@file_rec)
75
+ end
76
+ end
77
+
78
+ return_status
79
+ end
80
+ end
81
+ end
@@ -1,92 +1,93 @@
1
1
  require 'longleaf/errors'
2
+ require 'longleaf/events/event_names'
3
+ require 'longleaf/events/event_status_tracking'
2
4
  require 'longleaf/models/metadata_record'
3
5
  require 'longleaf/services/metadata_deserializer'
4
6
  require 'longleaf/services/metadata_serializer'
5
7
  require 'time'
6
8
 
7
- # Event to register a file with longleaf
8
9
  module Longleaf
10
+ # Event to register a file with longleaf
9
11
  class RegisterEvent
10
- EVENT_NAME = 'register'
11
-
12
+ include Longleaf::EventStatusTracking
13
+
12
14
  # @param file_rec [FileRecord] file record
13
15
  # @param app_manager [ApplicationConfigManager] the application configuration
14
16
  # @param force [boolean] if true, then already registered files will be re-registered
15
- def initialize(file_rec:, app_manager:, force: false, checksums: nil)
17
+ # @param digest_provider [#get_digests] object which provides digests for files being registered
18
+ def initialize(file_rec:, app_manager:, force: false, digest_provider: nil)
16
19
  raise ArgumentError.new('Must provide a file_rec parameter') if file_rec.nil?
17
20
  raise ArgumentError.new('Parameter file_rec must be a FileRecord') \
18
21
  unless file_rec.is_a?(FileRecord)
19
22
  raise ArgumentError.new('Must provide an ApplicationConfigManager') if app_manager.nil?
20
23
  raise ArgumentError.new('Parameter app_manager must be an ApplicationConfigManager') \
21
24
  unless app_manager.is_a?(ApplicationConfigManager)
22
-
25
+
23
26
  @app_manager = app_manager
24
27
  @file_rec = file_rec
25
28
  @force = force
26
- @checksums = checksums
29
+ @digest_provider = digest_provider
27
30
  end
28
-
31
+
29
32
  # Perform a registration event on the given file
30
- # @raises RegistrationError if a file cannot be registered
33
+ # @raise RegistrationError if a file cannot be registered
31
34
  def perform
32
- metadata_exists = File.file?(@file_rec.metadata_path)
33
- # If the file's metadata exists, only need to register it if the force flag is provided
34
- if metadata_exists && !@force
35
- raise RegistrationError.new("Unable to register '#{@file_rec.path}', it is already registered.")
36
- end
37
-
38
- # create metadata record
39
- md_rec = MetadataRecord.new(registered: Time.now.utc.iso8601)
40
- @file_rec.metadata_record = md_rec
41
-
42
- # retain significant details from former record
43
- if metadata_exists
44
- retain_existing_properties
35
+ begin
36
+ # Only need to re-register file if the force flag is provided
37
+ if @file_rec.metadata_present? && !@force
38
+ raise RegistrationError.new("Unable to register '#{@file_rec.path}', it is already registered.")
39
+ end
40
+
41
+ # create metadata record
42
+ md_rec = MetadataRecord.new(registered: Time.now.utc.iso8601(3))
43
+ @file_rec.metadata_record = md_rec
44
+
45
+ # retain significant details from former record
46
+ if @file_rec.metadata_present?
47
+ retain_existing_properties
48
+ end
49
+
50
+ populate_file_properties
51
+
52
+ if !@digest_provider.nil?
53
+ checksums = @digest_provider.get_digests(@file_rec.path)
54
+ md_rec.checksums.merge!(checksums) unless checksums.nil?
55
+ end
56
+
57
+ # persist the metadata
58
+ @app_manager.md_manager.persist(@file_rec)
59
+
60
+ record_success(EventNames::REGISTER, @file_rec.path)
61
+ rescue RegistrationError => err
62
+ record_failure(EventNames::REGISTER, @file_rec.path, err.message)
63
+ rescue InvalidStoragePathError => err
64
+ record_failure(EventNames::REGISTER, @file_rec.path, err.message)
45
65
  end
46
-
47
- populate_file_properties
48
-
49
- md_rec.checksums.merge!(@checksums) unless @checksums.nil?
50
-
51
- populate_services
52
-
53
- # persist the metadata out to file
54
- MetadataSerializer::write(metadata: md_rec, file_path: @file_rec.metadata_path)
66
+
67
+ return_status
55
68
  end
56
-
69
+
57
70
  private
58
71
  def populate_file_properties
59
72
  md_rec = @file_rec.metadata_record
60
-
73
+
61
74
  # Set file properties
62
- md_rec.last_modified = File.mtime(@file_rec.path).utc.iso8601
75
+ md_rec.last_modified = File.mtime(@file_rec.path).utc.iso8601(3)
63
76
  md_rec.file_size = File.size(@file_rec.path)
64
77
  end
65
-
66
- def populate_services
67
- md_rec = @file_rec.metadata_record
68
-
69
- service_manager = @app_manager.service_manager
70
- definitions = service_manager.list_service_definitions(location: @file_rec.storage_location.name)
71
-
72
- # Add service section
73
- definitions.each do |serv_def|
74
- serv_name = serv_def.name
75
- md_rec.add_service(serv_name)
76
- end
77
- end
78
-
78
+
79
79
  # Copy a subset of properties from an existing metadata record to the new record
80
80
  def retain_existing_properties
81
81
  md_rec = @file_rec.metadata_record
82
-
83
- old_md = MetadataDeserializer.deserialize(file_path: @file_rec.metadata_path)
82
+
83
+ old_md = MetadataDeserializer.deserialize(file_path: @file_rec.metadata_path,
84
+ digest_algs: @file_rec.storage_location.metadata_location.digests)
84
85
  # Copy custom properties
85
86
  old_md.properties.each { |name, value| md_rec.properties[name] = value }
86
87
  # Copy stale-replicas flag per service
87
88
  old_md.list_services.each do |serv_name|
88
89
  serv_rec = old_md.service(serv_name)
89
-
90
+
90
91
  stale_replicas = serv_rec.stale_replicas
91
92
  if stale_replicas
92
93
  new_service = md_rec.service(serv_name)
@@ -95,4 +96,4 @@ module Longleaf
95
96
  end
96
97
  end
97
98
  end
98
- end
99
+ end
@@ -0,0 +1,38 @@
1
+ module Longleaf
2
+ # Hash subclass which provides case insensitive keys, where keys are always downcased.
3
+ class CaseInsensitiveHash < Hash
4
+ def [](key)
5
+ super _insensitive(key)
6
+ end
7
+
8
+ def []=(key, value)
9
+ super _insensitive(key), value
10
+ end
11
+
12
+ def delete(key)
13
+ super _insensitive(key)
14
+ end
15
+
16
+ def has_key?(key)
17
+ super _insensitive(key)
18
+ end
19
+
20
+ def merge(other_hash)
21
+ super other_hash.map {|k, v| [_insensitive(k), v] }.to_h
22
+ end
23
+
24
+ def merge!(other_hash)
25
+ super other_hash.map {|k, v| [_insensitive(k), v] }.to_h
26
+ end
27
+
28
+ # Cause this hash to serialize as a regular hash to avoid deserialization failures
29
+ def encode_with coder
30
+ coder.represent_map nil, self
31
+ end
32
+
33
+ protected
34
+ def _insensitive(key)
35
+ key.respond_to?(:downcase) ? key.downcase : key
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,56 @@
1
+ require 'longleaf/errors'
2
+ require 'digest'
3
+
4
+ module Longleaf
5
+ # Helper methods for generating digests
6
+ class DigestHelper
7
+ KNOWN_DIGESTS ||= ['md5', 'sha1', 'sha2', 'sha256', 'sha384', 'sha512', 'rmd160']
8
+
9
+ # @param algs Either a string containing one or an array containing zero or more digest
10
+ # algorithm names.
11
+ # @raise [InvalidDigestAlgorithmError] thrown if any of the digest algorithms listed are not
12
+ # known to the system.
13
+ def self.validate_algorithms(algs)
14
+ return if algs.nil?
15
+ if algs.is_a?(String)
16
+ unless self.is_known_algorithm?(algs)
17
+ raise InvalidDigestAlgorithmError.new("Unknown digest algorithm #{algs}")
18
+ end
19
+ else
20
+ unknown = algs.select { |alg| !KNOWN_DIGESTS.include?(alg) }
21
+ unless unknown.empty?
22
+ raise InvalidDigestAlgorithmError.new("Unknown digest algorithm(s): #{unknown}")
23
+ end
24
+ end
25
+ end
26
+
27
+ # @param alg [String] identifier of digest algorithm
28
+ # @return [Boolean] true if the digest is a valid known algorithm
29
+ def self.is_known_algorithm?(alg)
30
+ KNOWN_DIGESTS.include?(alg)
31
+ end
32
+
33
+ # Get a Digest class for the specified algorithm
34
+ # @param alg [String] name of the digest algorithm
35
+ # @return [Digest] A digest class for the requested algorithm
36
+ # @raise [InvalidDigestAlgorithmError] if an unknown digest algorithm is requested
37
+ def self.start_digest(alg)
38
+ case alg
39
+ when 'md5'
40
+ return Digest::MD5.new
41
+ when 'sha1'
42
+ return Digest::SHA1.new
43
+ when 'sha2', 'sha256'
44
+ return Digest::SHA2.new
45
+ when 'sha384'
46
+ return Digest::SHA2.new(384)
47
+ when 'sha512'
48
+ return Digest::SHA2.new(512)
49
+ when 'rmd160'
50
+ return Digest::RMD160.new
51
+ else
52
+ raise InvalidDigestAlgorithmError.new("Cannot produce digest for unknown algorithm '#{alg}'.")
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,86 @@
1
+ require 'uri'
2
+
3
+ module Longleaf
4
+ # Helper for interacting with s3 uris
5
+ class S3UriHelper
6
+ ENDPOINT_PATTERN = /^(.+\.)?s3[.\-]([a-z0-9\-]+[\-.])?[a-z0-9]+\./
7
+ ALLOWED_SCHEMES = ['http', 'https', 's3']
8
+
9
+ # Extract the name of the s3 bucket from the provided url
10
+ # @param url s3 url
11
+ # @return the name of the bucket, or nil if the name could not be identified
12
+ def self.extract_bucket(url)
13
+ uri = s3_uri(url)
14
+
15
+ matches = ENDPOINT_PATTERN.match(uri.host)
16
+ if matches.nil?
17
+ raise ArgumentError.new("Provided URI does match the expected pattern for an S3 URI")
18
+ end
19
+
20
+ prefix = matches[1]
21
+ if prefix.nil? || prefix.empty?
22
+ # Is a path style url
23
+ path = uri.path
24
+
25
+ return nil if path == '/'
26
+
27
+ path_parts = path.split('/')
28
+ return nil if path_parts.empty?
29
+ return path_parts[1]
30
+ else
31
+ return prefix[0..-2]
32
+ end
33
+ end
34
+
35
+ def self.extract_path(url)
36
+ uri = s3_uri(url)
37
+
38
+ matches = ENDPOINT_PATTERN.match(uri.host)
39
+ if matches.nil?
40
+ raise ArgumentError.new("Provided URI does match the expected pattern for an S3 URI")
41
+ end
42
+
43
+ path = uri.path
44
+ return nil if path == '/' || path.empty?
45
+
46
+ # trim off the first slash
47
+ path = path.partition('/').last
48
+
49
+ # Determine if the first part of the path is the bucket name
50
+ prefix = matches[1]
51
+ if prefix.nil? || prefix.empty?
52
+ # trim off the bucket name
53
+ path = path.partition('/').last
54
+ end
55
+
56
+ path
57
+ end
58
+
59
+ def self.extract_region(url)
60
+ uri = s3_uri(url)
61
+
62
+ matches = ENDPOINT_PATTERN.match(uri.host)
63
+
64
+ if matches[2].nil?
65
+ # No region specified
66
+ nil
67
+ else
68
+ matches[2][0..-2]
69
+ end
70
+ end
71
+
72
+ def self.s3_uri(url)
73
+ if url.nil?
74
+ raise ArgumentError.new("url cannot be empty")
75
+ end
76
+ uri = URI(url)
77
+ if !ALLOWED_SCHEMES.include?(uri.scheme&.downcase)
78
+ raise ArgumentError.new("Invalid scheme for s3 URI #{url}, only http, https and s3 are permitted")
79
+ end
80
+ if uri.host.nil?
81
+ raise ArgumentError.new("Invalid S3 URI, no hostname: #{url}")
82
+ end
83
+ uri
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,189 @@
1
+ require 'longleaf/candidates/file_selector'
2
+ require 'longleaf/candidates/registered_file_selector'
3
+ require 'longleaf/candidates/manifest_digest_provider'
4
+ require 'longleaf/candidates/single_digest_provider'
5
+
6
+ module Longleaf
7
+ # Helper for parsing manifest inputs used for registration
8
+ class SelectionOptionsParser
9
+ extend Longleaf::Logging
10
+
11
+ # Parses the provided options to construct a file selector and digest provider for
12
+ # use in registration commands.
13
+ # @param options [Hash] command options
14
+ # @param app_config_manager [ApplicationConfigManager] app config manager
15
+ # @return The file selector and digest provider.
16
+ def self.parse_registration_selection_options(options, app_config_manager)
17
+ there_can_be_only_one("Only one of the following selection options may be provided: -m, -f, -s",
18
+ options, :file, :manifest, :location)
19
+
20
+ if !options[:manifest].nil?
21
+ digests_mapping = self.manifests_to_digest_mapping(options[:manifest])
22
+ selector = FileSelector.new(file_paths: digests_mapping.keys, app_config: app_config_manager)
23
+ digest_provider = ManifestDigestProvider.new(digests_mapping)
24
+ elsif !options[:file].nil?
25
+ if options[:checksums]
26
+ checksums = options[:checksums]
27
+ # validate checksum list format, must a comma delimited list of prefix:checksums
28
+ if /^[^:,]+:[^:,]+(,[^:,]+:[^:,]+)*$/.match(checksums)
29
+ # convert checksum list into hash with prefix as key
30
+ checksums = Hash[*checksums.split(/\s*[:,]\s*/)]
31
+ digest_provider = SingleDigestProvider.new(checksums)
32
+ else
33
+ logger.failure("Invalid checksums parameter format, see `longleaf help <command>` for more information")
34
+ exit 1
35
+ end
36
+ end
37
+
38
+ file_paths = options[:file].split(/\s*,\s*/)
39
+ selector = FileSelector.new(file_paths: file_paths, app_config: app_config_manager)
40
+ elsif !options[:location].nil?
41
+ storage_locations = options[:location].split(/\s*,\s*/)
42
+ selector = FileSelector.new(storage_locations: storage_locations, app_config: app_config_manager)
43
+ digest_provider = SingleDigestProvider.new(nil)
44
+ else
45
+ logger.failure("Must provide one of the following file selection options: -f, l, or -m")
46
+ exit 1
47
+ end
48
+
49
+ [selector, digest_provider]
50
+ end
51
+
52
+ def self.there_can_be_only_one(failure_msg, options, *names)
53
+ got_one = false
54
+ names.each do |name|
55
+ if !options[name].nil?
56
+ if got_one
57
+ logger.failure(failure_msg)
58
+ exit 1
59
+ end
60
+ got_one = true
61
+ end
62
+ end
63
+ end
64
+
65
+ # Parses the provided manifest options, reading the contents of the manifests to produce
66
+ # a mapping from files to one or more algorithms.
67
+ # @param manifest_vals [Array] List of manifest option values. They may be in one of the following formats:
68
+ # <alg_name>:<manifest_path> OR <alg_name>:@-
69
+ #. <manifest_path> OR @-
70
+ # @return a hash containing the aggregated contents of the provided manifests. The keys are
71
+ # paths to manifested files. The values are hashes, mapping digest algorithms to digest values.
72
+ def self.manifests_to_digest_mapping(manifest_vals)
73
+ alg_manifest_pairs = []
74
+ # interpret option inputs into a list of algorithms to manifest sources
75
+ manifest_vals.each do |manifest_val|
76
+ if manifest_val.include?(':')
77
+ manifest_parts = manifest_val.split(':', 2)
78
+ alg_manifest_pairs << manifest_parts
79
+ else
80
+ # algorithm not specified in option value
81
+ alg_manifest_pairs << [nil, manifest_val]
82
+ end
83
+ end
84
+ if alg_manifest_pairs.select { |mpair| mpair[1] == '@-' }.count > 1
85
+ self.fail("Cannot specify more than one manifest from STDIN")
86
+ end
87
+
88
+ # read the provided manifests to build a mapping from file uri to all supplied digests
89
+ digests_mapping = Hash.new { |h,k| h[k] = Hash.new }
90
+ alg_manifest_pairs.each do |mpair|
91
+ source_stream = nil
92
+ # Determine if reading from a manifest file or stdin
93
+ if mpair[1] == '@-'
94
+ source_stream = $stdin
95
+ else
96
+ source_stream = File.new(mpair[1])
97
+ end
98
+
99
+ current_alg = mpair[0]
100
+ multi_digest_manifest = current_alg.nil?
101
+ source_stream.each_line do |line|
102
+ line = line.strip
103
+ if multi_digest_manifest && /^[a-zA-Z0-9]+:$/ =~ line
104
+ # Found a digest algorithm header, assuming succeeding entries are of this type
105
+ current_alg = line.chomp(':')
106
+ # Verify that the digest algorithm is known to longleaf
107
+ if !DigestHelper.is_known_algorithm?(current_alg)
108
+ self.fail("Manifest specifies unknown digest algorithm: #{current_alg}")
109
+ end
110
+ else
111
+ if current_alg.nil?
112
+ self.fail("Manifest with unknown checksums encountered, an algorithm must be specified")
113
+ end
114
+ entry_parts = line.split(' ', 2)
115
+ if entry_parts.length != 2
116
+ self.fail("Invalid manifest entry: #{line}")
117
+ end
118
+
119
+ digests_mapping[entry_parts[1]][current_alg] = entry_parts[0]
120
+ end
121
+ end
122
+ end
123
+
124
+ digests_mapping
125
+ end
126
+
127
+ # Parses the provided options to create a selector for registered files
128
+ # @param options [Hash] command options
129
+ # @param app_config_manager [ApplicationConfigManager] app config manager
130
+ # @return selector
131
+ def self.create_registered_selector(options, app_config_manager)
132
+ there_can_be_only_one("Only one of the following selection options may be provided: -l, -f, -s",
133
+ options, :file, :location, :from_list)
134
+
135
+ if !options[:from_list].nil?
136
+ file_paths = read_from_list(options[:from_list])
137
+ return RegisteredFileSelector.new(file_paths: file_paths, app_config: app_config_manager)
138
+ elsif !options[:file].nil?
139
+ file_paths = options[:file].split(/\s*,\s*/)
140
+ return RegisteredFileSelector.new(file_paths: file_paths, app_config: app_config_manager)
141
+ elsif !options[:location].nil?
142
+ storage_locations = options[:location].split(/\s*,\s*/)
143
+ return RegisteredFileSelector.new(storage_locations: storage_locations, app_config: app_config_manager)
144
+ else
145
+ logger.failure("Must provide one of the following file selection options: -l, -f, or -s")
146
+ exit 1
147
+ end
148
+ end
149
+
150
+ # Parses the -l from_list option, reading the list of files specified either from the provided
151
+ # file path or STDIN
152
+ # @param from_list option value, either a file path or "@-"
153
+ # @return list of files from the from_list
154
+ def self.read_from_list(from_list)
155
+ from_list = from_list.strip
156
+ if from_list.empty?
157
+ logger.failure("List parameter must not be empty")
158
+ exit 1
159
+ end
160
+
161
+ if from_list == '@-'
162
+ source_stream = $stdin
163
+ else
164
+ begin
165
+ source_stream = File.new(from_list)
166
+ rescue Errno::ENOENT
167
+ logger.failure("Specified list file does not exist: #{from_list}")
168
+ exit 1
169
+ end
170
+ end
171
+
172
+ lines = []
173
+ source_stream.each_line do |line|
174
+ lines << line.strip
175
+ end
176
+
177
+ if lines.empty?
178
+ logger.failure("File list is empty, must provide one or more files for this operation")
179
+ exit 1
180
+ end
181
+ lines
182
+ end
183
+
184
+ def self.fail(message)
185
+ logger.failure(message)
186
+ exit 1
187
+ end
188
+ end
189
+ end