longleaf 0.1.0.pre.2 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +94 -0
  3. data/.editorconfig +13 -0
  4. data/.gitignore +4 -1
  5. data/.rubocop.yml +44 -0
  6. data/.rubocop_todo.yml +834 -0
  7. data/.yardopts +1 -0
  8. data/Gemfile +16 -1
  9. data/README.md +98 -12
  10. data/Rakefile +6 -0
  11. data/bin/setup +16 -1
  12. data/docs/aboutlongleaf.md +28 -0
  13. data/docs/extra.css +32 -0
  14. data/docs/img/change-file.png +0 -0
  15. data/docs/img/ll-example-preserved.png +0 -0
  16. data/docs/index.md +19 -0
  17. data/docs/install.md +66 -0
  18. data/docs/ll-example/config-example-relative.yml +33 -0
  19. data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
  20. data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
  21. data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
  22. data/docs/ll-example/metadata-dir/.gitkeep +0 -0
  23. data/docs/ll-example/replica-files/.gitkeep +0 -0
  24. data/docs/ll-example/replica-metadata/.gitkeep +0 -0
  25. data/docs/quickstart.md +270 -0
  26. data/docs/rdocs/Longleaf.html +135 -0
  27. data/docs/rdocs/Longleaf/AppFields.html +178 -0
  28. data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
  29. data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
  30. data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
  31. data/docs/rdocs/Longleaf/CLI.html +909 -0
  32. data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
  33. data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
  34. data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
  35. data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
  36. data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
  37. data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
  38. data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
  39. data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
  40. data/docs/rdocs/Longleaf/EventError.html +147 -0
  41. data/docs/rdocs/Longleaf/EventNames.html +163 -0
  42. data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
  43. data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
  44. data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
  45. data/docs/rdocs/Longleaf/FileRecord.html +716 -0
  46. data/docs/rdocs/Longleaf/FileSelector.html +901 -0
  47. data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
  48. data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
  49. data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
  50. data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
  51. data/docs/rdocs/Longleaf/Logging.html +405 -0
  52. data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
  53. data/docs/rdocs/Longleaf/LongleafError.html +139 -0
  54. data/docs/rdocs/Longleaf/MDFields.html +193 -0
  55. data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
  56. data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
  57. data/docs/rdocs/Longleaf/MetadataError.html +143 -0
  58. data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
  59. data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
  60. data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
  61. data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
  62. data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
  63. data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
  64. data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
  65. data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
  66. data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
  67. data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
  68. data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
  69. data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
  70. data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
  71. data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
  72. data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
  73. data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
  74. data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
  75. data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
  76. data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
  77. data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
  78. data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
  79. data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
  80. data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
  81. data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
  82. data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
  83. data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
  84. data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
  85. data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
  86. data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
  87. data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
  88. data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
  89. data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
  90. data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
  91. data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
  92. data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
  93. data/docs/rdocs/_index.html +660 -0
  94. data/docs/rdocs/class_list.html +51 -0
  95. data/docs/rdocs/css/common.css +1 -0
  96. data/docs/rdocs/css/full_list.css +58 -0
  97. data/docs/rdocs/css/style.css +496 -0
  98. data/docs/rdocs/file.README.html +165 -0
  99. data/docs/rdocs/file_list.html +56 -0
  100. data/docs/rdocs/frames.html +17 -0
  101. data/docs/rdocs/index.html +165 -0
  102. data/docs/rdocs/js/app.js +303 -0
  103. data/docs/rdocs/js/full_list.js +216 -0
  104. data/docs/rdocs/js/jquery.js +4 -0
  105. data/docs/rdocs/method_list.html +2051 -0
  106. data/docs/rdocs/top-level-namespace.html +110 -0
  107. data/lib/longleaf/candidates/file_selector.rb +139 -0
  108. data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
  109. data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
  110. data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
  111. data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
  112. data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
  113. data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
  114. data/lib/longleaf/cli.rb +237 -46
  115. data/lib/longleaf/commands/deregister_command.rb +51 -0
  116. data/lib/longleaf/commands/preserve_command.rb +50 -0
  117. data/lib/longleaf/commands/register_command.rb +32 -43
  118. data/lib/longleaf/commands/reindex_command.rb +92 -0
  119. data/lib/longleaf/commands/validate_config_command.rb +33 -8
  120. data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
  121. data/lib/longleaf/errors.rb +26 -7
  122. data/lib/longleaf/events/deregister_event.rb +53 -0
  123. data/lib/longleaf/events/event_names.rb +9 -0
  124. data/lib/longleaf/events/event_status_tracking.rb +59 -0
  125. data/lib/longleaf/events/preserve_event.rb +81 -0
  126. data/lib/longleaf/events/register_event.rb +52 -51
  127. data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
  128. data/lib/longleaf/helpers/digest_helper.rb +56 -0
  129. data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
  130. data/lib/longleaf/helpers/selection_options_parser.rb +189 -0
  131. data/lib/longleaf/helpers/service_date_helper.rb +78 -0
  132. data/lib/longleaf/indexing/index_manager.rb +101 -0
  133. data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
  134. data/lib/longleaf/logging.rb +5 -4
  135. data/lib/longleaf/logging/redirecting_logger.rb +26 -25
  136. data/lib/longleaf/models/app_fields.rb +7 -2
  137. data/lib/longleaf/models/file_record.rb +17 -8
  138. data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
  139. data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
  140. data/lib/longleaf/models/md_fields.rb +2 -1
  141. data/lib/longleaf/models/metadata_location.rb +47 -0
  142. data/lib/longleaf/models/metadata_record.rb +39 -15
  143. data/lib/longleaf/models/s3_storage_location.rb +133 -0
  144. data/lib/longleaf/models/service_definition.rb +7 -6
  145. data/lib/longleaf/models/service_fields.rb +7 -1
  146. data/lib/longleaf/models/service_record.rb +10 -6
  147. data/lib/longleaf/models/storage_location.rb +24 -19
  148. data/lib/longleaf/models/storage_types.rb +9 -0
  149. data/lib/longleaf/models/system_config_fields.rb +9 -0
  150. data/lib/longleaf/preservation_services/file_check_service.rb +58 -0
  151. data/lib/longleaf/preservation_services/fixity_check_service.rb +123 -0
  152. data/lib/longleaf/preservation_services/rsync_replication_service.rb +182 -0
  153. data/lib/longleaf/preservation_services/s3_replication_service.rb +143 -0
  154. data/lib/longleaf/services/application_config_deserializer.rb +81 -24
  155. data/lib/longleaf/services/application_config_manager.rb +20 -6
  156. data/lib/longleaf/services/application_config_validator.rb +19 -9
  157. data/lib/longleaf/services/configuration_validator.rb +67 -4
  158. data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
  159. data/lib/longleaf/services/metadata_deserializer.rb +113 -42
  160. data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
  161. data/lib/longleaf/services/metadata_serializer.rb +138 -25
  162. data/lib/longleaf/services/metadata_validator.rb +76 -0
  163. data/lib/longleaf/services/s3_location_validator.rb +19 -0
  164. data/lib/longleaf/services/service_class_cache.rb +112 -0
  165. data/lib/longleaf/services/service_definition_manager.rb +10 -7
  166. data/lib/longleaf/services/service_definition_validator.rb +25 -18
  167. data/lib/longleaf/services/service_manager.rb +86 -11
  168. data/lib/longleaf/services/service_mapping_manager.rb +13 -12
  169. data/lib/longleaf/services/service_mapping_validator.rb +36 -26
  170. data/lib/longleaf/services/storage_location_manager.rb +76 -15
  171. data/lib/longleaf/services/storage_location_validator.rb +49 -35
  172. data/lib/longleaf/specs/config_builder.rb +47 -23
  173. data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
  174. data/lib/longleaf/specs/custom_matchers.rb +9 -0
  175. data/lib/longleaf/specs/file_helpers.rb +61 -0
  176. data/lib/longleaf/specs/metadata_builder.rb +92 -0
  177. data/lib/longleaf/specs/system_config_builder.rb +27 -0
  178. data/lib/longleaf/version.rb +1 -1
  179. data/longleaf.gemspec +20 -7
  180. data/mkdocs.yml +21 -0
  181. metadata +306 -23
  182. data/.travis.yml +0 -4
  183. data/lib/longleaf/commands/abstract_command.rb +0 -37
  184. data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -0,0 +1,81 @@
1
+ require 'longleaf/services/service_manager'
2
+ require 'longleaf/events/event_names'
3
+ require 'longleaf/events/event_status_tracking'
4
+ require 'longleaf/logging'
5
+
6
+ module Longleaf
7
+ # Verify event for a single file
8
+ class PreserveEvent
9
+ include Longleaf::Logging
10
+ include Longleaf::EventStatusTracking
11
+
12
+ # @param file_rec [FileRecord] file record
13
+ # @param app_manager [ApplicationConfigManager] the application configuration
14
+ # @param force [boolean] if true, then services run regardless of whether they are flagged as needed
15
+ def initialize(file_rec:, app_manager:, force: false)
16
+ raise ArgumentError.new('Must provide a file_rec parameter') if file_rec.nil?
17
+ raise ArgumentError.new('Must provide an ApplicationConfigManager') if app_manager.nil?
18
+
19
+ @app_manager = app_manager
20
+ @file_rec = file_rec
21
+ @force = force
22
+ end
23
+
24
+ # Perform a preserve event on the given file, updating its metadata record if any services were executed.
25
+ def perform
26
+ storage_loc = @file_rec.storage_location
27
+ service_manager = @app_manager.service_manager
28
+ md_rec = @file_rec.metadata_record
29
+ f_path = @file_rec.path
30
+
31
+ logger.info("Performing preserve event on #{@file_rec.path}")
32
+
33
+ needs_persist = false
34
+ begin
35
+ if !File.exist?(f_path)
36
+ # Need to persist metadata to avoid repeating processing of this file too soon.
37
+ needs_persist = true
38
+ record_failure(EventNames::PRESERVE, f_path, "File is registered but missing.")
39
+ return return_status
40
+ end
41
+
42
+ # get the list of services applicable to this location and event
43
+ service_manager.list_services(location: storage_loc.name, event: EventNames::PRESERVE).each do |service_name|
44
+ # Skip over this service if it does not need to be run, unless force flag active
45
+ unless @force || service_manager.service_needed?(service_name, md_rec)
46
+ logger.debug("Service #{service_name} not needed for file '#{@file_rec.path}', skipping")
47
+ next
48
+ end
49
+
50
+ begin
51
+ logger.info("Performing preserve service #{service_name} for #{@file_rec.path}")
52
+ needs_persist = true
53
+ # execute the service
54
+ service_manager.perform_service(service_name, @file_rec, EventNames::PRESERVE)
55
+
56
+ # record the outcome
57
+ @file_rec.metadata_record.update_service_as_performed(service_name)
58
+ record_success(EventNames::PRESERVE, f_path, nil, service_name)
59
+ rescue PreservationServiceError => e
60
+ @file_rec.metadata_record.update_service_as_failed(service_name)
61
+ record_failure(EventNames::PRESERVE, f_path, e.message, service_name)
62
+ rescue StorageLocationUnavailableError => e
63
+ raise e
64
+ rescue StandardError => e
65
+ @file_rec.metadata_record.update_service_as_failed(service_name)
66
+ record_failure(EventNames::PRESERVE, f_path, nil, service_name, error: e)
67
+ return return_status
68
+ end
69
+ end
70
+ ensure
71
+ # persist the metadata out to file if any services were executed
72
+ if needs_persist
73
+ # persist the metadata
74
+ @app_manager.md_manager.persist(@file_rec)
75
+ end
76
+ end
77
+
78
+ return_status
79
+ end
80
+ end
81
+ end
@@ -1,92 +1,93 @@
1
1
  require 'longleaf/errors'
2
+ require 'longleaf/events/event_names'
3
+ require 'longleaf/events/event_status_tracking'
2
4
  require 'longleaf/models/metadata_record'
3
5
  require 'longleaf/services/metadata_deserializer'
4
6
  require 'longleaf/services/metadata_serializer'
5
7
  require 'time'
6
8
 
7
- # Event to register a file with longleaf
8
9
  module Longleaf
10
+ # Event to register a file with longleaf
9
11
  class RegisterEvent
10
- EVENT_NAME = 'register'
11
-
12
+ include Longleaf::EventStatusTracking
13
+
12
14
  # @param file_rec [FileRecord] file record
13
15
  # @param app_manager [ApplicationConfigManager] the application configuration
14
16
  # @param force [boolean] if true, then already registered files will be re-registered
15
- def initialize(file_rec:, app_manager:, force: false, checksums: nil)
17
+ # @param digest_provider [#get_digests] object which provides digests for files being registered
18
+ def initialize(file_rec:, app_manager:, force: false, digest_provider: nil)
16
19
  raise ArgumentError.new('Must provide a file_rec parameter') if file_rec.nil?
17
20
  raise ArgumentError.new('Parameter file_rec must be a FileRecord') \
18
21
  unless file_rec.is_a?(FileRecord)
19
22
  raise ArgumentError.new('Must provide an ApplicationConfigManager') if app_manager.nil?
20
23
  raise ArgumentError.new('Parameter app_manager must be an ApplicationConfigManager') \
21
24
  unless app_manager.is_a?(ApplicationConfigManager)
22
-
25
+
23
26
  @app_manager = app_manager
24
27
  @file_rec = file_rec
25
28
  @force = force
26
- @checksums = checksums
29
+ @digest_provider = digest_provider
27
30
  end
28
-
31
+
29
32
  # Perform a registration event on the given file
30
- # @raises RegistrationError if a file cannot be registered
33
+ # @raise RegistrationError if a file cannot be registered
31
34
  def perform
32
- metadata_exists = File.file?(@file_rec.metadata_path)
33
- # If the file's metadata exists, only need to register it if the force flag is provided
34
- if metadata_exists && !@force
35
- raise RegistrationError.new("Unable to register '#{@file_rec.path}', it is already registered.")
36
- end
37
-
38
- # create metadata record
39
- md_rec = MetadataRecord.new(registered: Time.now.utc.iso8601)
40
- @file_rec.metadata_record = md_rec
41
-
42
- # retain significant details from former record
43
- if metadata_exists
44
- retain_existing_properties
35
+ begin
36
+ # Only need to re-register file if the force flag is provided
37
+ if @file_rec.metadata_present? && !@force
38
+ raise RegistrationError.new("Unable to register '#{@file_rec.path}', it is already registered.")
39
+ end
40
+
41
+ # create metadata record
42
+ md_rec = MetadataRecord.new(registered: Time.now.utc.iso8601(3))
43
+ @file_rec.metadata_record = md_rec
44
+
45
+ # retain significant details from former record
46
+ if @file_rec.metadata_present?
47
+ retain_existing_properties
48
+ end
49
+
50
+ populate_file_properties
51
+
52
+ if !@digest_provider.nil?
53
+ checksums = @digest_provider.get_digests(@file_rec.path)
54
+ md_rec.checksums.merge!(checksums) unless checksums.nil?
55
+ end
56
+
57
+ # persist the metadata
58
+ @app_manager.md_manager.persist(@file_rec)
59
+
60
+ record_success(EventNames::REGISTER, @file_rec.path)
61
+ rescue RegistrationError => err
62
+ record_failure(EventNames::REGISTER, @file_rec.path, err.message)
63
+ rescue InvalidStoragePathError => err
64
+ record_failure(EventNames::REGISTER, @file_rec.path, err.message)
45
65
  end
46
-
47
- populate_file_properties
48
-
49
- md_rec.checksums.merge!(@checksums) unless @checksums.nil?
50
-
51
- populate_services
52
-
53
- # persist the metadata out to file
54
- MetadataSerializer::write(metadata: md_rec, file_path: @file_rec.metadata_path)
66
+
67
+ return_status
55
68
  end
56
-
69
+
57
70
  private
58
71
  def populate_file_properties
59
72
  md_rec = @file_rec.metadata_record
60
-
73
+
61
74
  # Set file properties
62
- md_rec.last_modified = File.mtime(@file_rec.path).utc.iso8601
75
+ md_rec.last_modified = File.mtime(@file_rec.path).utc.iso8601(3)
63
76
  md_rec.file_size = File.size(@file_rec.path)
64
77
  end
65
-
66
- def populate_services
67
- md_rec = @file_rec.metadata_record
68
-
69
- service_manager = @app_manager.service_manager
70
- definitions = service_manager.list_service_definitions(location: @file_rec.storage_location.name)
71
-
72
- # Add service section
73
- definitions.each do |serv_def|
74
- serv_name = serv_def.name
75
- md_rec.add_service(serv_name)
76
- end
77
- end
78
-
78
+
79
79
  # Copy a subset of properties from an existing metadata record to the new record
80
80
  def retain_existing_properties
81
81
  md_rec = @file_rec.metadata_record
82
-
83
- old_md = MetadataDeserializer.deserialize(file_path: @file_rec.metadata_path)
82
+
83
+ old_md = MetadataDeserializer.deserialize(file_path: @file_rec.metadata_path,
84
+ digest_algs: @file_rec.storage_location.metadata_location.digests)
84
85
  # Copy custom properties
85
86
  old_md.properties.each { |name, value| md_rec.properties[name] = value }
86
87
  # Copy stale-replicas flag per service
87
88
  old_md.list_services.each do |serv_name|
88
89
  serv_rec = old_md.service(serv_name)
89
-
90
+
90
91
  stale_replicas = serv_rec.stale_replicas
91
92
  if stale_replicas
92
93
  new_service = md_rec.service(serv_name)
@@ -95,4 +96,4 @@ module Longleaf
95
96
  end
96
97
  end
97
98
  end
98
- end
99
+ end
@@ -0,0 +1,38 @@
1
+ module Longleaf
2
+ # Hash subclass which provides case insensitive keys, where keys are always downcased.
3
+ class CaseInsensitiveHash < Hash
4
+ def [](key)
5
+ super _insensitive(key)
6
+ end
7
+
8
+ def []=(key, value)
9
+ super _insensitive(key), value
10
+ end
11
+
12
+ def delete(key)
13
+ super _insensitive(key)
14
+ end
15
+
16
+ def has_key?(key)
17
+ super _insensitive(key)
18
+ end
19
+
20
+ def merge(other_hash)
21
+ super other_hash.map {|k, v| [_insensitive(k), v] }.to_h
22
+ end
23
+
24
+ def merge!(other_hash)
25
+ super other_hash.map {|k, v| [_insensitive(k), v] }.to_h
26
+ end
27
+
28
+ # Cause this hash to serialize as a regular hash to avoid deserialization failures
29
+ def encode_with coder
30
+ coder.represent_map nil, self
31
+ end
32
+
33
+ protected
34
+ def _insensitive(key)
35
+ key.respond_to?(:downcase) ? key.downcase : key
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,56 @@
1
+ require 'longleaf/errors'
2
+ require 'digest'
3
+
4
+ module Longleaf
5
+ # Helper methods for generating digests
6
+ class DigestHelper
7
+ KNOWN_DIGESTS ||= ['md5', 'sha1', 'sha2', 'sha256', 'sha384', 'sha512', 'rmd160']
8
+
9
+ # @param algs Either a string containing one or an array containing zero or more digest
10
+ # algorithm names.
11
+ # @raise [InvalidDigestAlgorithmError] thrown if any of the digest algorithms listed are not
12
+ # known to the system.
13
+ def self.validate_algorithms(algs)
14
+ return if algs.nil?
15
+ if algs.is_a?(String)
16
+ unless self.is_known_algorithm?(algs)
17
+ raise InvalidDigestAlgorithmError.new("Unknown digest algorithm #{algs}")
18
+ end
19
+ else
20
+ unknown = algs.select { |alg| !KNOWN_DIGESTS.include?(alg) }
21
+ unless unknown.empty?
22
+ raise InvalidDigestAlgorithmError.new("Unknown digest algorithm(s): #{unknown}")
23
+ end
24
+ end
25
+ end
26
+
27
+ # @param alg [String] identifier of digest algorithm
28
+ # @return [Boolean] true if the digest is a valid known algorithm
29
+ def self.is_known_algorithm?(alg)
30
+ KNOWN_DIGESTS.include?(alg)
31
+ end
32
+
33
+ # Get a Digest class for the specified algorithm
34
+ # @param alg [String] name of the digest algorithm
35
+ # @return [Digest] A digest class for the requested algorithm
36
+ # @raise [InvalidDigestAlgorithmError] if an unknown digest algorithm is requested
37
+ def self.start_digest(alg)
38
+ case alg
39
+ when 'md5'
40
+ return Digest::MD5.new
41
+ when 'sha1'
42
+ return Digest::SHA1.new
43
+ when 'sha2', 'sha256'
44
+ return Digest::SHA2.new
45
+ when 'sha384'
46
+ return Digest::SHA2.new(384)
47
+ when 'sha512'
48
+ return Digest::SHA2.new(512)
49
+ when 'rmd160'
50
+ return Digest::RMD160.new
51
+ else
52
+ raise InvalidDigestAlgorithmError.new("Cannot produce digest for unknown algorithm '#{alg}'.")
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,86 @@
1
+ require 'uri'
2
+
3
+ module Longleaf
4
+ # Helper for interacting with s3 uris
5
+ class S3UriHelper
6
+ ENDPOINT_PATTERN = /^(.+\.)?s3[.\-]([a-z0-9\-]+[\-.])?[a-z0-9]+\./
7
+ ALLOWED_SCHEMES = ['http', 'https', 's3']
8
+
9
+ # Extract the name of the s3 bucket from the provided url
10
+ # @param url s3 url
11
+ # @return the name of the bucket, or nil if the name could not be identified
12
+ def self.extract_bucket(url)
13
+ uri = s3_uri(url)
14
+
15
+ matches = ENDPOINT_PATTERN.match(uri.host)
16
+ if matches.nil?
17
+ raise ArgumentError.new("Provided URI does match the expected pattern for an S3 URI")
18
+ end
19
+
20
+ prefix = matches[1]
21
+ if prefix.nil? || prefix.empty?
22
+ # Is a path style url
23
+ path = uri.path
24
+
25
+ return nil if path == '/'
26
+
27
+ path_parts = path.split('/')
28
+ return nil if path_parts.empty?
29
+ return path_parts[1]
30
+ else
31
+ return prefix[0..-2]
32
+ end
33
+ end
34
+
35
+ def self.extract_path(url)
36
+ uri = s3_uri(url)
37
+
38
+ matches = ENDPOINT_PATTERN.match(uri.host)
39
+ if matches.nil?
40
+ raise ArgumentError.new("Provided URI does match the expected pattern for an S3 URI")
41
+ end
42
+
43
+ path = uri.path
44
+ return nil if path == '/' || path.empty?
45
+
46
+ # trim off the first slash
47
+ path = path.partition('/').last
48
+
49
+ # Determine if the first part of the path is the bucket name
50
+ prefix = matches[1]
51
+ if prefix.nil? || prefix.empty?
52
+ # trim off the bucket name
53
+ path = path.partition('/').last
54
+ end
55
+
56
+ path
57
+ end
58
+
59
+ def self.extract_region(url)
60
+ uri = s3_uri(url)
61
+
62
+ matches = ENDPOINT_PATTERN.match(uri.host)
63
+
64
+ if matches[2].nil?
65
+ # No region specified
66
+ nil
67
+ else
68
+ matches[2][0..-2]
69
+ end
70
+ end
71
+
72
+ def self.s3_uri(url)
73
+ if url.nil?
74
+ raise ArgumentError.new("url cannot be empty")
75
+ end
76
+ uri = URI(url)
77
+ if !ALLOWED_SCHEMES.include?(uri.scheme&.downcase)
78
+ raise ArgumentError.new("Invalid scheme for s3 URI #{url}, only http, https and s3 are permitted")
79
+ end
80
+ if uri.host.nil?
81
+ raise ArgumentError.new("Invalid S3 URI, no hostname: #{url}")
82
+ end
83
+ uri
84
+ end
85
+ end
86
+ end
@@ -0,0 +1,189 @@
1
+ require 'longleaf/candidates/file_selector'
2
+ require 'longleaf/candidates/registered_file_selector'
3
+ require 'longleaf/candidates/manifest_digest_provider'
4
+ require 'longleaf/candidates/single_digest_provider'
5
+
6
+ module Longleaf
7
+ # Helper for parsing manifest inputs used for registration
8
+ class SelectionOptionsParser
9
+ extend Longleaf::Logging
10
+
11
+ # Parses the provided options to construct a file selector and digest provider for
12
+ # use in registration commands.
13
+ # @param options [Hash] command options
14
+ # @param app_config_manager [ApplicationConfigManager] app config manager
15
+ # @return The file selector and digest provider.
16
+ def self.parse_registration_selection_options(options, app_config_manager)
17
+ there_can_be_only_one("Only one of the following selection options may be provided: -m, -f, -s",
18
+ options, :file, :manifest, :location)
19
+
20
+ if !options[:manifest].nil?
21
+ digests_mapping = self.manifests_to_digest_mapping(options[:manifest])
22
+ selector = FileSelector.new(file_paths: digests_mapping.keys, app_config: app_config_manager)
23
+ digest_provider = ManifestDigestProvider.new(digests_mapping)
24
+ elsif !options[:file].nil?
25
+ if options[:checksums]
26
+ checksums = options[:checksums]
27
+ # validate checksum list format, must a comma delimited list of prefix:checksums
28
+ if /^[^:,]+:[^:,]+(,[^:,]+:[^:,]+)*$/.match(checksums)
29
+ # convert checksum list into hash with prefix as key
30
+ checksums = Hash[*checksums.split(/\s*[:,]\s*/)]
31
+ digest_provider = SingleDigestProvider.new(checksums)
32
+ else
33
+ logger.failure("Invalid checksums parameter format, see `longleaf help <command>` for more information")
34
+ exit 1
35
+ end
36
+ end
37
+
38
+ file_paths = options[:file].split(/\s*,\s*/)
39
+ selector = FileSelector.new(file_paths: file_paths, app_config: app_config_manager)
40
+ elsif !options[:location].nil?
41
+ storage_locations = options[:location].split(/\s*,\s*/)
42
+ selector = FileSelector.new(storage_locations: storage_locations, app_config: app_config_manager)
43
+ digest_provider = SingleDigestProvider.new(nil)
44
+ else
45
+ logger.failure("Must provide one of the following file selection options: -f, l, or -m")
46
+ exit 1
47
+ end
48
+
49
+ [selector, digest_provider]
50
+ end
51
+
52
+ def self.there_can_be_only_one(failure_msg, options, *names)
53
+ got_one = false
54
+ names.each do |name|
55
+ if !options[name].nil?
56
+ if got_one
57
+ logger.failure(failure_msg)
58
+ exit 1
59
+ end
60
+ got_one = true
61
+ end
62
+ end
63
+ end
64
+
65
+ # Parses the provided manifest options, reading the contents of the manifests to produce
66
+ # a mapping from files to one or more algorithms.
67
+ # @param manifest_vals [Array] List of manifest option values. They may be in one of the following formats:
68
+ # <alg_name>:<manifest_path> OR <alg_name>:@-
69
+ #. <manifest_path> OR @-
70
+ # @return a hash containing the aggregated contents of the provided manifests. The keys are
71
+ # paths to manifested files. The values are hashes, mapping digest algorithms to digest values.
72
+ def self.manifests_to_digest_mapping(manifest_vals)
73
+ alg_manifest_pairs = []
74
+ # interpret option inputs into a list of algorithms to manifest sources
75
+ manifest_vals.each do |manifest_val|
76
+ if manifest_val.include?(':')
77
+ manifest_parts = manifest_val.split(':', 2)
78
+ alg_manifest_pairs << manifest_parts
79
+ else
80
+ # algorithm not specified in option value
81
+ alg_manifest_pairs << [nil, manifest_val]
82
+ end
83
+ end
84
+ if alg_manifest_pairs.select { |mpair| mpair[1] == '@-' }.count > 1
85
+ self.fail("Cannot specify more than one manifest from STDIN")
86
+ end
87
+
88
+ # read the provided manifests to build a mapping from file uri to all supplied digests
89
+ digests_mapping = Hash.new { |h,k| h[k] = Hash.new }
90
+ alg_manifest_pairs.each do |mpair|
91
+ source_stream = nil
92
+ # Determine if reading from a manifest file or stdin
93
+ if mpair[1] == '@-'
94
+ source_stream = $stdin
95
+ else
96
+ source_stream = File.new(mpair[1])
97
+ end
98
+
99
+ current_alg = mpair[0]
100
+ multi_digest_manifest = current_alg.nil?
101
+ source_stream.each_line do |line|
102
+ line = line.strip
103
+ if multi_digest_manifest && /^[a-zA-Z0-9]+:$/ =~ line
104
+ # Found a digest algorithm header, assuming succeeding entries are of this type
105
+ current_alg = line.chomp(':')
106
+ # Verify that the digest algorithm is known to longleaf
107
+ if !DigestHelper.is_known_algorithm?(current_alg)
108
+ self.fail("Manifest specifies unknown digest algorithm: #{current_alg}")
109
+ end
110
+ else
111
+ if current_alg.nil?
112
+ self.fail("Manifest with unknown checksums encountered, an algorithm must be specified")
113
+ end
114
+ entry_parts = line.split(' ', 2)
115
+ if entry_parts.length != 2
116
+ self.fail("Invalid manifest entry: #{line}")
117
+ end
118
+
119
+ digests_mapping[entry_parts[1]][current_alg] = entry_parts[0]
120
+ end
121
+ end
122
+ end
123
+
124
+ digests_mapping
125
+ end
126
+
127
+ # Parses the provided options to create a selector for registered files
128
+ # @param options [Hash] command options
129
+ # @param app_config_manager [ApplicationConfigManager] app config manager
130
+ # @return selector
131
+ def self.create_registered_selector(options, app_config_manager)
132
+ there_can_be_only_one("Only one of the following selection options may be provided: -l, -f, -s",
133
+ options, :file, :location, :from_list)
134
+
135
+ if !options[:from_list].nil?
136
+ file_paths = read_from_list(options[:from_list])
137
+ return RegisteredFileSelector.new(file_paths: file_paths, app_config: app_config_manager)
138
+ elsif !options[:file].nil?
139
+ file_paths = options[:file].split(/\s*,\s*/)
140
+ return RegisteredFileSelector.new(file_paths: file_paths, app_config: app_config_manager)
141
+ elsif !options[:location].nil?
142
+ storage_locations = options[:location].split(/\s*,\s*/)
143
+ return RegisteredFileSelector.new(storage_locations: storage_locations, app_config: app_config_manager)
144
+ else
145
+ logger.failure("Must provide one of the following file selection options: -l, -f, or -s")
146
+ exit 1
147
+ end
148
+ end
149
+
150
+ # Parses the -l from_list option, reading the list of files specified either from the provided
151
+ # file path or STDIN
152
+ # @param from_list option value, either a file path or "@-"
153
+ # @return list of files from the from_list
154
+ def self.read_from_list(from_list)
155
+ from_list = from_list.strip
156
+ if from_list.empty?
157
+ logger.failure("List parameter must not be empty")
158
+ exit 1
159
+ end
160
+
161
+ if from_list == '@-'
162
+ source_stream = $stdin
163
+ else
164
+ begin
165
+ source_stream = File.new(from_list)
166
+ rescue Errno::ENOENT
167
+ logger.failure("Specified list file does not exist: #{from_list}")
168
+ exit 1
169
+ end
170
+ end
171
+
172
+ lines = []
173
+ source_stream.each_line do |line|
174
+ lines << line.strip
175
+ end
176
+
177
+ if lines.empty?
178
+ logger.failure("File list is empty, must provide one or more files for this operation")
179
+ exit 1
180
+ end
181
+ lines
182
+ end
183
+
184
+ def self.fail(message)
185
+ logger.failure(message)
186
+ exit 1
187
+ end
188
+ end
189
+ end