longleaf 0.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +94 -0
  3. data/.editorconfig +13 -0
  4. data/.gitignore +4 -1
  5. data/.rubocop.yml +44 -0
  6. data/.rubocop_todo.yml +834 -0
  7. data/.yardopts +1 -0
  8. data/Gemfile +16 -1
  9. data/README.md +98 -12
  10. data/Rakefile +6 -0
  11. data/bin/setup +16 -1
  12. data/docs/aboutlongleaf.md +28 -0
  13. data/docs/extra.css +32 -0
  14. data/docs/img/change-file.png +0 -0
  15. data/docs/img/ll-example-preserved.png +0 -0
  16. data/docs/index.md +19 -0
  17. data/docs/install.md +66 -0
  18. data/docs/ll-example/config-example-relative.yml +33 -0
  19. data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
  20. data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
  21. data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
  22. data/docs/ll-example/metadata-dir/.gitkeep +0 -0
  23. data/docs/ll-example/replica-files/.gitkeep +0 -0
  24. data/docs/ll-example/replica-metadata/.gitkeep +0 -0
  25. data/docs/quickstart.md +270 -0
  26. data/docs/rdocs/Longleaf.html +135 -0
  27. data/docs/rdocs/Longleaf/AppFields.html +178 -0
  28. data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
  29. data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
  30. data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
  31. data/docs/rdocs/Longleaf/CLI.html +909 -0
  32. data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
  33. data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
  34. data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
  35. data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
  36. data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
  37. data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
  38. data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
  39. data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
  40. data/docs/rdocs/Longleaf/EventError.html +147 -0
  41. data/docs/rdocs/Longleaf/EventNames.html +163 -0
  42. data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
  43. data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
  44. data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
  45. data/docs/rdocs/Longleaf/FileRecord.html +716 -0
  46. data/docs/rdocs/Longleaf/FileSelector.html +901 -0
  47. data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
  48. data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
  49. data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
  50. data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
  51. data/docs/rdocs/Longleaf/Logging.html +405 -0
  52. data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
  53. data/docs/rdocs/Longleaf/LongleafError.html +139 -0
  54. data/docs/rdocs/Longleaf/MDFields.html +193 -0
  55. data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
  56. data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
  57. data/docs/rdocs/Longleaf/MetadataError.html +143 -0
  58. data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
  59. data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
  60. data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
  61. data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
  62. data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
  63. data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
  64. data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
  65. data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
  66. data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
  67. data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
  68. data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
  69. data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
  70. data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
  71. data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
  72. data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
  73. data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
  74. data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
  75. data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
  76. data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
  77. data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
  78. data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
  79. data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
  80. data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
  81. data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
  82. data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
  83. data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
  84. data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
  85. data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
  86. data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
  87. data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
  88. data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
  89. data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
  90. data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
  91. data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
  92. data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
  93. data/docs/rdocs/_index.html +660 -0
  94. data/docs/rdocs/class_list.html +51 -0
  95. data/docs/rdocs/css/common.css +1 -0
  96. data/docs/rdocs/css/full_list.css +58 -0
  97. data/docs/rdocs/css/style.css +496 -0
  98. data/docs/rdocs/file.README.html +165 -0
  99. data/docs/rdocs/file_list.html +56 -0
  100. data/docs/rdocs/frames.html +17 -0
  101. data/docs/rdocs/index.html +165 -0
  102. data/docs/rdocs/js/app.js +303 -0
  103. data/docs/rdocs/js/full_list.js +216 -0
  104. data/docs/rdocs/js/jquery.js +4 -0
  105. data/docs/rdocs/method_list.html +2051 -0
  106. data/docs/rdocs/top-level-namespace.html +110 -0
  107. data/lib/longleaf/candidates/file_selector.rb +150 -0
  108. data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
  109. data/lib/longleaf/candidates/physical_path_provider.rb +17 -0
  110. data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
  111. data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
  112. data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
  113. data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
  114. data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
  115. data/lib/longleaf/cli.rb +252 -46
  116. data/lib/longleaf/commands/deregister_command.rb +51 -0
  117. data/lib/longleaf/commands/preserve_command.rb +50 -0
  118. data/lib/longleaf/commands/register_command.rb +34 -43
  119. data/lib/longleaf/commands/reindex_command.rb +92 -0
  120. data/lib/longleaf/commands/validate_config_command.rb +33 -8
  121. data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
  122. data/lib/longleaf/errors.rb +26 -7
  123. data/lib/longleaf/events/deregister_event.rb +53 -0
  124. data/lib/longleaf/events/event_names.rb +9 -0
  125. data/lib/longleaf/events/event_status_tracking.rb +59 -0
  126. data/lib/longleaf/events/preserve_event.rb +82 -0
  127. data/lib/longleaf/events/register_event.rb +59 -51
  128. data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
  129. data/lib/longleaf/helpers/digest_helper.rb +56 -0
  130. data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
  131. data/lib/longleaf/helpers/selection_options_parser.rb +215 -0
  132. data/lib/longleaf/helpers/service_date_helper.rb +78 -0
  133. data/lib/longleaf/indexing/index_manager.rb +101 -0
  134. data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
  135. data/lib/longleaf/logging.rb +5 -4
  136. data/lib/longleaf/logging/redirecting_logger.rb +30 -25
  137. data/lib/longleaf/models/app_fields.rb +7 -2
  138. data/lib/longleaf/models/file_record.rb +31 -8
  139. data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
  140. data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
  141. data/lib/longleaf/models/md_fields.rb +3 -1
  142. data/lib/longleaf/models/metadata_location.rb +47 -0
  143. data/lib/longleaf/models/metadata_record.rb +43 -16
  144. data/lib/longleaf/models/s3_storage_location.rb +138 -0
  145. data/lib/longleaf/models/service_definition.rb +7 -6
  146. data/lib/longleaf/models/service_fields.rb +7 -1
  147. data/lib/longleaf/models/service_record.rb +10 -6
  148. data/lib/longleaf/models/storage_location.rb +24 -19
  149. data/lib/longleaf/models/storage_types.rb +9 -0
  150. data/lib/longleaf/models/system_config_fields.rb +9 -0
  151. data/lib/longleaf/preservation_services/file_check_service.rb +59 -0
  152. data/lib/longleaf/preservation_services/fixity_check_service.rb +124 -0
  153. data/lib/longleaf/preservation_services/rsync_replication_service.rb +198 -0
  154. data/lib/longleaf/preservation_services/s3_replication_service.rb +131 -0
  155. data/lib/longleaf/services/application_config_deserializer.rb +81 -24
  156. data/lib/longleaf/services/application_config_manager.rb +20 -6
  157. data/lib/longleaf/services/application_config_validator.rb +19 -9
  158. data/lib/longleaf/services/configuration_validator.rb +67 -4
  159. data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
  160. data/lib/longleaf/services/metadata_deserializer.rb +115 -42
  161. data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
  162. data/lib/longleaf/services/metadata_serializer.rb +156 -23
  163. data/lib/longleaf/services/metadata_validator.rb +76 -0
  164. data/lib/longleaf/services/s3_location_validator.rb +19 -0
  165. data/lib/longleaf/services/service_class_cache.rb +112 -0
  166. data/lib/longleaf/services/service_definition_manager.rb +10 -7
  167. data/lib/longleaf/services/service_definition_validator.rb +25 -18
  168. data/lib/longleaf/services/service_manager.rb +86 -11
  169. data/lib/longleaf/services/service_mapping_manager.rb +13 -12
  170. data/lib/longleaf/services/service_mapping_validator.rb +36 -26
  171. data/lib/longleaf/services/storage_location_manager.rb +76 -15
  172. data/lib/longleaf/services/storage_location_validator.rb +49 -35
  173. data/lib/longleaf/specs/config_builder.rb +47 -23
  174. data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
  175. data/lib/longleaf/specs/custom_matchers.rb +9 -0
  176. data/lib/longleaf/specs/file_helpers.rb +61 -0
  177. data/lib/longleaf/specs/metadata_builder.rb +98 -0
  178. data/lib/longleaf/specs/system_config_builder.rb +27 -0
  179. data/lib/longleaf/version.rb +1 -1
  180. data/longleaf.gemspec +20 -7
  181. data/mkdocs.yml +21 -0
  182. metadata +308 -24
  183. data/.travis.yml +0 -4
  184. data/lib/longleaf/commands/abstract_command.rb +0 -37
  185. data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -0,0 +1,131 @@
1
+ require 'longleaf/events/event_names'
2
+ require 'longleaf/logging'
3
+ require 'longleaf/errors'
4
+ require 'longleaf/models/file_record'
5
+ require 'longleaf/models/service_fields'
6
+ require 'longleaf/events/register_event'
7
+ require 'longleaf/models/storage_types'
8
+ require 'aws-sdk-s3'
9
+
10
+ module Longleaf
11
+ # Preservation service which performs replication of a file to one or more s3 destinations.
12
+ #
13
+ # The service definition must contain one or more destinations, specified with the "to" property.
14
+ # These destinations must be either a known s3 storage location. The s3 client configuration
15
+ # is controlled by the storage location.
16
+ #
17
+ # Optional service configuration properties:
18
+ # * replica_collision_policy = specifies the desired outcome if the service attempts to replicate
19
+ # a file which already exists at a destination. Default: "replace".
20
+ class S3ReplicationService
21
+ include Longleaf::Logging
22
+ ST ||= Longleaf::StorageTypes
23
+ SF ||= Longleaf::ServiceFields
24
+
25
+ attr_reader :collision_policy
26
+
27
+ # Initialize a S3ReplicationService from the given service definition
28
+ #
29
+ # @param service_def [ServiceDefinition] the configuration for this service
30
+ # @param app_manager [ApplicationConfigManager] the application configuration
31
+ def initialize(service_def, app_manager)
32
+ @service_def = service_def
33
+ @app_manager = app_manager
34
+
35
+ # Set and validate the replica collision policy
36
+ @collision_policy = @service_def.properties[SF::COLLISION_PROPERTY] || SF::DEFAULT_COLLISION_POLICY
37
+ if !SF::VALID_COLLISION_POLICIES.include?(@collision_policy)
38
+ raise ArgumentError.new("Service #{service_def.name} received invalid #{SF::COLLISION_PROPERTY}" \
39
+ + " value #{@collision_policy}")
40
+ end
41
+
42
+ # Store and validate destinations
43
+ replicate_to = @service_def.properties[SF::REPLICATE_TO]
44
+ if replicate_to.nil? || replicate_to.empty?
45
+ raise ArgumentError.new("Service #{service_def.name} must provide one or more replication destinations.")
46
+ end
47
+ replicate_to = [replicate_to] if replicate_to.is_a?(String)
48
+
49
+ loc_manager = app_manager.location_manager
50
+ # Build list of destinations, translating to storage locations when relevant
51
+ @destinations = Array.new
52
+ replicate_to.each do |dest|
53
+ if loc_manager.locations.key?(dest)
54
+ location = loc_manager.locations[dest]
55
+ if location.type != ST::S3_STORAGE_TYPE
56
+ raise ArgumentError.new(
57
+ "Service #{service_def.name} specifies destination #{dest} which is not of type 's3'")
58
+ end
59
+ @destinations << loc_manager.locations[dest]
60
+ else
61
+ raise ArgumentError.new("Service #{service_def.name} specifies unknown storage location '#{dest}'" \
62
+ + " as a replication destination")
63
+ end
64
+ end
65
+ end
66
+
67
+ # During a replication event, perform replication of the specified file to all configured destinations
68
+ # as necessary.
69
+ #
70
+ # @param file_rec [FileRecord] record representing the file to perform the service on.
71
+ # @param event [String] name of the event this service is being invoked by.
72
+ # @raise [PreservationServiceError] if the rsync replication fails
73
+ def perform(file_rec, event)
74
+ if file_rec.storage_location.type == ST::FILESYSTEM_STORAGE_TYPE
75
+ replicate_from_fs(file_rec)
76
+ else
77
+ raise PreservationServiceError.new("Replication from storage location of type " \
78
+ + "#{file_rec.storage_location.type} to s3 is not supported")
79
+ end
80
+ end
81
+
82
+ def replicate_from_fs(file_rec)
83
+ # Determine the path to the file being replicated relative to its storage location
84
+ rel_path = file_rec.storage_location.relativize(file_rec.path)
85
+
86
+ @destinations.each do |destination|
87
+ # Check that the destination is available before attempting to write
88
+ verify_destination_available(destination, file_rec)
89
+
90
+ rel_to_bucket = destination.relative_to_bucket_path(rel_path)
91
+ file_obj = destination.s3_bucket.object(rel_to_bucket)
92
+ begin
93
+ file_obj.upload_file(file_rec.physical_path)
94
+ rescue Aws::S3::Errors::BadDigest => e
95
+ raise ChecksumMismatchError.new("Transfer to bucket '#{destination.s3_bucket.name}' failed, " \
96
+ + "MD5 provided did not match the received content for #{file_rec.path}")
97
+ rescue Aws::Errors::ServiceError => e
98
+ raise PreservationServiceError.new("Failed to transfer #{file_rec.path} to bucket " \
99
+ + "'#{destination.s3_bucket.name}': #{e.message}")
100
+ end
101
+
102
+ logger.info("Replicated #{file_rec.path} to destination #{file_obj.public_url}")
103
+
104
+ # TODO register file in destination
105
+ end
106
+ end
107
+
108
+ # Determine if this service is applicable for the provided event, given the configured service definition
109
+ #
110
+ # @param event [String] name of the event
111
+ # @return [Boolean] returns true if this service is applicable for the provided event
112
+ def is_applicable?(event)
113
+ case event
114
+ when EventNames::PRESERVE
115
+ true
116
+ else
117
+ false
118
+ end
119
+ end
120
+
121
+ private
122
+ def verify_destination_available(destination, file_rec)
123
+ begin
124
+ destination.available?
125
+ rescue StorageLocationUnavailableError => e
126
+ raise StorageLocationUnavailableError.new("Cannot replicate #{file_rec.path} to destination #{destination.name}: " \
127
+ + e.message)
128
+ end
129
+ end
130
+ end
131
+ end
@@ -1,46 +1,103 @@
1
1
  require 'longleaf/services/application_config_validator'
2
2
  require 'longleaf/services/application_config_manager'
3
+ require 'digest/md5'
4
+ require 'pathname'
3
5
 
4
- # Deserializer for application configuration files
5
6
  module Longleaf
7
+ # Deserializer for application configuration files
6
8
  class ApplicationConfigDeserializer
7
-
9
+ AF ||= Longleaf::AppFields
10
+
8
11
  # Deserializes a valid application configuration file as a ApplicationConfigManager option
9
- # @param config_path [String] file path to the application configuration file
12
+ # @param config_path [String] file path to the service and storage mapping configuration file
10
13
  # @param format [String] encoding format of the config file
11
- # return [Longleaf::ApplicationConfigManager] manager for the loaded configuration
14
+ # return [ApplicationConfigManager] manager for the loaded configuration
12
15
  def self.deserialize(config_path, format: 'yaml')
13
- config = load(config_path, format: format)
14
-
15
- Longleaf::ApplicationConfigValidator.validate(config)
16
- Longleaf::ApplicationConfigManager.new(config)
16
+ content = load_config_file(config_path)
17
+ config = load(content, format)
18
+
19
+ config_md5 = Digest::MD5.hexdigest(content)
20
+
21
+ make_paths_absolute(config_path, config)
22
+
23
+ ApplicationConfigValidator.new(config).validate_config.raise_if_invalid
24
+ ApplicationConfigManager.new(config, config_md5)
17
25
  end
18
-
26
+
27
+ def self.load_config_file(config_path)
28
+ begin
29
+ File.read(config_path)
30
+ rescue Errno::ENOENT
31
+ raise Longleaf::ConfigurationError.new(
32
+ "Configuration file #{config_path} does not exist.")
33
+ end
34
+ end
35
+
19
36
  # Deserialize a configuration file into a hash
20
- # @param config_path [String] file path to the application configuration file
37
+ # @param content [String] the contents of the application configuration file
21
38
  # @param format [String] encoding format of the config file
22
39
  # return [Hash] hash containing the configuration
23
- def self.load(config_path, format: 'yaml')
40
+ def self.load(content, format)
24
41
  case format
25
42
  when 'yaml'
26
- from_yaml(config_path)
43
+ from_yaml(content)
27
44
  else
28
- raise ArgumentError.new('Invalid deserialization format #{format} specified')
45
+ raise ArgumentError.new("Invalid deserialization format #{format} specified")
29
46
  end
30
47
  end
31
-
32
- private
33
- def self.from_yaml(config_path)
48
+
49
+ def self.from_yaml(content)
34
50
  begin
35
- YAML.load_file(config_path)
36
- rescue Errno::ENOENT => err
37
- raise Longleaf::ConfigurationError.new(
38
- "Cannot load application configuration, file #{config_path} does not exist.")
51
+ YAML.safe_load(content, [], [], true)
39
52
  rescue => err
40
- raise Longleaf::ConfigurationError.new(
41
- %Q(Failed to load application configuration due to the following reason:
42
- #{err.message}))
53
+ raise Longleaf::ConfigurationError.new(err)
43
54
  end
44
55
  end
56
+
57
+ def self.make_paths_absolute(config_path, config)
58
+ base_pathname = Pathname.new(config_path).expand_path.parent
59
+
60
+ config[AF::LOCATIONS].each do |name, properties|
61
+ properties[AF::LOCATION_PATH] = make_file_paths_absolute(base_pathname, properties)
62
+
63
+ # Resolve single field metadata location into expanded form
64
+ md_config = properties[AF::METADATA_CONFIG]
65
+ if md_config.nil?
66
+ next
67
+ end
68
+ if md_config.is_a?(String)
69
+ md_config = { AF::LOCATION => m_config }
70
+ end
71
+ md_config[AF::LOCATION_PATH] = make_file_paths_absolute(base_pathname, md_config)
72
+ end
73
+ end
74
+
75
+ def self.make_file_paths_absolute(base_pathname, properties)
76
+ path = properties[AF::LOCATION_PATH]
77
+ return nil if path.nil?
78
+
79
+ uri = URI(path)
80
+
81
+ if uri.scheme.nil? || uri.scheme.casecmp("file") == 0
82
+ absolution(base_pathname, uri.path)
83
+ else
84
+ path
85
+ end
86
+ end
87
+
88
+ def self.absolution(base_pathname, file_path)
89
+ if file_path.nil?
90
+ nil
91
+ else
92
+ path = Pathname.new(file_path)
93
+ if path.absolute?
94
+ path = path.expand_path.to_s
95
+ else
96
+ path = (base_pathname + path).to_s
97
+ end
98
+ end
99
+ end
100
+
101
+ private_class_method :load_config_file
45
102
  end
46
- end
103
+ end
@@ -5,20 +5,34 @@ require_relative 'service_definition_manager'
5
5
  require_relative 'service_mapping_validator'
6
6
  require_relative 'service_mapping_manager'
7
7
  require_relative 'service_manager'
8
+ require_relative 'metadata_persistence_manager'
9
+ require 'longleaf/indexing/index_manager'
10
+ require 'longleaf/models/app_fields'
8
11
 
9
- # Manager which loads and provides access to the configuration of the application
10
12
  module Longleaf
13
+ # Manager which loads and provides access to the configuration of the application
11
14
  class ApplicationConfigManager
15
+ attr_reader :config_md5
12
16
  attr_reader :service_manager
13
17
  attr_reader :location_manager
14
-
15
- def initialize(config)
18
+ attr_reader :index_manager
19
+ attr_reader :md_manager
20
+
21
+ def initialize(config, config_md5 = nil)
22
+ @config_md5 = config_md5
23
+
16
24
  @location_manager = Longleaf::StorageLocationManager.new(config)
17
-
25
+
18
26
  definition_manager = Longleaf::ServiceDefinitionManager.new(config)
19
27
  mapping_manager = Longleaf::ServiceMappingManager.new(config)
20
28
  @service_manager = Longleaf::ServiceManager.new(
21
- definition_manager: definition_manager, mapping_manager: mapping_manager)
29
+ definition_manager: definition_manager,
30
+ mapping_manager: mapping_manager,
31
+ app_manager: self)
32
+
33
+ sys_config = config[AppFields::SYSTEM]
34
+ @index_manager = IndexManager.new(sys_config, self)
35
+ @md_manager = MetadataPersistenceManager.new(@index_manager)
22
36
  end
23
37
  end
24
- end
38
+ end
@@ -2,17 +2,27 @@ require_relative 'storage_location_validator'
2
2
  require_relative 'service_definition_validator'
3
3
  require_relative 'service_mapping_validator'
4
4
 
5
- # Validator for Longleaf application configuration
6
5
  module Longleaf
7
- class ApplicationConfigValidator
8
-
6
+ # Validator for Longleaf application configuration
7
+ class ApplicationConfigValidator < ConfigurationValidator
8
+ # @param config [Hash] hash containing the application configuration
9
+ def initialize(config)
10
+ super(config)
11
+ end
12
+
13
+ protected
9
14
  # Validates the application configuration provided. Will raise ConfigurationError
10
15
  # if any portion of the configuration is not syntactically or semantically valid.
11
- # @param config [Hash] application configuration
12
- def self.validate(config)
13
- Longleaf::StorageLocationValidator::validate_config(config)
14
- Longleaf::ServiceDefinitionValidator::validate_config(config)
15
- Longleaf::ServiceMappingValidator::validate_config(config)
16
+ def validate
17
+ loc_result = StorageLocationValidator.new(@config).validate_config
18
+ defs_result = ServiceDefinitionValidator.new(@config).validate_config
19
+ mapping_result = ServiceMappingValidator.new(@config).validate_config
20
+
21
+ @result.errors.concat(loc_result.errors) unless loc_result.valid?
22
+ @result.errors.concat(defs_result.errors) unless defs_result.valid?
23
+ @result.errors.concat(mapping_result.errors) unless mapping_result.valid?
24
+
25
+ @result
16
26
  end
17
27
  end
18
- end
28
+ end
@@ -1,8 +1,71 @@
1
1
  module Longleaf
2
+ # Abstract configuration validator class
2
3
  class ConfigurationValidator
3
- protected
4
- def self.assert(fail_message, assertion_passed)
5
- raise ConfigurationError.new(fail_message) unless assertion_passed
4
+ attr_reader :result
5
+
6
+ def initialize(config)
7
+ @result = ConfigurationValidationResult.new
8
+ @config = config
9
+ end
10
+
11
+ # Verify that the provided configuration is valid
12
+ # @return [ConfigurationValidationResult] the result of the validation
13
+ def validate_config
14
+ register_on_failure { validate }
15
+
16
+ @result
17
+ end
18
+
19
+ # Asserts that the given conditional is true, raising a ConfigurationError if it is not.
20
+ def assert(fail_message, assertion_passed)
21
+ fail(fail_message) unless assertion_passed
22
+ end
23
+
24
+ # Indicate that validation has failed, throwing a Configuration error with the given message
25
+ def fail(fail_message)
26
+ raise ConfigurationError.new(fail_message)
27
+ end
28
+
29
+ # Registers an error to the result for this validator
30
+ def register_error(error)
31
+ if error.is_a?(StandardError)
32
+ @result.register_error(error.msg)
33
+ else
34
+ @result.register_error(error)
35
+ end
36
+ end
37
+
38
+ # Performs the provided block. If the block produces a ConfigurationError, the error
39
+ # is swallowed and registered to the result
40
+ def register_on_failure
41
+ begin
42
+ yield
43
+ rescue ConfigurationError => err
44
+ register_error(err.message)
45
+ end
46
+ end
47
+ end
48
+
49
+ class ConfigurationValidationResult
50
+ attr_reader :errors
51
+
52
+ def initialize
53
+ @errors = Array.new
54
+ end
55
+
56
+ # Register an error with this validation result
57
+ def register_error(error_message)
58
+ @errors << error_message
59
+ end
60
+
61
+ # @return [boolean] true if validation produced not errors
62
+ def valid?
63
+ @errors.length == 0
64
+ end
65
+
66
+ # @raise [ConfigurationError] if the result is not valid, which lists all failures
67
+ def raise_if_invalid
68
+ raise ConfigurationError.new(@errors.join("\n")) unless valid?
6
69
  end
7
70
  end
8
- end
71
+ end
@@ -0,0 +1,16 @@
1
+ require 'pathname'
2
+ require 'longleaf/errors'
3
+
4
+ module Longleaf
5
+ # Validates the configuration of a filesystem based location
6
+ class FilesystemLocationValidator
7
+
8
+ def self.validate(p_validator, name, path_prop, section_name, path)
9
+ base_msg = "Storage location '#{name}' specifies invalid #{section_name} '#{path_prop}' property: "
10
+ p_validator.assert(base_msg + 'Path must not be empty', !path.nil? && !path.to_s.strip.empty?)
11
+ p_validator.assert(base_msg + 'Path must not contain any relative modifiers (/..)', !path.include?('/..'))
12
+ p_validator.assert(base_msg + 'Path must be absolute', Pathname.new(path).absolute?)
13
+ p_validator.assert(base_msg + 'Path does not exist', Dir.exist?(path))
14
+ end
15
+ end
16
+ end
@@ -1,29 +1,30 @@
1
1
  require 'yaml'
2
- require_relative '../models/metadata_record'
3
- require_relative '../models/md_fields'
4
- require_relative '../errors'
2
+ require 'longleaf/models/metadata_record'
3
+ require 'longleaf/models/md_fields'
4
+ require 'longleaf/services/metadata_validator'
5
+ require 'longleaf/errors'
6
+ require 'longleaf/logging'
5
7
 
6
- # Service which deserializes metadata files into MetadataRecord objects
7
8
  module Longleaf
9
+ # Service which deserializes metadata files into MetadataRecord objects
8
10
  class MetadataDeserializer
9
- MDF = Longleaf::MDFields
10
-
11
+ extend Longleaf::Logging
12
+ MDF ||= MDFields
13
+
11
14
  # Deserialize a file into a MetadataRecord object
12
15
  #
13
16
  # @param file_path [String] path of the file to read. Required.
14
17
  # @param format [String] format the file is stored in. Default is 'yaml'.
15
- def self.deserialize(file_path:, format: 'yaml')
18
+ def self.deserialize(file_path:, format: 'yaml', digest_algs: [])
19
+ file_path = file_path.path if file_path.is_a?(File)
20
+
16
21
  case format
17
22
  when 'yaml'
18
- md = from_yaml(file_path)
23
+ md = from_yaml(file_path, digest_algs)
19
24
  else
20
- raise ArgumentError.new('Invalid deserialization format #{format} specified')
25
+ raise ArgumentError.new("Invalid deserialization format #{format} specified")
21
26
  end
22
-
23
- if !md || !md.key?(MDF::DATA) || !md.key?(MDF::SERVICES)
24
- raise Longleaf::MetadataError.new("Invalid metadata file, did not contain data or services fields: #{file_path}")
25
- end
26
-
27
+
27
28
  data = Hash.new.merge(md[MDF::DATA])
28
29
  # Extract reserved properties for submission as separate parameters
29
30
  registered = data.delete(MDFields::REGISTERED_TIMESTAMP)
@@ -31,38 +32,110 @@ module Longleaf
31
32
  checksums = data.delete(MDFields::CHECKSUMS)
32
33
  file_size = data.delete(MDFields::FILE_SIZE)
33
34
  last_modified = data.delete(MDFields::LAST_MODIFIED)
34
-
35
+ physical_path = data.delete(MDFields::PHYSICAL_PATH)
36
+
35
37
  services = md[MDF::SERVICES]
36
38
  service_records = Hash.new
37
- unless services.nil?
38
- services.each do |name, props|
39
- raise Longleaf::MetadataError.new("Value of service #{name} must be a hash") unless props.class == Hash
40
-
41
- service_props = Hash.new.merge(props)
42
-
43
- stale_replicas = service_props.delete(MDFields::STALE_REPLICAS)
44
- timestamp = service_props.delete(MDFields::SERVICE_TIMESTAMP)
45
- run_needed = service_props.delete(MDFields::RUN_NEEDED)
46
-
47
- service_records[name] = ServiceRecord.new(
48
- properties: service_props,
49
- stale_replicas: stale_replicas,
50
- timestamp: timestamp,
51
- run_needed: run_needed)
52
- end
39
+ services&.each do |name, props|
40
+ raise MetadataError.new("Value of service #{name} must be a hash") unless props.class == Hash
41
+
42
+ service_props = Hash.new.merge(props)
43
+
44
+ stale_replicas = service_props.delete(MDFields::STALE_REPLICAS)
45
+ timestamp = service_props.delete(MDFields::SERVICE_TIMESTAMP)
46
+ run_needed = service_props.delete(MDFields::RUN_NEEDED)
47
+
48
+ service_records[name] = ServiceRecord.new(
49
+ properties: service_props,
50
+ stale_replicas: stale_replicas,
51
+ timestamp: timestamp,
52
+ run_needed: run_needed)
53
53
  end
54
-
54
+
55
55
  MetadataRecord.new(properties: data,
56
- services: service_records,
57
- registered: registered,
58
- deregistered: deregistered,
59
- checksums: checksums,
60
- file_size: file_size,
61
- last_modified: last_modified)
56
+ services: service_records,
57
+ registered: registered,
58
+ deregistered: deregistered,
59
+ checksums: checksums,
60
+ file_size: file_size,
61
+ last_modified: last_modified,
62
+ physical_path: physical_path)
62
63
  end
63
-
64
- def self.from_yaml(file_path)
65
- YAML.load_file(file_path)
64
+
65
+ # Load configuration a yaml encoded configuration file
66
+ def self.from_yaml(file_path, digest_algs)
67
+ File.open(file_path, 'r:bom|utf-8') do |f|
68
+ contents = f.read
69
+
70
+ checksum_error = nil
71
+ begin
72
+ verify_digests(file_path, contents, digest_algs)
73
+ rescue ChecksumMismatchError => err
74
+ # Hold onto the checksum error, in case we can identify the underlying cause
75
+ checksum_error = err
76
+ end
77
+
78
+ begin
79
+ md = nil
80
+ begin
81
+ md = YAML.safe_load(contents, [], [], true)
82
+ rescue => err
83
+ raise MetadataError.new("Failed to parse metadata file #{file_path}: #{err.message}")
84
+ end
85
+
86
+ validation_result = MetadataValidator.new(md).validate_config
87
+ if !validation_result.valid?
88
+ if checksum_error.nil?
89
+ raise MetadataError.new("Invalid metadata file #{file_path.to_s}:\n#{validation_result.errors.join("\n")}")
90
+ else
91
+ raise MetadataError.new(validation_result.errors.join("\n"))
92
+ end
93
+ end
94
+
95
+ # Either return the valid metadata, or raise the checksum error as is
96
+ if checksum_error.nil?
97
+ md
98
+ else
99
+ raise checksum_error
100
+ end
101
+ rescue MetadataError => err
102
+ if checksum_error.nil?
103
+ raise err
104
+ else
105
+ # Add underlying cause from the metadata error to the checksum mismatch error
106
+ msg = checksum_error.message + "\nWith related issue(s):\n#{err.message}"
107
+ raise ChecksumMismatchError.new(msg)
108
+ end
109
+ end
110
+ end
111
+ end
112
+
113
+ def self.verify_digests(file_path, contents, digest_algs)
114
+ return if digest_algs.nil? || digest_algs.empty?
115
+
116
+ digest_algs.each do |alg|
117
+ if file_path.respond_to?(:path)
118
+ path = file_path.path
119
+ else
120
+ path = file_path
121
+ end
122
+ digest_path = "#{path}.#{alg}"
123
+ unless File.exist?(digest_path)
124
+ logger.warn("Missing expected #{alg} digest for #{path}")
125
+ next
126
+ end
127
+
128
+ digest = DigestHelper::start_digest(alg)
129
+ result = digest.hexdigest(contents)
130
+ existing_digest = IO.read(digest_path)
131
+
132
+ if result == existing_digest
133
+ logger.info("Metadata fixity check using algorithm '#{alg}' succeeded for file #{path}")
134
+ else
135
+ raise ChecksumMismatchError.new("Metadata digest of type #{alg} did not match the contents of #{path}:" \
136
+ + " expected #{existing_digest}, calculated #{result}")
137
+ end
138
+ end
66
139
  end
67
140
  end
68
- end
141
+ end