longleaf 0.1.0.pre.2 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +94 -0
  3. data/.editorconfig +13 -0
  4. data/.gitignore +4 -1
  5. data/.rubocop.yml +44 -0
  6. data/.rubocop_todo.yml +834 -0
  7. data/.yardopts +1 -0
  8. data/Gemfile +16 -1
  9. data/README.md +98 -12
  10. data/Rakefile +6 -0
  11. data/bin/setup +16 -1
  12. data/docs/aboutlongleaf.md +28 -0
  13. data/docs/extra.css +32 -0
  14. data/docs/img/change-file.png +0 -0
  15. data/docs/img/ll-example-preserved.png +0 -0
  16. data/docs/index.md +19 -0
  17. data/docs/install.md +66 -0
  18. data/docs/ll-example/config-example-relative.yml +33 -0
  19. data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
  20. data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
  21. data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
  22. data/docs/ll-example/metadata-dir/.gitkeep +0 -0
  23. data/docs/ll-example/replica-files/.gitkeep +0 -0
  24. data/docs/ll-example/replica-metadata/.gitkeep +0 -0
  25. data/docs/quickstart.md +270 -0
  26. data/docs/rdocs/Longleaf.html +135 -0
  27. data/docs/rdocs/Longleaf/AppFields.html +178 -0
  28. data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
  29. data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
  30. data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
  31. data/docs/rdocs/Longleaf/CLI.html +909 -0
  32. data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
  33. data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
  34. data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
  35. data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
  36. data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
  37. data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
  38. data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
  39. data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
  40. data/docs/rdocs/Longleaf/EventError.html +147 -0
  41. data/docs/rdocs/Longleaf/EventNames.html +163 -0
  42. data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
  43. data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
  44. data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
  45. data/docs/rdocs/Longleaf/FileRecord.html +716 -0
  46. data/docs/rdocs/Longleaf/FileSelector.html +901 -0
  47. data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
  48. data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
  49. data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
  50. data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
  51. data/docs/rdocs/Longleaf/Logging.html +405 -0
  52. data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
  53. data/docs/rdocs/Longleaf/LongleafError.html +139 -0
  54. data/docs/rdocs/Longleaf/MDFields.html +193 -0
  55. data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
  56. data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
  57. data/docs/rdocs/Longleaf/MetadataError.html +143 -0
  58. data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
  59. data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
  60. data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
  61. data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
  62. data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
  63. data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
  64. data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
  65. data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
  66. data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
  67. data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
  68. data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
  69. data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
  70. data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
  71. data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
  72. data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
  73. data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
  74. data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
  75. data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
  76. data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
  77. data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
  78. data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
  79. data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
  80. data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
  81. data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
  82. data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
  83. data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
  84. data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
  85. data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
  86. data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
  87. data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
  88. data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
  89. data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
  90. data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
  91. data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
  92. data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
  93. data/docs/rdocs/_index.html +660 -0
  94. data/docs/rdocs/class_list.html +51 -0
  95. data/docs/rdocs/css/common.css +1 -0
  96. data/docs/rdocs/css/full_list.css +58 -0
  97. data/docs/rdocs/css/style.css +496 -0
  98. data/docs/rdocs/file.README.html +165 -0
  99. data/docs/rdocs/file_list.html +56 -0
  100. data/docs/rdocs/frames.html +17 -0
  101. data/docs/rdocs/index.html +165 -0
  102. data/docs/rdocs/js/app.js +303 -0
  103. data/docs/rdocs/js/full_list.js +216 -0
  104. data/docs/rdocs/js/jquery.js +4 -0
  105. data/docs/rdocs/method_list.html +2051 -0
  106. data/docs/rdocs/top-level-namespace.html +110 -0
  107. data/lib/longleaf/candidates/file_selector.rb +139 -0
  108. data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
  109. data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
  110. data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
  111. data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
  112. data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
  113. data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
  114. data/lib/longleaf/cli.rb +237 -46
  115. data/lib/longleaf/commands/deregister_command.rb +51 -0
  116. data/lib/longleaf/commands/preserve_command.rb +50 -0
  117. data/lib/longleaf/commands/register_command.rb +32 -43
  118. data/lib/longleaf/commands/reindex_command.rb +92 -0
  119. data/lib/longleaf/commands/validate_config_command.rb +33 -8
  120. data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
  121. data/lib/longleaf/errors.rb +26 -7
  122. data/lib/longleaf/events/deregister_event.rb +53 -0
  123. data/lib/longleaf/events/event_names.rb +9 -0
  124. data/lib/longleaf/events/event_status_tracking.rb +59 -0
  125. data/lib/longleaf/events/preserve_event.rb +81 -0
  126. data/lib/longleaf/events/register_event.rb +52 -51
  127. data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
  128. data/lib/longleaf/helpers/digest_helper.rb +56 -0
  129. data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
  130. data/lib/longleaf/helpers/selection_options_parser.rb +189 -0
  131. data/lib/longleaf/helpers/service_date_helper.rb +78 -0
  132. data/lib/longleaf/indexing/index_manager.rb +101 -0
  133. data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
  134. data/lib/longleaf/logging.rb +5 -4
  135. data/lib/longleaf/logging/redirecting_logger.rb +26 -25
  136. data/lib/longleaf/models/app_fields.rb +7 -2
  137. data/lib/longleaf/models/file_record.rb +17 -8
  138. data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
  139. data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
  140. data/lib/longleaf/models/md_fields.rb +2 -1
  141. data/lib/longleaf/models/metadata_location.rb +47 -0
  142. data/lib/longleaf/models/metadata_record.rb +39 -15
  143. data/lib/longleaf/models/s3_storage_location.rb +133 -0
  144. data/lib/longleaf/models/service_definition.rb +7 -6
  145. data/lib/longleaf/models/service_fields.rb +7 -1
  146. data/lib/longleaf/models/service_record.rb +10 -6
  147. data/lib/longleaf/models/storage_location.rb +24 -19
  148. data/lib/longleaf/models/storage_types.rb +9 -0
  149. data/lib/longleaf/models/system_config_fields.rb +9 -0
  150. data/lib/longleaf/preservation_services/file_check_service.rb +58 -0
  151. data/lib/longleaf/preservation_services/fixity_check_service.rb +123 -0
  152. data/lib/longleaf/preservation_services/rsync_replication_service.rb +182 -0
  153. data/lib/longleaf/preservation_services/s3_replication_service.rb +143 -0
  154. data/lib/longleaf/services/application_config_deserializer.rb +81 -24
  155. data/lib/longleaf/services/application_config_manager.rb +20 -6
  156. data/lib/longleaf/services/application_config_validator.rb +19 -9
  157. data/lib/longleaf/services/configuration_validator.rb +67 -4
  158. data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
  159. data/lib/longleaf/services/metadata_deserializer.rb +113 -42
  160. data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
  161. data/lib/longleaf/services/metadata_serializer.rb +138 -25
  162. data/lib/longleaf/services/metadata_validator.rb +76 -0
  163. data/lib/longleaf/services/s3_location_validator.rb +19 -0
  164. data/lib/longleaf/services/service_class_cache.rb +112 -0
  165. data/lib/longleaf/services/service_definition_manager.rb +10 -7
  166. data/lib/longleaf/services/service_definition_validator.rb +25 -18
  167. data/lib/longleaf/services/service_manager.rb +86 -11
  168. data/lib/longleaf/services/service_mapping_manager.rb +13 -12
  169. data/lib/longleaf/services/service_mapping_validator.rb +36 -26
  170. data/lib/longleaf/services/storage_location_manager.rb +76 -15
  171. data/lib/longleaf/services/storage_location_validator.rb +49 -35
  172. data/lib/longleaf/specs/config_builder.rb +47 -23
  173. data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
  174. data/lib/longleaf/specs/custom_matchers.rb +9 -0
  175. data/lib/longleaf/specs/file_helpers.rb +61 -0
  176. data/lib/longleaf/specs/metadata_builder.rb +92 -0
  177. data/lib/longleaf/specs/system_config_builder.rb +27 -0
  178. data/lib/longleaf/version.rb +1 -1
  179. data/longleaf.gemspec +20 -7
  180. data/mkdocs.yml +21 -0
  181. metadata +306 -23
  182. data/.travis.yml +0 -4
  183. data/lib/longleaf/commands/abstract_command.rb +0 -37
  184. data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -5,20 +5,34 @@ require_relative 'service_definition_manager'
5
5
  require_relative 'service_mapping_validator'
6
6
  require_relative 'service_mapping_manager'
7
7
  require_relative 'service_manager'
8
+ require_relative 'metadata_persistence_manager'
9
+ require 'longleaf/indexing/index_manager'
10
+ require 'longleaf/models/app_fields'
8
11
 
9
- # Manager which loads and provides access to the configuration of the application
10
12
  module Longleaf
13
+ # Manager which loads and provides access to the configuration of the application
11
14
  class ApplicationConfigManager
15
+ attr_reader :config_md5
12
16
  attr_reader :service_manager
13
17
  attr_reader :location_manager
14
-
15
- def initialize(config)
18
+ attr_reader :index_manager
19
+ attr_reader :md_manager
20
+
21
+ def initialize(config, config_md5 = nil)
22
+ @config_md5 = config_md5
23
+
16
24
  @location_manager = Longleaf::StorageLocationManager.new(config)
17
-
25
+
18
26
  definition_manager = Longleaf::ServiceDefinitionManager.new(config)
19
27
  mapping_manager = Longleaf::ServiceMappingManager.new(config)
20
28
  @service_manager = Longleaf::ServiceManager.new(
21
- definition_manager: definition_manager, mapping_manager: mapping_manager)
29
+ definition_manager: definition_manager,
30
+ mapping_manager: mapping_manager,
31
+ app_manager: self)
32
+
33
+ sys_config = config[AppFields::SYSTEM]
34
+ @index_manager = IndexManager.new(sys_config, self)
35
+ @md_manager = MetadataPersistenceManager.new(@index_manager)
22
36
  end
23
37
  end
24
- end
38
+ end
@@ -2,17 +2,27 @@ require_relative 'storage_location_validator'
2
2
  require_relative 'service_definition_validator'
3
3
  require_relative 'service_mapping_validator'
4
4
 
5
- # Validator for Longleaf application configuration
6
5
  module Longleaf
7
- class ApplicationConfigValidator
8
-
6
+ # Validator for Longleaf application configuration
7
+ class ApplicationConfigValidator < ConfigurationValidator
8
+ # @param config [Hash] hash containing the application configuration
9
+ def initialize(config)
10
+ super(config)
11
+ end
12
+
13
+ protected
9
14
  # Validates the application configuration provided. Will raise ConfigurationError
10
15
  # if any portion of the configuration is not syntactically or semantically valid.
11
- # @param config [Hash] application configuration
12
- def self.validate(config)
13
- Longleaf::StorageLocationValidator::validate_config(config)
14
- Longleaf::ServiceDefinitionValidator::validate_config(config)
15
- Longleaf::ServiceMappingValidator::validate_config(config)
16
+ def validate
17
+ loc_result = StorageLocationValidator.new(@config).validate_config
18
+ defs_result = ServiceDefinitionValidator.new(@config).validate_config
19
+ mapping_result = ServiceMappingValidator.new(@config).validate_config
20
+
21
+ @result.errors.concat(loc_result.errors) unless loc_result.valid?
22
+ @result.errors.concat(defs_result.errors) unless defs_result.valid?
23
+ @result.errors.concat(mapping_result.errors) unless mapping_result.valid?
24
+
25
+ @result
16
26
  end
17
27
  end
18
- end
28
+ end
@@ -1,8 +1,71 @@
1
1
  module Longleaf
2
+ # Abstract configuration validator class
2
3
  class ConfigurationValidator
3
- protected
4
- def self.assert(fail_message, assertion_passed)
5
- raise ConfigurationError.new(fail_message) unless assertion_passed
4
+ attr_reader :result
5
+
6
+ def initialize(config)
7
+ @result = ConfigurationValidationResult.new
8
+ @config = config
9
+ end
10
+
11
+ # Verify that the provided configuration is valid
12
+ # @return [ConfigurationValidationResult] the result of the validation
13
+ def validate_config
14
+ register_on_failure { validate }
15
+
16
+ @result
17
+ end
18
+
19
+ # Asserts that the given conditional is true, raising a ConfigurationError if it is not.
20
+ def assert(fail_message, assertion_passed)
21
+ fail(fail_message) unless assertion_passed
22
+ end
23
+
24
+ # Indicate that validation has failed, throwing a Configuration error with the given message
25
+ def fail(fail_message)
26
+ raise ConfigurationError.new(fail_message)
27
+ end
28
+
29
+ # Registers an error to the result for this validator
30
+ def register_error(error)
31
+ if error.is_a?(StandardError)
32
+ @result.register_error(error.msg)
33
+ else
34
+ @result.register_error(error)
35
+ end
36
+ end
37
+
38
+ # Performs the provided block. If the block produces a ConfigurationError, the error
39
+ # is swallowed and registered to the result
40
+ def register_on_failure
41
+ begin
42
+ yield
43
+ rescue ConfigurationError => err
44
+ register_error(err.message)
45
+ end
46
+ end
47
+ end
48
+
49
+ class ConfigurationValidationResult
50
+ attr_reader :errors
51
+
52
+ def initialize
53
+ @errors = Array.new
54
+ end
55
+
56
+ # Register an error with this validation result
57
+ def register_error(error_message)
58
+ @errors << error_message
59
+ end
60
+
61
+ # @return [boolean] true if validation produced not errors
62
+ def valid?
63
+ @errors.length == 0
64
+ end
65
+
66
+ # @raise [ConfigurationError] if the result is not valid, which lists all failures
67
+ def raise_if_invalid
68
+ raise ConfigurationError.new(@errors.join("\n")) unless valid?
6
69
  end
7
70
  end
8
- end
71
+ end
@@ -0,0 +1,16 @@
1
+ require 'pathname'
2
+ require 'longleaf/errors'
3
+
4
+ module Longleaf
5
+ # Validates the configuration of a filesystem based location
6
+ class FilesystemLocationValidator
7
+
8
+ def self.validate(p_validator, name, path_prop, section_name, path)
9
+ base_msg = "Storage location '#{name}' specifies invalid #{section_name} '#{path_prop}' property: "
10
+ p_validator.assert(base_msg + 'Path must not be empty', !path.nil? && !path.to_s.strip.empty?)
11
+ p_validator.assert(base_msg + 'Path must not contain any relative modifiers (/..)', !path.include?('/..'))
12
+ p_validator.assert(base_msg + 'Path must be absolute', Pathname.new(path).absolute?)
13
+ p_validator.assert(base_msg + 'Path does not exist', Dir.exist?(path))
14
+ end
15
+ end
16
+ end
@@ -1,29 +1,30 @@
1
1
  require 'yaml'
2
- require_relative '../models/metadata_record'
3
- require_relative '../models/md_fields'
4
- require_relative '../errors'
2
+ require 'longleaf/models/metadata_record'
3
+ require 'longleaf/models/md_fields'
4
+ require 'longleaf/services/metadata_validator'
5
+ require 'longleaf/errors'
6
+ require 'longleaf/logging'
5
7
 
6
- # Service which deserializes metadata files into MetadataRecord objects
7
8
  module Longleaf
9
+ # Service which deserializes metadata files into MetadataRecord objects
8
10
  class MetadataDeserializer
9
- MDF = Longleaf::MDFields
10
-
11
+ extend Longleaf::Logging
12
+ MDF ||= MDFields
13
+
11
14
  # Deserialize a file into a MetadataRecord object
12
15
  #
13
16
  # @param file_path [String] path of the file to read. Required.
14
17
  # @param format [String] format the file is stored in. Default is 'yaml'.
15
- def self.deserialize(file_path:, format: 'yaml')
18
+ def self.deserialize(file_path:, format: 'yaml', digest_algs: [])
19
+ file_path = file_path.path if file_path.is_a?(File)
20
+
16
21
  case format
17
22
  when 'yaml'
18
- md = from_yaml(file_path)
23
+ md = from_yaml(file_path, digest_algs)
19
24
  else
20
- raise ArgumentError.new('Invalid deserialization format #{format} specified')
25
+ raise ArgumentError.new("Invalid deserialization format #{format} specified")
21
26
  end
22
-
23
- if !md || !md.key?(MDF::DATA) || !md.key?(MDF::SERVICES)
24
- raise Longleaf::MetadataError.new("Invalid metadata file, did not contain data or services fields: #{file_path}")
25
- end
26
-
27
+
27
28
  data = Hash.new.merge(md[MDF::DATA])
28
29
  # Extract reserved properties for submission as separate parameters
29
30
  registered = data.delete(MDFields::REGISTERED_TIMESTAMP)
@@ -31,38 +32,108 @@ module Longleaf
31
32
  checksums = data.delete(MDFields::CHECKSUMS)
32
33
  file_size = data.delete(MDFields::FILE_SIZE)
33
34
  last_modified = data.delete(MDFields::LAST_MODIFIED)
34
-
35
+
35
36
  services = md[MDF::SERVICES]
36
37
  service_records = Hash.new
37
- unless services.nil?
38
- services.each do |name, props|
39
- raise Longleaf::MetadataError.new("Value of service #{name} must be a hash") unless props.class == Hash
40
-
41
- service_props = Hash.new.merge(props)
42
-
43
- stale_replicas = service_props.delete(MDFields::STALE_REPLICAS)
44
- timestamp = service_props.delete(MDFields::SERVICE_TIMESTAMP)
45
- run_needed = service_props.delete(MDFields::RUN_NEEDED)
46
-
47
- service_records[name] = ServiceRecord.new(
48
- properties: service_props,
49
- stale_replicas: stale_replicas,
50
- timestamp: timestamp,
51
- run_needed: run_needed)
52
- end
38
+ services&.each do |name, props|
39
+ raise MetadataError.new("Value of service #{name} must be a hash") unless props.class == Hash
40
+
41
+ service_props = Hash.new.merge(props)
42
+
43
+ stale_replicas = service_props.delete(MDFields::STALE_REPLICAS)
44
+ timestamp = service_props.delete(MDFields::SERVICE_TIMESTAMP)
45
+ run_needed = service_props.delete(MDFields::RUN_NEEDED)
46
+
47
+ service_records[name] = ServiceRecord.new(
48
+ properties: service_props,
49
+ stale_replicas: stale_replicas,
50
+ timestamp: timestamp,
51
+ run_needed: run_needed)
53
52
  end
54
-
53
+
55
54
  MetadataRecord.new(properties: data,
56
- services: service_records,
57
- registered: registered,
58
- deregistered: deregistered,
59
- checksums: checksums,
60
- file_size: file_size,
61
- last_modified: last_modified)
55
+ services: service_records,
56
+ registered: registered,
57
+ deregistered: deregistered,
58
+ checksums: checksums,
59
+ file_size: file_size,
60
+ last_modified: last_modified)
62
61
  end
63
-
64
- def self.from_yaml(file_path)
65
- YAML.load_file(file_path)
62
+
63
+ # Load configuration a yaml encoded configuration file
64
+ def self.from_yaml(file_path, digest_algs)
65
+ File.open(file_path, 'r:bom|utf-8') do |f|
66
+ contents = f.read
67
+
68
+ checksum_error = nil
69
+ begin
70
+ verify_digests(file_path, contents, digest_algs)
71
+ rescue ChecksumMismatchError => err
72
+ # Hold onto the checksum error, in case we can identify the underlying cause
73
+ checksum_error = err
74
+ end
75
+
76
+ begin
77
+ md = nil
78
+ begin
79
+ md = YAML.safe_load(contents, [], [], true)
80
+ rescue => err
81
+ raise MetadataError.new("Failed to parse metadata file #{file_path}: #{err.message}")
82
+ end
83
+
84
+ validation_result = MetadataValidator.new(md).validate_config
85
+ if !validation_result.valid?
86
+ if checksum_error.nil?
87
+ raise MetadataError.new("Invalid metadata file #{file_path.to_s}:\n#{validation_result.errors.join("\n")}")
88
+ else
89
+ raise MetadataError.new(validation_result.errors.join("\n"))
90
+ end
91
+ end
92
+
93
+ # Either return the valid metadata, or raise the checksum error as is
94
+ if checksum_error.nil?
95
+ md
96
+ else
97
+ raise checksum_error
98
+ end
99
+ rescue MetadataError => err
100
+ if checksum_error.nil?
101
+ raise err
102
+ else
103
+ # Add underlying cause from the metadata error to the checksum mismatch error
104
+ msg = checksum_error.message + "\nWith related issue(s):\n#{err.message}"
105
+ raise ChecksumMismatchError.new(msg)
106
+ end
107
+ end
108
+ end
109
+ end
110
+
111
+ def self.verify_digests(file_path, contents, digest_algs)
112
+ return if digest_algs.nil? || digest_algs.empty?
113
+
114
+ digest_algs.each do |alg|
115
+ if file_path.respond_to?(:path)
116
+ path = file_path.path
117
+ else
118
+ path = file_path
119
+ end
120
+ digest_path = "#{path}.#{alg}"
121
+ unless File.exist?(digest_path)
122
+ logger.warn("Missing expected #{alg} digest for #{path}")
123
+ next
124
+ end
125
+
126
+ digest = DigestHelper::start_digest(alg)
127
+ result = digest.hexdigest(contents)
128
+ existing_digest = IO.read(digest_path)
129
+
130
+ if result == existing_digest
131
+ logger.info("Metadata fixity check using algorithm '#{alg}' succeeded for file #{path}")
132
+ else
133
+ raise ChecksumMismatchError.new("Metadata digest of type #{alg} did not match the contents of #{path}:" \
134
+ + " expected #{existing_digest}, calculated #{result}")
135
+ end
136
+ end
66
137
  end
67
138
  end
68
- end
139
+ end
@@ -0,0 +1,47 @@
1
+ require 'longleaf/services/metadata_serializer'
2
+ require 'longleaf/services/metadata_deserializer'
3
+ require 'longleaf/errors'
4
+
5
+ module Longleaf
6
+ # Handles the persistence of metadata records
7
+ class MetadataPersistenceManager
8
+ # Initialize the MetadataPersistenceManager
9
+ # @param index_manager [IndexManager] system config manager
10
+ def initialize(index_manager)
11
+ @index_manager = index_manager
12
+ end
13
+
14
+ # Persist the metadata for the provided file record to all configured destinations.
15
+ # This may include to disk as well as to an index.
16
+ # @param file_rec [FileRecord] file record
17
+ def persist(file_rec)
18
+ if file_rec.metadata_record.nil?
19
+ raise MetadataError.new("No metadata record provided, cannot persist metadata for #{file_rec.path}")
20
+ end
21
+
22
+ MetadataSerializer::write(metadata: file_rec.metadata_record,
23
+ file_path: file_rec.metadata_path,
24
+ digest_algs: file_rec.storage_location.metadata_location.digests)
25
+
26
+ index(file_rec)
27
+ end
28
+
29
+ # Index metadata for the provided file record
30
+ # @param file_rec [FileRecord] file record
31
+ def index(file_rec)
32
+ if @index_manager.using_index?
33
+ @index_manager.index(file_rec)
34
+ end
35
+ end
36
+
37
+ # Load the metadata record for the provided file record
38
+ # @param file_rec [FileRecord] file record
39
+ # @return [MetadataRecord] the metadata record for the file record
40
+ def load(file_rec)
41
+ md_rec = MetadataDeserializer.deserialize(file_path: file_rec.metadata_path,
42
+ digest_algs: file_rec.storage_location.metadata_location.digests)
43
+ file_rec.metadata_record = md_rec
44
+ md_rec
45
+ end
46
+ end
47
+ end
@@ -1,76 +1,189 @@
1
1
  require 'yaml'
2
2
  require 'longleaf/models/metadata_record'
3
3
  require 'longleaf/models/md_fields'
4
+ require 'longleaf/helpers/digest_helper'
5
+ require 'longleaf/errors'
6
+ require 'longleaf/logging'
4
7
  require 'pathname'
8
+ require "tempfile"
5
9
 
6
- # Service which serializes MetadataRecord objects
7
10
  module Longleaf
11
+ # Service which serializes MetadataRecord objects
8
12
  class MetadataSerializer
9
- MDF = Longleaf::MDFields
10
-
13
+ extend Longleaf::Logging
14
+ MDF ||= MDFields
15
+
11
16
  # Serialize the contents of the provided metadata record to the specified path
12
17
  #
13
18
  # @param metadata [MetadataRecord] metadata record to serialize. Required.
14
19
  # @param file_path [String] path to write the file to. Required.
15
20
  # @param format [String] format to serialize the metadata in. Default is 'yaml'.
16
- def self.write(metadata:, file_path:, format: 'yaml')
21
+ # @param digest_algs [Array] if provided, sidecar digest files for the metadata file
22
+ # will be generated for each algorithm.
23
+ def self.write(metadata:, file_path:, format: 'yaml', digest_algs: [])
17
24
  raise ArgumentError.new('metadata parameter must be a MetadataRecord') \
18
- unless metadata.class == Longleaf::MetadataRecord
19
-
25
+ unless metadata.class == MetadataRecord
26
+
20
27
  case format
21
28
  when 'yaml'
22
29
  content = to_yaml(metadata)
23
30
  else
24
- raise ArgumentError.new('Invalid serialization format #{format} specified')
31
+ raise ArgumentError.new("Invalid serialization format #{format} specified")
25
32
  end
26
-
27
- # Fill in parent directories if they do not exist
28
- parent_dir = Pathname(file_path).parent
29
- parent_dir.mkpath unless parent_dir.exist?
30
-
31
- File.write(file_path, content)
33
+
34
+ atomic_write(file_path, content, digest_algs)
32
35
  end
33
-
36
+
34
37
  # @param metadata [MetadataRecord] metadata record to transform
35
38
  # @return [String] a yaml representation of the provided MetadataRecord
36
39
  def self.to_yaml(metadata)
37
40
  props = to_hash(metadata)
38
41
  props.to_yaml
39
42
  end
40
-
43
+
44
+ # Create a hash representation of the given MetadataRecord file
45
+ # @param metadata [MetadataRecord] metadata record to transform into a hash
41
46
  def self.to_hash(metadata)
42
47
  props = Hash.new
43
-
48
+
44
49
  data = Hash.new.merge(metadata.properties)
45
50
  data[MDF::REGISTERED_TIMESTAMP] = metadata.registered if metadata.registered
46
51
  data[MDF::DEREGISTERED_TIMESTAMP] = metadata.deregistered if metadata.deregistered
47
- data[MDF::CHECKSUMS] = metadata.checksums unless metadata.checksums&.empty?
52
+ data[MDF::CHECKSUMS] = metadata.checksums unless metadata.checksums && metadata.checksums.empty?
48
53
  data[MDF::FILE_SIZE] = metadata.file_size unless metadata.file_size.nil?
49
54
  data[MDF::LAST_MODIFIED] = metadata.last_modified if metadata.last_modified
50
-
55
+
51
56
  props[MDF::DATA] = data
52
-
57
+
53
58
  services = Hash.new
54
59
  metadata.list_services.each do |name|
55
60
  service = metadata.service(name)
56
61
  service[MDF::STALE_REPLICAS] = service.stale_replicas if service.stale_replicas
57
62
  service[MDF::SERVICE_TIMESTAMP] = service.timestamp unless service.timestamp.nil?
58
63
  service[MDF::RUN_NEEDED] = service.run_needed if service.run_needed
59
- services[name] = service.properties
64
+ services[name] = service.properties unless service.properties.empty?
60
65
  end
61
-
66
+
62
67
  props[MDF::SERVICES] = services
63
-
68
+
64
69
  props
65
70
  end
66
-
71
+
72
+ # @param format [String] encoding format used for metadata file
73
+ # @return [String] the suffix used to indicate that a file is a metadata file in the provided encoding
74
+ # @raise [ArgumentError] raised if the provided format is not a supported metadata encoding format
67
75
  def self.metadata_suffix(format: 'yaml')
68
76
  case format
69
77
  when 'yaml'
70
78
  '-llmd.yaml'
71
79
  else
72
- raise ArgumentError.new('Invalid serialization format #{format} specified')
80
+ raise ArgumentError.new("Invalid serialization format #{format} specified")
81
+ end
82
+ end
83
+
84
+ # Safely writes the new metadata file and its digests.
85
+ # It does so by first writing the content and its digests to temp files,
86
+ # then making the temp files the current version of the file.
87
+ # Attempts to clean up new data in the case of failure.
88
+ def self.atomic_write(file_path, content, digest_algs)
89
+ # Fill in parent directories if they do not exist
90
+ parent_dir = Pathname(file_path).parent
91
+ parent_dir.mkpath unless parent_dir.exist?
92
+
93
+ file_path = file_path.path if file_path.respond_to?(:path)
94
+
95
+ # If file does not already exist, then simply write it
96
+ if !File.exist?(file_path)
97
+ File.write(file_path, content)
98
+ write_digests(file_path, content, digest_algs)
99
+ return
100
+ end
101
+
102
+ # Updating file, use safe atomic write
103
+ File.open(file_path) do |original_file|
104
+ original_file.flock(File::LOCK_EX)
105
+
106
+ base_name = File.basename(file_path)
107
+ Tempfile.open(base_name, parent_dir) do |temp_file|
108
+ begin
109
+ # Write content to temp file
110
+ temp_file.write(content)
111
+ temp_file.close
112
+
113
+ temp_path = temp_file.path
114
+
115
+ # Set permissions of new file to match old if it exists
116
+ old_stat = File.stat(file_path)
117
+ set_perms(temp_path, old_stat)
118
+
119
+ begin
120
+ digest_paths = write_digests(temp_path, content, digest_algs)
121
+
122
+ File.rename(temp_path, file_path)
123
+ rescue => e
124
+ cleanup_digests(temp_path)
125
+ raise e
126
+ end
127
+ rescue => e
128
+ temp_file.delete
129
+ raise e
130
+ end
131
+
132
+ # Cleanup all existing digest files, in case the set of algorithms has changed
133
+ cleanup_digests(file_path)
134
+ # Move new digests into place
135
+ digest_paths.each do |digest_path|
136
+ File.rename(digest_path, digest_path.sub(temp_path, file_path))
137
+ end
138
+ end
139
+ end
140
+ end
141
+
142
+ def self.set_perms(file_path, stat_info)
143
+ if stat_info
144
+ # Set correct permissions on new file
145
+ begin
146
+ File.chown(stat_info.uid, stat_info.gid, file_path)
147
+ # This operation will affect filesystem ACL's
148
+ File.chmod(stat_info.mode, file_path)
149
+ rescue Errno::EPERM, Errno::EACCES
150
+ # Changing file ownership failed, moving on.
151
+ return false
152
+ end
73
153
  end
154
+ true
74
155
  end
156
+
157
+ # Deletes all known digest files for the provided file path
158
+ def self.cleanup_digests(file_path)
159
+ DigestHelper::KNOWN_DIGESTS.each do |alg|
160
+ digest_path = "#{file_path}.#{alg}"
161
+ File.delete(digest_path) if File.exist?(digest_path)
162
+ end
163
+ end
164
+
165
+ def self.write_digests(file_path, content, digests)
166
+ return [] if digests.nil? || digests.empty?
167
+
168
+ digest_paths = Array.new
169
+
170
+ digests.each do |alg|
171
+ digest_class = DigestHelper::start_digest(alg)
172
+ result = digest_class.hexdigest(content)
173
+ digest_path = "#{file_path}.#{alg}"
174
+
175
+ File.write(digest_path, result)
176
+
177
+ digest_paths.push(digest_path)
178
+
179
+ self.logger.debug("Generated #{alg} digest for metadata file #{file_path}: #{digest_path} #{result}")
180
+ end
181
+
182
+ digest_paths
183
+ end
184
+
185
+ private_class_method :cleanup_digests
186
+ private_class_method :write_digests
187
+ private_class_method :atomic_write
75
188
  end
76
- end
189
+ end