longleaf 0.1.0.pre.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +94 -0
  3. data/.editorconfig +13 -0
  4. data/.gitignore +4 -1
  5. data/.rubocop.yml +44 -0
  6. data/.rubocop_todo.yml +834 -0
  7. data/.yardopts +1 -0
  8. data/Gemfile +16 -1
  9. data/README.md +98 -12
  10. data/Rakefile +6 -0
  11. data/bin/setup +16 -1
  12. data/docs/aboutlongleaf.md +28 -0
  13. data/docs/extra.css +32 -0
  14. data/docs/img/change-file.png +0 -0
  15. data/docs/img/ll-example-preserved.png +0 -0
  16. data/docs/index.md +19 -0
  17. data/docs/install.md +66 -0
  18. data/docs/ll-example/config-example-relative.yml +33 -0
  19. data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
  20. data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
  21. data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
  22. data/docs/ll-example/metadata-dir/.gitkeep +0 -0
  23. data/docs/ll-example/replica-files/.gitkeep +0 -0
  24. data/docs/ll-example/replica-metadata/.gitkeep +0 -0
  25. data/docs/quickstart.md +270 -0
  26. data/docs/rdocs/Longleaf.html +135 -0
  27. data/docs/rdocs/Longleaf/AppFields.html +178 -0
  28. data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
  29. data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
  30. data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
  31. data/docs/rdocs/Longleaf/CLI.html +909 -0
  32. data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
  33. data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
  34. data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
  35. data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
  36. data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
  37. data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
  38. data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
  39. data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
  40. data/docs/rdocs/Longleaf/EventError.html +147 -0
  41. data/docs/rdocs/Longleaf/EventNames.html +163 -0
  42. data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
  43. data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
  44. data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
  45. data/docs/rdocs/Longleaf/FileRecord.html +716 -0
  46. data/docs/rdocs/Longleaf/FileSelector.html +901 -0
  47. data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
  48. data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
  49. data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
  50. data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
  51. data/docs/rdocs/Longleaf/Logging.html +405 -0
  52. data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
  53. data/docs/rdocs/Longleaf/LongleafError.html +139 -0
  54. data/docs/rdocs/Longleaf/MDFields.html +193 -0
  55. data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
  56. data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
  57. data/docs/rdocs/Longleaf/MetadataError.html +143 -0
  58. data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
  59. data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
  60. data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
  61. data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
  62. data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
  63. data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
  64. data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
  65. data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
  66. data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
  67. data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
  68. data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
  69. data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
  70. data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
  71. data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
  72. data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
  73. data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
  74. data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
  75. data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
  76. data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
  77. data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
  78. data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
  79. data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
  80. data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
  81. data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
  82. data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
  83. data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
  84. data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
  85. data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
  86. data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
  87. data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
  88. data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
  89. data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
  90. data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
  91. data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
  92. data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
  93. data/docs/rdocs/_index.html +660 -0
  94. data/docs/rdocs/class_list.html +51 -0
  95. data/docs/rdocs/css/common.css +1 -0
  96. data/docs/rdocs/css/full_list.css +58 -0
  97. data/docs/rdocs/css/style.css +496 -0
  98. data/docs/rdocs/file.README.html +165 -0
  99. data/docs/rdocs/file_list.html +56 -0
  100. data/docs/rdocs/frames.html +17 -0
  101. data/docs/rdocs/index.html +165 -0
  102. data/docs/rdocs/js/app.js +303 -0
  103. data/docs/rdocs/js/full_list.js +216 -0
  104. data/docs/rdocs/js/jquery.js +4 -0
  105. data/docs/rdocs/method_list.html +2051 -0
  106. data/docs/rdocs/top-level-namespace.html +110 -0
  107. data/lib/longleaf/candidates/file_selector.rb +139 -0
  108. data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
  109. data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
  110. data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
  111. data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
  112. data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
  113. data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
  114. data/lib/longleaf/cli.rb +237 -46
  115. data/lib/longleaf/commands/deregister_command.rb +51 -0
  116. data/lib/longleaf/commands/preserve_command.rb +50 -0
  117. data/lib/longleaf/commands/register_command.rb +32 -43
  118. data/lib/longleaf/commands/reindex_command.rb +92 -0
  119. data/lib/longleaf/commands/validate_config_command.rb +33 -8
  120. data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
  121. data/lib/longleaf/errors.rb +26 -7
  122. data/lib/longleaf/events/deregister_event.rb +53 -0
  123. data/lib/longleaf/events/event_names.rb +9 -0
  124. data/lib/longleaf/events/event_status_tracking.rb +59 -0
  125. data/lib/longleaf/events/preserve_event.rb +81 -0
  126. data/lib/longleaf/events/register_event.rb +52 -51
  127. data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
  128. data/lib/longleaf/helpers/digest_helper.rb +56 -0
  129. data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
  130. data/lib/longleaf/helpers/selection_options_parser.rb +189 -0
  131. data/lib/longleaf/helpers/service_date_helper.rb +78 -0
  132. data/lib/longleaf/indexing/index_manager.rb +101 -0
  133. data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
  134. data/lib/longleaf/logging.rb +5 -4
  135. data/lib/longleaf/logging/redirecting_logger.rb +26 -25
  136. data/lib/longleaf/models/app_fields.rb +7 -2
  137. data/lib/longleaf/models/file_record.rb +17 -8
  138. data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
  139. data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
  140. data/lib/longleaf/models/md_fields.rb +2 -1
  141. data/lib/longleaf/models/metadata_location.rb +47 -0
  142. data/lib/longleaf/models/metadata_record.rb +39 -15
  143. data/lib/longleaf/models/s3_storage_location.rb +133 -0
  144. data/lib/longleaf/models/service_definition.rb +7 -6
  145. data/lib/longleaf/models/service_fields.rb +7 -1
  146. data/lib/longleaf/models/service_record.rb +10 -6
  147. data/lib/longleaf/models/storage_location.rb +24 -19
  148. data/lib/longleaf/models/storage_types.rb +9 -0
  149. data/lib/longleaf/models/system_config_fields.rb +9 -0
  150. data/lib/longleaf/preservation_services/file_check_service.rb +58 -0
  151. data/lib/longleaf/preservation_services/fixity_check_service.rb +123 -0
  152. data/lib/longleaf/preservation_services/rsync_replication_service.rb +182 -0
  153. data/lib/longleaf/preservation_services/s3_replication_service.rb +143 -0
  154. data/lib/longleaf/services/application_config_deserializer.rb +81 -24
  155. data/lib/longleaf/services/application_config_manager.rb +20 -6
  156. data/lib/longleaf/services/application_config_validator.rb +19 -9
  157. data/lib/longleaf/services/configuration_validator.rb +67 -4
  158. data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
  159. data/lib/longleaf/services/metadata_deserializer.rb +113 -42
  160. data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
  161. data/lib/longleaf/services/metadata_serializer.rb +138 -25
  162. data/lib/longleaf/services/metadata_validator.rb +76 -0
  163. data/lib/longleaf/services/s3_location_validator.rb +19 -0
  164. data/lib/longleaf/services/service_class_cache.rb +112 -0
  165. data/lib/longleaf/services/service_definition_manager.rb +10 -7
  166. data/lib/longleaf/services/service_definition_validator.rb +25 -18
  167. data/lib/longleaf/services/service_manager.rb +86 -11
  168. data/lib/longleaf/services/service_mapping_manager.rb +13 -12
  169. data/lib/longleaf/services/service_mapping_validator.rb +36 -26
  170. data/lib/longleaf/services/storage_location_manager.rb +76 -15
  171. data/lib/longleaf/services/storage_location_validator.rb +49 -35
  172. data/lib/longleaf/specs/config_builder.rb +47 -23
  173. data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
  174. data/lib/longleaf/specs/custom_matchers.rb +9 -0
  175. data/lib/longleaf/specs/file_helpers.rb +61 -0
  176. data/lib/longleaf/specs/metadata_builder.rb +92 -0
  177. data/lib/longleaf/specs/system_config_builder.rb +27 -0
  178. data/lib/longleaf/version.rb +1 -1
  179. data/longleaf.gemspec +20 -7
  180. data/mkdocs.yml +21 -0
  181. metadata +306 -23
  182. data/.travis.yml +0 -4
  183. data/lib/longleaf/commands/abstract_command.rb +0 -37
  184. data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -5,20 +5,34 @@ require_relative 'service_definition_manager'
5
5
  require_relative 'service_mapping_validator'
6
6
  require_relative 'service_mapping_manager'
7
7
  require_relative 'service_manager'
8
+ require_relative 'metadata_persistence_manager'
9
+ require 'longleaf/indexing/index_manager'
10
+ require 'longleaf/models/app_fields'
8
11
 
9
- # Manager which loads and provides access to the configuration of the application
10
12
  module Longleaf
13
+ # Manager which loads and provides access to the configuration of the application
11
14
  class ApplicationConfigManager
15
+ attr_reader :config_md5
12
16
  attr_reader :service_manager
13
17
  attr_reader :location_manager
14
-
15
- def initialize(config)
18
+ attr_reader :index_manager
19
+ attr_reader :md_manager
20
+
21
+ def initialize(config, config_md5 = nil)
22
+ @config_md5 = config_md5
23
+
16
24
  @location_manager = Longleaf::StorageLocationManager.new(config)
17
-
25
+
18
26
  definition_manager = Longleaf::ServiceDefinitionManager.new(config)
19
27
  mapping_manager = Longleaf::ServiceMappingManager.new(config)
20
28
  @service_manager = Longleaf::ServiceManager.new(
21
- definition_manager: definition_manager, mapping_manager: mapping_manager)
29
+ definition_manager: definition_manager,
30
+ mapping_manager: mapping_manager,
31
+ app_manager: self)
32
+
33
+ sys_config = config[AppFields::SYSTEM]
34
+ @index_manager = IndexManager.new(sys_config, self)
35
+ @md_manager = MetadataPersistenceManager.new(@index_manager)
22
36
  end
23
37
  end
24
- end
38
+ end
@@ -2,17 +2,27 @@ require_relative 'storage_location_validator'
2
2
  require_relative 'service_definition_validator'
3
3
  require_relative 'service_mapping_validator'
4
4
 
5
- # Validator for Longleaf application configuration
6
5
  module Longleaf
7
- class ApplicationConfigValidator
8
-
6
+ # Validator for Longleaf application configuration
7
+ class ApplicationConfigValidator < ConfigurationValidator
8
+ # @param config [Hash] hash containing the application configuration
9
+ def initialize(config)
10
+ super(config)
11
+ end
12
+
13
+ protected
9
14
  # Validates the application configuration provided. Will raise ConfigurationError
10
15
  # if any portion of the configuration is not syntactically or semantically valid.
11
- # @param config [Hash] application configuration
12
- def self.validate(config)
13
- Longleaf::StorageLocationValidator::validate_config(config)
14
- Longleaf::ServiceDefinitionValidator::validate_config(config)
15
- Longleaf::ServiceMappingValidator::validate_config(config)
16
+ def validate
17
+ loc_result = StorageLocationValidator.new(@config).validate_config
18
+ defs_result = ServiceDefinitionValidator.new(@config).validate_config
19
+ mapping_result = ServiceMappingValidator.new(@config).validate_config
20
+
21
+ @result.errors.concat(loc_result.errors) unless loc_result.valid?
22
+ @result.errors.concat(defs_result.errors) unless defs_result.valid?
23
+ @result.errors.concat(mapping_result.errors) unless mapping_result.valid?
24
+
25
+ @result
16
26
  end
17
27
  end
18
- end
28
+ end
@@ -1,8 +1,71 @@
1
1
  module Longleaf
2
+ # Abstract configuration validator class
2
3
  class ConfigurationValidator
3
- protected
4
- def self.assert(fail_message, assertion_passed)
5
- raise ConfigurationError.new(fail_message) unless assertion_passed
4
+ attr_reader :result
5
+
6
+ def initialize(config)
7
+ @result = ConfigurationValidationResult.new
8
+ @config = config
9
+ end
10
+
11
+ # Verify that the provided configuration is valid
12
+ # @return [ConfigurationValidationResult] the result of the validation
13
+ def validate_config
14
+ register_on_failure { validate }
15
+
16
+ @result
17
+ end
18
+
19
+ # Asserts that the given conditional is true, raising a ConfigurationError if it is not.
20
+ def assert(fail_message, assertion_passed)
21
+ fail(fail_message) unless assertion_passed
22
+ end
23
+
24
+ # Indicate that validation has failed, throwing a Configuration error with the given message
25
+ def fail(fail_message)
26
+ raise ConfigurationError.new(fail_message)
27
+ end
28
+
29
+ # Registers an error to the result for this validator
30
+ def register_error(error)
31
+ if error.is_a?(StandardError)
32
+ @result.register_error(error.msg)
33
+ else
34
+ @result.register_error(error)
35
+ end
36
+ end
37
+
38
+ # Performs the provided block. If the block produces a ConfigurationError, the error
39
+ # is swallowed and registered to the result
40
+ def register_on_failure
41
+ begin
42
+ yield
43
+ rescue ConfigurationError => err
44
+ register_error(err.message)
45
+ end
46
+ end
47
+ end
48
+
49
+ class ConfigurationValidationResult
50
+ attr_reader :errors
51
+
52
+ def initialize
53
+ @errors = Array.new
54
+ end
55
+
56
+ # Register an error with this validation result
57
+ def register_error(error_message)
58
+ @errors << error_message
59
+ end
60
+
61
+ # @return [boolean] true if validation produced not errors
62
+ def valid?
63
+ @errors.length == 0
64
+ end
65
+
66
+ # @raise [ConfigurationError] if the result is not valid, which lists all failures
67
+ def raise_if_invalid
68
+ raise ConfigurationError.new(@errors.join("\n")) unless valid?
6
69
  end
7
70
  end
8
- end
71
+ end
@@ -0,0 +1,16 @@
1
+ require 'pathname'
2
+ require 'longleaf/errors'
3
+
4
+ module Longleaf
5
+ # Validates the configuration of a filesystem based location
6
+ class FilesystemLocationValidator
7
+
8
+ def self.validate(p_validator, name, path_prop, section_name, path)
9
+ base_msg = "Storage location '#{name}' specifies invalid #{section_name} '#{path_prop}' property: "
10
+ p_validator.assert(base_msg + 'Path must not be empty', !path.nil? && !path.to_s.strip.empty?)
11
+ p_validator.assert(base_msg + 'Path must not contain any relative modifiers (/..)', !path.include?('/..'))
12
+ p_validator.assert(base_msg + 'Path must be absolute', Pathname.new(path).absolute?)
13
+ p_validator.assert(base_msg + 'Path does not exist', Dir.exist?(path))
14
+ end
15
+ end
16
+ end
@@ -1,29 +1,30 @@
1
1
  require 'yaml'
2
- require_relative '../models/metadata_record'
3
- require_relative '../models/md_fields'
4
- require_relative '../errors'
2
+ require 'longleaf/models/metadata_record'
3
+ require 'longleaf/models/md_fields'
4
+ require 'longleaf/services/metadata_validator'
5
+ require 'longleaf/errors'
6
+ require 'longleaf/logging'
5
7
 
6
- # Service which deserializes metadata files into MetadataRecord objects
7
8
  module Longleaf
9
+ # Service which deserializes metadata files into MetadataRecord objects
8
10
  class MetadataDeserializer
9
- MDF = Longleaf::MDFields
10
-
11
+ extend Longleaf::Logging
12
+ MDF ||= MDFields
13
+
11
14
  # Deserialize a file into a MetadataRecord object
12
15
  #
13
16
  # @param file_path [String] path of the file to read. Required.
14
17
  # @param format [String] format the file is stored in. Default is 'yaml'.
15
- def self.deserialize(file_path:, format: 'yaml')
18
+ def self.deserialize(file_path:, format: 'yaml', digest_algs: [])
19
+ file_path = file_path.path if file_path.is_a?(File)
20
+
16
21
  case format
17
22
  when 'yaml'
18
- md = from_yaml(file_path)
23
+ md = from_yaml(file_path, digest_algs)
19
24
  else
20
- raise ArgumentError.new('Invalid deserialization format #{format} specified')
25
+ raise ArgumentError.new("Invalid deserialization format #{format} specified")
21
26
  end
22
-
23
- if !md || !md.key?(MDF::DATA) || !md.key?(MDF::SERVICES)
24
- raise Longleaf::MetadataError.new("Invalid metadata file, did not contain data or services fields: #{file_path}")
25
- end
26
-
27
+
27
28
  data = Hash.new.merge(md[MDF::DATA])
28
29
  # Extract reserved properties for submission as separate parameters
29
30
  registered = data.delete(MDFields::REGISTERED_TIMESTAMP)
@@ -31,38 +32,108 @@ module Longleaf
31
32
  checksums = data.delete(MDFields::CHECKSUMS)
32
33
  file_size = data.delete(MDFields::FILE_SIZE)
33
34
  last_modified = data.delete(MDFields::LAST_MODIFIED)
34
-
35
+
35
36
  services = md[MDF::SERVICES]
36
37
  service_records = Hash.new
37
- unless services.nil?
38
- services.each do |name, props|
39
- raise Longleaf::MetadataError.new("Value of service #{name} must be a hash") unless props.class == Hash
40
-
41
- service_props = Hash.new.merge(props)
42
-
43
- stale_replicas = service_props.delete(MDFields::STALE_REPLICAS)
44
- timestamp = service_props.delete(MDFields::SERVICE_TIMESTAMP)
45
- run_needed = service_props.delete(MDFields::RUN_NEEDED)
46
-
47
- service_records[name] = ServiceRecord.new(
48
- properties: service_props,
49
- stale_replicas: stale_replicas,
50
- timestamp: timestamp,
51
- run_needed: run_needed)
52
- end
38
+ services&.each do |name, props|
39
+ raise MetadataError.new("Value of service #{name} must be a hash") unless props.class == Hash
40
+
41
+ service_props = Hash.new.merge(props)
42
+
43
+ stale_replicas = service_props.delete(MDFields::STALE_REPLICAS)
44
+ timestamp = service_props.delete(MDFields::SERVICE_TIMESTAMP)
45
+ run_needed = service_props.delete(MDFields::RUN_NEEDED)
46
+
47
+ service_records[name] = ServiceRecord.new(
48
+ properties: service_props,
49
+ stale_replicas: stale_replicas,
50
+ timestamp: timestamp,
51
+ run_needed: run_needed)
53
52
  end
54
-
53
+
55
54
  MetadataRecord.new(properties: data,
56
- services: service_records,
57
- registered: registered,
58
- deregistered: deregistered,
59
- checksums: checksums,
60
- file_size: file_size,
61
- last_modified: last_modified)
55
+ services: service_records,
56
+ registered: registered,
57
+ deregistered: deregistered,
58
+ checksums: checksums,
59
+ file_size: file_size,
60
+ last_modified: last_modified)
62
61
  end
63
-
64
- def self.from_yaml(file_path)
65
- YAML.load_file(file_path)
62
+
63
+ # Load configuration a yaml encoded configuration file
64
+ def self.from_yaml(file_path, digest_algs)
65
+ File.open(file_path, 'r:bom|utf-8') do |f|
66
+ contents = f.read
67
+
68
+ checksum_error = nil
69
+ begin
70
+ verify_digests(file_path, contents, digest_algs)
71
+ rescue ChecksumMismatchError => err
72
+ # Hold onto the checksum error, in case we can identify the underlying cause
73
+ checksum_error = err
74
+ end
75
+
76
+ begin
77
+ md = nil
78
+ begin
79
+ md = YAML.safe_load(contents, [], [], true)
80
+ rescue => err
81
+ raise MetadataError.new("Failed to parse metadata file #{file_path}: #{err.message}")
82
+ end
83
+
84
+ validation_result = MetadataValidator.new(md).validate_config
85
+ if !validation_result.valid?
86
+ if checksum_error.nil?
87
+ raise MetadataError.new("Invalid metadata file #{file_path.to_s}:\n#{validation_result.errors.join("\n")}")
88
+ else
89
+ raise MetadataError.new(validation_result.errors.join("\n"))
90
+ end
91
+ end
92
+
93
+ # Either return the valid metadata, or raise the checksum error as is
94
+ if checksum_error.nil?
95
+ md
96
+ else
97
+ raise checksum_error
98
+ end
99
+ rescue MetadataError => err
100
+ if checksum_error.nil?
101
+ raise err
102
+ else
103
+ # Add underlying cause from the metadata error to the checksum mismatch error
104
+ msg = checksum_error.message + "\nWith related issue(s):\n#{err.message}"
105
+ raise ChecksumMismatchError.new(msg)
106
+ end
107
+ end
108
+ end
109
+ end
110
+
111
+ def self.verify_digests(file_path, contents, digest_algs)
112
+ return if digest_algs.nil? || digest_algs.empty?
113
+
114
+ digest_algs.each do |alg|
115
+ if file_path.respond_to?(:path)
116
+ path = file_path.path
117
+ else
118
+ path = file_path
119
+ end
120
+ digest_path = "#{path}.#{alg}"
121
+ unless File.exist?(digest_path)
122
+ logger.warn("Missing expected #{alg} digest for #{path}")
123
+ next
124
+ end
125
+
126
+ digest = DigestHelper::start_digest(alg)
127
+ result = digest.hexdigest(contents)
128
+ existing_digest = IO.read(digest_path)
129
+
130
+ if result == existing_digest
131
+ logger.info("Metadata fixity check using algorithm '#{alg}' succeeded for file #{path}")
132
+ else
133
+ raise ChecksumMismatchError.new("Metadata digest of type #{alg} did not match the contents of #{path}:" \
134
+ + " expected #{existing_digest}, calculated #{result}")
135
+ end
136
+ end
66
137
  end
67
138
  end
68
- end
139
+ end
@@ -0,0 +1,47 @@
1
+ require 'longleaf/services/metadata_serializer'
2
+ require 'longleaf/services/metadata_deserializer'
3
+ require 'longleaf/errors'
4
+
5
+ module Longleaf
6
+ # Handles the persistence of metadata records
7
+ class MetadataPersistenceManager
8
+ # Initialize the MetadataPersistenceManager
9
+ # @param index_manager [IndexManager] system config manager
10
+ def initialize(index_manager)
11
+ @index_manager = index_manager
12
+ end
13
+
14
+ # Persist the metadata for the provided file record to all configured destinations.
15
+ # This may include to disk as well as to an index.
16
+ # @param file_rec [FileRecord] file record
17
+ def persist(file_rec)
18
+ if file_rec.metadata_record.nil?
19
+ raise MetadataError.new("No metadata record provided, cannot persist metadata for #{file_rec.path}")
20
+ end
21
+
22
+ MetadataSerializer::write(metadata: file_rec.metadata_record,
23
+ file_path: file_rec.metadata_path,
24
+ digest_algs: file_rec.storage_location.metadata_location.digests)
25
+
26
+ index(file_rec)
27
+ end
28
+
29
+ # Index metadata for the provided file record
30
+ # @param file_rec [FileRecord] file record
31
+ def index(file_rec)
32
+ if @index_manager.using_index?
33
+ @index_manager.index(file_rec)
34
+ end
35
+ end
36
+
37
+ # Load the metadata record for the provided file record
38
+ # @param file_rec [FileRecord] file record
39
+ # @return [MetadataRecord] the metadata record for the file record
40
+ def load(file_rec)
41
+ md_rec = MetadataDeserializer.deserialize(file_path: file_rec.metadata_path,
42
+ digest_algs: file_rec.storage_location.metadata_location.digests)
43
+ file_rec.metadata_record = md_rec
44
+ md_rec
45
+ end
46
+ end
47
+ end
@@ -1,76 +1,189 @@
1
1
  require 'yaml'
2
2
  require 'longleaf/models/metadata_record'
3
3
  require 'longleaf/models/md_fields'
4
+ require 'longleaf/helpers/digest_helper'
5
+ require 'longleaf/errors'
6
+ require 'longleaf/logging'
4
7
  require 'pathname'
8
+ require "tempfile"
5
9
 
6
- # Service which serializes MetadataRecord objects
7
10
  module Longleaf
11
+ # Service which serializes MetadataRecord objects
8
12
  class MetadataSerializer
9
- MDF = Longleaf::MDFields
10
-
13
+ extend Longleaf::Logging
14
+ MDF ||= MDFields
15
+
11
16
  # Serialize the contents of the provided metadata record to the specified path
12
17
  #
13
18
  # @param metadata [MetadataRecord] metadata record to serialize. Required.
14
19
  # @param file_path [String] path to write the file to. Required.
15
20
  # @param format [String] format to serialize the metadata in. Default is 'yaml'.
16
- def self.write(metadata:, file_path:, format: 'yaml')
21
+ # @param digest_algs [Array] if provided, sidecar digest files for the metadata file
22
+ # will be generated for each algorithm.
23
+ def self.write(metadata:, file_path:, format: 'yaml', digest_algs: [])
17
24
  raise ArgumentError.new('metadata parameter must be a MetadataRecord') \
18
- unless metadata.class == Longleaf::MetadataRecord
19
-
25
+ unless metadata.class == MetadataRecord
26
+
20
27
  case format
21
28
  when 'yaml'
22
29
  content = to_yaml(metadata)
23
30
  else
24
- raise ArgumentError.new('Invalid serialization format #{format} specified')
31
+ raise ArgumentError.new("Invalid serialization format #{format} specified")
25
32
  end
26
-
27
- # Fill in parent directories if they do not exist
28
- parent_dir = Pathname(file_path).parent
29
- parent_dir.mkpath unless parent_dir.exist?
30
-
31
- File.write(file_path, content)
33
+
34
+ atomic_write(file_path, content, digest_algs)
32
35
  end
33
-
36
+
34
37
  # @param metadata [MetadataRecord] metadata record to transform
35
38
  # @return [String] a yaml representation of the provided MetadataRecord
36
39
  def self.to_yaml(metadata)
37
40
  props = to_hash(metadata)
38
41
  props.to_yaml
39
42
  end
40
-
43
+
44
+ # Create a hash representation of the given MetadataRecord file
45
+ # @param metadata [MetadataRecord] metadata record to transform into a hash
41
46
  def self.to_hash(metadata)
42
47
  props = Hash.new
43
-
48
+
44
49
  data = Hash.new.merge(metadata.properties)
45
50
  data[MDF::REGISTERED_TIMESTAMP] = metadata.registered if metadata.registered
46
51
  data[MDF::DEREGISTERED_TIMESTAMP] = metadata.deregistered if metadata.deregistered
47
- data[MDF::CHECKSUMS] = metadata.checksums unless metadata.checksums&.empty?
52
+ data[MDF::CHECKSUMS] = metadata.checksums unless metadata.checksums && metadata.checksums.empty?
48
53
  data[MDF::FILE_SIZE] = metadata.file_size unless metadata.file_size.nil?
49
54
  data[MDF::LAST_MODIFIED] = metadata.last_modified if metadata.last_modified
50
-
55
+
51
56
  props[MDF::DATA] = data
52
-
57
+
53
58
  services = Hash.new
54
59
  metadata.list_services.each do |name|
55
60
  service = metadata.service(name)
56
61
  service[MDF::STALE_REPLICAS] = service.stale_replicas if service.stale_replicas
57
62
  service[MDF::SERVICE_TIMESTAMP] = service.timestamp unless service.timestamp.nil?
58
63
  service[MDF::RUN_NEEDED] = service.run_needed if service.run_needed
59
- services[name] = service.properties
64
+ services[name] = service.properties unless service.properties.empty?
60
65
  end
61
-
66
+
62
67
  props[MDF::SERVICES] = services
63
-
68
+
64
69
  props
65
70
  end
66
-
71
+
72
+ # @param format [String] encoding format used for metadata file
73
+ # @return [String] the suffix used to indicate that a file is a metadata file in the provided encoding
74
+ # @raise [ArgumentError] raised if the provided format is not a supported metadata encoding format
67
75
  def self.metadata_suffix(format: 'yaml')
68
76
  case format
69
77
  when 'yaml'
70
78
  '-llmd.yaml'
71
79
  else
72
- raise ArgumentError.new('Invalid serialization format #{format} specified')
80
+ raise ArgumentError.new("Invalid serialization format #{format} specified")
81
+ end
82
+ end
83
+
84
+ # Safely writes the new metadata file and its digests.
85
+ # It does so by first writing the content and its digests to temp files,
86
+ # then making the temp files the current version of the file.
87
+ # Attempts to clean up new data in the case of failure.
88
+ def self.atomic_write(file_path, content, digest_algs)
89
+ # Fill in parent directories if they do not exist
90
+ parent_dir = Pathname(file_path).parent
91
+ parent_dir.mkpath unless parent_dir.exist?
92
+
93
+ file_path = file_path.path if file_path.respond_to?(:path)
94
+
95
+ # If file does not already exist, then simply write it
96
+ if !File.exist?(file_path)
97
+ File.write(file_path, content)
98
+ write_digests(file_path, content, digest_algs)
99
+ return
100
+ end
101
+
102
+ # Updating file, use safe atomic write
103
+ File.open(file_path) do |original_file|
104
+ original_file.flock(File::LOCK_EX)
105
+
106
+ base_name = File.basename(file_path)
107
+ Tempfile.open(base_name, parent_dir) do |temp_file|
108
+ begin
109
+ # Write content to temp file
110
+ temp_file.write(content)
111
+ temp_file.close
112
+
113
+ temp_path = temp_file.path
114
+
115
+ # Set permissions of new file to match old if it exists
116
+ old_stat = File.stat(file_path)
117
+ set_perms(temp_path, old_stat)
118
+
119
+ begin
120
+ digest_paths = write_digests(temp_path, content, digest_algs)
121
+
122
+ File.rename(temp_path, file_path)
123
+ rescue => e
124
+ cleanup_digests(temp_path)
125
+ raise e
126
+ end
127
+ rescue => e
128
+ temp_file.delete
129
+ raise e
130
+ end
131
+
132
+ # Cleanup all existing digest files, in case the set of algorithms has changed
133
+ cleanup_digests(file_path)
134
+ # Move new digests into place
135
+ digest_paths.each do |digest_path|
136
+ File.rename(digest_path, digest_path.sub(temp_path, file_path))
137
+ end
138
+ end
139
+ end
140
+ end
141
+
142
+ def self.set_perms(file_path, stat_info)
143
+ if stat_info
144
+ # Set correct permissions on new file
145
+ begin
146
+ File.chown(stat_info.uid, stat_info.gid, file_path)
147
+ # This operation will affect filesystem ACL's
148
+ File.chmod(stat_info.mode, file_path)
149
+ rescue Errno::EPERM, Errno::EACCES
150
+ # Changing file ownership failed, moving on.
151
+ return false
152
+ end
73
153
  end
154
+ true
74
155
  end
156
+
157
+ # Deletes all known digest files for the provided file path
158
+ def self.cleanup_digests(file_path)
159
+ DigestHelper::KNOWN_DIGESTS.each do |alg|
160
+ digest_path = "#{file_path}.#{alg}"
161
+ File.delete(digest_path) if File.exist?(digest_path)
162
+ end
163
+ end
164
+
165
+ def self.write_digests(file_path, content, digests)
166
+ return [] if digests.nil? || digests.empty?
167
+
168
+ digest_paths = Array.new
169
+
170
+ digests.each do |alg|
171
+ digest_class = DigestHelper::start_digest(alg)
172
+ result = digest_class.hexdigest(content)
173
+ digest_path = "#{file_path}.#{alg}"
174
+
175
+ File.write(digest_path, result)
176
+
177
+ digest_paths.push(digest_path)
178
+
179
+ self.logger.debug("Generated #{alg} digest for metadata file #{file_path}: #{digest_path} #{result}")
180
+ end
181
+
182
+ digest_paths
183
+ end
184
+
185
+ private_class_method :cleanup_digests
186
+ private_class_method :write_digests
187
+ private_class_method :atomic_write
75
188
  end
76
- end
189
+ end