longleaf 0.1.0.pre.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +94 -0
  3. data/.editorconfig +13 -0
  4. data/.gitignore +4 -1
  5. data/.rubocop.yml +44 -0
  6. data/.rubocop_todo.yml +834 -0
  7. data/.yardopts +1 -0
  8. data/Gemfile +16 -1
  9. data/README.md +98 -12
  10. data/Rakefile +6 -0
  11. data/bin/setup +16 -1
  12. data/docs/aboutlongleaf.md +28 -0
  13. data/docs/extra.css +32 -0
  14. data/docs/img/change-file.png +0 -0
  15. data/docs/img/ll-example-preserved.png +0 -0
  16. data/docs/index.md +19 -0
  17. data/docs/install.md +66 -0
  18. data/docs/ll-example/config-example-relative.yml +33 -0
  19. data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
  20. data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
  21. data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
  22. data/docs/ll-example/metadata-dir/.gitkeep +0 -0
  23. data/docs/ll-example/replica-files/.gitkeep +0 -0
  24. data/docs/ll-example/replica-metadata/.gitkeep +0 -0
  25. data/docs/quickstart.md +270 -0
  26. data/docs/rdocs/Longleaf.html +135 -0
  27. data/docs/rdocs/Longleaf/AppFields.html +178 -0
  28. data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
  29. data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
  30. data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
  31. data/docs/rdocs/Longleaf/CLI.html +909 -0
  32. data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
  33. data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
  34. data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
  35. data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
  36. data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
  37. data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
  38. data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
  39. data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
  40. data/docs/rdocs/Longleaf/EventError.html +147 -0
  41. data/docs/rdocs/Longleaf/EventNames.html +163 -0
  42. data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
  43. data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
  44. data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
  45. data/docs/rdocs/Longleaf/FileRecord.html +716 -0
  46. data/docs/rdocs/Longleaf/FileSelector.html +901 -0
  47. data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
  48. data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
  49. data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
  50. data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
  51. data/docs/rdocs/Longleaf/Logging.html +405 -0
  52. data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
  53. data/docs/rdocs/Longleaf/LongleafError.html +139 -0
  54. data/docs/rdocs/Longleaf/MDFields.html +193 -0
  55. data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
  56. data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
  57. data/docs/rdocs/Longleaf/MetadataError.html +143 -0
  58. data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
  59. data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
  60. data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
  61. data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
  62. data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
  63. data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
  64. data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
  65. data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
  66. data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
  67. data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
  68. data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
  69. data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
  70. data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
  71. data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
  72. data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
  73. data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
  74. data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
  75. data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
  76. data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
  77. data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
  78. data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
  79. data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
  80. data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
  81. data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
  82. data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
  83. data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
  84. data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
  85. data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
  86. data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
  87. data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
  88. data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
  89. data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
  90. data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
  91. data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
  92. data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
  93. data/docs/rdocs/_index.html +660 -0
  94. data/docs/rdocs/class_list.html +51 -0
  95. data/docs/rdocs/css/common.css +1 -0
  96. data/docs/rdocs/css/full_list.css +58 -0
  97. data/docs/rdocs/css/style.css +496 -0
  98. data/docs/rdocs/file.README.html +165 -0
  99. data/docs/rdocs/file_list.html +56 -0
  100. data/docs/rdocs/frames.html +17 -0
  101. data/docs/rdocs/index.html +165 -0
  102. data/docs/rdocs/js/app.js +303 -0
  103. data/docs/rdocs/js/full_list.js +216 -0
  104. data/docs/rdocs/js/jquery.js +4 -0
  105. data/docs/rdocs/method_list.html +2051 -0
  106. data/docs/rdocs/top-level-namespace.html +110 -0
  107. data/lib/longleaf/candidates/file_selector.rb +139 -0
  108. data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
  109. data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
  110. data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
  111. data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
  112. data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
  113. data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
  114. data/lib/longleaf/cli.rb +237 -46
  115. data/lib/longleaf/commands/deregister_command.rb +51 -0
  116. data/lib/longleaf/commands/preserve_command.rb +50 -0
  117. data/lib/longleaf/commands/register_command.rb +32 -43
  118. data/lib/longleaf/commands/reindex_command.rb +92 -0
  119. data/lib/longleaf/commands/validate_config_command.rb +33 -8
  120. data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
  121. data/lib/longleaf/errors.rb +26 -7
  122. data/lib/longleaf/events/deregister_event.rb +53 -0
  123. data/lib/longleaf/events/event_names.rb +9 -0
  124. data/lib/longleaf/events/event_status_tracking.rb +59 -0
  125. data/lib/longleaf/events/preserve_event.rb +81 -0
  126. data/lib/longleaf/events/register_event.rb +52 -51
  127. data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
  128. data/lib/longleaf/helpers/digest_helper.rb +56 -0
  129. data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
  130. data/lib/longleaf/helpers/selection_options_parser.rb +189 -0
  131. data/lib/longleaf/helpers/service_date_helper.rb +78 -0
  132. data/lib/longleaf/indexing/index_manager.rb +101 -0
  133. data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
  134. data/lib/longleaf/logging.rb +5 -4
  135. data/lib/longleaf/logging/redirecting_logger.rb +26 -25
  136. data/lib/longleaf/models/app_fields.rb +7 -2
  137. data/lib/longleaf/models/file_record.rb +17 -8
  138. data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
  139. data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
  140. data/lib/longleaf/models/md_fields.rb +2 -1
  141. data/lib/longleaf/models/metadata_location.rb +47 -0
  142. data/lib/longleaf/models/metadata_record.rb +39 -15
  143. data/lib/longleaf/models/s3_storage_location.rb +133 -0
  144. data/lib/longleaf/models/service_definition.rb +7 -6
  145. data/lib/longleaf/models/service_fields.rb +7 -1
  146. data/lib/longleaf/models/service_record.rb +10 -6
  147. data/lib/longleaf/models/storage_location.rb +24 -19
  148. data/lib/longleaf/models/storage_types.rb +9 -0
  149. data/lib/longleaf/models/system_config_fields.rb +9 -0
  150. data/lib/longleaf/preservation_services/file_check_service.rb +58 -0
  151. data/lib/longleaf/preservation_services/fixity_check_service.rb +123 -0
  152. data/lib/longleaf/preservation_services/rsync_replication_service.rb +182 -0
  153. data/lib/longleaf/preservation_services/s3_replication_service.rb +143 -0
  154. data/lib/longleaf/services/application_config_deserializer.rb +81 -24
  155. data/lib/longleaf/services/application_config_manager.rb +20 -6
  156. data/lib/longleaf/services/application_config_validator.rb +19 -9
  157. data/lib/longleaf/services/configuration_validator.rb +67 -4
  158. data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
  159. data/lib/longleaf/services/metadata_deserializer.rb +113 -42
  160. data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
  161. data/lib/longleaf/services/metadata_serializer.rb +138 -25
  162. data/lib/longleaf/services/metadata_validator.rb +76 -0
  163. data/lib/longleaf/services/s3_location_validator.rb +19 -0
  164. data/lib/longleaf/services/service_class_cache.rb +112 -0
  165. data/lib/longleaf/services/service_definition_manager.rb +10 -7
  166. data/lib/longleaf/services/service_definition_validator.rb +25 -18
  167. data/lib/longleaf/services/service_manager.rb +86 -11
  168. data/lib/longleaf/services/service_mapping_manager.rb +13 -12
  169. data/lib/longleaf/services/service_mapping_validator.rb +36 -26
  170. data/lib/longleaf/services/storage_location_manager.rb +76 -15
  171. data/lib/longleaf/services/storage_location_validator.rb +49 -35
  172. data/lib/longleaf/specs/config_builder.rb +47 -23
  173. data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
  174. data/lib/longleaf/specs/custom_matchers.rb +9 -0
  175. data/lib/longleaf/specs/file_helpers.rb +61 -0
  176. data/lib/longleaf/specs/metadata_builder.rb +92 -0
  177. data/lib/longleaf/specs/system_config_builder.rb +27 -0
  178. data/lib/longleaf/version.rb +1 -1
  179. data/longleaf.gemspec +20 -7
  180. data/mkdocs.yml +21 -0
  181. metadata +306 -23
  182. data/.travis.yml +0 -4
  183. data/lib/longleaf/commands/abstract_command.rb +0 -37
  184. data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -0,0 +1,133 @@
1
+ require 'longleaf/models/storage_location'
2
+ require 'longleaf/models/storage_types'
3
+ require 'longleaf/helpers/s3_uri_helper'
4
+ require 'uri'
5
+ require 'aws-sdk-s3'
6
+
7
+ module Longleaf
8
+ # A storage location in a s3 bucket
9
+ #
10
+ # Optionally, the location configuration may include an "options" sub-hash in order to provide
11
+ # any of the s3 client options specified in Client initializer:
12
+ # https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#constructor_details
13
+
14
+ class S3StorageLocation < StorageLocation
15
+
16
+ IS_URI_REGEX = /\A#{URI::regexp}\z/
17
+
18
+ CLIENT_OPTIONS_FIELD = 'options'
19
+
20
+ # @param name [String] the name of this storage location
21
+ # @param config [Hash] hash containing the configuration options for this location
22
+ # @param md_loc [MetadataLocation] metadata location associated with this storage location
23
+ def initialize(name, config, md_loc)
24
+ super(name, config, md_loc)
25
+
26
+ @bucket_name = S3UriHelper.extract_bucket(@path)
27
+ if @bucket_name.nil?
28
+ raise ArgumentError.new("Unable to identify bucket for location #{@name} from path #{@path}")
29
+ end
30
+
31
+ # Force path to always end with a slash
32
+ @path += '/' unless @path.end_with?('/')
33
+
34
+ custom_options = config[CLIENT_OPTIONS_FIELD]
35
+ if custom_options.nil?
36
+ @client_options = Hash.new
37
+ else
38
+ # Clone options and convert keys to symbols
39
+ @client_options = Hash[custom_options.map { |(k,v)| [k.to_sym,v] } ]
40
+ end
41
+ # If no region directly configured, use region from path
42
+ if !@client_options.key?(:region)
43
+ region = S3UriHelper.extract_region(@path)
44
+ @client_options[:region] = region unless region.nil?
45
+ end
46
+
47
+ @subpath_prefix = S3UriHelper.extract_path(@path)
48
+ end
49
+
50
+ # @return the storage type for this location
51
+ def type
52
+ StorageTypes::S3_STORAGE_TYPE
53
+ end
54
+
55
+ # Get that absolute path to the file associated with the provided metadata path
56
+ # @param md_path [String] metadata file path
57
+ # @raise [ArgumentError] if the md_path is not in this storage location
58
+ # @return [String] the path for the file associated with this metadata
59
+ def get_path_from_metadata_path(md_path)
60
+ raise ArgumentError.new("A file_path parameter is required") if md_path.nil? || md_path.empty?
61
+
62
+ rel_path = @metadata_location.relative_file_path_for(md_path)
63
+
64
+ URI.join(@path, rel_path).to_s
65
+ end
66
+
67
+ # Checks that the path and metadata path defined in this location are available
68
+ # @raise [StorageLocationUnavailableError] if the storage location is not available
69
+ def available?
70
+ begin
71
+ s3_client().head_bucket({ bucket: @bucket_name, use_accelerate_endpoint: false })
72
+ rescue StandardError => e
73
+ raise StorageLocationUnavailableError.new("Destination bucket #{@bucket_name} does not exist " \
74
+ + "or is not accessible: #{e.message}")
75
+ end
76
+ @metadata_location.available?
77
+ end
78
+
79
+ # Get the file path relative to this location
80
+ # @param file_path [String] file path
81
+ # @return the file path relative to this location
82
+ # @raise [ArgumentError] if the file path is not contained by this location
83
+ def relativize(file_path)
84
+ raise ArgumentError.new("Must provide a non-nil path to relativize") if file_path.nil?
85
+
86
+ if file_path.start_with?(@path)
87
+ file_path[@path.length..-1]
88
+ else
89
+ if file_path =~ IS_URI_REGEX
90
+ raise ArgumentError.new("Path #{file_path} is not contained by #{@name}")
91
+ else
92
+ # path already relative
93
+ file_path
94
+ end
95
+ end
96
+ end
97
+
98
+ # Prefixes the provided path with the query path portion of the location's path
99
+ # after the bucket uri, used to place relative paths into the same sub-URL of a bucket.
100
+ # For example:
101
+ # Given a location with 'path' http://example.s3-amazonaws.com/env/test/
102
+ # Where rel_path = 'path/to/text.txt'
103
+ # The result would be 'env/test/path/to/text.txt'
104
+ # @param rel_path relative path to work with
105
+ # @return the given relative path prefixed with the path portion of the storage location path
106
+ def relative_to_bucket_path(rel_path)
107
+ raise ArgumentError.new("Must provide a non-nil path") if rel_path.nil?
108
+
109
+ if @subpath_prefix.nil?
110
+ return rel_path
111
+ end
112
+
113
+ @subpath_prefix + rel_path
114
+ end
115
+
116
+ # @return the bucket used by this storage location
117
+ def s3_bucket
118
+ if @bucket.nil?
119
+ @s3 = Aws::S3::Resource.new(client: s3_client())
120
+ @bucket = @s3.bucket(@bucket_name)
121
+ end
122
+ @bucket
123
+ end
124
+
125
+ # @return the s3 client used by this storage locatio
126
+ def s3_client
127
+ if @client.nil?
128
+ @client = Aws::S3::Client.new(**@client_options)
129
+ end
130
+ @client
131
+ end
132
+ end
133
+ end
@@ -1,21 +1,22 @@
1
1
  require_relative 'service_fields'
2
2
 
3
- # Definition of a preservation service
4
3
  module Longleaf
4
+ # Definition of a configured preservation service
5
5
  class ServiceDefinition
6
6
  attr_reader :name
7
- attr_reader :work_script
7
+ attr_reader :work_script, :work_class
8
8
  attr_reader :frequency, :delay
9
9
  attr_reader :properties
10
-
11
- def initialize(name:, work_script:, frequency: nil, delay: nil, properties: Hash.new)
10
+
11
+ def initialize(name:, work_script:, work_class: nil, frequency: nil, delay: nil, properties: Hash.new)
12
12
  raise ArgumentError.new("Parameters name and work_script are required") unless name && work_script
13
-
13
+
14
14
  @properties = properties
15
15
  @name = name
16
16
  @work_script = work_script
17
+ @work_class = work_class
17
18
  @frequency = frequency
18
19
  @delay = delay
19
20
  end
20
21
  end
21
- end
22
+ end
@@ -1,10 +1,16 @@
1
1
  module Longleaf
2
+ # Constants for common configuration fields for preservation service definitions
2
3
  class ServiceFields
3
4
  WORK_SCRIPT = 'work_script'
5
+ WORK_CLASS = 'work_class'
4
6
  FREQUENCY = 'frequency'
5
7
  DELAY = 'delay'
6
-
8
+
7
9
  REPLICATE_TO = 'to'
8
10
  DIGEST_ALGORITHMS = 'algorithms'
11
+
12
+ COLLISION_PROPERTY = "replica_collision_policy"
13
+ DEFAULT_COLLISION_POLICY = "replace"
14
+ VALID_COLLISION_POLICIES = ["replace"]
9
15
  end
10
16
  end
@@ -1,27 +1,31 @@
1
- # Record for an individual service in a file's metadata record.
2
1
  module Longleaf
2
+ # Record for an individual service in a file's metadata record.
3
3
  class ServiceRecord
4
4
  attr_reader :properties
5
5
  attr_accessor :stale_replicas, :timestamp, :run_needed
6
-
6
+ attr_accessor :failure_timestamp
7
+
7
8
  # @param properties [Hash] initial properties for this service record
9
+ # @param stale_replicas [Boolean] whether there are any stale replicas from this service
10
+ # @param timestamp [String] timestamp when this service last ran or was initialized
11
+ # @param run_needed [Boolean] flag indicating that this service should be run at the next available opportunity
8
12
  def initialize(properties: Hash.new, stale_replicas: false, timestamp: nil, run_needed: false)
9
13
  raise ArgumentError.new("Service properties must be a hash") if properties.class != Hash
10
-
14
+
11
15
  @properties = properties
12
16
  @timestamp = timestamp
13
17
  @stale_replicas = stale_replicas
14
18
  @run_needed = run_needed
15
19
  end
16
-
20
+
17
21
  # @return the value of a service property identified by key
18
22
  def [](key)
19
23
  @properties[key]
20
24
  end
21
-
25
+
22
26
  # set the value of a service property identified by key
23
27
  def []=(key, value)
24
28
  @properties[key] = value
25
29
  end
26
30
  end
27
- end
31
+ end
@@ -1,19 +1,25 @@
1
- require 'longleaf/services/metadata_serializer'
1
+ require 'longleaf/models/app_fields'
2
2
 
3
3
  module Longleaf
4
+ # Representation of a configured storage location
4
5
  class StorageLocation
6
+ AF ||= Longleaf::AppFields
7
+
5
8
  attr_reader :name
6
9
  attr_reader :path
7
- attr_reader :metadata_path
8
-
9
- def initialize(name:, path:, metadata_path:)
10
- raise ArgumentError.new("Parameters name, path and metadata_path are required") unless name && path && metadata_path
11
-
12
- @path = path
10
+ attr_reader :metadata_location
11
+
12
+ # @param name [String] the name of this storage location
13
+ # @param config [Hash] hash containing the configuration options for this location
14
+ # @param md_loc [MetadataLocation] metadata location associated with this storage location
15
+ def initialize(name, config, md_loc)
16
+ raise ArgumentError.new("Config parameter is required") unless config
17
+ @path = config[AF::LOCATION_PATH]
13
18
  @name = name
14
- @metadata_path = metadata_path
19
+ raise ArgumentError.new("Parameters name, path and metadata location are required") unless @name && @path && md_loc
20
+ @metadata_location = md_loc
15
21
  end
16
-
22
+
17
23
  # Get the path for the metadata file for the given file path located in this storage location.
18
24
  # @param file_path [String] path of the file
19
25
  # @raise [ArgumentError] if the file_path is not provided or is not in this storage location.
@@ -22,16 +28,15 @@ module Longleaf
22
28
  raise ArgumentError.new("Provided file path is not contained by storage location #{@name}: #{file_path}") \
23
29
  unless file_path.start_with?(@path)
24
30
 
25
- file_path.sub(/^#{@path}/, metadata_path) + MetadataSerializer::metadata_suffix
31
+ rel_file_path = relativize(file_path)
32
+
33
+ @metadata_location.metadata_path_for(rel_file_path)
26
34
  end
27
-
28
- # Checks that the path and metadata path defined in this location are available
29
- # @raise [StorageLocationUnavailableError] if the storage location is not available
30
- def available?
31
- raise StorageLocationUnavailableError.new("Path does not exist or is not a directory: #{@path}")\
32
- unless Dir.exist?(@path)
33
- raise StorageLocationUnavailableError.new("Metadata path does not exist or is not a directory: #{@metadata_path}")\
34
- unless Dir.exist?(@metadata_path)
35
+
36
+ # @param [String] path to check
37
+ # @return true if the file path is contained by the path for this location
38
+ def contains?(file_path)
39
+ file_path.start_with?(@path)
35
40
  end
36
41
  end
37
- end
42
+ end
@@ -0,0 +1,9 @@
1
+ module Longleaf
2
+ # Storage type constants
3
+ class StorageTypes
4
+ FILESYSTEM_STORAGE_TYPE = 'filesystem'
5
+ S3_STORAGE_TYPE = 's3'
6
+
7
+ DEFAULT_STORAGE_TYPE = FILESYSTEM_STORAGE_TYPE
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Longleaf
2
+ # System configuration field names
3
+ class SystemConfigFields
4
+ MD_INDEX = 'index'
5
+ MD_INDEX_ADAPTER = 'adapter'
6
+ MD_INDEX_CONNECTION = 'connection'
7
+ MD_INDEX_PAGE_SIZE = 'page_size'
8
+ end
9
+ end
@@ -0,0 +1,58 @@
1
+ require 'longleaf/events/event_names'
2
+ require 'longleaf/logging'
3
+
4
+ module Longleaf
5
+ # Preservation service which validates a file using current filesystem information compared against the
6
+ # last registered details for that file. Checks using file name, size and last modified timestamp.
7
+ class FileCheckService
8
+ include Longleaf::Logging
9
+
10
+ # Initialize a FileCheckService from the given service definition
11
+ #
12
+ # @param service_def [ServiceDefinition] the configuration for this service
13
+ # @param app_manager [ApplicationConfigManager] manager for configured storage locations
14
+ def initialize(service_def, app_manager)
15
+ @service_def = service_def
16
+ @app_manager = app_manager
17
+ end
18
+
19
+ # Perform file information check.
20
+ #
21
+ # @param file_rec [FileRecord] record representing the file to perform the service on.
22
+ # @param event [String] name of the event this service is being invoked by.
23
+ # @raise [PreservationServiceError] if the file system information does not match the stored details
24
+ def perform(file_rec, event)
25
+ file_path = file_rec.path
26
+ md_rec = file_rec.metadata_record
27
+
28
+ logger.debug("Performing file information check of #{file_path}")
29
+
30
+ if !File.exist?(file_path)
31
+ raise PreservationServiceError.new("File does not exist: #{file_path}")
32
+ end
33
+
34
+ file_size = File.size(file_rec.path)
35
+ if file_size != md_rec.file_size
36
+ raise PreservationServiceError.new("File size for #{file_path} does not match the expected value: registered = #{md_rec.file_size} bytes, actual = #{file_size} bytes")
37
+ end
38
+
39
+ last_modified = File.mtime(file_rec.path).utc.iso8601(3)
40
+ if last_modified != md_rec.last_modified
41
+ raise PreservationServiceError.new("Last modified timestamp for #{file_path} does not match the expected value: registered = #{md_rec.last_modified}, actual = #{last_modified}")
42
+ end
43
+ end
44
+
45
+ # Determine if this service is applicable for the provided event, given the configured service definition
46
+ #
47
+ # @param event [String] name of the event
48
+ # @return [Boolean] returns true if this service is applicable for the provided event
49
+ def is_applicable?(event)
50
+ case event
51
+ when EventNames::PRESERVE
52
+ true
53
+ else
54
+ false
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,123 @@
1
+ require 'longleaf/events/event_names'
2
+ require 'longleaf/models/service_fields'
3
+ require 'longleaf/logging'
4
+ require 'longleaf/helpers/digest_helper'
5
+ require 'set'
6
+
7
+ module Longleaf
8
+ # Preservation service which performs one or more fixity checks on a file based on the configured list
9
+ # of digest algorithms. It currently supports 'md5', 'sha1', 'sha2', 'sha256', 'sha384', 'sha512' and 'rmd160'.
10
+ #
11
+ # If the service encounters a file which is missing any of the digest algorithms the service is configured
12
+ # to check, the outcome may be controlled with the 'absent_digest' property via the following values:
13
+ # * 'fail' - the service will raise a ChecksumMismatchError for the missing algorithm. This is the default.
14
+ # * 'ignore' - the service will skip calculating any algorithms not already present for the file.
15
+ # * 'generate' - the service will generate and store any missing digests from the set of configured algorithms.
16
+ class FixityCheckService
17
+ include Longleaf::Logging
18
+
19
+ SUPPORTED_ALGORITHMS = ['md5', 'sha1', 'sha2', 'sha256', 'sha384', 'sha512', 'rmd160']
20
+
21
+ # service configuration property indicating how to handle situations where a file does not
22
+ # have a digest for one of the expected algorithms on record.
23
+ ABSENT_DIGEST_PROPERTY = 'absent_digest'
24
+ FAIL_IF_ABSENT = 'fail'
25
+ GENERATE_IF_ABSENT = 'generate'
26
+ IGNORE_IF_ABSENT = 'ignore'
27
+ ABSENT_DIGEST_OPTIONS = [FAIL_IF_ABSENT, GENERATE_IF_ABSENT, IGNORE_IF_ABSENT]
28
+
29
+ # Initialize a FixityCheckService from the given service definition
30
+ #
31
+ # @param service_def [ServiceDefinition] the configuration for this service
32
+ # @param app_manager [ApplicationConfigManager] manager for configured storage locations
33
+ def initialize(service_def, app_manager)
34
+ @service_def = service_def
35
+ @absent_digest_behavior = @service_def.properties[ABSENT_DIGEST_PROPERTY] || FAIL_IF_ABSENT
36
+ unless ABSENT_DIGEST_OPTIONS.include?(@absent_digest_behavior)
37
+ raise ArgumentError.new("Invalid option '#{@absent_digest_behavior}' for property #{ABSENT_DIGEST_PROPERTY} in service #{service_def.name}")
38
+ end
39
+
40
+ service_algs = service_def.properties[ServiceFields::DIGEST_ALGORITHMS]
41
+ if service_algs.nil? || service_algs.empty?
42
+ raise ArgumentError.new("FixityCheckService from definition #{service_def.name} requires a list of one or more digest algorithms")
43
+ end
44
+
45
+ service_algs = [service_algs] if service_algs.is_a?(String)
46
+
47
+ # Store the list of digest algorithms to verify, using normalized algorithm names.
48
+ @digest_algs = Set.new
49
+ service_algs.each do |alg|
50
+ normalized_alg = alg.downcase.delete('-')
51
+ if SUPPORTED_ALGORITHMS.include?(normalized_alg)
52
+ @digest_algs << normalized_alg
53
+ else
54
+ raise ArgumentError.new("Unsupported checksum algorithm '#{alg}' in definition #{service_def.name}. Supported algorithms are: #{SUPPORTED_ALGORITHMS}")
55
+ end
56
+ end
57
+ end
58
+
59
+ # Perform all configured fixity checks on the provided file
60
+ #
61
+ # @param file_rec [FileRecord] record representing the file to perform the service on.
62
+ # @param event [String] name of the event this service is being invoked by.
63
+ # @raise [ChecksumMismatchError] if the checksum on record does not match the generated checksum
64
+ def perform(file_rec, event)
65
+ path = file_rec.path
66
+ md_rec = file_rec.metadata_record
67
+
68
+ # Get the list of existing checksums for the file and normalize algorithm names
69
+ file_digests = Hash.new
70
+ md_rec.checksums&.each do |alg, digest|
71
+ normalized_alg = alg.downcase.delete('-')
72
+ if @digest_algs.include?(normalized_alg)
73
+ file_digests[normalized_alg] = digest
74
+ else
75
+ logger.debug("Metadata for file #{path} contains unexpected '#{alg}' digest, it will be ignored.")
76
+ end
77
+ end
78
+
79
+ @digest_algs.each do |alg|
80
+ existing_digest = file_digests[alg]
81
+
82
+ if existing_digest.nil?
83
+ if @absent_digest_behavior == FAIL_IF_ABSENT
84
+ raise ChecksumMismatchError.new("Fixity check using algorithm '#{alg}' failed for file #{path}: no existing digest of type '#{alg}' on record.")
85
+ elsif @absent_digest_behavior == IGNORE_IF_ABSENT
86
+ logger.debug("Skipping check of algorithm '#{alg}' for file #{path}: no digest on record.")
87
+ next
88
+ end
89
+ end
90
+
91
+ digest = DigestHelper::start_digest(alg)
92
+ digest.file(path)
93
+ generated_digest = digest.hexdigest
94
+
95
+ # Store the missing checksum if using the 'generate' behavior
96
+ if existing_digest.nil? && @absent_digest_behavior == GENERATE_IF_ABSENT
97
+ md_rec.checksums[alg] = generated_digest
98
+ logger.info("Generated and stored digest using algorithm '#{alg}' for file #{path}")
99
+ else
100
+ # Compare the new digest to the one on record
101
+ if existing_digest == generated_digest
102
+ logger.info("Fixity check using algorithm '#{alg}' succeeded for file #{path}")
103
+ else
104
+ raise ChecksumMismatchError.new("Fixity check using algorithm '#{alg}' failed for file #{path}: expected '#{existing_digest}', calculated '#{generated_digest}.'")
105
+ end
106
+ end
107
+ end
108
+ end
109
+
110
+ # Determine if this service is applicable for the provided event, given the configured service definition
111
+ #
112
+ # @param event [String] name of the event
113
+ # @return [Boolean] returns true if this service is applicable for the provided event
114
+ def is_applicable?(event)
115
+ case event
116
+ when EventNames::PRESERVE
117
+ true
118
+ else
119
+ false
120
+ end
121
+ end
122
+ end
123
+ end