longleaf 0.1.0.pre.2 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +94 -0
  3. data/.editorconfig +13 -0
  4. data/.gitignore +4 -1
  5. data/.rubocop.yml +44 -0
  6. data/.rubocop_todo.yml +834 -0
  7. data/.yardopts +1 -0
  8. data/Gemfile +16 -1
  9. data/README.md +98 -12
  10. data/Rakefile +6 -0
  11. data/bin/setup +16 -1
  12. data/docs/aboutlongleaf.md +28 -0
  13. data/docs/extra.css +32 -0
  14. data/docs/img/change-file.png +0 -0
  15. data/docs/img/ll-example-preserved.png +0 -0
  16. data/docs/index.md +19 -0
  17. data/docs/install.md +66 -0
  18. data/docs/ll-example/config-example-relative.yml +33 -0
  19. data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
  20. data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
  21. data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
  22. data/docs/ll-example/metadata-dir/.gitkeep +0 -0
  23. data/docs/ll-example/replica-files/.gitkeep +0 -0
  24. data/docs/ll-example/replica-metadata/.gitkeep +0 -0
  25. data/docs/quickstart.md +270 -0
  26. data/docs/rdocs/Longleaf.html +135 -0
  27. data/docs/rdocs/Longleaf/AppFields.html +178 -0
  28. data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
  29. data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
  30. data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
  31. data/docs/rdocs/Longleaf/CLI.html +909 -0
  32. data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
  33. data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
  34. data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
  35. data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
  36. data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
  37. data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
  38. data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
  39. data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
  40. data/docs/rdocs/Longleaf/EventError.html +147 -0
  41. data/docs/rdocs/Longleaf/EventNames.html +163 -0
  42. data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
  43. data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
  44. data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
  45. data/docs/rdocs/Longleaf/FileRecord.html +716 -0
  46. data/docs/rdocs/Longleaf/FileSelector.html +901 -0
  47. data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
  48. data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
  49. data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
  50. data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
  51. data/docs/rdocs/Longleaf/Logging.html +405 -0
  52. data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
  53. data/docs/rdocs/Longleaf/LongleafError.html +139 -0
  54. data/docs/rdocs/Longleaf/MDFields.html +193 -0
  55. data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
  56. data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
  57. data/docs/rdocs/Longleaf/MetadataError.html +143 -0
  58. data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
  59. data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
  60. data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
  61. data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
  62. data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
  63. data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
  64. data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
  65. data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
  66. data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
  67. data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
  68. data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
  69. data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
  70. data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
  71. data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
  72. data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
  73. data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
  74. data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
  75. data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
  76. data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
  77. data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
  78. data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
  79. data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
  80. data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
  81. data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
  82. data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
  83. data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
  84. data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
  85. data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
  86. data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
  87. data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
  88. data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
  89. data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
  90. data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
  91. data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
  92. data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
  93. data/docs/rdocs/_index.html +660 -0
  94. data/docs/rdocs/class_list.html +51 -0
  95. data/docs/rdocs/css/common.css +1 -0
  96. data/docs/rdocs/css/full_list.css +58 -0
  97. data/docs/rdocs/css/style.css +496 -0
  98. data/docs/rdocs/file.README.html +165 -0
  99. data/docs/rdocs/file_list.html +56 -0
  100. data/docs/rdocs/frames.html +17 -0
  101. data/docs/rdocs/index.html +165 -0
  102. data/docs/rdocs/js/app.js +303 -0
  103. data/docs/rdocs/js/full_list.js +216 -0
  104. data/docs/rdocs/js/jquery.js +4 -0
  105. data/docs/rdocs/method_list.html +2051 -0
  106. data/docs/rdocs/top-level-namespace.html +110 -0
  107. data/lib/longleaf/candidates/file_selector.rb +139 -0
  108. data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
  109. data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
  110. data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
  111. data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
  112. data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
  113. data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
  114. data/lib/longleaf/cli.rb +237 -46
  115. data/lib/longleaf/commands/deregister_command.rb +51 -0
  116. data/lib/longleaf/commands/preserve_command.rb +50 -0
  117. data/lib/longleaf/commands/register_command.rb +32 -43
  118. data/lib/longleaf/commands/reindex_command.rb +92 -0
  119. data/lib/longleaf/commands/validate_config_command.rb +33 -8
  120. data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
  121. data/lib/longleaf/errors.rb +26 -7
  122. data/lib/longleaf/events/deregister_event.rb +53 -0
  123. data/lib/longleaf/events/event_names.rb +9 -0
  124. data/lib/longleaf/events/event_status_tracking.rb +59 -0
  125. data/lib/longleaf/events/preserve_event.rb +81 -0
  126. data/lib/longleaf/events/register_event.rb +52 -51
  127. data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
  128. data/lib/longleaf/helpers/digest_helper.rb +56 -0
  129. data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
  130. data/lib/longleaf/helpers/selection_options_parser.rb +189 -0
  131. data/lib/longleaf/helpers/service_date_helper.rb +78 -0
  132. data/lib/longleaf/indexing/index_manager.rb +101 -0
  133. data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
  134. data/lib/longleaf/logging.rb +5 -4
  135. data/lib/longleaf/logging/redirecting_logger.rb +26 -25
  136. data/lib/longleaf/models/app_fields.rb +7 -2
  137. data/lib/longleaf/models/file_record.rb +17 -8
  138. data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
  139. data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
  140. data/lib/longleaf/models/md_fields.rb +2 -1
  141. data/lib/longleaf/models/metadata_location.rb +47 -0
  142. data/lib/longleaf/models/metadata_record.rb +39 -15
  143. data/lib/longleaf/models/s3_storage_location.rb +133 -0
  144. data/lib/longleaf/models/service_definition.rb +7 -6
  145. data/lib/longleaf/models/service_fields.rb +7 -1
  146. data/lib/longleaf/models/service_record.rb +10 -6
  147. data/lib/longleaf/models/storage_location.rb +24 -19
  148. data/lib/longleaf/models/storage_types.rb +9 -0
  149. data/lib/longleaf/models/system_config_fields.rb +9 -0
  150. data/lib/longleaf/preservation_services/file_check_service.rb +58 -0
  151. data/lib/longleaf/preservation_services/fixity_check_service.rb +123 -0
  152. data/lib/longleaf/preservation_services/rsync_replication_service.rb +182 -0
  153. data/lib/longleaf/preservation_services/s3_replication_service.rb +143 -0
  154. data/lib/longleaf/services/application_config_deserializer.rb +81 -24
  155. data/lib/longleaf/services/application_config_manager.rb +20 -6
  156. data/lib/longleaf/services/application_config_validator.rb +19 -9
  157. data/lib/longleaf/services/configuration_validator.rb +67 -4
  158. data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
  159. data/lib/longleaf/services/metadata_deserializer.rb +113 -42
  160. data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
  161. data/lib/longleaf/services/metadata_serializer.rb +138 -25
  162. data/lib/longleaf/services/metadata_validator.rb +76 -0
  163. data/lib/longleaf/services/s3_location_validator.rb +19 -0
  164. data/lib/longleaf/services/service_class_cache.rb +112 -0
  165. data/lib/longleaf/services/service_definition_manager.rb +10 -7
  166. data/lib/longleaf/services/service_definition_validator.rb +25 -18
  167. data/lib/longleaf/services/service_manager.rb +86 -11
  168. data/lib/longleaf/services/service_mapping_manager.rb +13 -12
  169. data/lib/longleaf/services/service_mapping_validator.rb +36 -26
  170. data/lib/longleaf/services/storage_location_manager.rb +76 -15
  171. data/lib/longleaf/services/storage_location_validator.rb +49 -35
  172. data/lib/longleaf/specs/config_builder.rb +47 -23
  173. data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
  174. data/lib/longleaf/specs/custom_matchers.rb +9 -0
  175. data/lib/longleaf/specs/file_helpers.rb +61 -0
  176. data/lib/longleaf/specs/metadata_builder.rb +92 -0
  177. data/lib/longleaf/specs/system_config_builder.rb +27 -0
  178. data/lib/longleaf/version.rb +1 -1
  179. data/longleaf.gemspec +20 -7
  180. data/mkdocs.yml +21 -0
  181. metadata +306 -23
  182. data/.travis.yml +0 -4
  183. data/lib/longleaf/commands/abstract_command.rb +0 -37
  184. data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -0,0 +1,133 @@
1
+ require 'longleaf/models/storage_location'
2
+ require 'longleaf/models/storage_types'
3
+ require 'longleaf/helpers/s3_uri_helper'
4
+ require 'uri'
5
+ require 'aws-sdk-s3'
6
+
7
+ module Longleaf
8
+ # A storage location in a s3 bucket
9
+ #
10
+ # Optionally, the location configuration may include an "options" sub-hash in order to provide
11
+ # any of the s3 client options specified in Client initializer:
12
+ # https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#constructor_details
13
+
14
+ class S3StorageLocation < StorageLocation
15
+
16
+ IS_URI_REGEX = /\A#{URI::regexp}\z/
17
+
18
+ CLIENT_OPTIONS_FIELD = 'options'
19
+
20
+ # @param name [String] the name of this storage location
21
+ # @param config [Hash] hash containing the configuration options for this location
22
+ # @param md_loc [MetadataLocation] metadata location associated with this storage location
23
+ def initialize(name, config, md_loc)
24
+ super(name, config, md_loc)
25
+
26
+ @bucket_name = S3UriHelper.extract_bucket(@path)
27
+ if @bucket_name.nil?
28
+ raise ArgumentError.new("Unable to identify bucket for location #{@name} from path #{@path}")
29
+ end
30
+
31
+ # Force path to always end with a slash
32
+ @path += '/' unless @path.end_with?('/')
33
+
34
+ custom_options = config[CLIENT_OPTIONS_FIELD]
35
+ if custom_options.nil?
36
+ @client_options = Hash.new
37
+ else
38
+ # Clone options and convert keys to symbols
39
+ @client_options = Hash[custom_options.map { |(k,v)| [k.to_sym,v] } ]
40
+ end
41
+ # If no region directly configured, use region from path
42
+ if !@client_options.key?(:region)
43
+ region = S3UriHelper.extract_region(@path)
44
+ @client_options[:region] = region unless region.nil?
45
+ end
46
+
47
+ @subpath_prefix = S3UriHelper.extract_path(@path)
48
+ end
49
+
50
+ # @return the storage type for this location
51
+ def type
52
+ StorageTypes::S3_STORAGE_TYPE
53
+ end
54
+
55
+ # Get that absolute path to the file associated with the provided metadata path
56
+ # @param md_path [String] metadata file path
57
+ # @raise [ArgumentError] if the md_path is not in this storage location
58
+ # @return [String] the path for the file associated with this metadata
59
+ def get_path_from_metadata_path(md_path)
60
+ raise ArgumentError.new("A file_path parameter is required") if md_path.nil? || md_path.empty?
61
+
62
+ rel_path = @metadata_location.relative_file_path_for(md_path)
63
+
64
+ URI.join(@path, rel_path).to_s
65
+ end
66
+
67
+ # Checks that the path and metadata path defined in this location are available
68
+ # @raise [StorageLocationUnavailableError] if the storage location is not available
69
+ def available?
70
+ begin
71
+ s3_client().head_bucket({ bucket: @bucket_name, use_accelerate_endpoint: false })
72
+ rescue StandardError => e
73
+ raise StorageLocationUnavailableError.new("Destination bucket #{@bucket_name} does not exist " \
74
+ + "or is not accessible: #{e.message}")
75
+ end
76
+ @metadata_location.available?
77
+ end
78
+
79
+ # Get the file path relative to this location
80
+ # @param file_path [String] file path
81
+ # @return the file path relative to this location
82
+ # @raise [ArgumentError] if the file path is not contained by this location
83
+ def relativize(file_path)
84
+ raise ArgumentError.new("Must provide a non-nil path to relativize") if file_path.nil?
85
+
86
+ if file_path.start_with?(@path)
87
+ file_path[@path.length..-1]
88
+ else
89
+ if file_path =~ IS_URI_REGEX
90
+ raise ArgumentError.new("Path #{file_path} is not contained by #{@name}")
91
+ else
92
+ # path already relative
93
+ file_path
94
+ end
95
+ end
96
+ end
97
+
98
+ # Prefixes the provided path with the query path portion of the location's path
99
+ # after the bucket uri, used to place relative paths into the same sub-URL of a bucket.
100
+ # For example:
101
+ # Given a location with 'path' http://example.s3-amazonaws.com/env/test/
102
+ # Where rel_path = 'path/to/text.txt'
103
+ # The result would be 'env/test/path/to/text.txt'
104
+ # @param rel_path relative path to work with
105
+ # @return the given relative path prefixed with the path portion of the storage location path
106
+ def relative_to_bucket_path(rel_path)
107
+ raise ArgumentError.new("Must provide a non-nil path") if rel_path.nil?
108
+
109
+ if @subpath_prefix.nil?
110
+ return rel_path
111
+ end
112
+
113
+ @subpath_prefix + rel_path
114
+ end
115
+
116
+ # @return the bucket used by this storage location
117
+ def s3_bucket
118
+ if @bucket.nil?
119
+ @s3 = Aws::S3::Resource.new(client: s3_client())
120
+ @bucket = @s3.bucket(@bucket_name)
121
+ end
122
+ @bucket
123
+ end
124
+
125
+ # @return the s3 client used by this storage locatio
126
+ def s3_client
127
+ if @client.nil?
128
+ @client = Aws::S3::Client.new(**@client_options)
129
+ end
130
+ @client
131
+ end
132
+ end
133
+ end
@@ -1,21 +1,22 @@
1
1
  require_relative 'service_fields'
2
2
 
3
- # Definition of a preservation service
4
3
  module Longleaf
4
+ # Definition of a configured preservation service
5
5
  class ServiceDefinition
6
6
  attr_reader :name
7
- attr_reader :work_script
7
+ attr_reader :work_script, :work_class
8
8
  attr_reader :frequency, :delay
9
9
  attr_reader :properties
10
-
11
- def initialize(name:, work_script:, frequency: nil, delay: nil, properties: Hash.new)
10
+
11
+ def initialize(name:, work_script:, work_class: nil, frequency: nil, delay: nil, properties: Hash.new)
12
12
  raise ArgumentError.new("Parameters name and work_script are required") unless name && work_script
13
-
13
+
14
14
  @properties = properties
15
15
  @name = name
16
16
  @work_script = work_script
17
+ @work_class = work_class
17
18
  @frequency = frequency
18
19
  @delay = delay
19
20
  end
20
21
  end
21
- end
22
+ end
@@ -1,10 +1,16 @@
1
1
  module Longleaf
2
+ # Constants for common configuration fields for preservation service definitions
2
3
  class ServiceFields
3
4
  WORK_SCRIPT = 'work_script'
5
+ WORK_CLASS = 'work_class'
4
6
  FREQUENCY = 'frequency'
5
7
  DELAY = 'delay'
6
-
8
+
7
9
  REPLICATE_TO = 'to'
8
10
  DIGEST_ALGORITHMS = 'algorithms'
11
+
12
+ COLLISION_PROPERTY = "replica_collision_policy"
13
+ DEFAULT_COLLISION_POLICY = "replace"
14
+ VALID_COLLISION_POLICIES = ["replace"]
9
15
  end
10
16
  end
@@ -1,27 +1,31 @@
1
- # Record for an individual service in a file's metadata record.
2
1
  module Longleaf
2
+ # Record for an individual service in a file's metadata record.
3
3
  class ServiceRecord
4
4
  attr_reader :properties
5
5
  attr_accessor :stale_replicas, :timestamp, :run_needed
6
-
6
+ attr_accessor :failure_timestamp
7
+
7
8
  # @param properties [Hash] initial properties for this service record
9
+ # @param stale_replicas [Boolean] whether there are any stale replicas from this service
10
+ # @param timestamp [String] timestamp when this service last ran or was initialized
11
+ # @param run_needed [Boolean] flag indicating that this service should be run at the next available opportunity
8
12
  def initialize(properties: Hash.new, stale_replicas: false, timestamp: nil, run_needed: false)
9
13
  raise ArgumentError.new("Service properties must be a hash") if properties.class != Hash
10
-
14
+
11
15
  @properties = properties
12
16
  @timestamp = timestamp
13
17
  @stale_replicas = stale_replicas
14
18
  @run_needed = run_needed
15
19
  end
16
-
20
+
17
21
  # @return the value of a service property identified by key
18
22
  def [](key)
19
23
  @properties[key]
20
24
  end
21
-
25
+
22
26
  # set the value of a service property identified by key
23
27
  def []=(key, value)
24
28
  @properties[key] = value
25
29
  end
26
30
  end
27
- end
31
+ end
@@ -1,19 +1,25 @@
1
- require 'longleaf/services/metadata_serializer'
1
+ require 'longleaf/models/app_fields'
2
2
 
3
3
  module Longleaf
4
+ # Representation of a configured storage location
4
5
  class StorageLocation
6
+ AF ||= Longleaf::AppFields
7
+
5
8
  attr_reader :name
6
9
  attr_reader :path
7
- attr_reader :metadata_path
8
-
9
- def initialize(name:, path:, metadata_path:)
10
- raise ArgumentError.new("Parameters name, path and metadata_path are required") unless name && path && metadata_path
11
-
12
- @path = path
10
+ attr_reader :metadata_location
11
+
12
+ # @param name [String] the name of this storage location
13
+ # @param config [Hash] hash containing the configuration options for this location
14
+ # @param md_loc [MetadataLocation] metadata location associated with this storage location
15
+ def initialize(name, config, md_loc)
16
+ raise ArgumentError.new("Config parameter is required") unless config
17
+ @path = config[AF::LOCATION_PATH]
13
18
  @name = name
14
- @metadata_path = metadata_path
19
+ raise ArgumentError.new("Parameters name, path and metadata location are required") unless @name && @path && md_loc
20
+ @metadata_location = md_loc
15
21
  end
16
-
22
+
17
23
  # Get the path for the metadata file for the given file path located in this storage location.
18
24
  # @param file_path [String] path of the file
19
25
  # @raise [ArgumentError] if the file_path is not provided or is not in this storage location.
@@ -22,16 +28,15 @@ module Longleaf
22
28
  raise ArgumentError.new("Provided file path is not contained by storage location #{@name}: #{file_path}") \
23
29
  unless file_path.start_with?(@path)
24
30
 
25
- file_path.sub(/^#{@path}/, metadata_path) + MetadataSerializer::metadata_suffix
31
+ rel_file_path = relativize(file_path)
32
+
33
+ @metadata_location.metadata_path_for(rel_file_path)
26
34
  end
27
-
28
- # Checks that the path and metadata path defined in this location are available
29
- # @raise [StorageLocationUnavailableError] if the storage location is not available
30
- def available?
31
- raise StorageLocationUnavailableError.new("Path does not exist or is not a directory: #{@path}")\
32
- unless Dir.exist?(@path)
33
- raise StorageLocationUnavailableError.new("Metadata path does not exist or is not a directory: #{@metadata_path}")\
34
- unless Dir.exist?(@metadata_path)
35
+
36
+ # @param [String] path to check
37
+ # @return true if the file path is contained by the path for this location
38
+ def contains?(file_path)
39
+ file_path.start_with?(@path)
35
40
  end
36
41
  end
37
- end
42
+ end
@@ -0,0 +1,9 @@
1
+ module Longleaf
2
+ # Storage type constants
3
+ class StorageTypes
4
+ FILESYSTEM_STORAGE_TYPE = 'filesystem'
5
+ S3_STORAGE_TYPE = 's3'
6
+
7
+ DEFAULT_STORAGE_TYPE = FILESYSTEM_STORAGE_TYPE
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Longleaf
2
+ # System configuration field names
3
+ class SystemConfigFields
4
+ MD_INDEX = 'index'
5
+ MD_INDEX_ADAPTER = 'adapter'
6
+ MD_INDEX_CONNECTION = 'connection'
7
+ MD_INDEX_PAGE_SIZE = 'page_size'
8
+ end
9
+ end
@@ -0,0 +1,58 @@
1
+ require 'longleaf/events/event_names'
2
+ require 'longleaf/logging'
3
+
4
+ module Longleaf
5
+ # Preservation service which validates a file using current filesystem information compared against the
6
+ # last registered details for that file. Checks using file name, size and last modified timestamp.
7
+ class FileCheckService
8
+ include Longleaf::Logging
9
+
10
+ # Initialize a FileCheckService from the given service definition
11
+ #
12
+ # @param service_def [ServiceDefinition] the configuration for this service
13
+ # @param app_manager [ApplicationConfigManager] manager for configured storage locations
14
+ def initialize(service_def, app_manager)
15
+ @service_def = service_def
16
+ @app_manager = app_manager
17
+ end
18
+
19
+ # Perform file information check.
20
+ #
21
+ # @param file_rec [FileRecord] record representing the file to perform the service on.
22
+ # @param event [String] name of the event this service is being invoked by.
23
+ # @raise [PreservationServiceError] if the file system information does not match the stored details
24
+ def perform(file_rec, event)
25
+ file_path = file_rec.path
26
+ md_rec = file_rec.metadata_record
27
+
28
+ logger.debug("Performing file information check of #{file_path}")
29
+
30
+ if !File.exist?(file_path)
31
+ raise PreservationServiceError.new("File does not exist: #{file_path}")
32
+ end
33
+
34
+ file_size = File.size(file_rec.path)
35
+ if file_size != md_rec.file_size
36
+ raise PreservationServiceError.new("File size for #{file_path} does not match the expected value: registered = #{md_rec.file_size} bytes, actual = #{file_size} bytes")
37
+ end
38
+
39
+ last_modified = File.mtime(file_rec.path).utc.iso8601(3)
40
+ if last_modified != md_rec.last_modified
41
+ raise PreservationServiceError.new("Last modified timestamp for #{file_path} does not match the expected value: registered = #{md_rec.last_modified}, actual = #{last_modified}")
42
+ end
43
+ end
44
+
45
+ # Determine if this service is applicable for the provided event, given the configured service definition
46
+ #
47
+ # @param event [String] name of the event
48
+ # @return [Boolean] returns true if this service is applicable for the provided event
49
+ def is_applicable?(event)
50
+ case event
51
+ when EventNames::PRESERVE
52
+ true
53
+ else
54
+ false
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,123 @@
1
+ require 'longleaf/events/event_names'
2
+ require 'longleaf/models/service_fields'
3
+ require 'longleaf/logging'
4
+ require 'longleaf/helpers/digest_helper'
5
+ require 'set'
6
+
7
+ module Longleaf
8
+ # Preservation service which performs one or more fixity checks on a file based on the configured list
9
+ # of digest algorithms. It currently supports 'md5', 'sha1', 'sha2', 'sha256', 'sha384', 'sha512' and 'rmd160'.
10
+ #
11
+ # If the service encounters a file which is missing any of the digest algorithms the service is configured
12
+ # to check, the outcome may be controlled with the 'absent_digest' property via the following values:
13
+ # * 'fail' - the service will raise a ChecksumMismatchError for the missing algorithm. This is the default.
14
+ # * 'ignore' - the service will skip calculating any algorithms not already present for the file.
15
+ # * 'generate' - the service will generate and store any missing digests from the set of configured algorithms.
16
+ class FixityCheckService
17
+ include Longleaf::Logging
18
+
19
+ SUPPORTED_ALGORITHMS = ['md5', 'sha1', 'sha2', 'sha256', 'sha384', 'sha512', 'rmd160']
20
+
21
+ # service configuration property indicating how to handle situations where a file does not
22
+ # have a digest for one of the expected algorithms on record.
23
+ ABSENT_DIGEST_PROPERTY = 'absent_digest'
24
+ FAIL_IF_ABSENT = 'fail'
25
+ GENERATE_IF_ABSENT = 'generate'
26
+ IGNORE_IF_ABSENT = 'ignore'
27
+ ABSENT_DIGEST_OPTIONS = [FAIL_IF_ABSENT, GENERATE_IF_ABSENT, IGNORE_IF_ABSENT]
28
+
29
+ # Initialize a FixityCheckService from the given service definition
30
+ #
31
+ # @param service_def [ServiceDefinition] the configuration for this service
32
+ # @param app_manager [ApplicationConfigManager] manager for configured storage locations
33
+ def initialize(service_def, app_manager)
34
+ @service_def = service_def
35
+ @absent_digest_behavior = @service_def.properties[ABSENT_DIGEST_PROPERTY] || FAIL_IF_ABSENT
36
+ unless ABSENT_DIGEST_OPTIONS.include?(@absent_digest_behavior)
37
+ raise ArgumentError.new("Invalid option '#{@absent_digest_behavior}' for property #{ABSENT_DIGEST_PROPERTY} in service #{service_def.name}")
38
+ end
39
+
40
+ service_algs = service_def.properties[ServiceFields::DIGEST_ALGORITHMS]
41
+ if service_algs.nil? || service_algs.empty?
42
+ raise ArgumentError.new("FixityCheckService from definition #{service_def.name} requires a list of one or more digest algorithms")
43
+ end
44
+
45
+ service_algs = [service_algs] if service_algs.is_a?(String)
46
+
47
+ # Store the list of digest algorithms to verify, using normalized algorithm names.
48
+ @digest_algs = Set.new
49
+ service_algs.each do |alg|
50
+ normalized_alg = alg.downcase.delete('-')
51
+ if SUPPORTED_ALGORITHMS.include?(normalized_alg)
52
+ @digest_algs << normalized_alg
53
+ else
54
+ raise ArgumentError.new("Unsupported checksum algorithm '#{alg}' in definition #{service_def.name}. Supported algorithms are: #{SUPPORTED_ALGORITHMS}")
55
+ end
56
+ end
57
+ end
58
+
59
+ # Perform all configured fixity checks on the provided file
60
+ #
61
+ # @param file_rec [FileRecord] record representing the file to perform the service on.
62
+ # @param event [String] name of the event this service is being invoked by.
63
+ # @raise [ChecksumMismatchError] if the checksum on record does not match the generated checksum
64
+ def perform(file_rec, event)
65
+ path = file_rec.path
66
+ md_rec = file_rec.metadata_record
67
+
68
+ # Get the list of existing checksums for the file and normalize algorithm names
69
+ file_digests = Hash.new
70
+ md_rec.checksums&.each do |alg, digest|
71
+ normalized_alg = alg.downcase.delete('-')
72
+ if @digest_algs.include?(normalized_alg)
73
+ file_digests[normalized_alg] = digest
74
+ else
75
+ logger.debug("Metadata for file #{path} contains unexpected '#{alg}' digest, it will be ignored.")
76
+ end
77
+ end
78
+
79
+ @digest_algs.each do |alg|
80
+ existing_digest = file_digests[alg]
81
+
82
+ if existing_digest.nil?
83
+ if @absent_digest_behavior == FAIL_IF_ABSENT
84
+ raise ChecksumMismatchError.new("Fixity check using algorithm '#{alg}' failed for file #{path}: no existing digest of type '#{alg}' on record.")
85
+ elsif @absent_digest_behavior == IGNORE_IF_ABSENT
86
+ logger.debug("Skipping check of algorithm '#{alg}' for file #{path}: no digest on record.")
87
+ next
88
+ end
89
+ end
90
+
91
+ digest = DigestHelper::start_digest(alg)
92
+ digest.file(path)
93
+ generated_digest = digest.hexdigest
94
+
95
+ # Store the missing checksum if using the 'generate' behavior
96
+ if existing_digest.nil? && @absent_digest_behavior == GENERATE_IF_ABSENT
97
+ md_rec.checksums[alg] = generated_digest
98
+ logger.info("Generated and stored digest using algorithm '#{alg}' for file #{path}")
99
+ else
100
+ # Compare the new digest to the one on record
101
+ if existing_digest == generated_digest
102
+ logger.info("Fixity check using algorithm '#{alg}' succeeded for file #{path}")
103
+ else
104
+ raise ChecksumMismatchError.new("Fixity check using algorithm '#{alg}' failed for file #{path}: expected '#{existing_digest}', calculated '#{generated_digest}.'")
105
+ end
106
+ end
107
+ end
108
+ end
109
+
110
+ # Determine if this service is applicable for the provided event, given the configured service definition
111
+ #
112
+ # @param event [String] name of the event
113
+ # @return [Boolean] returns true if this service is applicable for the provided event
114
+ def is_applicable?(event)
115
+ case event
116
+ when EventNames::PRESERVE
117
+ true
118
+ else
119
+ false
120
+ end
121
+ end
122
+ end
123
+ end