longleaf 0.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +94 -0
  3. data/.editorconfig +13 -0
  4. data/.gitignore +4 -1
  5. data/.rubocop.yml +44 -0
  6. data/.rubocop_todo.yml +834 -0
  7. data/.yardopts +1 -0
  8. data/Gemfile +16 -1
  9. data/README.md +98 -12
  10. data/Rakefile +6 -0
  11. data/bin/setup +16 -1
  12. data/docs/aboutlongleaf.md +28 -0
  13. data/docs/extra.css +32 -0
  14. data/docs/img/change-file.png +0 -0
  15. data/docs/img/ll-example-preserved.png +0 -0
  16. data/docs/index.md +19 -0
  17. data/docs/install.md +66 -0
  18. data/docs/ll-example/config-example-relative.yml +33 -0
  19. data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
  20. data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
  21. data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
  22. data/docs/ll-example/metadata-dir/.gitkeep +0 -0
  23. data/docs/ll-example/replica-files/.gitkeep +0 -0
  24. data/docs/ll-example/replica-metadata/.gitkeep +0 -0
  25. data/docs/quickstart.md +270 -0
  26. data/docs/rdocs/Longleaf.html +135 -0
  27. data/docs/rdocs/Longleaf/AppFields.html +178 -0
  28. data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
  29. data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
  30. data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
  31. data/docs/rdocs/Longleaf/CLI.html +909 -0
  32. data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
  33. data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
  34. data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
  35. data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
  36. data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
  37. data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
  38. data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
  39. data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
  40. data/docs/rdocs/Longleaf/EventError.html +147 -0
  41. data/docs/rdocs/Longleaf/EventNames.html +163 -0
  42. data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
  43. data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
  44. data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
  45. data/docs/rdocs/Longleaf/FileRecord.html +716 -0
  46. data/docs/rdocs/Longleaf/FileSelector.html +901 -0
  47. data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
  48. data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
  49. data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
  50. data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
  51. data/docs/rdocs/Longleaf/Logging.html +405 -0
  52. data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
  53. data/docs/rdocs/Longleaf/LongleafError.html +139 -0
  54. data/docs/rdocs/Longleaf/MDFields.html +193 -0
  55. data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
  56. data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
  57. data/docs/rdocs/Longleaf/MetadataError.html +143 -0
  58. data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
  59. data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
  60. data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
  61. data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
  62. data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
  63. data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
  64. data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
  65. data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
  66. data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
  67. data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
  68. data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
  69. data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
  70. data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
  71. data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
  72. data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
  73. data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
  74. data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
  75. data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
  76. data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
  77. data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
  78. data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
  79. data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
  80. data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
  81. data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
  82. data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
  83. data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
  84. data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
  85. data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
  86. data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
  87. data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
  88. data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
  89. data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
  90. data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
  91. data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
  92. data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
  93. data/docs/rdocs/_index.html +660 -0
  94. data/docs/rdocs/class_list.html +51 -0
  95. data/docs/rdocs/css/common.css +1 -0
  96. data/docs/rdocs/css/full_list.css +58 -0
  97. data/docs/rdocs/css/style.css +496 -0
  98. data/docs/rdocs/file.README.html +165 -0
  99. data/docs/rdocs/file_list.html +56 -0
  100. data/docs/rdocs/frames.html +17 -0
  101. data/docs/rdocs/index.html +165 -0
  102. data/docs/rdocs/js/app.js +303 -0
  103. data/docs/rdocs/js/full_list.js +216 -0
  104. data/docs/rdocs/js/jquery.js +4 -0
  105. data/docs/rdocs/method_list.html +2051 -0
  106. data/docs/rdocs/top-level-namespace.html +110 -0
  107. data/lib/longleaf/candidates/file_selector.rb +150 -0
  108. data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
  109. data/lib/longleaf/candidates/physical_path_provider.rb +17 -0
  110. data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
  111. data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
  112. data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
  113. data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
  114. data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
  115. data/lib/longleaf/cli.rb +252 -46
  116. data/lib/longleaf/commands/deregister_command.rb +51 -0
  117. data/lib/longleaf/commands/preserve_command.rb +50 -0
  118. data/lib/longleaf/commands/register_command.rb +34 -43
  119. data/lib/longleaf/commands/reindex_command.rb +92 -0
  120. data/lib/longleaf/commands/validate_config_command.rb +33 -8
  121. data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
  122. data/lib/longleaf/errors.rb +26 -7
  123. data/lib/longleaf/events/deregister_event.rb +53 -0
  124. data/lib/longleaf/events/event_names.rb +9 -0
  125. data/lib/longleaf/events/event_status_tracking.rb +59 -0
  126. data/lib/longleaf/events/preserve_event.rb +82 -0
  127. data/lib/longleaf/events/register_event.rb +59 -51
  128. data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
  129. data/lib/longleaf/helpers/digest_helper.rb +56 -0
  130. data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
  131. data/lib/longleaf/helpers/selection_options_parser.rb +215 -0
  132. data/lib/longleaf/helpers/service_date_helper.rb +78 -0
  133. data/lib/longleaf/indexing/index_manager.rb +101 -0
  134. data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
  135. data/lib/longleaf/logging.rb +5 -4
  136. data/lib/longleaf/logging/redirecting_logger.rb +30 -25
  137. data/lib/longleaf/models/app_fields.rb +7 -2
  138. data/lib/longleaf/models/file_record.rb +31 -8
  139. data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
  140. data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
  141. data/lib/longleaf/models/md_fields.rb +3 -1
  142. data/lib/longleaf/models/metadata_location.rb +47 -0
  143. data/lib/longleaf/models/metadata_record.rb +43 -16
  144. data/lib/longleaf/models/s3_storage_location.rb +138 -0
  145. data/lib/longleaf/models/service_definition.rb +7 -6
  146. data/lib/longleaf/models/service_fields.rb +7 -1
  147. data/lib/longleaf/models/service_record.rb +10 -6
  148. data/lib/longleaf/models/storage_location.rb +24 -19
  149. data/lib/longleaf/models/storage_types.rb +9 -0
  150. data/lib/longleaf/models/system_config_fields.rb +9 -0
  151. data/lib/longleaf/preservation_services/file_check_service.rb +59 -0
  152. data/lib/longleaf/preservation_services/fixity_check_service.rb +124 -0
  153. data/lib/longleaf/preservation_services/rsync_replication_service.rb +198 -0
  154. data/lib/longleaf/preservation_services/s3_replication_service.rb +131 -0
  155. data/lib/longleaf/services/application_config_deserializer.rb +81 -24
  156. data/lib/longleaf/services/application_config_manager.rb +20 -6
  157. data/lib/longleaf/services/application_config_validator.rb +19 -9
  158. data/lib/longleaf/services/configuration_validator.rb +67 -4
  159. data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
  160. data/lib/longleaf/services/metadata_deserializer.rb +115 -42
  161. data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
  162. data/lib/longleaf/services/metadata_serializer.rb +156 -23
  163. data/lib/longleaf/services/metadata_validator.rb +76 -0
  164. data/lib/longleaf/services/s3_location_validator.rb +19 -0
  165. data/lib/longleaf/services/service_class_cache.rb +112 -0
  166. data/lib/longleaf/services/service_definition_manager.rb +10 -7
  167. data/lib/longleaf/services/service_definition_validator.rb +25 -18
  168. data/lib/longleaf/services/service_manager.rb +86 -11
  169. data/lib/longleaf/services/service_mapping_manager.rb +13 -12
  170. data/lib/longleaf/services/service_mapping_validator.rb +36 -26
  171. data/lib/longleaf/services/storage_location_manager.rb +76 -15
  172. data/lib/longleaf/services/storage_location_validator.rb +49 -35
  173. data/lib/longleaf/specs/config_builder.rb +47 -23
  174. data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
  175. data/lib/longleaf/specs/custom_matchers.rb +9 -0
  176. data/lib/longleaf/specs/file_helpers.rb +61 -0
  177. data/lib/longleaf/specs/metadata_builder.rb +98 -0
  178. data/lib/longleaf/specs/system_config_builder.rb +27 -0
  179. data/lib/longleaf/version.rb +1 -1
  180. data/longleaf.gemspec +20 -7
  181. data/mkdocs.yml +21 -0
  182. metadata +308 -24
  183. data/.travis.yml +0 -4
  184. data/lib/longleaf/commands/abstract_command.rb +0 -37
  185. data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -0,0 +1,47 @@
1
+ require 'longleaf/services/metadata_serializer'
2
+ require 'longleaf/services/metadata_deserializer'
3
+ require 'longleaf/errors'
4
+
5
+ module Longleaf
6
+ # Handles the persistence of metadata records
7
+ class MetadataPersistenceManager
8
+ # Initialize the MetadataPersistenceManager
9
+ # @param index_manager [IndexManager] system config manager
10
+ def initialize(index_manager)
11
+ @index_manager = index_manager
12
+ end
13
+
14
+ # Persist the metadata for the provided file record to all configured destinations.
15
+ # This may include to disk as well as to an index.
16
+ # @param file_rec [FileRecord] file record
17
+ def persist(file_rec)
18
+ if file_rec.metadata_record.nil?
19
+ raise MetadataError.new("No metadata record provided, cannot persist metadata for #{file_rec.path}")
20
+ end
21
+
22
+ MetadataSerializer::write(metadata: file_rec.metadata_record,
23
+ file_path: file_rec.metadata_path,
24
+ digest_algs: file_rec.storage_location.metadata_location.digests)
25
+
26
+ index(file_rec)
27
+ end
28
+
29
+ # Index metadata for the provided file record
30
+ # @param file_rec [FileRecord] file record
31
+ def index(file_rec)
32
+ if @index_manager.using_index?
33
+ @index_manager.index(file_rec)
34
+ end
35
+ end
36
+
37
+ # Load the metadata record for the provided file record
38
+ # @param file_rec [FileRecord] file record
39
+ # @return [MetadataRecord] the metadata record for the file record
40
+ def load(file_rec)
41
+ md_rec = MetadataDeserializer.deserialize(file_path: file_rec.metadata_path,
42
+ digest_algs: file_rec.storage_location.metadata_location.digests)
43
+ file_rec.metadata_record = md_rec
44
+ md_rec
45
+ end
46
+ end
47
+ end
@@ -1,71 +1,204 @@
1
1
  require 'yaml'
2
- require_relative '../models/metadata_record'
3
- require_relative '../models/md_fields'
2
+ require 'longleaf/models/metadata_record'
3
+ require 'longleaf/models/md_fields'
4
+ require 'longleaf/helpers/digest_helper'
5
+ require 'longleaf/errors'
6
+ require 'longleaf/logging'
7
+ require 'pathname'
8
+ require "tempfile"
4
9
 
5
- # Service which serializes MetadataRecord objects
6
10
  module Longleaf
11
+ # Service which serializes MetadataRecord objects
7
12
  class MetadataSerializer
8
- MDF = Longleaf::MDFields
9
-
13
+ extend Longleaf::Logging
14
+ MDF ||= MDFields
15
+
10
16
  # Serialize the contents of the provided metadata record to the specified path
11
17
  #
12
18
  # @param metadata [MetadataRecord] metadata record to serialize. Required.
13
19
  # @param file_path [String] path to write the file to. Required.
14
20
  # @param format [String] format to serialize the metadata in. Default is 'yaml'.
15
- def self.write(metadata:, file_path:, format: 'yaml')
21
+ # @param digest_algs [Array] if provided, sidecar digest files for the metadata file
22
+ # will be generated for each algorithm.
23
+ def self.write(metadata:, file_path:, format: 'yaml', digest_algs: [])
16
24
  raise ArgumentError.new('metadata parameter must be a MetadataRecord') \
17
- unless metadata.class == Longleaf::MetadataRecord
18
-
25
+ unless metadata.class == MetadataRecord
26
+
19
27
  case format
20
28
  when 'yaml'
21
29
  content = to_yaml(metadata)
22
30
  else
23
- raise ArgumentError.new('Invalid serialization format #{format} specified')
31
+ raise ArgumentError.new("Invalid serialization format #{format} specified")
24
32
  end
25
-
26
- File.write(file_path, content)
33
+
34
+ atomic_write(file_path, content, digest_algs)
27
35
  end
28
-
36
+
29
37
  # @param metadata [MetadataRecord] metadata record to transform
30
38
  # @return [String] a yaml representation of the provided MetadataRecord
31
39
  def self.to_yaml(metadata)
32
40
  props = to_hash(metadata)
33
41
  props.to_yaml
34
42
  end
35
-
43
+
44
+ # Create a hash representation of the given MetadataRecord file
45
+ # @param metadata [MetadataRecord] metadata record to transform into a hash
36
46
  def self.to_hash(metadata)
37
47
  props = Hash.new
38
-
48
+
39
49
  data = Hash.new.merge(metadata.properties)
40
50
  data[MDF::REGISTERED_TIMESTAMP] = metadata.registered if metadata.registered
41
51
  data[MDF::DEREGISTERED_TIMESTAMP] = metadata.deregistered if metadata.deregistered
42
- data[MDF::CHECKSUMS] = metadata.checksums unless metadata.checksums&.empty?
52
+ data[MDF::CHECKSUMS] = metadata.checksums unless metadata.checksums && metadata.checksums.empty?
43
53
  data[MDF::FILE_SIZE] = metadata.file_size unless metadata.file_size.nil?
44
54
  data[MDF::LAST_MODIFIED] = metadata.last_modified if metadata.last_modified
45
-
55
+ data[MDF::PHYSICAL_PATH] = metadata.physical_path if metadata.physical_path
56
+
46
57
  props[MDF::DATA] = data
47
-
58
+
48
59
  services = Hash.new
49
60
  metadata.list_services.each do |name|
50
61
  service = metadata.service(name)
51
62
  service[MDF::STALE_REPLICAS] = service.stale_replicas if service.stale_replicas
52
63
  service[MDF::SERVICE_TIMESTAMP] = service.timestamp unless service.timestamp.nil?
53
64
  service[MDF::RUN_NEEDED] = service.run_needed if service.run_needed
54
- services[name] = service.properties
65
+ services[name] = service.properties unless service.properties.empty?
55
66
  end
56
-
67
+
57
68
  props[MDF::SERVICES] = services
58
-
69
+
59
70
  props
60
71
  end
61
-
72
+
73
+ # @param format [String] encoding format used for metadata file
74
+ # @return [String] the suffix used to indicate that a file is a metadata file in the provided encoding
75
+ # @raise [ArgumentError] raised if the provided format is not a supported metadata encoding format
62
76
  def self.metadata_suffix(format: 'yaml')
63
77
  case format
64
78
  when 'yaml'
65
79
  '-llmd.yaml'
66
80
  else
67
- raise ArgumentError.new('Invalid serialization format #{format} specified')
81
+ raise ArgumentError.new("Invalid serialization format #{format} specified")
68
82
  end
69
83
  end
84
+
85
+ # Safely writes the new metadata file and its digests.
86
+ # It does so by first writing the content and its digests to temp files,
87
+ # then making the temp files the current version of the file.
88
+ # Attempts to clean up new data in the case of failure.
89
+ def self.atomic_write(file_path, content, digest_algs)
90
+ # Fill in parent directories if they do not exist
91
+ parent_dir = Pathname(file_path).parent
92
+ parent_dir.mkpath unless parent_dir.exist?
93
+
94
+ file_path = file_path.path if file_path.respond_to?(:path)
95
+
96
+ # If file does not already exist, then simply write it
97
+ if !File.exist?(file_path)
98
+ File.write(file_path, content)
99
+ write_digests(file_path, content, digest_algs)
100
+ return
101
+ end
102
+
103
+ # Updating file, use safe atomic write
104
+ File.open(file_path) do |original_file|
105
+ original_file.flock(File::LOCK_EX)
106
+
107
+ base_name = File.basename(file_path)
108
+ old_renamed = nil
109
+ Tempfile.open(base_name, parent_dir) do |temp_file|
110
+ begin
111
+ # Write content to temp file
112
+ temp_file.write(content)
113
+ temp_file.close
114
+
115
+ temp_path = temp_file.path
116
+
117
+ # Set permissions of new file to match old if it exists
118
+ old_stat = File.stat(file_path)
119
+ set_perms(temp_path, old_stat)
120
+
121
+ # Produce digest files for the temp file
122
+ digest_paths = write_digests(temp_path, content, digest_algs)
123
+
124
+ # Move the old file to a temp path in case it needs to be restored
125
+ old_renamed = temp_path + ".old"
126
+ File.rename(file_path, old_renamed)
127
+
128
+ # Move move the new file into place as the new main file
129
+ File.rename(temp_path, file_path)
130
+ rescue => e
131
+ # Attempt to restore old file if it had already been moved
132
+ if !old_renamed.nil? && !File.exist?(file_path)
133
+ File.rename(old_renamed, file_path)
134
+ end
135
+ # Cleanup the temp file and any digest files written for it
136
+ temp_file.delete if File.exist?(temp_file.path)
137
+ unless digest_paths.nil?
138
+ digest_paths.each do |digest_path|
139
+ File.delete(digest_path)
140
+ end
141
+ end
142
+ raise e
143
+ end
144
+
145
+ # Cleanup all existing digest files, in case the set of algorithms has changed
146
+ cleanup_digests(file_path)
147
+ # Move new digests into place
148
+ digest_paths.each do |digest_path|
149
+ File.rename(digest_path, digest_path.sub(temp_path, file_path))
150
+ end
151
+ # Cleanup the old file
152
+ File.delete(old_renamed)
153
+ end
154
+ end
155
+ end
156
+
157
+ def self.set_perms(file_path, stat_info)
158
+ if stat_info
159
+ # Set correct permissions on new file
160
+ begin
161
+ File.chown(stat_info.uid, stat_info.gid, file_path)
162
+ # This operation will affect filesystem ACL's
163
+ File.chmod(stat_info.mode, file_path)
164
+ rescue Errno::EPERM, Errno::EACCES
165
+ # Changing file ownership failed, moving on.
166
+ return false
167
+ end
168
+ end
169
+ true
170
+ end
171
+
172
+ # Deletes all known digest files for the provided file path
173
+ def self.cleanup_digests(file_path)
174
+ DigestHelper::KNOWN_DIGESTS.each do |alg|
175
+ digest_path = "#{file_path}.#{alg}"
176
+ File.delete(digest_path) if File.exist?(digest_path)
177
+ end
178
+ end
179
+
180
+ def self.write_digests(file_path, content, digests)
181
+ return [] if digests.nil? || digests.empty?
182
+
183
+ digest_paths = Array.new
184
+
185
+ digests.each do |alg|
186
+ digest_class = DigestHelper::start_digest(alg)
187
+ result = digest_class.hexdigest(content)
188
+ digest_path = "#{file_path}.#{alg}"
189
+
190
+ File.write(digest_path, result)
191
+
192
+ digest_paths.push(digest_path)
193
+
194
+ self.logger.debug("Generated #{alg} digest for metadata file #{file_path}: #{digest_path} #{result}")
195
+ end
196
+
197
+ digest_paths
198
+ end
199
+
200
+ private_class_method :cleanup_digests
201
+ private_class_method :write_digests
202
+ private_class_method :atomic_write
70
203
  end
71
- end
204
+ end
@@ -0,0 +1,76 @@
1
+ require 'pathname'
2
+ require 'longleaf/models/md_fields'
3
+ require 'longleaf/errors'
4
+ require_relative 'configuration_validator'
5
+
6
+ module Longleaf
7
+ # Validator for file metadata
8
+ class MetadataValidator < ConfigurationValidator
9
+ MDF ||= MDFields
10
+
11
+ # @param config [Hash] hash containing the application configuration
12
+ def initialize(config)
13
+ super(config)
14
+ end
15
+
16
+ protected
17
+ # Validates the provided metadata for a file to ensure that it is syntactically correct and field types
18
+ # are validate.
19
+ def validate
20
+ assert("Metadata must be a hash, but a #{@config.class} was provided", @config.class == Hash)
21
+ assert("Metadata must contain a '#{MDF::DATA}' key", @config.key?(MDF::DATA))
22
+ assert("Metadata must contain a '#{MDF::SERVICES}' key", @config.key?(MDF::SERVICES))
23
+
24
+ data = @config[MDF::DATA]
25
+ register_on_failure { validate_date_field(data, MDF::REGISTERED_TIMESTAMP) }
26
+ register_on_failure { validate_date_field(data, MDF::DEREGISTERED_TIMESTAMP, required: false) }
27
+ register_on_failure { validate_date_field(data, MDF::LAST_MODIFIED) }
28
+
29
+ register_on_failure { validate_positive_integer(data, MDF::FILE_SIZE) }
30
+
31
+ checksums = data[MDF::CHECKSUMS]
32
+ register_on_failure do
33
+ if !checksums.nil? && !checksums.is_a?(Hash)
34
+ fail("Field '#{MDF::CHECKSUMS}' must be a map of algorithms to digests, but was a #{checksums.class}")
35
+ end
36
+ end
37
+
38
+ # Ensure that any service timestamps present are valid dates
39
+ services = @config[MDF::SERVICES]
40
+ services.each do |service_name, service_rec|
41
+ register_on_failure { validate_date_field(service_rec, MDF::SERVICE_TIMESTAMP, required: false) }
42
+ end
43
+ end
44
+
45
+ def validate_date_field(section, field_key, required: true)
46
+ field_val = section[field_key]
47
+
48
+ if field_val
49
+ begin
50
+ Time.iso8601(section[field_key])
51
+ rescue ArgumentError
52
+ fail("Field '#{field_key}' must be a valid ISO8601 timestamp, but contained value '#{section[field_key]}'")
53
+ end
54
+ elsif required
55
+ fail("Metadata must contain a '#{field_key}' field")
56
+ end
57
+ end
58
+
59
+ def validate_positive_integer(section, field_key, required: true)
60
+ field_val = section[field_key]
61
+
62
+ if field_val
63
+ begin
64
+ val = field_val.is_a?(Integer) ? field_val : Integer(field_val, 10)
65
+ if val < 0
66
+ fail("Field '#{field_key}' must be a positive integer")
67
+ end
68
+ rescue ArgumentError => err
69
+ fail("Field '#{field_key}' must be a positive integer")
70
+ end
71
+ elsif required
72
+ fail("Metadata must contain a '#{field_key}' field")
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,19 @@
1
+ require 'pathname'
2
+ require 'longleaf/errors'
3
+ require 'longleaf/helpers/s3_uri_helper'
4
+
5
+ module Longleaf
6
+ # Validates the configuration of a s3 based location
7
+ class S3LocationValidator
8
+ def self.validate(p_validator, name, path_prop, section_name, path)
9
+ base_msg = "Storage location '#{name}' specifies invalid #{section_name} '#{path_prop}' property: "
10
+ p_validator.assert(base_msg + 'Path must not be empty', !path.nil? && !path.to_s.strip.empty?)
11
+ begin
12
+ bucket_name = S3UriHelper.extract_bucket(path)
13
+ p_validator.assert(base_msg + 'Path must specify a bucket', !bucket_name.nil?)
14
+ rescue ArgumentError => e
15
+ p_validator.fail(base_msg + e.message)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,112 @@
1
+ require 'pathname'
2
+
3
+ module Longleaf
4
+ # Cache for loading and retrieving preservation service classes
5
+ class ServiceClassCache
6
+ STD_PRESERVATION_SERVICE_PATH = 'longleaf/preservation_services/'
7
+
8
+ def initialize(app_manager)
9
+ @app_manager = app_manager
10
+ # Cache storing per service definition instances of service classes
11
+ @service_instance_cache = Hash.new
12
+ # Cache storing per script path class of service
13
+ @class_cache = Hash.new
14
+ end
15
+
16
+ # Returns an instance of the preversation service defined for the provided service definition,
17
+ # based on the work_script and work_class properties provided.
18
+ #
19
+ # @param service_def [ServiceDefinition] definition of service to instantiate
20
+ # @return [PreservationService] Instance of the preservation service class for the definition.
21
+ def service_instance(service_def)
22
+ service_name = service_def.name
23
+ # Return the cached instance of the service
24
+ if @service_instance_cache.key?(service_name)
25
+ return @service_instance_cache[service_name]
26
+ end
27
+
28
+ clazz = service_class(service_def)
29
+ # Cache and return the class instance
30
+ @service_instance_cache[service_name] = clazz.new(service_def, @app_manager)
31
+ end
32
+
33
+ # Load and return the PreservationService class assigned to the provided service definition,
34
+ # based on the work_script and work_class properties provided.
35
+ #
36
+ # @param service_def [ServiceDefinition] definition of service to retrieve class for
37
+ # @return [Class] class of work_script
38
+ def service_class(service_def)
39
+ service_name = service_def.name
40
+ work_script = service_def.work_script
41
+
42
+ if work_script.include?('/')
43
+ expanded_path = Pathname.new(work_script).expand_path.to_s
44
+ if !from_permitted_path?(expanded_path)
45
+ raise ConfigurationError.new("Unable to load work_script for service #{service_name}, #{work_script} is not in a known library path.")
46
+ end
47
+
48
+ last_slash_index = work_script.rindex('/')
49
+ script_path = work_script[0..last_slash_index]
50
+ script_name = work_script[(last_slash_index + 1)..-1]
51
+ else
52
+ script_path = STD_PRESERVATION_SERVICE_PATH
53
+ script_name = work_script
54
+ end
55
+
56
+ # Strip off the extension
57
+ script_name.sub!('.rb', '')
58
+
59
+ require_path = File.join(script_path, script_name)
60
+ # Return the cached Class if this path has been encountered before
61
+ if @class_cache.key?(require_path)
62
+ return @class_cache[require_path]
63
+ end
64
+
65
+ # Load the script
66
+ begin
67
+ require require_path
68
+ rescue LoadError
69
+ raise ConfigurationError.new("Failed to load work_script '#{script_name}' for service #{service_name}")
70
+ end
71
+
72
+ # Generate the class name, either configured or from file naming convention if possible
73
+ if service_def.work_class
74
+ class_name = service_def.work_class
75
+ else
76
+ class_name = script_name.split('_').map(&:capitalize).join
77
+ # Assume the longleaf module for classes in the standard path
78
+ class_name = 'Longleaf::' + class_name if script_path == STD_PRESERVATION_SERVICE_PATH
79
+ end
80
+
81
+ begin
82
+ class_constant = constantize(class_name)
83
+ # cache the class for this work_script and return it
84
+ @class_cache[require_path] = class_constant
85
+ rescue NameError
86
+ raise ConfigurationError.new("Failed to load work_script '#{script_name}' for service #{service_name}, class name #{class_name} was not found.")
87
+ end
88
+ end
89
+
90
+ private
91
+ # Borrowed from sidekiq implementation
92
+ def constantize(str)
93
+ names = str.split('::')
94
+ names.shift if names.empty? || names.first.empty?
95
+
96
+ names.inject(Object) do |constant, name|
97
+ # the false flag limits search for name to under the constant namespace
98
+ # which mimics Rails' behaviour
99
+ constant.const_defined?(name, false) ? constant.const_get(name, false) : constant.const_missing(name)
100
+ end
101
+ end
102
+
103
+ def from_permitted_path?(script_path)
104
+ $LOAD_PATH.each do |lib_path|
105
+ if script_path.start_with?(lib_path)
106
+ return true
107
+ end
108
+ end
109
+ false
110
+ end
111
+ end
112
+ end