longleaf 0.1.0.pre.2 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +94 -0
  3. data/.editorconfig +13 -0
  4. data/.gitignore +4 -1
  5. data/.rubocop.yml +44 -0
  6. data/.rubocop_todo.yml +834 -0
  7. data/.yardopts +1 -0
  8. data/Gemfile +16 -1
  9. data/README.md +98 -12
  10. data/Rakefile +6 -0
  11. data/bin/setup +16 -1
  12. data/docs/aboutlongleaf.md +28 -0
  13. data/docs/extra.css +32 -0
  14. data/docs/img/change-file.png +0 -0
  15. data/docs/img/ll-example-preserved.png +0 -0
  16. data/docs/index.md +19 -0
  17. data/docs/install.md +66 -0
  18. data/docs/ll-example/config-example-relative.yml +33 -0
  19. data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
  20. data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
  21. data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
  22. data/docs/ll-example/metadata-dir/.gitkeep +0 -0
  23. data/docs/ll-example/replica-files/.gitkeep +0 -0
  24. data/docs/ll-example/replica-metadata/.gitkeep +0 -0
  25. data/docs/quickstart.md +270 -0
  26. data/docs/rdocs/Longleaf.html +135 -0
  27. data/docs/rdocs/Longleaf/AppFields.html +178 -0
  28. data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
  29. data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
  30. data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
  31. data/docs/rdocs/Longleaf/CLI.html +909 -0
  32. data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
  33. data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
  34. data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
  35. data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
  36. data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
  37. data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
  38. data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
  39. data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
  40. data/docs/rdocs/Longleaf/EventError.html +147 -0
  41. data/docs/rdocs/Longleaf/EventNames.html +163 -0
  42. data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
  43. data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
  44. data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
  45. data/docs/rdocs/Longleaf/FileRecord.html +716 -0
  46. data/docs/rdocs/Longleaf/FileSelector.html +901 -0
  47. data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
  48. data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
  49. data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
  50. data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
  51. data/docs/rdocs/Longleaf/Logging.html +405 -0
  52. data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
  53. data/docs/rdocs/Longleaf/LongleafError.html +139 -0
  54. data/docs/rdocs/Longleaf/MDFields.html +193 -0
  55. data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
  56. data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
  57. data/docs/rdocs/Longleaf/MetadataError.html +143 -0
  58. data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
  59. data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
  60. data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
  61. data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
  62. data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
  63. data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
  64. data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
  65. data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
  66. data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
  67. data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
  68. data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
  69. data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
  70. data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
  71. data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
  72. data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
  73. data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
  74. data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
  75. data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
  76. data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
  77. data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
  78. data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
  79. data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
  80. data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
  81. data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
  82. data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
  83. data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
  84. data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
  85. data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
  86. data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
  87. data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
  88. data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
  89. data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
  90. data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
  91. data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
  92. data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
  93. data/docs/rdocs/_index.html +660 -0
  94. data/docs/rdocs/class_list.html +51 -0
  95. data/docs/rdocs/css/common.css +1 -0
  96. data/docs/rdocs/css/full_list.css +58 -0
  97. data/docs/rdocs/css/style.css +496 -0
  98. data/docs/rdocs/file.README.html +165 -0
  99. data/docs/rdocs/file_list.html +56 -0
  100. data/docs/rdocs/frames.html +17 -0
  101. data/docs/rdocs/index.html +165 -0
  102. data/docs/rdocs/js/app.js +303 -0
  103. data/docs/rdocs/js/full_list.js +216 -0
  104. data/docs/rdocs/js/jquery.js +4 -0
  105. data/docs/rdocs/method_list.html +2051 -0
  106. data/docs/rdocs/top-level-namespace.html +110 -0
  107. data/lib/longleaf/candidates/file_selector.rb +139 -0
  108. data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
  109. data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
  110. data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
  111. data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
  112. data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
  113. data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
  114. data/lib/longleaf/cli.rb +237 -46
  115. data/lib/longleaf/commands/deregister_command.rb +51 -0
  116. data/lib/longleaf/commands/preserve_command.rb +50 -0
  117. data/lib/longleaf/commands/register_command.rb +32 -43
  118. data/lib/longleaf/commands/reindex_command.rb +92 -0
  119. data/lib/longleaf/commands/validate_config_command.rb +33 -8
  120. data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
  121. data/lib/longleaf/errors.rb +26 -7
  122. data/lib/longleaf/events/deregister_event.rb +53 -0
  123. data/lib/longleaf/events/event_names.rb +9 -0
  124. data/lib/longleaf/events/event_status_tracking.rb +59 -0
  125. data/lib/longleaf/events/preserve_event.rb +81 -0
  126. data/lib/longleaf/events/register_event.rb +52 -51
  127. data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
  128. data/lib/longleaf/helpers/digest_helper.rb +56 -0
  129. data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
  130. data/lib/longleaf/helpers/selection_options_parser.rb +189 -0
  131. data/lib/longleaf/helpers/service_date_helper.rb +78 -0
  132. data/lib/longleaf/indexing/index_manager.rb +101 -0
  133. data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
  134. data/lib/longleaf/logging.rb +5 -4
  135. data/lib/longleaf/logging/redirecting_logger.rb +26 -25
  136. data/lib/longleaf/models/app_fields.rb +7 -2
  137. data/lib/longleaf/models/file_record.rb +17 -8
  138. data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
  139. data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
  140. data/lib/longleaf/models/md_fields.rb +2 -1
  141. data/lib/longleaf/models/metadata_location.rb +47 -0
  142. data/lib/longleaf/models/metadata_record.rb +39 -15
  143. data/lib/longleaf/models/s3_storage_location.rb +133 -0
  144. data/lib/longleaf/models/service_definition.rb +7 -6
  145. data/lib/longleaf/models/service_fields.rb +7 -1
  146. data/lib/longleaf/models/service_record.rb +10 -6
  147. data/lib/longleaf/models/storage_location.rb +24 -19
  148. data/lib/longleaf/models/storage_types.rb +9 -0
  149. data/lib/longleaf/models/system_config_fields.rb +9 -0
  150. data/lib/longleaf/preservation_services/file_check_service.rb +58 -0
  151. data/lib/longleaf/preservation_services/fixity_check_service.rb +123 -0
  152. data/lib/longleaf/preservation_services/rsync_replication_service.rb +182 -0
  153. data/lib/longleaf/preservation_services/s3_replication_service.rb +143 -0
  154. data/lib/longleaf/services/application_config_deserializer.rb +81 -24
  155. data/lib/longleaf/services/application_config_manager.rb +20 -6
  156. data/lib/longleaf/services/application_config_validator.rb +19 -9
  157. data/lib/longleaf/services/configuration_validator.rb +67 -4
  158. data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
  159. data/lib/longleaf/services/metadata_deserializer.rb +113 -42
  160. data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
  161. data/lib/longleaf/services/metadata_serializer.rb +138 -25
  162. data/lib/longleaf/services/metadata_validator.rb +76 -0
  163. data/lib/longleaf/services/s3_location_validator.rb +19 -0
  164. data/lib/longleaf/services/service_class_cache.rb +112 -0
  165. data/lib/longleaf/services/service_definition_manager.rb +10 -7
  166. data/lib/longleaf/services/service_definition_validator.rb +25 -18
  167. data/lib/longleaf/services/service_manager.rb +86 -11
  168. data/lib/longleaf/services/service_mapping_manager.rb +13 -12
  169. data/lib/longleaf/services/service_mapping_validator.rb +36 -26
  170. data/lib/longleaf/services/storage_location_manager.rb +76 -15
  171. data/lib/longleaf/services/storage_location_validator.rb +49 -35
  172. data/lib/longleaf/specs/config_builder.rb +47 -23
  173. data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
  174. data/lib/longleaf/specs/custom_matchers.rb +9 -0
  175. data/lib/longleaf/specs/file_helpers.rb +61 -0
  176. data/lib/longleaf/specs/metadata_builder.rb +92 -0
  177. data/lib/longleaf/specs/system_config_builder.rb +27 -0
  178. data/lib/longleaf/version.rb +1 -1
  179. data/longleaf.gemspec +20 -7
  180. data/mkdocs.yml +21 -0
  181. metadata +306 -23
  182. data/.travis.yml +0 -4
  183. data/lib/longleaf/commands/abstract_command.rb +0 -37
  184. data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -0,0 +1,182 @@
1
+ require 'longleaf/events/event_names'
2
+ require 'longleaf/logging'
3
+ require 'longleaf/errors'
4
+ require 'longleaf/models/file_record'
5
+ require 'longleaf/models/service_fields'
6
+ require 'longleaf/events/register_event'
7
+ require 'longleaf/candidates/single_digest_provider'
8
+ require 'open3'
9
+
10
+ module Longleaf
11
+ # Preservation service which performs replication of a file to one or more destinations using rsync.
12
+ #
13
+ # The service definition must contain one or more destinations, specified with the "to" property.
14
+ # These destinations must be either a known storage location name, a remote path, or absolute path.
15
+ #
16
+ # Optional service configuration properties:
17
+ # * replica_collision_policy = specifies the desired outcome if the service attempts to replicate
18
+ # a file which already exists at a destination. Default: "replace".
19
+ # * rsync_command = the command to invoke in order to execute rsync. Default: "rsync"
20
+ # * rsync_options = additional parameters that will be passed along to rsync. Cannot include options
21
+ # which change the target of the command or prevent its execution, such as "files-from", "dry-run",
22
+ # "help", etc. Command will always include "-R". Default "-a".
23
+ class RsyncReplicationService
24
+ include Longleaf::Logging
25
+ SF ||= Longleaf::ServiceFields
26
+
27
+ RSYNC_COMMAND_PROPERTY = "rsync_command"
28
+ DEFAULT_COMMAND = "rsync"
29
+
30
+ RSYNC_OPTIONS_PROPERTY = "rsync_options"
31
+ DEFAULT_OPTIONS = "-a"
32
+ DISALLOWED_OPTIONS = ["files-from", "n", "dry-run", "exclude", "exclude-from", "cvs-exclude",
33
+ "h", "help", "f", "F", "filter"]
34
+
35
+ attr_reader :command, :options, :collision_policy
36
+
37
+ # Initialize a RsyncReplicationService from the given service definition
38
+ #
39
+ # @param service_def [ServiceDefinition] the configuration for this service
40
+ # @param app_manager [ApplicationConfigManager] the application configuration
41
+ def initialize(service_def, app_manager)
42
+ @service_def = service_def
43
+ @app_manager = app_manager
44
+
45
+ @command = @service_def.properties[RSYNC_COMMAND_PROPERTY] || DEFAULT_COMMAND
46
+
47
+ # Validate rsync parameters
48
+ @options = @service_def.properties[RSYNC_OPTIONS_PROPERTY] || DEFAULT_OPTIONS
49
+ if contains_disallowed_option?(@options)
50
+ raise ArgumentError.new("Service #{service_def.name} specifies a disallowed rsync paramter," \
51
+ + " rsync_options may not include the following: #{DISALLOWED_OPTIONS.join(' ')}")
52
+ end
53
+
54
+ # Add -R (--relative) in to command options to ensure full path gets replicated
55
+ @options = @options + " -R"
56
+
57
+ # Set and validate the replica collision policy
58
+ @collision_policy = @service_def.properties[SF::COLLISION_PROPERTY] || SF::DEFAULT_COLLISION_POLICY
59
+ if !SF::VALID_COLLISION_POLICIES.include?(@collision_policy)
60
+ raise ArgumentError.new("Service #{service_def.name} received invalid #{SF::COLLISION_PROPERTY}" \
61
+ + " value #{@collision_policy}")
62
+ end
63
+
64
+ # Store and validate destinations
65
+ replicate_to = @service_def.properties[SF::REPLICATE_TO]
66
+ if replicate_to.nil? || replicate_to.empty?
67
+ raise ArgumentError.new("Service #{service_def.name} must provide one or more replication destinations.")
68
+ end
69
+ replicate_to = [replicate_to] if replicate_to.is_a?(String)
70
+
71
+ loc_manager = app_manager.location_manager
72
+ # Build list of destinations, translating to storage locations when relevant
73
+ @destinations = Array.new
74
+ replicate_to.each do |dest|
75
+ # Assume that if destination contains a : or / it is a path rather than storage location
76
+ if dest =~ /[:\/]/
77
+ @destinations << dest
78
+ else
79
+ if loc_manager.locations.key?(dest)
80
+ @destinations << loc_manager.locations[dest]
81
+ else
82
+ raise ArgumentError.new("Service #{service_def.name} specifies unknown storage location '#{dest}'" \
83
+ + " as a replication destination")
84
+ end
85
+ end
86
+ end
87
+ end
88
+
89
+ # During a replication event, perform replication of the specified file to all configured destinations
90
+ # as necessary.
91
+ #
92
+ # @param file_rec [FileRecord] record representing the file to perform the service on.
93
+ # @param event [String] name of the event this service is being invoked by.
94
+ # @raise [PreservationServiceError] if the rsync replication fails
95
+ def perform(file_rec, event)
96
+ @destinations.each do |destination|
97
+ dest_is_storage_loc = destination.is_a?(Longleaf::StorageLocation)
98
+
99
+ if dest_is_storage_loc
100
+ dest_path = destination.path
101
+ else
102
+ dest_path = destination
103
+ end
104
+
105
+ # Determine the path to the file being replicated relative to its storage location
106
+ rel_path = file_rec.storage_location.relativize(file_rec.path)
107
+ # source path with . so that rsync will only create destination directories starting from that point
108
+ source_path = File.join(file_rec.storage_location.path, "./#{rel_path}")
109
+
110
+ # Check that the destination is available because attempting to write
111
+ verify_destination_available(destination, file_rec)
112
+
113
+ logger.debug("Invoking rsync with command: #{@command} \"#{source_path}\" \"#{dest_path}\" #{@options}")
114
+ stdout, stderr, status = Open3.capture3("#{@command} \"#{source_path}\" \"#{dest_path}\" #{@options}")
115
+ raise PreservationServiceError.new("Failed to replicate #{file_rec.path} to #{dest_path}: #{stderr}") \
116
+ unless status.success?
117
+
118
+ logger.info("Replicated #{file_rec.path} to destination #{dest_path}")
119
+
120
+ # For destinations which are storage locations, register the replica with longleaf
121
+ if dest_is_storage_loc
122
+ register_replica(destination, rel_path, file_rec)
123
+ end
124
+ end
125
+ end
126
+
127
+ # Determine if this service is applicable for the provided event, given the configured service definition
128
+ #
129
+ # @param event [String] name of the event
130
+ # @return [Boolean] returns true if this service is applicable for the provided event
131
+ def is_applicable?(event)
132
+ case event
133
+ when EventNames::PRESERVE
134
+ true
135
+ else
136
+ false
137
+ end
138
+ end
139
+
140
+ private
141
+ def contains_disallowed_option?(options)
142
+ DISALLOWED_OPTIONS.each do |disallowed|
143
+ if disallowed.length == 1
144
+ if options =~ /(\A| )-[a-zA-Z0-9]*#{disallowed}[a-zA-Z0-9]*( |=|\z)/
145
+ return true
146
+ end
147
+ else
148
+ if options =~ /(\A| )--#{disallowed}( |=|\z)/
149
+ return true
150
+ end
151
+ end
152
+ end
153
+
154
+ false
155
+ end
156
+
157
+ def verify_destination_available(destination, file_rec)
158
+ if destination.is_a?(Longleaf::StorageLocation)
159
+ begin
160
+ destination.available?
161
+ rescue StorageLocationUnavailableError => e
162
+ raise StorageLocationUnavailableError.new("Cannot replicate #{file_rec.path} to destination #{destination.name}: " \
163
+ + e.message)
164
+ end
165
+ elsif destination.start_with?("/")
166
+ raise StorageLocationUnavailableError.new("Cannot replicate #{file_rec.path} to destination" \
167
+ + " #{destination}, path does not exist.") unless Dir.exist?(destination)
168
+ end
169
+ end
170
+
171
+ def register_replica(destination, rel_path, file_rec)
172
+ dest_file_path = File.join(destination.path, rel_path)
173
+ dest_file_rec = FileRecord.new(dest_file_path, destination)
174
+
175
+ register_event = RegisterEvent.new(file_rec: dest_file_rec,
176
+ app_manager: @app_manager,
177
+ force: true,
178
+ digest_provider: SingleDigestProvider.new(file_rec.metadata_record.checksums))
179
+ register_event.perform
180
+ end
181
+ end
182
+ end
@@ -0,0 +1,143 @@
1
+ require 'longleaf/events/event_names'
2
+ require 'longleaf/logging'
3
+ require 'longleaf/errors'
4
+ require 'longleaf/models/file_record'
5
+ require 'longleaf/models/service_fields'
6
+ require 'longleaf/events/register_event'
7
+ require 'longleaf/models/storage_types'
8
+ require 'aws-sdk-s3'
9
+
10
+ module Longleaf
11
+ # Preservation service which performs replication of a file to one or more s3 destinations.
12
+ #
13
+ # The service definition must contain one or more destinations, specified with the "to" property.
14
+ # These destinations must be either a known s3 storage location. The s3 client configuration
15
+ # is controlled by the storage location.
16
+ #
17
+ # Optional service configuration properties:
18
+ # * replica_collision_policy = specifies the desired outcome if the service attempts to replicate
19
+ # a file which already exists at a destination. Default: "replace".
20
+ class S3ReplicationService
21
+ include Longleaf::Logging
22
+ ST ||= Longleaf::StorageTypes
23
+ SF ||= Longleaf::ServiceFields
24
+
25
+ attr_reader :collision_policy
26
+
27
+ # Initialize a S3ReplicationService from the given service definition
28
+ #
29
+ # @param service_def [ServiceDefinition] the configuration for this service
30
+ # @param app_manager [ApplicationConfigManager] the application configuration
31
+ def initialize(service_def, app_manager)
32
+ @service_def = service_def
33
+ @app_manager = app_manager
34
+
35
+ # Set and validate the replica collision policy
36
+ @collision_policy = @service_def.properties[SF::COLLISION_PROPERTY] || SF::DEFAULT_COLLISION_POLICY
37
+ if !SF::VALID_COLLISION_POLICIES.include?(@collision_policy)
38
+ raise ArgumentError.new("Service #{service_def.name} received invalid #{SF::COLLISION_PROPERTY}" \
39
+ + " value #{@collision_policy}")
40
+ end
41
+
42
+ # Store and validate destinations
43
+ replicate_to = @service_def.properties[SF::REPLICATE_TO]
44
+ if replicate_to.nil? || replicate_to.empty?
45
+ raise ArgumentError.new("Service #{service_def.name} must provide one or more replication destinations.")
46
+ end
47
+ replicate_to = [replicate_to] if replicate_to.is_a?(String)
48
+
49
+ loc_manager = app_manager.location_manager
50
+ # Build list of destinations, translating to storage locations when relevant
51
+ @destinations = Array.new
52
+ replicate_to.each do |dest|
53
+ if loc_manager.locations.key?(dest)
54
+ location = loc_manager.locations[dest]
55
+ if location.type != ST::S3_STORAGE_TYPE
56
+ raise ArgumentError.new(
57
+ "Service #{service_def.name} specifies destination #{dest} which is not of type 's3'")
58
+ end
59
+ @destinations << loc_manager.locations[dest]
60
+ else
61
+ raise ArgumentError.new("Service #{service_def.name} specifies unknown storage location '#{dest}'" \
62
+ + " as a replication destination")
63
+ end
64
+ end
65
+ end
66
+
67
+ # During a replication event, perform replication of the specified file to all configured destinations
68
+ # as necessary.
69
+ #
70
+ # @param file_rec [FileRecord] record representing the file to perform the service on.
71
+ # @param event [String] name of the event this service is being invoked by.
72
+ # @raise [PreservationServiceError] if the rsync replication fails
73
+ def perform(file_rec, event)
74
+ if file_rec.storage_location.type == ST::FILESYSTEM_STORAGE_TYPE
75
+ replicate_from_fs(file_rec)
76
+ else
77
+ raise PreservationServiceError.new("Replication from storage location of type " \
78
+ + "#{file_rec.storage_location.type} to s3 is not supported")
79
+ end
80
+ end
81
+
82
+ def replicate_from_fs(file_rec)
83
+ # Determine the path to the file being replicated relative to its storage location
84
+ rel_path = file_rec.storage_location.relativize(file_rec.path)
85
+
86
+ content_md5 = get_content_md5(file_rec)
87
+
88
+ @destinations.each do |destination|
89
+ # Check that the destination is available before attempting to write
90
+ verify_destination_available(destination, file_rec)
91
+
92
+ rel_to_bucket = destination.relative_to_bucket_path(rel_path)
93
+ file_obj = destination.s3_bucket.object(rel_to_bucket)
94
+ begin
95
+ file_obj.upload_file(file_rec.path, { :content_md5 => content_md5 })
96
+ rescue Aws::S3::Errors::BadDigest => e
97
+ raise ChecksumMismatchError.new("Transfer to bucket '#{destination.s3_bucket.name}' failed, " \
98
+ + "MD5 provided did not match the received content for #{file_rec.path}")
99
+ rescue Aws::Errors::ServiceError => e
100
+ raise PreservationServiceError.new("Failed to transfer #{file_rec.path} to bucket " \
101
+ + "'#{destination.s3_bucket.name}': #{e.message}")
102
+ end
103
+
104
+ logger.info("Replicated #{file_rec.path} to destination #{file_obj.public_url}")
105
+
106
+ # TODO register file in destination
107
+ end
108
+ end
109
+
110
+ # Determine if this service is applicable for the provided event, given the configured service definition
111
+ #
112
+ # @param event [String] name of the event
113
+ # @return [Boolean] returns true if this service is applicable for the provided event
114
+ def is_applicable?(event)
115
+ case event
116
+ when EventNames::PRESERVE
117
+ true
118
+ else
119
+ false
120
+ end
121
+ end
122
+
123
+ private
124
+ def verify_destination_available(destination, file_rec)
125
+ begin
126
+ destination.available?
127
+ rescue StorageLocationUnavailableError => e
128
+ raise StorageLocationUnavailableError.new("Cannot replicate #{file_rec.path} to destination #{destination.name}: " \
129
+ + e.message)
130
+ end
131
+ end
132
+
133
+ def get_content_md5(file_rec)
134
+ md_rec = file_rec.metadata_record
135
+ if md_rec.checksums.key?('md5')
136
+ # base 64 encode the digest, as is required by the Content-Md5 header
137
+ [[md_rec.checksums['md5']].pack("H*")].pack("m0")
138
+ else
139
+ nil
140
+ end
141
+ end
142
+ end
143
+ end
@@ -1,46 +1,103 @@
1
1
  require 'longleaf/services/application_config_validator'
2
2
  require 'longleaf/services/application_config_manager'
3
+ require 'digest/md5'
4
+ require 'pathname'
3
5
 
4
- # Deserializer for application configuration files
5
6
  module Longleaf
7
+ # Deserializer for application configuration files
6
8
  class ApplicationConfigDeserializer
7
-
9
+ AF ||= Longleaf::AppFields
10
+
8
11
  # Deserializes a valid application configuration file as a ApplicationConfigManager option
9
- # @param config_path [String] file path to the application configuration file
12
+ # @param config_path [String] file path to the service and storage mapping configuration file
10
13
  # @param format [String] encoding format of the config file
11
- # return [Longleaf::ApplicationConfigManager] manager for the loaded configuration
14
+ # return [ApplicationConfigManager] manager for the loaded configuration
12
15
  def self.deserialize(config_path, format: 'yaml')
13
- config = load(config_path, format: format)
14
-
15
- Longleaf::ApplicationConfigValidator.validate(config)
16
- Longleaf::ApplicationConfigManager.new(config)
16
+ content = load_config_file(config_path)
17
+ config = load(content, format)
18
+
19
+ config_md5 = Digest::MD5.hexdigest(content)
20
+
21
+ make_paths_absolute(config_path, config)
22
+
23
+ ApplicationConfigValidator.new(config).validate_config.raise_if_invalid
24
+ ApplicationConfigManager.new(config, config_md5)
17
25
  end
18
-
26
+
27
+ def self.load_config_file(config_path)
28
+ begin
29
+ File.read(config_path)
30
+ rescue Errno::ENOENT
31
+ raise Longleaf::ConfigurationError.new(
32
+ "Configuration file #{config_path} does not exist.")
33
+ end
34
+ end
35
+
19
36
  # Deserialize a configuration file into a hash
20
- # @param config_path [String] file path to the application configuration file
37
+ # @param content [String] the contents of the application configuration file
21
38
  # @param format [String] encoding format of the config file
22
39
  # return [Hash] hash containing the configuration
23
- def self.load(config_path, format: 'yaml')
40
+ def self.load(content, format)
24
41
  case format
25
42
  when 'yaml'
26
- from_yaml(config_path)
43
+ from_yaml(content)
27
44
  else
28
- raise ArgumentError.new('Invalid deserialization format #{format} specified')
45
+ raise ArgumentError.new("Invalid deserialization format #{format} specified")
29
46
  end
30
47
  end
31
-
32
- private
33
- def self.from_yaml(config_path)
48
+
49
+ def self.from_yaml(content)
34
50
  begin
35
- YAML.load_file(config_path)
36
- rescue Errno::ENOENT => err
37
- raise Longleaf::ConfigurationError.new(
38
- "Cannot load application configuration, file #{config_path} does not exist.")
51
+ YAML.safe_load(content, [], [], true)
39
52
  rescue => err
40
- raise Longleaf::ConfigurationError.new(
41
- %Q(Failed to load application configuration due to the following reason:
42
- #{err.message}))
53
+ raise Longleaf::ConfigurationError.new(err)
43
54
  end
44
55
  end
56
+
57
+ def self.make_paths_absolute(config_path, config)
58
+ base_pathname = Pathname.new(config_path).expand_path.parent
59
+
60
+ config[AF::LOCATIONS].each do |name, properties|
61
+ properties[AF::LOCATION_PATH] = make_file_paths_absolute(base_pathname, properties)
62
+
63
+ # Resolve single field metadata location into expanded form
64
+ md_config = properties[AF::METADATA_CONFIG]
65
+ if md_config.nil?
66
+ next
67
+ end
68
+ if md_config.is_a?(String)
69
+ md_config = { AF::LOCATION => m_config }
70
+ end
71
+ md_config[AF::LOCATION_PATH] = make_file_paths_absolute(base_pathname, md_config)
72
+ end
73
+ end
74
+
75
+ def self.make_file_paths_absolute(base_pathname, properties)
76
+ path = properties[AF::LOCATION_PATH]
77
+ return nil if path.nil?
78
+
79
+ uri = URI(path)
80
+
81
+ if uri.scheme.nil? || uri.scheme.casecmp("file") == 0
82
+ absolution(base_pathname, uri.path)
83
+ else
84
+ path
85
+ end
86
+ end
87
+
88
+ def self.absolution(base_pathname, file_path)
89
+ if file_path.nil?
90
+ nil
91
+ else
92
+ path = Pathname.new(file_path)
93
+ if path.absolute?
94
+ path = path.expand_path.to_s
95
+ else
96
+ path = (base_pathname + path).to_s
97
+ end
98
+ end
99
+ end
100
+
101
+ private_class_method :load_config_file
45
102
  end
46
- end
103
+ end