longleaf 0.1.0.pre.2 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +94 -0
  3. data/.editorconfig +13 -0
  4. data/.gitignore +4 -1
  5. data/.rubocop.yml +44 -0
  6. data/.rubocop_todo.yml +834 -0
  7. data/.yardopts +1 -0
  8. data/Gemfile +16 -1
  9. data/README.md +98 -12
  10. data/Rakefile +6 -0
  11. data/bin/setup +16 -1
  12. data/docs/aboutlongleaf.md +28 -0
  13. data/docs/extra.css +32 -0
  14. data/docs/img/change-file.png +0 -0
  15. data/docs/img/ll-example-preserved.png +0 -0
  16. data/docs/index.md +19 -0
  17. data/docs/install.md +66 -0
  18. data/docs/ll-example/config-example-relative.yml +33 -0
  19. data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
  20. data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
  21. data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
  22. data/docs/ll-example/metadata-dir/.gitkeep +0 -0
  23. data/docs/ll-example/replica-files/.gitkeep +0 -0
  24. data/docs/ll-example/replica-metadata/.gitkeep +0 -0
  25. data/docs/quickstart.md +270 -0
  26. data/docs/rdocs/Longleaf.html +135 -0
  27. data/docs/rdocs/Longleaf/AppFields.html +178 -0
  28. data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
  29. data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
  30. data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
  31. data/docs/rdocs/Longleaf/CLI.html +909 -0
  32. data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
  33. data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
  34. data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
  35. data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
  36. data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
  37. data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
  38. data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
  39. data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
  40. data/docs/rdocs/Longleaf/EventError.html +147 -0
  41. data/docs/rdocs/Longleaf/EventNames.html +163 -0
  42. data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
  43. data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
  44. data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
  45. data/docs/rdocs/Longleaf/FileRecord.html +716 -0
  46. data/docs/rdocs/Longleaf/FileSelector.html +901 -0
  47. data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
  48. data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
  49. data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
  50. data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
  51. data/docs/rdocs/Longleaf/Logging.html +405 -0
  52. data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
  53. data/docs/rdocs/Longleaf/LongleafError.html +139 -0
  54. data/docs/rdocs/Longleaf/MDFields.html +193 -0
  55. data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
  56. data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
  57. data/docs/rdocs/Longleaf/MetadataError.html +143 -0
  58. data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
  59. data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
  60. data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
  61. data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
  62. data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
  63. data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
  64. data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
  65. data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
  66. data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
  67. data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
  68. data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
  69. data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
  70. data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
  71. data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
  72. data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
  73. data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
  74. data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
  75. data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
  76. data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
  77. data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
  78. data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
  79. data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
  80. data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
  81. data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
  82. data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
  83. data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
  84. data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
  85. data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
  86. data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
  87. data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
  88. data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
  89. data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
  90. data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
  91. data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
  92. data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
  93. data/docs/rdocs/_index.html +660 -0
  94. data/docs/rdocs/class_list.html +51 -0
  95. data/docs/rdocs/css/common.css +1 -0
  96. data/docs/rdocs/css/full_list.css +58 -0
  97. data/docs/rdocs/css/style.css +496 -0
  98. data/docs/rdocs/file.README.html +165 -0
  99. data/docs/rdocs/file_list.html +56 -0
  100. data/docs/rdocs/frames.html +17 -0
  101. data/docs/rdocs/index.html +165 -0
  102. data/docs/rdocs/js/app.js +303 -0
  103. data/docs/rdocs/js/full_list.js +216 -0
  104. data/docs/rdocs/js/jquery.js +4 -0
  105. data/docs/rdocs/method_list.html +2051 -0
  106. data/docs/rdocs/top-level-namespace.html +110 -0
  107. data/lib/longleaf/candidates/file_selector.rb +139 -0
  108. data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
  109. data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
  110. data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +93 -0
  111. data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
  112. data/lib/longleaf/candidates/service_candidate_locator.rb +23 -0
  113. data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
  114. data/lib/longleaf/cli.rb +237 -46
  115. data/lib/longleaf/commands/deregister_command.rb +51 -0
  116. data/lib/longleaf/commands/preserve_command.rb +50 -0
  117. data/lib/longleaf/commands/register_command.rb +32 -43
  118. data/lib/longleaf/commands/reindex_command.rb +92 -0
  119. data/lib/longleaf/commands/validate_config_command.rb +33 -8
  120. data/lib/longleaf/commands/validate_metadata_command.rb +51 -0
  121. data/lib/longleaf/errors.rb +26 -7
  122. data/lib/longleaf/events/deregister_event.rb +53 -0
  123. data/lib/longleaf/events/event_names.rb +9 -0
  124. data/lib/longleaf/events/event_status_tracking.rb +59 -0
  125. data/lib/longleaf/events/preserve_event.rb +81 -0
  126. data/lib/longleaf/events/register_event.rb +52 -51
  127. data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
  128. data/lib/longleaf/helpers/digest_helper.rb +56 -0
  129. data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
  130. data/lib/longleaf/helpers/selection_options_parser.rb +189 -0
  131. data/lib/longleaf/helpers/service_date_helper.rb +78 -0
  132. data/lib/longleaf/indexing/index_manager.rb +101 -0
  133. data/lib/longleaf/indexing/sequel_index_driver.rb +306 -0
  134. data/lib/longleaf/logging.rb +5 -4
  135. data/lib/longleaf/logging/redirecting_logger.rb +26 -25
  136. data/lib/longleaf/models/app_fields.rb +7 -2
  137. data/lib/longleaf/models/file_record.rb +17 -8
  138. data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
  139. data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
  140. data/lib/longleaf/models/md_fields.rb +2 -1
  141. data/lib/longleaf/models/metadata_location.rb +47 -0
  142. data/lib/longleaf/models/metadata_record.rb +39 -15
  143. data/lib/longleaf/models/s3_storage_location.rb +133 -0
  144. data/lib/longleaf/models/service_definition.rb +7 -6
  145. data/lib/longleaf/models/service_fields.rb +7 -1
  146. data/lib/longleaf/models/service_record.rb +10 -6
  147. data/lib/longleaf/models/storage_location.rb +24 -19
  148. data/lib/longleaf/models/storage_types.rb +9 -0
  149. data/lib/longleaf/models/system_config_fields.rb +9 -0
  150. data/lib/longleaf/preservation_services/file_check_service.rb +58 -0
  151. data/lib/longleaf/preservation_services/fixity_check_service.rb +123 -0
  152. data/lib/longleaf/preservation_services/rsync_replication_service.rb +182 -0
  153. data/lib/longleaf/preservation_services/s3_replication_service.rb +143 -0
  154. data/lib/longleaf/services/application_config_deserializer.rb +81 -24
  155. data/lib/longleaf/services/application_config_manager.rb +20 -6
  156. data/lib/longleaf/services/application_config_validator.rb +19 -9
  157. data/lib/longleaf/services/configuration_validator.rb +67 -4
  158. data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
  159. data/lib/longleaf/services/metadata_deserializer.rb +113 -42
  160. data/lib/longleaf/services/metadata_persistence_manager.rb +47 -0
  161. data/lib/longleaf/services/metadata_serializer.rb +138 -25
  162. data/lib/longleaf/services/metadata_validator.rb +76 -0
  163. data/lib/longleaf/services/s3_location_validator.rb +19 -0
  164. data/lib/longleaf/services/service_class_cache.rb +112 -0
  165. data/lib/longleaf/services/service_definition_manager.rb +10 -7
  166. data/lib/longleaf/services/service_definition_validator.rb +25 -18
  167. data/lib/longleaf/services/service_manager.rb +86 -11
  168. data/lib/longleaf/services/service_mapping_manager.rb +13 -12
  169. data/lib/longleaf/services/service_mapping_validator.rb +36 -26
  170. data/lib/longleaf/services/storage_location_manager.rb +76 -15
  171. data/lib/longleaf/services/storage_location_validator.rb +49 -35
  172. data/lib/longleaf/specs/config_builder.rb +47 -23
  173. data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
  174. data/lib/longleaf/specs/custom_matchers.rb +9 -0
  175. data/lib/longleaf/specs/file_helpers.rb +61 -0
  176. data/lib/longleaf/specs/metadata_builder.rb +92 -0
  177. data/lib/longleaf/specs/system_config_builder.rb +27 -0
  178. data/lib/longleaf/version.rb +1 -1
  179. data/longleaf.gemspec +20 -7
  180. data/mkdocs.yml +21 -0
  181. metadata +306 -23
  182. data/.travis.yml +0 -4
  183. data/lib/longleaf/commands/abstract_command.rb +0 -37
  184. data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -0,0 +1,182 @@
1
+ require 'longleaf/events/event_names'
2
+ require 'longleaf/logging'
3
+ require 'longleaf/errors'
4
+ require 'longleaf/models/file_record'
5
+ require 'longleaf/models/service_fields'
6
+ require 'longleaf/events/register_event'
7
+ require 'longleaf/candidates/single_digest_provider'
8
+ require 'open3'
9
+
10
+ module Longleaf
11
+ # Preservation service which performs replication of a file to one or more destinations using rsync.
12
+ #
13
+ # The service definition must contain one or more destinations, specified with the "to" property.
14
+ # These destinations must be either a known storage location name, a remote path, or absolute path.
15
+ #
16
+ # Optional service configuration properties:
17
+ # * replica_collision_policy = specifies the desired outcome if the service attempts to replicate
18
+ # a file which already exists at a destination. Default: "replace".
19
+ # * rsync_command = the command to invoke in order to execute rsync. Default: "rsync"
20
+ # * rsync_options = additional parameters that will be passed along to rsync. Cannot include options
21
+ # which change the target of the command or prevent its execution, such as "files-from", "dry-run",
22
+ # "help", etc. Command will always include "-R". Default "-a".
23
+ class RsyncReplicationService
24
+ include Longleaf::Logging
25
+ SF ||= Longleaf::ServiceFields
26
+
27
+ RSYNC_COMMAND_PROPERTY = "rsync_command"
28
+ DEFAULT_COMMAND = "rsync"
29
+
30
+ RSYNC_OPTIONS_PROPERTY = "rsync_options"
31
+ DEFAULT_OPTIONS = "-a"
32
+ DISALLOWED_OPTIONS = ["files-from", "n", "dry-run", "exclude", "exclude-from", "cvs-exclude",
33
+ "h", "help", "f", "F", "filter"]
34
+
35
+ attr_reader :command, :options, :collision_policy
36
+
37
+ # Initialize a RsyncReplicationService from the given service definition
38
+ #
39
+ # @param service_def [ServiceDefinition] the configuration for this service
40
+ # @param app_manager [ApplicationConfigManager] the application configuration
41
+ def initialize(service_def, app_manager)
42
+ @service_def = service_def
43
+ @app_manager = app_manager
44
+
45
+ @command = @service_def.properties[RSYNC_COMMAND_PROPERTY] || DEFAULT_COMMAND
46
+
47
+ # Validate rsync parameters
48
+ @options = @service_def.properties[RSYNC_OPTIONS_PROPERTY] || DEFAULT_OPTIONS
49
+ if contains_disallowed_option?(@options)
50
+ raise ArgumentError.new("Service #{service_def.name} specifies a disallowed rsync paramter," \
51
+ + " rsync_options may not include the following: #{DISALLOWED_OPTIONS.join(' ')}")
52
+ end
53
+
54
+ # Add -R (--relative) in to command options to ensure full path gets replicated
55
+ @options = @options + " -R"
56
+
57
+ # Set and validate the replica collision policy
58
+ @collision_policy = @service_def.properties[SF::COLLISION_PROPERTY] || SF::DEFAULT_COLLISION_POLICY
59
+ if !SF::VALID_COLLISION_POLICIES.include?(@collision_policy)
60
+ raise ArgumentError.new("Service #{service_def.name} received invalid #{SF::COLLISION_PROPERTY}" \
61
+ + " value #{@collision_policy}")
62
+ end
63
+
64
+ # Store and validate destinations
65
+ replicate_to = @service_def.properties[SF::REPLICATE_TO]
66
+ if replicate_to.nil? || replicate_to.empty?
67
+ raise ArgumentError.new("Service #{service_def.name} must provide one or more replication destinations.")
68
+ end
69
+ replicate_to = [replicate_to] if replicate_to.is_a?(String)
70
+
71
+ loc_manager = app_manager.location_manager
72
+ # Build list of destinations, translating to storage locations when relevant
73
+ @destinations = Array.new
74
+ replicate_to.each do |dest|
75
+ # Assume that if destination contains a : or / it is a path rather than storage location
76
+ if dest =~ /[:\/]/
77
+ @destinations << dest
78
+ else
79
+ if loc_manager.locations.key?(dest)
80
+ @destinations << loc_manager.locations[dest]
81
+ else
82
+ raise ArgumentError.new("Service #{service_def.name} specifies unknown storage location '#{dest}'" \
83
+ + " as a replication destination")
84
+ end
85
+ end
86
+ end
87
+ end
88
+
89
+ # During a replication event, perform replication of the specified file to all configured destinations
90
+ # as necessary.
91
+ #
92
+ # @param file_rec [FileRecord] record representing the file to perform the service on.
93
+ # @param event [String] name of the event this service is being invoked by.
94
+ # @raise [PreservationServiceError] if the rsync replication fails
95
+ def perform(file_rec, event)
96
+ @destinations.each do |destination|
97
+ dest_is_storage_loc = destination.is_a?(Longleaf::StorageLocation)
98
+
99
+ if dest_is_storage_loc
100
+ dest_path = destination.path
101
+ else
102
+ dest_path = destination
103
+ end
104
+
105
+ # Determine the path to the file being replicated relative to its storage location
106
+ rel_path = file_rec.storage_location.relativize(file_rec.path)
107
+ # source path with . so that rsync will only create destination directories starting from that point
108
+ source_path = File.join(file_rec.storage_location.path, "./#{rel_path}")
109
+
110
+ # Check that the destination is available because attempting to write
111
+ verify_destination_available(destination, file_rec)
112
+
113
+ logger.debug("Invoking rsync with command: #{@command} \"#{source_path}\" \"#{dest_path}\" #{@options}")
114
+ stdout, stderr, status = Open3.capture3("#{@command} \"#{source_path}\" \"#{dest_path}\" #{@options}")
115
+ raise PreservationServiceError.new("Failed to replicate #{file_rec.path} to #{dest_path}: #{stderr}") \
116
+ unless status.success?
117
+
118
+ logger.info("Replicated #{file_rec.path} to destination #{dest_path}")
119
+
120
+ # For destinations which are storage locations, register the replica with longleaf
121
+ if dest_is_storage_loc
122
+ register_replica(destination, rel_path, file_rec)
123
+ end
124
+ end
125
+ end
126
+
127
+ # Determine if this service is applicable for the provided event, given the configured service definition
128
+ #
129
+ # @param event [String] name of the event
130
+ # @return [Boolean] returns true if this service is applicable for the provided event
131
+ def is_applicable?(event)
132
+ case event
133
+ when EventNames::PRESERVE
134
+ true
135
+ else
136
+ false
137
+ end
138
+ end
139
+
140
+ private
141
+ def contains_disallowed_option?(options)
142
+ DISALLOWED_OPTIONS.each do |disallowed|
143
+ if disallowed.length == 1
144
+ if options =~ /(\A| )-[a-zA-Z0-9]*#{disallowed}[a-zA-Z0-9]*( |=|\z)/
145
+ return true
146
+ end
147
+ else
148
+ if options =~ /(\A| )--#{disallowed}( |=|\z)/
149
+ return true
150
+ end
151
+ end
152
+ end
153
+
154
+ false
155
+ end
156
+
157
+ def verify_destination_available(destination, file_rec)
158
+ if destination.is_a?(Longleaf::StorageLocation)
159
+ begin
160
+ destination.available?
161
+ rescue StorageLocationUnavailableError => e
162
+ raise StorageLocationUnavailableError.new("Cannot replicate #{file_rec.path} to destination #{destination.name}: " \
163
+ + e.message)
164
+ end
165
+ elsif destination.start_with?("/")
166
+ raise StorageLocationUnavailableError.new("Cannot replicate #{file_rec.path} to destination" \
167
+ + " #{destination}, path does not exist.") unless Dir.exist?(destination)
168
+ end
169
+ end
170
+
171
+ def register_replica(destination, rel_path, file_rec)
172
+ dest_file_path = File.join(destination.path, rel_path)
173
+ dest_file_rec = FileRecord.new(dest_file_path, destination)
174
+
175
+ register_event = RegisterEvent.new(file_rec: dest_file_rec,
176
+ app_manager: @app_manager,
177
+ force: true,
178
+ digest_provider: SingleDigestProvider.new(file_rec.metadata_record.checksums))
179
+ register_event.perform
180
+ end
181
+ end
182
+ end
@@ -0,0 +1,143 @@
1
+ require 'longleaf/events/event_names'
2
+ require 'longleaf/logging'
3
+ require 'longleaf/errors'
4
+ require 'longleaf/models/file_record'
5
+ require 'longleaf/models/service_fields'
6
+ require 'longleaf/events/register_event'
7
+ require 'longleaf/models/storage_types'
8
+ require 'aws-sdk-s3'
9
+
10
+ module Longleaf
11
+ # Preservation service which performs replication of a file to one or more s3 destinations.
12
+ #
13
+ # The service definition must contain one or more destinations, specified with the "to" property.
14
+ # These destinations must be either a known s3 storage location. The s3 client configuration
15
+ # is controlled by the storage location.
16
+ #
17
+ # Optional service configuration properties:
18
+ # * replica_collision_policy = specifies the desired outcome if the service attempts to replicate
19
+ # a file which already exists at a destination. Default: "replace".
20
+ class S3ReplicationService
21
+ include Longleaf::Logging
22
+ ST ||= Longleaf::StorageTypes
23
+ SF ||= Longleaf::ServiceFields
24
+
25
+ attr_reader :collision_policy
26
+
27
+ # Initialize a S3ReplicationService from the given service definition
28
+ #
29
+ # @param service_def [ServiceDefinition] the configuration for this service
30
+ # @param app_manager [ApplicationConfigManager] the application configuration
31
+ def initialize(service_def, app_manager)
32
+ @service_def = service_def
33
+ @app_manager = app_manager
34
+
35
+ # Set and validate the replica collision policy
36
+ @collision_policy = @service_def.properties[SF::COLLISION_PROPERTY] || SF::DEFAULT_COLLISION_POLICY
37
+ if !SF::VALID_COLLISION_POLICIES.include?(@collision_policy)
38
+ raise ArgumentError.new("Service #{service_def.name} received invalid #{SF::COLLISION_PROPERTY}" \
39
+ + " value #{@collision_policy}")
40
+ end
41
+
42
+ # Store and validate destinations
43
+ replicate_to = @service_def.properties[SF::REPLICATE_TO]
44
+ if replicate_to.nil? || replicate_to.empty?
45
+ raise ArgumentError.new("Service #{service_def.name} must provide one or more replication destinations.")
46
+ end
47
+ replicate_to = [replicate_to] if replicate_to.is_a?(String)
48
+
49
+ loc_manager = app_manager.location_manager
50
+ # Build list of destinations, translating to storage locations when relevant
51
+ @destinations = Array.new
52
+ replicate_to.each do |dest|
53
+ if loc_manager.locations.key?(dest)
54
+ location = loc_manager.locations[dest]
55
+ if location.type != ST::S3_STORAGE_TYPE
56
+ raise ArgumentError.new(
57
+ "Service #{service_def.name} specifies destination #{dest} which is not of type 's3'")
58
+ end
59
+ @destinations << loc_manager.locations[dest]
60
+ else
61
+ raise ArgumentError.new("Service #{service_def.name} specifies unknown storage location '#{dest}'" \
62
+ + " as a replication destination")
63
+ end
64
+ end
65
+ end
66
+
67
+ # During a replication event, perform replication of the specified file to all configured destinations
68
+ # as necessary.
69
+ #
70
+ # @param file_rec [FileRecord] record representing the file to perform the service on.
71
+ # @param event [String] name of the event this service is being invoked by.
72
+ # @raise [PreservationServiceError] if the rsync replication fails
73
+ def perform(file_rec, event)
74
+ if file_rec.storage_location.type == ST::FILESYSTEM_STORAGE_TYPE
75
+ replicate_from_fs(file_rec)
76
+ else
77
+ raise PreservationServiceError.new("Replication from storage location of type " \
78
+ + "#{file_rec.storage_location.type} to s3 is not supported")
79
+ end
80
+ end
81
+
82
+ def replicate_from_fs(file_rec)
83
+ # Determine the path to the file being replicated relative to its storage location
84
+ rel_path = file_rec.storage_location.relativize(file_rec.path)
85
+
86
+ content_md5 = get_content_md5(file_rec)
87
+
88
+ @destinations.each do |destination|
89
+ # Check that the destination is available before attempting to write
90
+ verify_destination_available(destination, file_rec)
91
+
92
+ rel_to_bucket = destination.relative_to_bucket_path(rel_path)
93
+ file_obj = destination.s3_bucket.object(rel_to_bucket)
94
+ begin
95
+ file_obj.upload_file(file_rec.path, { :content_md5 => content_md5 })
96
+ rescue Aws::S3::Errors::BadDigest => e
97
+ raise ChecksumMismatchError.new("Transfer to bucket '#{destination.s3_bucket.name}' failed, " \
98
+ + "MD5 provided did not match the received content for #{file_rec.path}")
99
+ rescue Aws::Errors::ServiceError => e
100
+ raise PreservationServiceError.new("Failed to transfer #{file_rec.path} to bucket " \
101
+ + "'#{destination.s3_bucket.name}': #{e.message}")
102
+ end
103
+
104
+ logger.info("Replicated #{file_rec.path} to destination #{file_obj.public_url}")
105
+
106
+ # TODO register file in destination
107
+ end
108
+ end
109
+
110
+ # Determine if this service is applicable for the provided event, given the configured service definition
111
+ #
112
+ # @param event [String] name of the event
113
+ # @return [Boolean] returns true if this service is applicable for the provided event
114
+ def is_applicable?(event)
115
+ case event
116
+ when EventNames::PRESERVE
117
+ true
118
+ else
119
+ false
120
+ end
121
+ end
122
+
123
+ private
124
+ def verify_destination_available(destination, file_rec)
125
+ begin
126
+ destination.available?
127
+ rescue StorageLocationUnavailableError => e
128
+ raise StorageLocationUnavailableError.new("Cannot replicate #{file_rec.path} to destination #{destination.name}: " \
129
+ + e.message)
130
+ end
131
+ end
132
+
133
+ def get_content_md5(file_rec)
134
+ md_rec = file_rec.metadata_record
135
+ if md_rec.checksums.key?('md5')
136
+ # base 64 encode the digest, as is required by the Content-Md5 header
137
+ [[md_rec.checksums['md5']].pack("H*")].pack("m0")
138
+ else
139
+ nil
140
+ end
141
+ end
142
+ end
143
+ end
@@ -1,46 +1,103 @@
1
1
  require 'longleaf/services/application_config_validator'
2
2
  require 'longleaf/services/application_config_manager'
3
+ require 'digest/md5'
4
+ require 'pathname'
3
5
 
4
- # Deserializer for application configuration files
5
6
  module Longleaf
7
+ # Deserializer for application configuration files
6
8
  class ApplicationConfigDeserializer
7
-
9
+ AF ||= Longleaf::AppFields
10
+
8
11
  # Deserializes a valid application configuration file as a ApplicationConfigManager option
9
- # @param config_path [String] file path to the application configuration file
12
+ # @param config_path [String] file path to the service and storage mapping configuration file
10
13
  # @param format [String] encoding format of the config file
11
- # return [Longleaf::ApplicationConfigManager] manager for the loaded configuration
14
+ # return [ApplicationConfigManager] manager for the loaded configuration
12
15
  def self.deserialize(config_path, format: 'yaml')
13
- config = load(config_path, format: format)
14
-
15
- Longleaf::ApplicationConfigValidator.validate(config)
16
- Longleaf::ApplicationConfigManager.new(config)
16
+ content = load_config_file(config_path)
17
+ config = load(content, format)
18
+
19
+ config_md5 = Digest::MD5.hexdigest(content)
20
+
21
+ make_paths_absolute(config_path, config)
22
+
23
+ ApplicationConfigValidator.new(config).validate_config.raise_if_invalid
24
+ ApplicationConfigManager.new(config, config_md5)
17
25
  end
18
-
26
+
27
+ def self.load_config_file(config_path)
28
+ begin
29
+ File.read(config_path)
30
+ rescue Errno::ENOENT
31
+ raise Longleaf::ConfigurationError.new(
32
+ "Configuration file #{config_path} does not exist.")
33
+ end
34
+ end
35
+
19
36
  # Deserialize a configuration file into a hash
20
- # @param config_path [String] file path to the application configuration file
37
+ # @param content [String] the contents of the application configuration file
21
38
  # @param format [String] encoding format of the config file
22
39
  # return [Hash] hash containing the configuration
23
- def self.load(config_path, format: 'yaml')
40
+ def self.load(content, format)
24
41
  case format
25
42
  when 'yaml'
26
- from_yaml(config_path)
43
+ from_yaml(content)
27
44
  else
28
- raise ArgumentError.new('Invalid deserialization format #{format} specified')
45
+ raise ArgumentError.new("Invalid deserialization format #{format} specified")
29
46
  end
30
47
  end
31
-
32
- private
33
- def self.from_yaml(config_path)
48
+
49
+ def self.from_yaml(content)
34
50
  begin
35
- YAML.load_file(config_path)
36
- rescue Errno::ENOENT => err
37
- raise Longleaf::ConfigurationError.new(
38
- "Cannot load application configuration, file #{config_path} does not exist.")
51
+ YAML.safe_load(content, [], [], true)
39
52
  rescue => err
40
- raise Longleaf::ConfigurationError.new(
41
- %Q(Failed to load application configuration due to the following reason:
42
- #{err.message}))
53
+ raise Longleaf::ConfigurationError.new(err)
43
54
  end
44
55
  end
56
+
57
+ def self.make_paths_absolute(config_path, config)
58
+ base_pathname = Pathname.new(config_path).expand_path.parent
59
+
60
+ config[AF::LOCATIONS].each do |name, properties|
61
+ properties[AF::LOCATION_PATH] = make_file_paths_absolute(base_pathname, properties)
62
+
63
+ # Resolve single field metadata location into expanded form
64
+ md_config = properties[AF::METADATA_CONFIG]
65
+ if md_config.nil?
66
+ next
67
+ end
68
+ if md_config.is_a?(String)
69
+ md_config = { AF::LOCATION => m_config }
70
+ end
71
+ md_config[AF::LOCATION_PATH] = make_file_paths_absolute(base_pathname, md_config)
72
+ end
73
+ end
74
+
75
+ def self.make_file_paths_absolute(base_pathname, properties)
76
+ path = properties[AF::LOCATION_PATH]
77
+ return nil if path.nil?
78
+
79
+ uri = URI(path)
80
+
81
+ if uri.scheme.nil? || uri.scheme.casecmp("file") == 0
82
+ absolution(base_pathname, uri.path)
83
+ else
84
+ path
85
+ end
86
+ end
87
+
88
+ def self.absolution(base_pathname, file_path)
89
+ if file_path.nil?
90
+ nil
91
+ else
92
+ path = Pathname.new(file_path)
93
+ if path.absolute?
94
+ path = path.expand_path.to_s
95
+ else
96
+ path = (base_pathname + path).to_s
97
+ end
98
+ end
99
+ end
100
+
101
+ private_class_method :load_config_file
45
102
  end
46
- end
103
+ end