longleaf 0.2.0.pre.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (165) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +84 -0
  3. data/.gitignore +4 -2
  4. data/.rubocop.yml +42 -2
  5. data/.rubocop_todo.yml +390 -311
  6. data/.yardopts +1 -0
  7. data/Gemfile +16 -1
  8. data/README.md +67 -13
  9. data/Rakefile +6 -0
  10. data/bin/setup +16 -1
  11. data/docs/aboutlongleaf.md +28 -0
  12. data/docs/extra.css +32 -0
  13. data/docs/img/change-file.png +0 -0
  14. data/docs/img/ll-example-preserved.png +0 -0
  15. data/docs/index.md +19 -0
  16. data/docs/install.md +66 -0
  17. data/docs/ll-example/config-example-relative.yml +33 -0
  18. data/docs/ll-example/files-dir/LLexample-PDF.pdf +0 -0
  19. data/docs/ll-example/files-dir/LLexample-TOCHANGE.txt +15 -0
  20. data/docs/ll-example/files-dir/LLexample-tokeep.txt +10 -0
  21. data/docs/ll-example/metadata-dir/.gitkeep +0 -0
  22. data/docs/ll-example/replica-files/.gitkeep +0 -0
  23. data/docs/ll-example/replica-metadata/.gitkeep +0 -0
  24. data/docs/quickstart.md +270 -0
  25. data/docs/rdocs/Longleaf.html +135 -0
  26. data/docs/rdocs/Longleaf/AppFields.html +178 -0
  27. data/docs/rdocs/Longleaf/ApplicationConfigDeserializer.html +631 -0
  28. data/docs/rdocs/Longleaf/ApplicationConfigManager.html +610 -0
  29. data/docs/rdocs/Longleaf/ApplicationConfigValidator.html +238 -0
  30. data/docs/rdocs/Longleaf/CLI.html +909 -0
  31. data/docs/rdocs/Longleaf/ChecksumMismatchError.html +151 -0
  32. data/docs/rdocs/Longleaf/ConfigBuilder.html +1339 -0
  33. data/docs/rdocs/Longleaf/ConfigurationError.html +143 -0
  34. data/docs/rdocs/Longleaf/ConfigurationValidator.html +227 -0
  35. data/docs/rdocs/Longleaf/DeregisterCommand.html +420 -0
  36. data/docs/rdocs/Longleaf/DeregisterEvent.html +453 -0
  37. data/docs/rdocs/Longleaf/DeregistrationError.html +151 -0
  38. data/docs/rdocs/Longleaf/DigestHelper.html +419 -0
  39. data/docs/rdocs/Longleaf/EventError.html +147 -0
  40. data/docs/rdocs/Longleaf/EventNames.html +163 -0
  41. data/docs/rdocs/Longleaf/EventStatusTracking.html +656 -0
  42. data/docs/rdocs/Longleaf/FileCheckService.html +540 -0
  43. data/docs/rdocs/Longleaf/FileHelpers.html +520 -0
  44. data/docs/rdocs/Longleaf/FileRecord.html +716 -0
  45. data/docs/rdocs/Longleaf/FileSelector.html +901 -0
  46. data/docs/rdocs/Longleaf/FixityCheckService.html +691 -0
  47. data/docs/rdocs/Longleaf/IndexManager.html +1155 -0
  48. data/docs/rdocs/Longleaf/InvalidDigestAlgorithmError.html +143 -0
  49. data/docs/rdocs/Longleaf/InvalidStoragePathError.html +143 -0
  50. data/docs/rdocs/Longleaf/Logging.html +405 -0
  51. data/docs/rdocs/Longleaf/Logging/RedirectingLogger.html +1213 -0
  52. data/docs/rdocs/Longleaf/LongleafError.html +139 -0
  53. data/docs/rdocs/Longleaf/MDFields.html +193 -0
  54. data/docs/rdocs/Longleaf/MetadataBuilder.html +787 -0
  55. data/docs/rdocs/Longleaf/MetadataDeserializer.html +537 -0
  56. data/docs/rdocs/Longleaf/MetadataError.html +143 -0
  57. data/docs/rdocs/Longleaf/MetadataPersistenceManager.html +539 -0
  58. data/docs/rdocs/Longleaf/MetadataRecord.html +1411 -0
  59. data/docs/rdocs/Longleaf/MetadataSerializer.html +786 -0
  60. data/docs/rdocs/Longleaf/PreservationServiceError.html +147 -0
  61. data/docs/rdocs/Longleaf/PreserveCommand.html +410 -0
  62. data/docs/rdocs/Longleaf/PreserveEvent.html +491 -0
  63. data/docs/rdocs/Longleaf/RegisterCommand.html +428 -0
  64. data/docs/rdocs/Longleaf/RegisterEvent.html +628 -0
  65. data/docs/rdocs/Longleaf/RegisteredFileSelector.html +446 -0
  66. data/docs/rdocs/Longleaf/RegistrationError.html +151 -0
  67. data/docs/rdocs/Longleaf/ReindexCommand.html +576 -0
  68. data/docs/rdocs/Longleaf/RsyncReplicationService.html +1180 -0
  69. data/docs/rdocs/Longleaf/SequelIndexDriver.html +1978 -0
  70. data/docs/rdocs/Longleaf/ServiceCandidateFilesystemIterator.html +572 -0
  71. data/docs/rdocs/Longleaf/ServiceCandidateIndexIterator.html +532 -0
  72. data/docs/rdocs/Longleaf/ServiceCandidateLocator.html +333 -0
  73. data/docs/rdocs/Longleaf/ServiceClassCache.html +725 -0
  74. data/docs/rdocs/Longleaf/ServiceDateHelper.html +425 -0
  75. data/docs/rdocs/Longleaf/ServiceDefinition.html +683 -0
  76. data/docs/rdocs/Longleaf/ServiceDefinitionManager.html +371 -0
  77. data/docs/rdocs/Longleaf/ServiceDefinitionValidator.html +269 -0
  78. data/docs/rdocs/Longleaf/ServiceFields.html +173 -0
  79. data/docs/rdocs/Longleaf/ServiceManager.html +1229 -0
  80. data/docs/rdocs/Longleaf/ServiceMappingManager.html +410 -0
  81. data/docs/rdocs/Longleaf/ServiceMappingValidator.html +347 -0
  82. data/docs/rdocs/Longleaf/ServiceRecord.html +821 -0
  83. data/docs/rdocs/Longleaf/StorageLocation.html +985 -0
  84. data/docs/rdocs/Longleaf/StorageLocationManager.html +729 -0
  85. data/docs/rdocs/Longleaf/StorageLocationUnavailableError.html +143 -0
  86. data/docs/rdocs/Longleaf/StorageLocationValidator.html +373 -0
  87. data/docs/rdocs/Longleaf/StoragePathValidator.html +253 -0
  88. data/docs/rdocs/Longleaf/SystemConfigBuilder.html +441 -0
  89. data/docs/rdocs/Longleaf/SystemConfigFields.html +163 -0
  90. data/docs/rdocs/Longleaf/ValidateConfigCommand.html +451 -0
  91. data/docs/rdocs/Longleaf/ValidateMetadataCommand.html +408 -0
  92. data/docs/rdocs/_index.html +660 -0
  93. data/docs/rdocs/class_list.html +51 -0
  94. data/docs/rdocs/css/common.css +1 -0
  95. data/docs/rdocs/css/full_list.css +58 -0
  96. data/docs/rdocs/css/style.css +496 -0
  97. data/docs/rdocs/file.README.html +165 -0
  98. data/docs/rdocs/file_list.html +56 -0
  99. data/docs/rdocs/frames.html +17 -0
  100. data/docs/rdocs/index.html +165 -0
  101. data/docs/rdocs/js/app.js +303 -0
  102. data/docs/rdocs/js/full_list.js +216 -0
  103. data/docs/rdocs/js/jquery.js +4 -0
  104. data/docs/rdocs/method_list.html +2051 -0
  105. data/docs/rdocs/top-level-namespace.html +110 -0
  106. data/lib/longleaf/candidates/file_selector.rb +47 -15
  107. data/lib/longleaf/candidates/registered_file_selector.rb +67 -0
  108. data/lib/longleaf/candidates/service_candidate_filesystem_iterator.rb +29 -35
  109. data/lib/longleaf/candidates/service_candidate_index_iterator.rb +84 -0
  110. data/lib/longleaf/candidates/service_candidate_locator.rb +9 -4
  111. data/lib/longleaf/cli.rb +162 -80
  112. data/lib/longleaf/commands/deregister_command.rb +12 -11
  113. data/lib/longleaf/commands/preserve_command.rb +13 -8
  114. data/lib/longleaf/commands/register_command.rb +9 -6
  115. data/lib/longleaf/commands/reindex_command.rb +92 -0
  116. data/lib/longleaf/commands/validate_config_command.rb +27 -6
  117. data/lib/longleaf/commands/validate_metadata_command.rb +11 -9
  118. data/lib/longleaf/errors.rb +12 -12
  119. data/lib/longleaf/events/deregister_event.rb +13 -15
  120. data/lib/longleaf/events/event_status_tracking.rb +7 -7
  121. data/lib/longleaf/events/preserve_event.rb +24 -14
  122. data/lib/longleaf/events/register_event.rb +21 -35
  123. data/lib/longleaf/helpers/digest_helper.rb +4 -4
  124. data/lib/longleaf/helpers/service_date_helper.rb +5 -6
  125. data/lib/longleaf/indexing/index_manager.rb +101 -0
  126. data/lib/longleaf/indexing/sequel_index_driver.rb +324 -0
  127. data/lib/longleaf/logging.rb +4 -4
  128. data/lib/longleaf/logging/redirecting_logger.rb +20 -20
  129. data/lib/longleaf/models/app_fields.rb +2 -1
  130. data/lib/longleaf/models/file_record.rb +10 -6
  131. data/lib/longleaf/models/md_fields.rb +1 -1
  132. data/lib/longleaf/models/metadata_record.rb +22 -12
  133. data/lib/longleaf/models/service_definition.rb +3 -3
  134. data/lib/longleaf/models/service_fields.rb +1 -1
  135. data/lib/longleaf/models/service_record.rb +6 -5
  136. data/lib/longleaf/models/storage_location.rb +26 -7
  137. data/lib/longleaf/models/system_config_fields.rb +9 -0
  138. data/lib/longleaf/preservation_services/file_check_service.rb +58 -0
  139. data/lib/longleaf/preservation_services/fixity_check_service.rb +16 -14
  140. data/lib/longleaf/preservation_services/rsync_replication_service.rb +32 -31
  141. data/lib/longleaf/services/application_config_deserializer.rb +55 -18
  142. data/lib/longleaf/services/application_config_manager.rb +16 -4
  143. data/lib/longleaf/services/application_config_validator.rb +1 -2
  144. data/lib/longleaf/services/configuration_validator.rb +6 -4
  145. data/lib/longleaf/services/metadata_deserializer.rb +40 -38
  146. data/lib/longleaf/services/metadata_persistence_manager.rb +46 -0
  147. data/lib/longleaf/services/metadata_serializer.rb +23 -22
  148. data/lib/longleaf/services/service_class_cache.rb +15 -15
  149. data/lib/longleaf/services/service_definition_manager.rb +5 -6
  150. data/lib/longleaf/services/service_definition_validator.rb +5 -6
  151. data/lib/longleaf/services/service_manager.rb +37 -17
  152. data/lib/longleaf/services/service_mapping_manager.rb +9 -9
  153. data/lib/longleaf/services/service_mapping_validator.rb +9 -10
  154. data/lib/longleaf/services/storage_location_manager.rb +22 -8
  155. data/lib/longleaf/services/storage_location_validator.rb +11 -8
  156. data/lib/longleaf/services/storage_path_validator.rb +1 -1
  157. data/lib/longleaf/specs/config_builder.rb +30 -17
  158. data/lib/longleaf/specs/custom_matchers.rb +1 -1
  159. data/lib/longleaf/specs/file_helpers.rb +15 -14
  160. data/lib/longleaf/specs/metadata_builder.rb +91 -0
  161. data/lib/longleaf/specs/system_config_builder.rb +27 -0
  162. data/lib/longleaf/version.rb +1 -1
  163. data/longleaf.gemspec +17 -7
  164. data/mkdocs.yml +20 -0
  165. metadata +233 -22
@@ -15,9 +15,9 @@ module Longleaf
15
15
  # * 'generate' - the service will generate and store any missing digests from the set of configured algorithms.
16
16
  class FixityCheckService
17
17
  include Longleaf::Logging
18
-
18
+
19
19
  SUPPORTED_ALGORITHMS = ['md5', 'sha1', 'sha2', 'sha256', 'sha384', 'sha512', 'rmd160']
20
-
20
+
21
21
  # service configuration property indicating how to handle situations where a file does not
22
22
  # have a digest for one of the expected algorithms on record.
23
23
  ABSENT_DIGEST_PROPERTY = 'absent_digest'
@@ -25,7 +25,7 @@ module Longleaf
25
25
  GENERATE_IF_ABSENT = 'generate'
26
26
  IGNORE_IF_ABSENT = 'ignore'
27
27
  ABSENT_DIGEST_OPTIONS = [FAIL_IF_ABSENT, GENERATE_IF_ABSENT, IGNORE_IF_ABSENT]
28
-
28
+
29
29
  # Initialize a FixityCheckService from the given service definition
30
30
  #
31
31
  # @param service_def [ServiceDefinition] the configuration for this service
@@ -36,12 +36,14 @@ module Longleaf
36
36
  unless ABSENT_DIGEST_OPTIONS.include?(@absent_digest_behavior)
37
37
  raise ArgumentError.new("Invalid option '#{@absent_digest_behavior}' for property #{ABSENT_DIGEST_PROPERTY} in service #{service_def.name}")
38
38
  end
39
-
39
+
40
40
  service_algs = service_def.properties[ServiceFields::DIGEST_ALGORITHMS]
41
41
  if service_algs.nil? || service_algs.empty?
42
42
  raise ArgumentError.new("FixityCheckService from definition #{service_def.name} requires a list of one or more digest algorithms")
43
43
  end
44
-
44
+
45
+ service_algs = [service_algs] if service_algs.is_a?(String)
46
+
45
47
  # Store the list of digest algorithms to verify, using normalized algorithm names.
46
48
  @digest_algs = Set.new
47
49
  service_algs.each do |alg|
@@ -49,11 +51,11 @@ module Longleaf
49
51
  if SUPPORTED_ALGORITHMS.include?(normalized_alg)
50
52
  @digest_algs << normalized_alg
51
53
  else
52
- raise ArgumentError.new("Unsupported checksum algorithm '#{alg}' in definition #{service_def.name}. Supported algorithms are: #{SUPPORTED_ALGORITHMS.to_s}")
54
+ raise ArgumentError.new("Unsupported checksum algorithm '#{alg}' in definition #{service_def.name}. Supported algorithms are: #{SUPPORTED_ALGORITHMS}")
53
55
  end
54
56
  end
55
57
  end
56
-
58
+
57
59
  # Perform all configured fixity checks on the provided file
58
60
  #
59
61
  # @param file_rec [FileRecord] record representing the file to perform the service on.
@@ -62,7 +64,7 @@ module Longleaf
62
64
  def perform(file_rec, event)
63
65
  path = file_rec.path
64
66
  md_rec = file_rec.metadata_record
65
-
67
+
66
68
  # Get the list of existing checksums for the file and normalize algorithm names
67
69
  file_digests = Hash.new
68
70
  md_rec.checksums&.each do |alg, digest|
@@ -73,10 +75,10 @@ module Longleaf
73
75
  logger.debug("Metadata for file #{path} contains unexpected '#{alg}' digest, it will be ignored.")
74
76
  end
75
77
  end
76
-
78
+
77
79
  @digest_algs.each do |alg|
78
80
  existing_digest = file_digests[alg]
79
-
81
+
80
82
  if existing_digest.nil?
81
83
  if @absent_digest_behavior == FAIL_IF_ABSENT
82
84
  raise ChecksumMismatchError.new("Fixity check using algorithm '#{alg}' failed for file #{path}: no existing digest of type '#{alg}' on record.")
@@ -85,11 +87,11 @@ module Longleaf
85
87
  next
86
88
  end
87
89
  end
88
-
90
+
89
91
  digest = DigestHelper::start_digest(alg)
90
92
  digest.file(path)
91
93
  generated_digest = digest.hexdigest
92
-
94
+
93
95
  # Store the missing checksum if using the 'generate' behavior
94
96
  if existing_digest.nil? && @absent_digest_behavior == GENERATE_IF_ABSENT
95
97
  md_rec.checksums[alg] = generated_digest
@@ -104,7 +106,7 @@ module Longleaf
104
106
  end
105
107
  end
106
108
  end
107
-
109
+
108
110
  # Determine if this service is applicable for the provided event, given the configured service definition
109
111
  #
110
112
  # @param event [String] name of the event
@@ -118,4 +120,4 @@ module Longleaf
118
120
  end
119
121
  end
120
122
  end
121
- end
123
+ end
@@ -17,58 +17,59 @@ module Longleaf
17
17
  # a file which already exists at a destination. Default: "replace".
18
18
  # * rsync_command = the command to invoke in order to execute rsync. Default: "rsync"
19
19
  # * rsync_options = additional parameters that will be passed along to rsync. Cannot include options
20
- # which change the target of the command or prevent its execution, such as "files-from", "dry-run",
20
+ # which change the target of the command or prevent its execution, such as "files-from", "dry-run",
21
21
  # "help", etc. Command will always include "-R". Default "-a".
22
22
  class RsyncReplicationService
23
23
  include Longleaf::Logging
24
-
24
+
25
25
  COLLISION_PROPERTY = "replica_collision_policy"
26
26
  DEFAULT_COLLISION_POLICY = "replace"
27
27
  VALID_COLLISION_POLICIES = ["replace"]
28
-
28
+
29
29
  RSYNC_COMMAND_PROPERTY = "rsync_command"
30
30
  DEFAULT_COMMAND = "rsync"
31
-
31
+
32
32
  RSYNC_OPTIONS_PROPERTY = "rsync_options"
33
33
  DEFAULT_OPTIONS = "-a"
34
34
  DISALLOWED_OPTIONS = ["files-from", "n", "dry-run", "exclude", "exclude-from", "cvs-exclude",
35
35
  "h", "help", "f", "F", "filter"]
36
-
36
+
37
37
  attr_reader :command, :options, :collision_policy
38
-
38
+
39
39
  # Initialize a RsyncReplicationService from the given service definition
40
40
  #
41
41
  # @param service_def [ServiceDefinition] the configuration for this service
42
- # @param location_manager [StorageLocationManager] manager for configured storage locations
42
+ # @param app_manager [ApplicationConfigManager] the application configuration
43
43
  def initialize(service_def, app_manager)
44
44
  @service_def = service_def
45
45
  @app_manager = app_manager
46
-
46
+
47
47
  @command = @service_def.properties[RSYNC_COMMAND_PROPERTY] || DEFAULT_COMMAND
48
-
48
+
49
49
  # Validate rsync parameters
50
50
  @options = @service_def.properties[RSYNC_OPTIONS_PROPERTY] || DEFAULT_OPTIONS
51
51
  if contains_disallowed_option?(@options)
52
52
  raise ArgumentError.new("Service #{service_def.name} specifies a disallowed rsync paramter," \
53
- + " rsync_options may not include the following: #{DISALLOWED_OPTIONS.join(" ")}")
53
+ + " rsync_options may not include the following: #{DISALLOWED_OPTIONS.join(' ')}")
54
54
  end
55
-
55
+
56
56
  # Add -R (--relative) in to command options to ensure full path gets replicated
57
57
  @options = @options + " -R"
58
-
58
+
59
59
  # Set and validate the replica collision policy
60
60
  @collision_policy = @service_def.properties[COLLISION_PROPERTY] || DEFAULT_COLLISION_POLICY
61
61
  if !VALID_COLLISION_POLICIES.include?(@collision_policy)
62
62
  raise ArgumentError.new("Service #{service_def.name} received invalid #{COLLISION_PROPERTY}" \
63
63
  + " value #{collision_policy}")
64
64
  end
65
-
65
+
66
66
  # Store and validate destinations
67
67
  replicate_to = @service_def.properties[ServiceFields::REPLICATE_TO]
68
68
  if replicate_to.nil? || replicate_to.empty?
69
69
  raise ArgumentError.new("Service #{service_def.name} must provide one or more replication destinations.")
70
70
  end
71
-
71
+ replicate_to = [replicate_to] if replicate_to.is_a?(String)
72
+
72
73
  loc_manager = app_manager.location_manager
73
74
  # Build list of destinations, translating to storage locations when relevant
74
75
  @destinations = Array.new
@@ -86,7 +87,7 @@ module Longleaf
86
87
  end
87
88
  end
88
89
  end
89
-
90
+
90
91
  # During a replication event, perform replication of the specified file to all configured destinations
91
92
  # as necessary.
92
93
  #
@@ -96,35 +97,35 @@ module Longleaf
96
97
  def perform(file_rec, event)
97
98
  @destinations.each do |destination|
98
99
  dest_is_storage_loc = destination.is_a?(Longleaf::StorageLocation)
99
-
100
+
100
101
  if dest_is_storage_loc
101
102
  dest_path = destination.path
102
103
  else
103
104
  dest_path = destination
104
105
  end
105
-
106
+
106
107
  # Determine the path to the file being replicated relative to its storage location
107
108
  rel_path = file_rec.path.sub(/\A#{file_rec.storage_location.path}/, "")
108
109
  # source path with . so that rsync will only create destination directories starting from that point
109
110
  source_path = File.join(file_rec.storage_location.path, "./#{rel_path}")
110
-
111
+
111
112
  # Check that the destination is available because attempting to write
112
113
  verify_destination_available(destination, file_rec)
113
-
114
+
114
115
  logger.debug("Invoking rsync with command: #{@command} \"#{source_path}\" \"#{dest_path}\" #{@options}")
115
116
  stdout, stderr, status = Open3.capture3("#{@command} \"#{source_path}\" \"#{dest_path}\" #{@options}")
116
117
  raise PreservationServiceError.new("Failed to replicate #{file_rec.path} to #{dest_path}: #{stderr}") \
117
118
  unless status.success?
118
-
119
+
119
120
  logger.info("Replicated #{file_rec.path} to destination #{dest_path}")
120
-
121
+
121
122
  # For destinations which are storage locations, register the replica with longleaf
122
123
  if dest_is_storage_loc
123
124
  register_replica(destination, rel_path, file_rec)
124
125
  end
125
126
  end
126
127
  end
127
-
128
+
128
129
  # Determine if this service is applicable for the provided event, given the configured service definition
129
130
  #
130
131
  # @param event [String] name of the event
@@ -137,7 +138,7 @@ module Longleaf
137
138
  false
138
139
  end
139
140
  end
140
-
141
+
141
142
  private
142
143
  def contains_disallowed_option?(options)
143
144
  DISALLOWED_OPTIONS.each do |disallowed|
@@ -151,33 +152,33 @@ module Longleaf
151
152
  end
152
153
  end
153
154
  end
154
-
155
+
155
156
  false
156
157
  end
157
-
158
+
158
159
  def verify_destination_available(destination, file_rec)
159
160
  if destination.is_a?(Longleaf::StorageLocation)
160
161
  begin
161
162
  destination.available?
162
163
  rescue StorageLocationUnavailableError => e
163
- raise StorageLocationUnavailableError.new("Cannot replicate #{file_rec.path} to destination" \
164
- + " storage location #{destination.name}, it is unavailable.") unless destination.available?
164
+ raise StorageLocationUnavailableError.new("Cannot replicate #{file_rec.path} to destination #{destination.name}: " \
165
+ + e.message)
165
166
  end
166
167
  elsif destination.start_with?("/")
167
168
  raise StorageLocationUnavailableError.new("Cannot replicate #{file_rec.path} to destination" \
168
169
  + " #{destination}, path does not exist.") unless Dir.exist?(destination)
169
170
  end
170
171
  end
171
-
172
+
172
173
  def register_replica(destination, rel_path, file_rec)
173
174
  dest_file_path = File.join(destination.path, rel_path)
174
175
  dest_file_rec = FileRecord.new(dest_file_path, destination)
175
-
176
+
176
177
  register_event = RegisterEvent.new(file_rec: dest_file_rec,
177
178
  app_manager: @app_manager,
178
- force: true,
179
+ force: true,
179
180
  checksums: file_rec.metadata_record.checksums)
180
181
  register_event.perform
181
182
  end
182
183
  end
183
- end
184
+ end
@@ -1,44 +1,81 @@
1
1
  require 'longleaf/services/application_config_validator'
2
2
  require 'longleaf/services/application_config_manager'
3
+ require 'digest/md5'
4
+ require 'pathname'
3
5
 
4
6
  module Longleaf
5
7
  # Deserializer for application configuration files
6
8
  class ApplicationConfigDeserializer
7
-
9
+ AF ||= Longleaf::AppFields
10
+
8
11
  # Deserializes a valid application configuration file as a ApplicationConfigManager option
9
- # @param config_path [String] file path to the application configuration file
12
+ # @param config_path [String] file path to the service and storage mapping configuration file
10
13
  # @param format [String] encoding format of the config file
11
14
  # return [ApplicationConfigManager] manager for the loaded configuration
12
15
  def self.deserialize(config_path, format: 'yaml')
13
- config = load(config_path, format: format)
14
-
16
+ content = load_config_file(config_path)
17
+ config = load(content, format)
18
+
19
+ config_md5 = Digest::MD5.hexdigest(content)
20
+
21
+ make_paths_absolute(config_path, config)
15
22
  Longleaf::ApplicationConfigValidator.validate(config)
16
- Longleaf::ApplicationConfigManager.new(config)
23
+ Longleaf::ApplicationConfigManager.new(config, config_md5)
24
+ end
25
+
26
+ def self.load_config_file(config_path)
27
+ begin
28
+ File.read(config_path)
29
+ rescue Errno::ENOENT
30
+ raise Longleaf::ConfigurationError.new(
31
+ "Configuration file #{config_path} does not exist.")
32
+ end
17
33
  end
18
-
34
+
19
35
  # Deserialize a configuration file into a hash
20
- # @param config_path [String] file path to the application configuration file
36
+ # @param content [String] the contents of the application configuration file
21
37
  # @param format [String] encoding format of the config file
22
38
  # return [Hash] hash containing the configuration
23
- def self.load(config_path, format: 'yaml')
39
+ def self.load(content, format)
24
40
  case format
25
41
  when 'yaml'
26
- from_yaml(config_path)
42
+ from_yaml(content)
27
43
  else
28
- raise ArgumentError.new('Invalid deserialization format #{format} specified')
44
+ raise ArgumentError.new("Invalid deserialization format #{format} specified")
29
45
  end
30
46
  end
31
-
32
- private
33
- def self.from_yaml(config_path)
47
+
48
+ def self.from_yaml(content)
34
49
  begin
35
- YAML.load_file(config_path)
36
- rescue Errno::ENOENT => err
37
- raise Longleaf::ConfigurationError.new(
38
- "Configuration file #{config_path} does not exist.")
50
+ YAML.safe_load(content, [], [], true)
39
51
  rescue => err
40
52
  raise Longleaf::ConfigurationError.new(err)
41
53
  end
42
54
  end
55
+
56
+ def self.make_paths_absolute(config_path, config)
57
+ base_pathname = Pathname.new(config_path).expand_path.parent
58
+
59
+ config[AF::LOCATIONS].each do |name, properties|
60
+ properties[AF::LOCATION_PATH] = absolution(base_pathname, properties[AF::LOCATION_PATH])
61
+
62
+ properties[AF::METADATA_PATH] = absolution(base_pathname, properties[AF::METADATA_PATH])
63
+ end
64
+ end
65
+
66
+ def self.absolution(base_pathname, file_path)
67
+ if file_path.nil?
68
+ nil
69
+ else
70
+ path = Pathname.new(file_path)
71
+ if path.absolute?
72
+ path = path.expand_path.to_s
73
+ else
74
+ path = (base_pathname + path).to_s
75
+ end
76
+ end
77
+ end
78
+
79
+ private_class_method :load_config_file
43
80
  end
44
- end
81
+ end
@@ -5,22 +5,34 @@ require_relative 'service_definition_manager'
5
5
  require_relative 'service_mapping_validator'
6
6
  require_relative 'service_mapping_manager'
7
7
  require_relative 'service_manager'
8
+ require_relative 'metadata_persistence_manager'
9
+ require 'longleaf/indexing/index_manager'
10
+ require 'longleaf/models/app_fields'
8
11
 
9
12
  module Longleaf
10
13
  # Manager which loads and provides access to the configuration of the application
11
14
  class ApplicationConfigManager
15
+ attr_reader :config_md5
12
16
  attr_reader :service_manager
13
17
  attr_reader :location_manager
14
-
15
- def initialize(config)
18
+ attr_reader :index_manager
19
+ attr_reader :md_manager
20
+
21
+ def initialize(config, config_md5 = nil)
22
+ @config_md5 = config_md5
23
+
16
24
  @location_manager = Longleaf::StorageLocationManager.new(config)
17
-
25
+
18
26
  definition_manager = Longleaf::ServiceDefinitionManager.new(config)
19
27
  mapping_manager = Longleaf::ServiceMappingManager.new(config)
20
28
  @service_manager = Longleaf::ServiceManager.new(
21
29
  definition_manager: definition_manager,
22
30
  mapping_manager: mapping_manager,
23
31
  app_manager: self)
32
+
33
+ sys_config = config[AppFields::SYSTEM]
34
+ @index_manager = IndexManager.new(sys_config, self)
35
+ @md_manager = MetadataPersistenceManager.new(@index_manager)
24
36
  end
25
37
  end
26
- end
38
+ end
@@ -5,7 +5,6 @@ require_relative 'service_mapping_validator'
5
5
  module Longleaf
6
6
  # Validator for Longleaf application configuration
7
7
  class ApplicationConfigValidator
8
-
9
8
  # Validates the application configuration provided. Will raise ConfigurationError
10
9
  # if any portion of the configuration is not syntactically or semantically valid.
11
10
  # @param config [Hash] application configuration
@@ -15,4 +14,4 @@ module Longleaf
15
14
  Longleaf::ServiceMappingValidator::validate_config(config)
16
15
  end
17
16
  end
18
- end
17
+ end
@@ -1,9 +1,11 @@
1
1
  module Longleaf
2
2
  # Abstract configuration validator class
3
3
  class ConfigurationValidator
4
- protected
5
- def self.assert(fail_message, assertion_passed)
6
- raise ConfigurationError.new(fail_message) unless assertion_passed
4
+ class << self
5
+ protected
6
+ def assert(fail_message, assertion_passed)
7
+ raise ConfigurationError.new(fail_message) unless assertion_passed
8
+ end
7
9
  end
8
10
  end
9
- end
11
+ end
@@ -9,7 +9,7 @@ module Longleaf
9
9
  class MetadataDeserializer
10
10
  extend Longleaf::Logging
11
11
  MDF ||= MDFields
12
-
12
+
13
13
  # Deserialize a file into a MetadataRecord object
14
14
  #
15
15
  # @param file_path [String] path of the file to read. Required.
@@ -19,13 +19,13 @@ module Longleaf
19
19
  when 'yaml'
20
20
  md = from_yaml(file_path, digest_algs)
21
21
  else
22
- raise ArgumentError.new('Invalid deserialization format #{format} specified')
22
+ raise ArgumentError.new("Invalid deserialization format #{format} specified")
23
23
  end
24
-
24
+
25
25
  if !md || !md.is_a?(Hash) || !md.key?(MDF::DATA) || !md.key?(MDF::SERVICES)
26
26
  raise Longleaf::MetadataError.new("Invalid metadata file, did not contain data or services fields: #{file_path}")
27
27
  end
28
-
28
+
29
29
  data = Hash.new.merge(md[MDF::DATA])
30
30
  # Extract reserved properties for submission as separate parameters
31
31
  registered = data.delete(MDFields::REGISTERED_TIMESTAMP)
@@ -33,50 +33,52 @@ module Longleaf
33
33
  checksums = data.delete(MDFields::CHECKSUMS)
34
34
  file_size = data.delete(MDFields::FILE_SIZE)
35
35
  last_modified = data.delete(MDFields::LAST_MODIFIED)
36
-
36
+
37
37
  services = md[MDF::SERVICES]
38
38
  service_records = Hash.new
39
- unless services.nil?
40
- services.each do |name, props|
41
- raise Longleaf::MetadataError.new("Value of service #{name} must be a hash") unless props.class == Hash
42
-
43
- service_props = Hash.new.merge(props)
44
-
45
- stale_replicas = service_props.delete(MDFields::STALE_REPLICAS)
46
- timestamp = service_props.delete(MDFields::SERVICE_TIMESTAMP)
47
- run_needed = service_props.delete(MDFields::RUN_NEEDED)
48
-
49
- service_records[name] = ServiceRecord.new(
50
- properties: service_props,
51
- stale_replicas: stale_replicas,
52
- timestamp: timestamp,
53
- run_needed: run_needed)
54
- end
39
+ services&.each do |name, props|
40
+ raise Longleaf::MetadataError.new("Value of service #{name} must be a hash") unless props.class == Hash
41
+
42
+ service_props = Hash.new.merge(props)
43
+
44
+ stale_replicas = service_props.delete(MDFields::STALE_REPLICAS)
45
+ timestamp = service_props.delete(MDFields::SERVICE_TIMESTAMP)
46
+ run_needed = service_props.delete(MDFields::RUN_NEEDED)
47
+
48
+ service_records[name] = ServiceRecord.new(
49
+ properties: service_props,
50
+ stale_replicas: stale_replicas,
51
+ timestamp: timestamp,
52
+ run_needed: run_needed)
55
53
  end
56
-
54
+
57
55
  MetadataRecord.new(properties: data,
58
- services: service_records,
59
- registered: registered,
60
- deregistered: deregistered,
61
- checksums: checksums,
62
- file_size: file_size,
63
- last_modified: last_modified)
56
+ services: service_records,
57
+ registered: registered,
58
+ deregistered: deregistered,
59
+ checksums: checksums,
60
+ file_size: file_size,
61
+ last_modified: last_modified)
64
62
  end
65
-
63
+
66
64
  # Load configuration a yaml encoded configuration file
67
65
  def self.from_yaml(file_path, digest_algs)
68
66
  File.open(file_path, 'r:bom|utf-8') do |f|
69
67
  contents = f.read
70
-
68
+
71
69
  verify_digests(file_path, contents, digest_algs)
72
-
73
- YAML.load(contents)
70
+
71
+ begin
72
+ YAML.safe_load(contents, [], [], true)
73
+ rescue => err
74
+ raise Longleaf::MetadataError.new("Failed to parse metadata file #{file_path}: #{err.message}")
75
+ end
74
76
  end
75
77
  end
76
-
78
+
77
79
  def self.verify_digests(file_path, contents, digest_algs)
78
80
  return if digest_algs.nil? || digest_algs.empty?
79
-
81
+
80
82
  digest_algs.each do |alg|
81
83
  if file_path.respond_to?(:path)
82
84
  path = file_path.path
@@ -88,18 +90,18 @@ module Longleaf
88
90
  logger.warn("Missing expected #{alg} digest for #{path}")
89
91
  next
90
92
  end
91
-
93
+
92
94
  digest = DigestHelper::start_digest(alg)
93
95
  result = digest.hexdigest(contents)
94
96
  existing_digest = IO.read(digest_path)
95
-
97
+
96
98
  if result == existing_digest
97
99
  logger.info("Metadata fixity check using algorithm '#{alg}' succeeded for file #{path}")
98
100
  else
99
101
  raise ChecksumMismatchError.new("Metadata digest of type #{alg} did not match the contents of #{path}:" \
100
- + " expected #{existing_digest}, calculated #{result}")
102
+ + " expected #{existing_digest}, calculated #{result}")
101
103
  end
102
104
  end
103
105
  end
104
106
  end
105
- end
107
+ end