longleaf 0.3.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +12 -2
  3. data/README.md +11 -1
  4. data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
  5. data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
  6. data/lib/longleaf/cli.rb +49 -36
  7. data/lib/longleaf/commands/register_command.rb +3 -3
  8. data/lib/longleaf/commands/validate_config_command.rb +1 -1
  9. data/lib/longleaf/events/register_event.rb +8 -4
  10. data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
  11. data/lib/longleaf/helpers/digest_helper.rb +7 -1
  12. data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
  13. data/lib/longleaf/helpers/selection_options_parser.rb +189 -0
  14. data/lib/longleaf/helpers/service_date_helper.rb +29 -1
  15. data/lib/longleaf/indexing/sequel_index_driver.rb +2 -20
  16. data/lib/longleaf/models/app_fields.rb +4 -2
  17. data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
  18. data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
  19. data/lib/longleaf/models/metadata_location.rb +47 -0
  20. data/lib/longleaf/models/metadata_record.rb +3 -1
  21. data/lib/longleaf/models/s3_storage_location.rb +133 -0
  22. data/lib/longleaf/models/service_fields.rb +4 -0
  23. data/lib/longleaf/models/storage_location.rb +17 -48
  24. data/lib/longleaf/models/storage_types.rb +9 -0
  25. data/lib/longleaf/preservation_services/rsync_replication_service.rb +9 -11
  26. data/lib/longleaf/preservation_services/s3_replication_service.rb +143 -0
  27. data/lib/longleaf/services/application_config_deserializer.rb +26 -4
  28. data/lib/longleaf/services/application_config_validator.rb +17 -6
  29. data/lib/longleaf/services/configuration_validator.rb +64 -4
  30. data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
  31. data/lib/longleaf/services/metadata_deserializer.rb +41 -9
  32. data/lib/longleaf/services/metadata_persistence_manager.rb +3 -2
  33. data/lib/longleaf/services/metadata_serializer.rb +94 -13
  34. data/lib/longleaf/services/metadata_validator.rb +76 -0
  35. data/lib/longleaf/services/s3_location_validator.rb +19 -0
  36. data/lib/longleaf/services/service_definition_validator.rb +16 -8
  37. data/lib/longleaf/services/service_manager.rb +7 -15
  38. data/lib/longleaf/services/service_mapping_validator.rb +26 -15
  39. data/lib/longleaf/services/storage_location_manager.rb +38 -12
  40. data/lib/longleaf/services/storage_location_validator.rb +41 -30
  41. data/lib/longleaf/specs/config_builder.rb +10 -3
  42. data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
  43. data/lib/longleaf/specs/metadata_builder.rb +1 -0
  44. data/lib/longleaf/version.rb +1 -1
  45. data/longleaf.gemspec +3 -1
  46. data/mkdocs.yml +2 -1
  47. metadata +48 -8
  48. data/.travis.yml +0 -4
  49. data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -0,0 +1,189 @@
1
+ require 'longleaf/candidates/file_selector'
2
+ require 'longleaf/candidates/registered_file_selector'
3
+ require 'longleaf/candidates/manifest_digest_provider'
4
+ require 'longleaf/candidates/single_digest_provider'
5
+
6
+ module Longleaf
7
+ # Helper for parsing manifest inputs used for registration
8
+ class SelectionOptionsParser
9
+ extend Longleaf::Logging
10
+
11
+ # Parses the provided options to construct a file selector and digest provider for
12
+ # use in registration commands.
13
+ # @param options [Hash] command options
14
+ # @param app_config_manager [ApplicationConfigManager] app config manager
15
+ # @return The file selector and digest provider.
16
+ def self.parse_registration_selection_options(options, app_config_manager)
17
+ there_can_be_only_one("Only one of the following selection options may be provided: -m, -f, -s",
18
+ options, :file, :manifest, :location)
19
+
20
+ if !options[:manifest].nil?
21
+ digests_mapping = self.manifests_to_digest_mapping(options[:manifest])
22
+ selector = FileSelector.new(file_paths: digests_mapping.keys, app_config: app_config_manager)
23
+ digest_provider = ManifestDigestProvider.new(digests_mapping)
24
+ elsif !options[:file].nil?
25
+ if options[:checksums]
26
+ checksums = options[:checksums]
27
+ # validate checksum list format, must a comma delimited list of prefix:checksums
28
+ if /^[^:,]+:[^:,]+(,[^:,]+:[^:,]+)*$/.match(checksums)
29
+ # convert checksum list into hash with prefix as key
30
+ checksums = Hash[*checksums.split(/\s*[:,]\s*/)]
31
+ digest_provider = SingleDigestProvider.new(checksums)
32
+ else
33
+ logger.failure("Invalid checksums parameter format, see `longleaf help <command>` for more information")
34
+ exit 1
35
+ end
36
+ end
37
+
38
+ file_paths = options[:file].split(/\s*,\s*/)
39
+ selector = FileSelector.new(file_paths: file_paths, app_config: app_config_manager)
40
+ elsif !options[:location].nil?
41
+ storage_locations = options[:location].split(/\s*,\s*/)
42
+ selector = FileSelector.new(storage_locations: storage_locations, app_config: app_config_manager)
43
+ digest_provider = SingleDigestProvider.new(nil)
44
+ else
45
+ logger.failure("Must provide one of the following file selection options: -f, l, or -m")
46
+ exit 1
47
+ end
48
+
49
+ [selector, digest_provider]
50
+ end
51
+
52
+ def self.there_can_be_only_one(failure_msg, options, *names)
53
+ got_one = false
54
+ names.each do |name|
55
+ if !options[name].nil?
56
+ if got_one
57
+ logger.failure(failure_msg)
58
+ exit 1
59
+ end
60
+ got_one = true
61
+ end
62
+ end
63
+ end
64
+
65
+ # Parses the provided manifest options, reading the contents of the manifests to produce
66
+ # a mapping from files to one or more algorithms.
67
+ # @param manifest_vals [Array] List of manifest option values. They may be in one of the following formats:
68
+ # <alg_name>:<manifest_path> OR <alg_name>:@-
69
+ #. <manifest_path> OR @-
70
+ # @return a hash containing the aggregated contents of the provided manifests. The keys are
71
+ # paths to manifested files. The values are hashes, mapping digest algorithms to digest values.
72
+ def self.manifests_to_digest_mapping(manifest_vals)
73
+ alg_manifest_pairs = []
74
+ # interpret option inputs into a list of algorithms to manifest sources
75
+ manifest_vals.each do |manifest_val|
76
+ if manifest_val.include?(':')
77
+ manifest_parts = manifest_val.split(':', 2)
78
+ alg_manifest_pairs << manifest_parts
79
+ else
80
+ # algorithm not specified in option value
81
+ alg_manifest_pairs << [nil, manifest_val]
82
+ end
83
+ end
84
+ if alg_manifest_pairs.select { |mpair| mpair[1] == '@-' }.count > 1
85
+ self.fail("Cannot specify more than one manifest from STDIN")
86
+ end
87
+
88
+ # read the provided manifests to build a mapping from file uri to all supplied digests
89
+ digests_mapping = Hash.new { |h,k| h[k] = Hash.new }
90
+ alg_manifest_pairs.each do |mpair|
91
+ source_stream = nil
92
+ # Determine if reading from a manifest file or stdin
93
+ if mpair[1] == '@-'
94
+ source_stream = $stdin
95
+ else
96
+ source_stream = File.new(mpair[1])
97
+ end
98
+
99
+ current_alg = mpair[0]
100
+ multi_digest_manifest = current_alg.nil?
101
+ source_stream.each_line do |line|
102
+ line = line.strip
103
+ if multi_digest_manifest && /^[a-zA-Z0-9]+:$/ =~ line
104
+ # Found a digest algorithm header, assuming succeeding entries are of this type
105
+ current_alg = line.chomp(':')
106
+ # Verify that the digest algorithm is known to longleaf
107
+ if !DigestHelper.is_known_algorithm?(current_alg)
108
+ self.fail("Manifest specifies unknown digest algorithm: #{current_alg}")
109
+ end
110
+ else
111
+ if current_alg.nil?
112
+ self.fail("Manifest with unknown checksums encountered, an algorithm must be specified")
113
+ end
114
+ entry_parts = line.split(' ', 2)
115
+ if entry_parts.length != 2
116
+ self.fail("Invalid manifest entry: #{line}")
117
+ end
118
+
119
+ digests_mapping[entry_parts[1]][current_alg] = entry_parts[0]
120
+ end
121
+ end
122
+ end
123
+
124
+ digests_mapping
125
+ end
126
+
127
+ # Parses the provided options to create a selector for registered files
128
+ # @param options [Hash] command options
129
+ # @param app_config_manager [ApplicationConfigManager] app config manager
130
+ # @return selector
131
+ def self.create_registered_selector(options, app_config_manager)
132
+ there_can_be_only_one("Only one of the following selection options may be provided: -l, -f, -s",
133
+ options, :file, :location, :from_list)
134
+
135
+ if !options[:from_list].nil?
136
+ file_paths = read_from_list(options[:from_list])
137
+ return RegisteredFileSelector.new(file_paths: file_paths, app_config: app_config_manager)
138
+ elsif !options[:file].nil?
139
+ file_paths = options[:file].split(/\s*,\s*/)
140
+ return RegisteredFileSelector.new(file_paths: file_paths, app_config: app_config_manager)
141
+ elsif !options[:location].nil?
142
+ storage_locations = options[:location].split(/\s*,\s*/)
143
+ return RegisteredFileSelector.new(storage_locations: storage_locations, app_config: app_config_manager)
144
+ else
145
+ logger.failure("Must provide one of the following file selection options: -l, -f, or -s")
146
+ exit 1
147
+ end
148
+ end
149
+
150
+ # Parses the -l from_list option, reading the list of files specified either from the provided
151
+ # file path or STDIN
152
+ # @param from_list option value, either a file path or "@-"
153
+ # @return list of files from the from_list
154
+ def self.read_from_list(from_list)
155
+ from_list = from_list.strip
156
+ if from_list.empty?
157
+ logger.failure("List parameter must not be empty")
158
+ exit 1
159
+ end
160
+
161
+ if from_list == '@-'
162
+ source_stream = $stdin
163
+ else
164
+ begin
165
+ source_stream = File.new(from_list)
166
+ rescue Errno::ENOENT
167
+ logger.failure("Specified list file does not exist: #{from_list}")
168
+ exit 1
169
+ end
170
+ end
171
+
172
+ lines = []
173
+ source_stream.each_line do |line|
174
+ lines << line.strip
175
+ end
176
+
177
+ if lines.empty?
178
+ logger.failure("File list is empty, must provide one or more files for this operation")
179
+ exit 1
180
+ end
181
+ lines
182
+ end
183
+
184
+ def self.fail(message)
185
+ logger.failure(message)
186
+ exit 1
187
+ end
188
+ end
189
+ end
@@ -37,7 +37,7 @@ module Longleaf
37
37
  end
38
38
 
39
39
  modified_time = datetime + (value * unit_modifier)
40
- modified_time.iso8601
40
+ modified_time.iso8601(3)
41
41
  end
42
42
 
43
43
  # Get a timestamp in the format expected for service timestamps.
@@ -46,5 +46,33 @@ module Longleaf
46
46
  def self.formatted_timestamp(timestamp = Time.now)
47
47
  timestamp.utc.iso8601(3).to_s
48
48
  end
49
+
50
+ # Get the timestamp for the next time the provided service would need to be run
51
+ # for the object described by md_rec
52
+ # @param md_rec [MetadataRecord] metadata record for the file
53
+ # @param service_def [ServiceDefinition] definition for the service
54
+ # @return [String] iso8601 timestamp for the next time the service will need to run, or
55
+ # nil if the service does not need to run again.
56
+ def self.next_run_needed(md_rec, service_def)
57
+ raise ArgumentError.new('Must provide a md_rec parameter') if md_rec.nil?
58
+ raise ArgumentError.new('Must provide a service_def parameter') if service_def.nil?
59
+
60
+ service_name = service_def.name
61
+ service_rec = md_rec.service(service_name)
62
+
63
+ if service_rec.nil? || service_rec.timestamp.nil?
64
+ if service_def.delay.nil?
65
+ return md_rec.registered
66
+ else
67
+ return ServiceDateHelper.add_to_timestamp(md_rec.registered, service_def.delay)
68
+ end
69
+ end
70
+
71
+ if service_def.frequency.nil?
72
+ return nil
73
+ else
74
+ return ServiceDateHelper.add_to_timestamp(service_rec.timestamp, service_def.frequency)
75
+ end
76
+ end
49
77
  end
50
78
  end
@@ -114,27 +114,9 @@ module Longleaf
114
114
 
115
115
  expected_services.each do |service_def|
116
116
  service_name = service_def.name
117
- # Service has never run, set execution time to now
118
- if !present_services.include?(service_name)
119
- service_times << current_time
120
- next
121
- end
122
-
123
- service_rec = md_rec.service(service_name)
124
117
 
125
- # Service either needs a run or has no timestamp, so execution time of now
126
- if service_rec.run_needed || service_rec.timestamp.nil?
127
- service_times << current_time
128
- next
129
- end
130
-
131
- # Calculate the next time this service should run based on frequency
132
- frequency = service_def.frequency
133
- unless frequency.nil?
134
- service_timestamp = service_rec.timestamp
135
- service_times << ServiceDateHelper.add_to_timestamp(service_timestamp, frequency)
136
- next
137
- end
118
+ next_run = ServiceDateHelper.next_run_needed(md_rec, service_def)
119
+ service_times << next_run unless next_run.nil?
138
120
  end
139
121
  # Return the lowest service execution time
140
122
  service_times.min
@@ -7,7 +7,9 @@ module Longleaf
7
7
  SYSTEM = 'system'
8
8
 
9
9
  LOCATION_PATH = 'path'
10
- METADATA_PATH = 'metadata_path'
11
- METADATA_DIGESTS = 'metadata_digests'
10
+ METADATA_CONFIG = 'metadata'
11
+ METADATA_DIGESTS = 'digests'
12
+
13
+ STORAGE_TYPE = 'type'
12
14
  end
13
15
  end
@@ -0,0 +1,56 @@
1
+ require 'longleaf/services/metadata_serializer'
2
+ require 'longleaf/models/metadata_location'
3
+ require 'longleaf/models/storage_types'
4
+
5
+ module Longleaf
6
+ # A filesystem based location in which metadata associated with registered files is stored.
7
+ class FilesystemMetadataLocation < MetadataLocation
8
+ AF ||= Longleaf::AppFields
9
+
10
+ def initialize(config)
11
+ super(config)
12
+ end
13
+
14
+ # @return the storage type for this location
15
+ def type
16
+ StorageTypes::FILESYSTEM_STORAGE_TYPE
17
+ end
18
+
19
+ # Get the absolute path for the metadata file for the given file path located in this storage location.
20
+ # @param file_path [String] path of the file relative its storage location
21
+ # @return absolute path to the metadata
22
+ # @raise [ArgumentError] if the file_path is not provided.
23
+ def metadata_path_for(file_path)
24
+ raise ArgumentError.new("A file_path parameter is required") if file_path.nil?
25
+ raise ArgumentError.new("File path must be relative") if Pathname.new(file_path).absolute?
26
+
27
+ md_path = File.join(@path, file_path)
28
+ # If the file_path is to a file, then add metadata suffix.
29
+ if md_path.end_with?('/')
30
+ md_path
31
+ else
32
+ md_path + MetadataSerializer::metadata_suffix
33
+ end
34
+ end
35
+
36
+ # Get the metadata path relative to this location
37
+ # @param md_path [String] metadata file path
38
+ # @return the metadata path relative to this location
39
+ # @raise [ArgumentError] if the metadata path is not contained by this location
40
+ def relativize(md_path)
41
+ return md_path if Pathname.new(md_path).relative?
42
+
43
+ raise ArgumentError.new("Metadata path must be contained by this location") if !md_path.start_with?(@path)
44
+
45
+ md_path.sub(@path, "")
46
+ end
47
+
48
+
49
+ # Checks that the path defined in this metadata location are available
50
+ # @raise [StorageLocationUnavailableError] if the metadata location is not available
51
+ def available?
52
+ raise StorageLocationUnavailableError.new("Metadata path does not exist or is not a directory: #{@path}")\
53
+ unless Dir.exist?(@path)
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,52 @@
1
+ require 'longleaf/models/storage_location'
2
+ require 'longleaf/models/storage_types'
3
+
4
+ module Longleaf
5
+ # A storage location in a local filesystem
6
+ class FilesystemStorageLocation < StorageLocation
7
+ # @param name [String] the name of this storage location
8
+ # @param config [Hash] hash containing the configuration options for this location
9
+ # @param md_loc [MetadataLocation] metadata location associated with this storage location
10
+ def initialize(name, config, md_loc)
11
+ super(name, config, md_loc)
12
+ @path += File::SEPARATOR unless @path.end_with?(File::SEPARATOR)
13
+ end
14
+
15
+ # @return the storage type for this location
16
+ def type
17
+ StorageTypes::FILESYSTEM_STORAGE_TYPE
18
+ end
19
+
20
+ # Get that absolute path to the file associated with the provided metadata path
21
+ # @param md_path [String] metadata file path
22
+ # @raise [ArgumentError] if the md_path is not in this storage location
23
+ # @return [String] the path for the file associated with this metadata
24
+ def get_path_from_metadata_path(md_path)
25
+ raise ArgumentError.new("A file_path parameter is required") if md_path.nil? || md_path.empty?
26
+
27
+ rel_path = @metadata_location.relative_file_path_for(md_path)
28
+
29
+ File.join(@path, rel_path)
30
+ end
31
+
32
+ # Checks that the path and metadata path defined in this location are available
33
+ # @raise [StorageLocationUnavailableError] if the storage location is not available
34
+ def available?
35
+ raise StorageLocationUnavailableError.new("Path does not exist or is not a directory: #{@path}")\
36
+ unless Dir.exist?(@path)
37
+ @metadata_location.available?
38
+ end
39
+
40
+ # Get the file path relative to this location
41
+ # @param file_path [String] file path
42
+ # @return the file path relative to this location
43
+ # @raise [ArgumentError] if the file path is not contained by this location
44
+ def relativize(file_path)
45
+ return file_path if Pathname.new(file_path).relative?
46
+
47
+ raise ArgumentError.new("Metadata path must be contained by this location") if !file_path.start_with?(@path)
48
+
49
+ file_path.sub(@path, "")
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,47 @@
1
+ require 'longleaf/models/app_fields'
2
+
3
+ module Longleaf
4
+ # A location in which metadata associated with registered files is stored.
5
+ class MetadataLocation
6
+ AF ||= Longleaf::AppFields
7
+
8
+ attr_reader :path
9
+ attr_reader :digests
10
+
11
+ def initialize(config)
12
+ raise ArgumentError.new("Config parameter is required") unless config
13
+ @path = config[AF::LOCATION_PATH]
14
+ raise ArgumentError.new("Parameter path is required") unless @path
15
+ @path += '/' unless @path.end_with?('/')
16
+
17
+ digests = config[AF::METADATA_DIGESTS]
18
+ if digests.nil?
19
+ @digests = []
20
+ elsif digests.is_a?(String)
21
+ @digests = [digests.downcase]
22
+ else
23
+ @digests = digests.map(&:downcase)
24
+ end
25
+ DigestHelper::validate_algorithms(@digests)
26
+ end
27
+
28
+ # Transforms the given metadata path into a relative storage location path
29
+ # @param md_path [String] path of the metadata file or directory to compute file path for.
30
+ # @return
31
+ def relative_file_path_for(md_path)
32
+ rel_md_path = relativize(md_path)
33
+
34
+ if rel_md_path.end_with?(MetadataSerializer::metadata_suffix)
35
+ rel_md_path[0..-MetadataSerializer::metadata_suffix.length - 1]
36
+ else
37
+ rel_md_path
38
+ end
39
+ end
40
+
41
+ # @param [String] metadata path to check
42
+ # @return true if the metadata path is contained by the path for this location
43
+ def contains?(md_path)
44
+ md_path.start_with?(@path)
45
+ end
46
+ end
47
+ end
@@ -1,5 +1,6 @@
1
1
  require_relative 'md_fields'
2
2
  require_relative 'service_record'
3
+ require 'longleaf/helpers/case_insensitive_hash'
3
4
 
4
5
  module Longleaf
5
6
  # Metadata record for a single file
@@ -22,7 +23,8 @@ module Longleaf
22
23
  @properties = properties || Hash.new
23
24
  @registered = registered
24
25
  @deregistered = deregistered
25
- @checksums = checksums || Hash.new
26
+ @checksums = CaseInsensitiveHash.new
27
+ @checksums.merge!(checksums) unless checksums.nil?
26
28
  @services = services || Hash.new
27
29
  @file_size = file_size
28
30
  @last_modified = last_modified