longleaf 0.3.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.circleci/config.yml +12 -2
  3. data/README.md +11 -1
  4. data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
  5. data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
  6. data/lib/longleaf/cli.rb +49 -36
  7. data/lib/longleaf/commands/register_command.rb +3 -3
  8. data/lib/longleaf/commands/validate_config_command.rb +1 -1
  9. data/lib/longleaf/events/register_event.rb +8 -4
  10. data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
  11. data/lib/longleaf/helpers/digest_helper.rb +7 -1
  12. data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
  13. data/lib/longleaf/helpers/selection_options_parser.rb +189 -0
  14. data/lib/longleaf/helpers/service_date_helper.rb +29 -1
  15. data/lib/longleaf/indexing/sequel_index_driver.rb +2 -20
  16. data/lib/longleaf/models/app_fields.rb +4 -2
  17. data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
  18. data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
  19. data/lib/longleaf/models/metadata_location.rb +47 -0
  20. data/lib/longleaf/models/metadata_record.rb +3 -1
  21. data/lib/longleaf/models/s3_storage_location.rb +133 -0
  22. data/lib/longleaf/models/service_fields.rb +4 -0
  23. data/lib/longleaf/models/storage_location.rb +17 -48
  24. data/lib/longleaf/models/storage_types.rb +9 -0
  25. data/lib/longleaf/preservation_services/rsync_replication_service.rb +9 -11
  26. data/lib/longleaf/preservation_services/s3_replication_service.rb +143 -0
  27. data/lib/longleaf/services/application_config_deserializer.rb +26 -4
  28. data/lib/longleaf/services/application_config_validator.rb +17 -6
  29. data/lib/longleaf/services/configuration_validator.rb +64 -4
  30. data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
  31. data/lib/longleaf/services/metadata_deserializer.rb +41 -9
  32. data/lib/longleaf/services/metadata_persistence_manager.rb +3 -2
  33. data/lib/longleaf/services/metadata_serializer.rb +94 -13
  34. data/lib/longleaf/services/metadata_validator.rb +76 -0
  35. data/lib/longleaf/services/s3_location_validator.rb +19 -0
  36. data/lib/longleaf/services/service_definition_validator.rb +16 -8
  37. data/lib/longleaf/services/service_manager.rb +7 -15
  38. data/lib/longleaf/services/service_mapping_validator.rb +26 -15
  39. data/lib/longleaf/services/storage_location_manager.rb +38 -12
  40. data/lib/longleaf/services/storage_location_validator.rb +41 -30
  41. data/lib/longleaf/specs/config_builder.rb +10 -3
  42. data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
  43. data/lib/longleaf/specs/metadata_builder.rb +1 -0
  44. data/lib/longleaf/version.rb +1 -1
  45. data/longleaf.gemspec +3 -1
  46. data/mkdocs.yml +2 -1
  47. metadata +48 -8
  48. data/.travis.yml +0 -4
  49. data/lib/longleaf/services/storage_path_validator.rb +0 -16
@@ -0,0 +1,189 @@
1
+ require 'longleaf/candidates/file_selector'
2
+ require 'longleaf/candidates/registered_file_selector'
3
+ require 'longleaf/candidates/manifest_digest_provider'
4
+ require 'longleaf/candidates/single_digest_provider'
5
+
6
+ module Longleaf
7
+ # Helper for parsing manifest inputs used for registration
8
+ class SelectionOptionsParser
9
+ extend Longleaf::Logging
10
+
11
+ # Parses the provided options to construct a file selector and digest provider for
12
+ # use in registration commands.
13
+ # @param options [Hash] command options
14
+ # @param app_config_manager [ApplicationConfigManager] app config manager
15
+ # @return The file selector and digest provider.
16
+ def self.parse_registration_selection_options(options, app_config_manager)
17
+ there_can_be_only_one("Only one of the following selection options may be provided: -m, -f, -s",
18
+ options, :file, :manifest, :location)
19
+
20
+ if !options[:manifest].nil?
21
+ digests_mapping = self.manifests_to_digest_mapping(options[:manifest])
22
+ selector = FileSelector.new(file_paths: digests_mapping.keys, app_config: app_config_manager)
23
+ digest_provider = ManifestDigestProvider.new(digests_mapping)
24
+ elsif !options[:file].nil?
25
+ if options[:checksums]
26
+ checksums = options[:checksums]
27
+ # validate checksum list format, must a comma delimited list of prefix:checksums
28
+ if /^[^:,]+:[^:,]+(,[^:,]+:[^:,]+)*$/.match(checksums)
29
+ # convert checksum list into hash with prefix as key
30
+ checksums = Hash[*checksums.split(/\s*[:,]\s*/)]
31
+ digest_provider = SingleDigestProvider.new(checksums)
32
+ else
33
+ logger.failure("Invalid checksums parameter format, see `longleaf help <command>` for more information")
34
+ exit 1
35
+ end
36
+ end
37
+
38
+ file_paths = options[:file].split(/\s*,\s*/)
39
+ selector = FileSelector.new(file_paths: file_paths, app_config: app_config_manager)
40
+ elsif !options[:location].nil?
41
+ storage_locations = options[:location].split(/\s*,\s*/)
42
+ selector = FileSelector.new(storage_locations: storage_locations, app_config: app_config_manager)
43
+ digest_provider = SingleDigestProvider.new(nil)
44
+ else
45
+ logger.failure("Must provide one of the following file selection options: -f, l, or -m")
46
+ exit 1
47
+ end
48
+
49
+ [selector, digest_provider]
50
+ end
51
+
52
+ def self.there_can_be_only_one(failure_msg, options, *names)
53
+ got_one = false
54
+ names.each do |name|
55
+ if !options[name].nil?
56
+ if got_one
57
+ logger.failure(failure_msg)
58
+ exit 1
59
+ end
60
+ got_one = true
61
+ end
62
+ end
63
+ end
64
+
65
+ # Parses the provided manifest options, reading the contents of the manifests to produce
66
+ # a mapping from files to one or more algorithms.
67
+ # @param manifest_vals [Array] List of manifest option values. They may be in one of the following formats:
68
+ # <alg_name>:<manifest_path> OR <alg_name>:@-
69
+ #. <manifest_path> OR @-
70
+ # @return a hash containing the aggregated contents of the provided manifests. The keys are
71
+ # paths to manifested files. The values are hashes, mapping digest algorithms to digest values.
72
+ def self.manifests_to_digest_mapping(manifest_vals)
73
+ alg_manifest_pairs = []
74
+ # interpret option inputs into a list of algorithms to manifest sources
75
+ manifest_vals.each do |manifest_val|
76
+ if manifest_val.include?(':')
77
+ manifest_parts = manifest_val.split(':', 2)
78
+ alg_manifest_pairs << manifest_parts
79
+ else
80
+ # algorithm not specified in option value
81
+ alg_manifest_pairs << [nil, manifest_val]
82
+ end
83
+ end
84
+ if alg_manifest_pairs.select { |mpair| mpair[1] == '@-' }.count > 1
85
+ self.fail("Cannot specify more than one manifest from STDIN")
86
+ end
87
+
88
+ # read the provided manifests to build a mapping from file uri to all supplied digests
89
+ digests_mapping = Hash.new { |h,k| h[k] = Hash.new }
90
+ alg_manifest_pairs.each do |mpair|
91
+ source_stream = nil
92
+ # Determine if reading from a manifest file or stdin
93
+ if mpair[1] == '@-'
94
+ source_stream = $stdin
95
+ else
96
+ source_stream = File.new(mpair[1])
97
+ end
98
+
99
+ current_alg = mpair[0]
100
+ multi_digest_manifest = current_alg.nil?
101
+ source_stream.each_line do |line|
102
+ line = line.strip
103
+ if multi_digest_manifest && /^[a-zA-Z0-9]+:$/ =~ line
104
+ # Found a digest algorithm header, assuming succeeding entries are of this type
105
+ current_alg = line.chomp(':')
106
+ # Verify that the digest algorithm is known to longleaf
107
+ if !DigestHelper.is_known_algorithm?(current_alg)
108
+ self.fail("Manifest specifies unknown digest algorithm: #{current_alg}")
109
+ end
110
+ else
111
+ if current_alg.nil?
112
+ self.fail("Manifest with unknown checksums encountered, an algorithm must be specified")
113
+ end
114
+ entry_parts = line.split(' ', 2)
115
+ if entry_parts.length != 2
116
+ self.fail("Invalid manifest entry: #{line}")
117
+ end
118
+
119
+ digests_mapping[entry_parts[1]][current_alg] = entry_parts[0]
120
+ end
121
+ end
122
+ end
123
+
124
+ digests_mapping
125
+ end
126
+
127
+ # Parses the provided options to create a selector for registered files
128
+ # @param options [Hash] command options
129
+ # @param app_config_manager [ApplicationConfigManager] app config manager
130
+ # @return selector
131
+ def self.create_registered_selector(options, app_config_manager)
132
+ there_can_be_only_one("Only one of the following selection options may be provided: -l, -f, -s",
133
+ options, :file, :location, :from_list)
134
+
135
+ if !options[:from_list].nil?
136
+ file_paths = read_from_list(options[:from_list])
137
+ return RegisteredFileSelector.new(file_paths: file_paths, app_config: app_config_manager)
138
+ elsif !options[:file].nil?
139
+ file_paths = options[:file].split(/\s*,\s*/)
140
+ return RegisteredFileSelector.new(file_paths: file_paths, app_config: app_config_manager)
141
+ elsif !options[:location].nil?
142
+ storage_locations = options[:location].split(/\s*,\s*/)
143
+ return RegisteredFileSelector.new(storage_locations: storage_locations, app_config: app_config_manager)
144
+ else
145
+ logger.failure("Must provide one of the following file selection options: -l, -f, or -s")
146
+ exit 1
147
+ end
148
+ end
149
+
150
+ # Parses the -l from_list option, reading the list of files specified either from the provided
151
+ # file path or STDIN
152
+ # @param from_list option value, either a file path or "@-"
153
+ # @return list of files from the from_list
154
+ def self.read_from_list(from_list)
155
+ from_list = from_list.strip
156
+ if from_list.empty?
157
+ logger.failure("List parameter must not be empty")
158
+ exit 1
159
+ end
160
+
161
+ if from_list == '@-'
162
+ source_stream = $stdin
163
+ else
164
+ begin
165
+ source_stream = File.new(from_list)
166
+ rescue Errno::ENOENT
167
+ logger.failure("Specified list file does not exist: #{from_list}")
168
+ exit 1
169
+ end
170
+ end
171
+
172
+ lines = []
173
+ source_stream.each_line do |line|
174
+ lines << line.strip
175
+ end
176
+
177
+ if lines.empty?
178
+ logger.failure("File list is empty, must provide one or more files for this operation")
179
+ exit 1
180
+ end
181
+ lines
182
+ end
183
+
184
+ def self.fail(message)
185
+ logger.failure(message)
186
+ exit 1
187
+ end
188
+ end
189
+ end
@@ -37,7 +37,7 @@ module Longleaf
37
37
  end
38
38
 
39
39
  modified_time = datetime + (value * unit_modifier)
40
- modified_time.iso8601
40
+ modified_time.iso8601(3)
41
41
  end
42
42
 
43
43
  # Get a timestamp in the format expected for service timestamps.
@@ -46,5 +46,33 @@ module Longleaf
46
46
  def self.formatted_timestamp(timestamp = Time.now)
47
47
  timestamp.utc.iso8601(3).to_s
48
48
  end
49
+
50
+ # Get the timestamp for the next time the provided service would need to be run
51
+ # for the object described by md_rec
52
+ # @param md_rec [MetadataRecord] metadata record for the file
53
+ # @param service_def [ServiceDefinition] definition for the service
54
+ # @return [String] iso8601 timestamp for the next time the service will need to run, or
55
+ # nil if the service does not need to run again.
56
+ def self.next_run_needed(md_rec, service_def)
57
+ raise ArgumentError.new('Must provide a md_rec parameter') if md_rec.nil?
58
+ raise ArgumentError.new('Must provide a service_def parameter') if service_def.nil?
59
+
60
+ service_name = service_def.name
61
+ service_rec = md_rec.service(service_name)
62
+
63
+ if service_rec.nil? || service_rec.timestamp.nil?
64
+ if service_def.delay.nil?
65
+ return md_rec.registered
66
+ else
67
+ return ServiceDateHelper.add_to_timestamp(md_rec.registered, service_def.delay)
68
+ end
69
+ end
70
+
71
+ if service_def.frequency.nil?
72
+ return nil
73
+ else
74
+ return ServiceDateHelper.add_to_timestamp(service_rec.timestamp, service_def.frequency)
75
+ end
76
+ end
49
77
  end
50
78
  end
@@ -114,27 +114,9 @@ module Longleaf
114
114
 
115
115
  expected_services.each do |service_def|
116
116
  service_name = service_def.name
117
- # Service has never run, set execution time to now
118
- if !present_services.include?(service_name)
119
- service_times << current_time
120
- next
121
- end
122
-
123
- service_rec = md_rec.service(service_name)
124
117
 
125
- # Service either needs a run or has no timestamp, so execution time of now
126
- if service_rec.run_needed || service_rec.timestamp.nil?
127
- service_times << current_time
128
- next
129
- end
130
-
131
- # Calculate the next time this service should run based on frequency
132
- frequency = service_def.frequency
133
- unless frequency.nil?
134
- service_timestamp = service_rec.timestamp
135
- service_times << ServiceDateHelper.add_to_timestamp(service_timestamp, frequency)
136
- next
137
- end
118
+ next_run = ServiceDateHelper.next_run_needed(md_rec, service_def)
119
+ service_times << next_run unless next_run.nil?
138
120
  end
139
121
  # Return the lowest service execution time
140
122
  service_times.min
@@ -7,7 +7,9 @@ module Longleaf
7
7
  SYSTEM = 'system'
8
8
 
9
9
  LOCATION_PATH = 'path'
10
- METADATA_PATH = 'metadata_path'
11
- METADATA_DIGESTS = 'metadata_digests'
10
+ METADATA_CONFIG = 'metadata'
11
+ METADATA_DIGESTS = 'digests'
12
+
13
+ STORAGE_TYPE = 'type'
12
14
  end
13
15
  end
@@ -0,0 +1,56 @@
1
+ require 'longleaf/services/metadata_serializer'
2
+ require 'longleaf/models/metadata_location'
3
+ require 'longleaf/models/storage_types'
4
+
5
+ module Longleaf
6
+ # A filesystem based location in which metadata associated with registered files is stored.
7
+ class FilesystemMetadataLocation < MetadataLocation
8
+ AF ||= Longleaf::AppFields
9
+
10
+ def initialize(config)
11
+ super(config)
12
+ end
13
+
14
+ # @return the storage type for this location
15
+ def type
16
+ StorageTypes::FILESYSTEM_STORAGE_TYPE
17
+ end
18
+
19
+ # Get the absolute path for the metadata file for the given file path located in this storage location.
20
+ # @param file_path [String] path of the file relative its storage location
21
+ # @return absolute path to the metadata
22
+ # @raise [ArgumentError] if the file_path is not provided.
23
+ def metadata_path_for(file_path)
24
+ raise ArgumentError.new("A file_path parameter is required") if file_path.nil?
25
+ raise ArgumentError.new("File path must be relative") if Pathname.new(file_path).absolute?
26
+
27
+ md_path = File.join(@path, file_path)
28
+ # If the file_path is to a file, then add metadata suffix.
29
+ if md_path.end_with?('/')
30
+ md_path
31
+ else
32
+ md_path + MetadataSerializer::metadata_suffix
33
+ end
34
+ end
35
+
36
+ # Get the metadata path relative to this location
37
+ # @param md_path [String] metadata file path
38
+ # @return the metadata path relative to this location
39
+ # @raise [ArgumentError] if the metadata path is not contained by this location
40
+ def relativize(md_path)
41
+ return md_path if Pathname.new(md_path).relative?
42
+
43
+ raise ArgumentError.new("Metadata path must be contained by this location") if !md_path.start_with?(@path)
44
+
45
+ md_path.sub(@path, "")
46
+ end
47
+
48
+
49
+ # Checks that the path defined in this metadata location are available
50
+ # @raise [StorageLocationUnavailableError] if the metadata location is not available
51
+ def available?
52
+ raise StorageLocationUnavailableError.new("Metadata path does not exist or is not a directory: #{@path}")\
53
+ unless Dir.exist?(@path)
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,52 @@
1
+ require 'longleaf/models/storage_location'
2
+ require 'longleaf/models/storage_types'
3
+
4
+ module Longleaf
5
+ # A storage location in a local filesystem
6
+ class FilesystemStorageLocation < StorageLocation
7
+ # @param name [String] the name of this storage location
8
+ # @param config [Hash] hash containing the configuration options for this location
9
+ # @param md_loc [MetadataLocation] metadata location associated with this storage location
10
+ def initialize(name, config, md_loc)
11
+ super(name, config, md_loc)
12
+ @path += File::SEPARATOR unless @path.end_with?(File::SEPARATOR)
13
+ end
14
+
15
+ # @return the storage type for this location
16
+ def type
17
+ StorageTypes::FILESYSTEM_STORAGE_TYPE
18
+ end
19
+
20
+ # Get that absolute path to the file associated with the provided metadata path
21
+ # @param md_path [String] metadata file path
22
+ # @raise [ArgumentError] if the md_path is not in this storage location
23
+ # @return [String] the path for the file associated with this metadata
24
+ def get_path_from_metadata_path(md_path)
25
+ raise ArgumentError.new("A file_path parameter is required") if md_path.nil? || md_path.empty?
26
+
27
+ rel_path = @metadata_location.relative_file_path_for(md_path)
28
+
29
+ File.join(@path, rel_path)
30
+ end
31
+
32
+ # Checks that the path and metadata path defined in this location are available
33
+ # @raise [StorageLocationUnavailableError] if the storage location is not available
34
+ def available?
35
+ raise StorageLocationUnavailableError.new("Path does not exist or is not a directory: #{@path}")\
36
+ unless Dir.exist?(@path)
37
+ @metadata_location.available?
38
+ end
39
+
40
+ # Get the file path relative to this location
41
+ # @param file_path [String] file path
42
+ # @return the file path relative to this location
43
+ # @raise [ArgumentError] if the file path is not contained by this location
44
+ def relativize(file_path)
45
+ return file_path if Pathname.new(file_path).relative?
46
+
47
+ raise ArgumentError.new("Metadata path must be contained by this location") if !file_path.start_with?(@path)
48
+
49
+ file_path.sub(@path, "")
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,47 @@
1
+ require 'longleaf/models/app_fields'
2
+
3
+ module Longleaf
4
+ # A location in which metadata associated with registered files is stored.
5
+ class MetadataLocation
6
+ AF ||= Longleaf::AppFields
7
+
8
+ attr_reader :path
9
+ attr_reader :digests
10
+
11
+ def initialize(config)
12
+ raise ArgumentError.new("Config parameter is required") unless config
13
+ @path = config[AF::LOCATION_PATH]
14
+ raise ArgumentError.new("Parameter path is required") unless @path
15
+ @path += '/' unless @path.end_with?('/')
16
+
17
+ digests = config[AF::METADATA_DIGESTS]
18
+ if digests.nil?
19
+ @digests = []
20
+ elsif digests.is_a?(String)
21
+ @digests = [digests.downcase]
22
+ else
23
+ @digests = digests.map(&:downcase)
24
+ end
25
+ DigestHelper::validate_algorithms(@digests)
26
+ end
27
+
28
+ # Transforms the given metadata path into a relative storage location path
29
+ # @param md_path [String] path of the metadata file or directory to compute file path for.
30
+ # @return
31
+ def relative_file_path_for(md_path)
32
+ rel_md_path = relativize(md_path)
33
+
34
+ if rel_md_path.end_with?(MetadataSerializer::metadata_suffix)
35
+ rel_md_path[0..-MetadataSerializer::metadata_suffix.length - 1]
36
+ else
37
+ rel_md_path
38
+ end
39
+ end
40
+
41
+ # @param [String] metadata path to check
42
+ # @return true if the metadata path is contained by the path for this location
43
+ def contains?(md_path)
44
+ md_path.start_with?(@path)
45
+ end
46
+ end
47
+ end
@@ -1,5 +1,6 @@
1
1
  require_relative 'md_fields'
2
2
  require_relative 'service_record'
3
+ require 'longleaf/helpers/case_insensitive_hash'
3
4
 
4
5
  module Longleaf
5
6
  # Metadata record for a single file
@@ -22,7 +23,8 @@ module Longleaf
22
23
  @properties = properties || Hash.new
23
24
  @registered = registered
24
25
  @deregistered = deregistered
25
- @checksums = checksums || Hash.new
26
+ @checksums = CaseInsensitiveHash.new
27
+ @checksums.merge!(checksums) unless checksums.nil?
26
28
  @services = services || Hash.new
27
29
  @file_size = file_size
28
30
  @last_modified = last_modified