longleaf 1.0.0 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a5f659d06bd9b1094ed6dd0e10d108282bacde2522e4f07f5a2118dcbffb030e
4
- data.tar.gz: '0368bb1228372a7b617aa148844f8cc88fbd453b3a704aa0c66d2d1b0abd21ef'
3
+ metadata.gz: 71c3c9e78a968f153c457d2a59ae17e8d45b4f6e905deeb787172844cee3733f
4
+ data.tar.gz: d354dba30939cdf1917b09fa7fc874654135f311dd1d5c60453cb42fe2b2a736
5
5
  SHA512:
6
- metadata.gz: 12917813fba8f517585d9e20d22044fce229451b5a22b10edd44f3accaea807a4049d867980fb8b301202f6d0bd4f047b0a06df78d46d054e8e7cc4eeeda9cb6
7
- data.tar.gz: fe2f94a2f0bdbe5b2095c63275adca26a5c0f8a6d8dd013bdfa9a1465073085f80a8f2e32b0d5aa7b53c19cce58e48bb1fa9b16479db9b3bcd0af4c649128188
6
+ metadata.gz: af2433108df902de025137d67c4b3d089b8766f7856d10d5d93d6bd449b82227c7cf3e987e50199c83a0869056842cb34d55f710102fa8a654282170c215882a
7
+ data.tar.gz: 93deda52a5f3de34ebc29baef3f531db099ea76521389dc223f33c2922da0e1fb81d775c8178370ade67ea397761553bf9ec34c03c12eec1e0a77d12174e07de
data/.circleci/config.yml CHANGED
@@ -78,7 +78,7 @@ jobs:
78
78
 
79
79
  test_with_ruby_latest:
80
80
  docker:
81
- - image: circleci/ruby:latest
81
+ - image: circleci/ruby:2.7
82
82
  environment:
83
83
  BUNDLER_VERSION: 2.1.4
84
84
 
@@ -1,4 +1,5 @@
1
1
  require 'longleaf/logging'
2
+ require 'longleaf/candidates/physical_path_provider'
2
3
 
3
4
  module Longleaf
4
5
  # Selects and allows for iteration over files which match a provided set of selection criteria
@@ -10,7 +11,8 @@ module Longleaf
10
11
  attr_reader :specificity
11
12
 
12
13
  # May only provide either file_paths or storage_locations
13
- def initialize(file_paths: nil, storage_locations: nil, app_config:)
14
+ def initialize(file_paths: nil, storage_locations: nil, physical_provider: Longleaf::PhysicalPathProvider.new,
15
+ app_config:)
14
16
  if nil_or_empty?(file_paths) && nil_or_empty?(storage_locations)
15
17
  raise ArgumentError.new("Must provide either file paths or storage locations")
16
18
  end
@@ -36,6 +38,7 @@ module Longleaf
36
38
  end
37
39
  # The set of storage locations to select file paths from
38
40
  @storage_locations = storage_locations
41
+ @physical_provider = physical_provider
39
42
  # Validate that the selected storage locations are known
40
43
  if @storage_locations.nil?
41
44
  @specificity = SPECIFICITY_PATH
@@ -63,7 +66,7 @@ module Longleaf
63
66
  @target_paths
64
67
  end
65
68
 
66
- # Get the next file path for this selector.
69
+ # Get the next logical file path for this selector.
67
70
  # @return [String] an absolute path to the next file targeted by this selector,
68
71
  # or nil if no more files selected
69
72
  def next_path
@@ -80,9 +83,17 @@ module Longleaf
80
83
  path = @paths.pop
81
84
  until path.nil? do
82
85
  @app_config.location_manager.verify_path_in_location(path)
86
+ physical_path = @physical_provider.get_physical_path(path)
87
+ separate_logical = physical_path != path
88
+ if separate_logical
89
+ @app_config.location_manager.verify_path_in_location(physical_path)
90
+ end
83
91
 
84
- if File.exist?(path)
85
- if File.directory?(path)
92
+ if File.exist?(physical_path)
93
+ if File.directory?(physical_path)
94
+ if separate_logical
95
+ raise InvalidStoragePathError.new("Cannot specify physical path to a directory: #{physical_path}")
96
+ end
86
97
  logger.debug("Expanding directory #{path}")
87
98
  # For a directory, add all children to file_paths
88
99
  Dir.entries(path).sort.reverse_each do |child|
@@ -93,7 +104,7 @@ module Longleaf
93
104
  return path
94
105
  end
95
106
  else
96
- raise InvalidStoragePathError.new("File #{path} does not exist.")
107
+ raise InvalidStoragePathError.new("File #{physical_path} does not exist.")
97
108
  end
98
109
 
99
110
  # Returned path was not a suitable file, try the next path
@@ -1,7 +1,7 @@
1
1
  module Longleaf
2
2
  # Provides digests for files from a manifest
3
3
  class ManifestDigestProvider
4
- # @param hash which maps file paths to hashs of digests
4
+ # @param digests_mapping hash which maps file paths to hashs of digests
5
5
  def initialize(digests_mapping)
6
6
  @digests_mapping = digests_mapping
7
7
  end
@@ -0,0 +1,17 @@
1
+ module Longleaf
2
+ # Provides physical paths for logical paths from a mapping
3
+ class PhysicalPathProvider
4
+ # @param phys_mapping hash with logical paths as keys, physical paths as values
5
+ def initialize(phys_mapping = Hash.new)
6
+ @phys_mapping = phys_mapping
7
+ end
8
+
9
+ # @param logical_path [String] logical path of file
10
+ # @return physical path of the file
11
+ def get_physical_path(logical_path)
12
+ # return the logical path itself if no physical path is mapped
13
+ return logical_path unless @phys_mapping.key?(logical_path)
14
+ @phys_mapping[logical_path]
15
+ end
16
+ end
17
+ end
data/lib/longleaf/cli.rb CHANGED
@@ -65,12 +65,12 @@ module Longleaf
65
65
  :aliases => "-f",
66
66
  :required => false,
67
67
  :desc => 'File or files to perform this operation on. If multiple files are provided, they must be comma separated.' })
68
+
68
69
  add_shared_option(
69
- :location, :file_selection, {
70
+ :location, :registered_selection, {
70
71
  :aliases => "-s",
71
72
  :required => false,
72
73
  :desc => 'Name or comma separated names of storage locations to perform this operation over.' })
73
-
74
74
  add_shared_option(
75
75
  :from_list, :registered_selection, {
76
76
  :aliases => "-l",
@@ -115,7 +115,21 @@ module Longleaf
115
115
  ...
116
116
  md5:
117
117
  <digest> <path>
118
- ...})
118
+ ...
119
+
120
+ To provide separate logical and physical paths, add a physical path column:
121
+ '-m sha1:@-'
122
+ Where the content in STDIN adheres to the format:
123
+ <digest> <logical path> <physical path>
124
+ ...
125
+ })
126
+ method_option(:physical_path,
127
+ :aliases => "-p",
128
+ :required => false,
129
+ :desc => %q{Comma separated list of physical paths of files to register. Only needed
130
+ if the physical and logical paths of the files differ, otherwise they will be assumed to be the same.
131
+ Only applicable when used with the -f option, and only for individual files, not directories.
132
+ Must be provided in the same order as the logical paths.})
119
133
  method_option(:force,
120
134
  :type => :boolean,
121
135
  :default => false,
@@ -132,11 +146,12 @@ module Longleaf
132
146
 
133
147
  app_config_manager = load_application_config(options)
134
148
 
135
- file_selector, digest_provider = SelectionOptionsParser.parse_registration_selection_options(
136
- options, app_config_manager)
149
+ file_selector, digest_provider, physical_provider = SelectionOptionsParser
150
+ .parse_registration_selection_options(options, app_config_manager)
137
151
 
138
152
  command = RegisterCommand.new(app_config_manager)
139
- exit command.execute(file_selector: file_selector, force: options[:force], digest_provider: digest_provider)
153
+ exit command.execute(file_selector: file_selector, force: options[:force], digest_provider: digest_provider,
154
+ physical_provider: physical_provider)
140
155
  end
141
156
 
142
157
  desc "deregister", "Deregister files with Longleaf"
@@ -16,9 +16,10 @@ module Longleaf
16
16
  # Execute the register command on the given parameters
17
17
  # @param file_selector [FileSelector] selector for files to register
18
18
  # @param force [Boolean] force flag
19
- # @param digest_provider [DigestProvider] object which provides digests for files being registered
19
+ # @param digest_provider [ManifestDigestProvider] object which provides digests for files being registered
20
+ # @param physical_provider [PhysicalPathProvider] object which provides physical paths for files being registered
20
21
  # @return [Integer] status code
21
- def execute(file_selector:, force: false, digest_provider: nil)
22
+ def execute(file_selector:, force: false, digest_provider: nil, physical_provider: nil)
22
23
  start_time = Time.now
23
24
  logger.info('Performing register command')
24
25
  begin
@@ -29,7 +30,8 @@ module Longleaf
29
30
 
30
31
  storage_location = @app_manager.location_manager.get_location_by_path(f_path)
31
32
 
32
- file_rec = FileRecord.new(f_path, storage_location)
33
+ phys_path = physical_provider.get_physical_path(f_path)
34
+ file_rec = FileRecord.new(f_path, storage_location, nil, phys_path)
33
35
 
34
36
  register_event = RegisterEvent.new(file_rec: file_rec, force: force, app_manager: @app_manager,
35
37
  digest_provider: digest_provider)
@@ -27,12 +27,13 @@ module Longleaf
27
27
  service_manager = @app_manager.service_manager
28
28
  md_rec = @file_rec.metadata_record
29
29
  f_path = @file_rec.path
30
+ phys_path = @file_rec.physical_path
30
31
 
31
- logger.info("Performing preserve event on #{@file_rec.path}")
32
+ logger.info("Performing preserve event on #{f_path}")
32
33
 
33
34
  needs_persist = false
34
35
  begin
35
- if !File.exist?(f_path)
36
+ if !File.exist?(phys_path)
36
37
  # Need to persist metadata to avoid repeating processing of this file too soon.
37
38
  needs_persist = true
38
39
  record_failure(EventNames::PRESERVE, f_path, "File is registered but missing.")
@@ -70,10 +70,17 @@ module Longleaf
70
70
  private
71
71
  def populate_file_properties
72
72
  md_rec = @file_rec.metadata_record
73
+ physical_path = @file_rec.physical_path
73
74
 
74
75
  # Set file properties
75
- md_rec.last_modified = File.mtime(@file_rec.path).utc.iso8601(3)
76
- md_rec.file_size = File.size(@file_rec.path)
76
+ md_rec.last_modified = File.mtime(physical_path).utc.iso8601(3)
77
+ md_rec.file_size = File.size(physical_path)
78
+
79
+ if physical_path != @file_rec.path
80
+ md_rec.physical_path = physical_path
81
+ else
82
+ md_rec.physical_path = nil
83
+ end
77
84
  end
78
85
 
79
86
  # Copy a subset of properties from an existing metadata record to the new record
@@ -1,6 +1,7 @@
1
1
  require 'longleaf/candidates/file_selector'
2
2
  require 'longleaf/candidates/registered_file_selector'
3
3
  require 'longleaf/candidates/manifest_digest_provider'
4
+ require 'longleaf/candidates/physical_path_provider'
4
5
  require 'longleaf/candidates/single_digest_provider'
5
6
 
6
7
  module Longleaf
@@ -18,8 +19,11 @@ module Longleaf
18
19
  options, :file, :manifest, :location)
19
20
 
20
21
  if !options[:manifest].nil?
21
- digests_mapping = self.manifests_to_digest_mapping(options[:manifest])
22
- selector = FileSelector.new(file_paths: digests_mapping.keys, app_config: app_config_manager)
22
+ digests_mapping, logical_phys_mapping = self.parse_manifest(options[:manifest])
23
+ physical_provider = PhysicalPathProvider.new(logical_phys_mapping)
24
+ selector = FileSelector.new(file_paths: digests_mapping.keys,
25
+ physical_provider: physical_provider,
26
+ app_config: app_config_manager)
23
27
  digest_provider = ManifestDigestProvider.new(digests_mapping)
24
28
  elsif !options[:file].nil?
25
29
  if options[:checksums]
@@ -35,18 +39,28 @@ module Longleaf
35
39
  end
36
40
  end
37
41
 
38
- file_paths = options[:file].split(/\s*,\s*/)
39
- selector = FileSelector.new(file_paths: file_paths, app_config: app_config_manager)
40
- elsif !options[:location].nil?
41
- storage_locations = options[:location].split(/\s*,\s*/)
42
- selector = FileSelector.new(storage_locations: storage_locations, app_config: app_config_manager)
43
- digest_provider = SingleDigestProvider.new(nil)
42
+ file_paths = self.split_quoted(options[:file], "\\s*,\\s*")
43
+ if !options[:physical_path].nil?
44
+ physical_paths = self.split_quoted(options[:physical_path], "\\s*,\\s*")
45
+ if physical_paths.length != file_paths.length
46
+ logger.failure("Invalid physical paths parameter, number of paths did not match number of logical paths")
47
+ exit 1
48
+ end
49
+ logical_phys_mapping = Hash[file_paths.zip physical_paths]
50
+ physical_provider = PhysicalPathProvider.new(logical_phys_mapping)
51
+ else
52
+ physical_provider = PhysicalPathProvider.new
53
+ end
54
+
55
+ selector = FileSelector.new(file_paths: file_paths,
56
+ physical_provider: physical_provider,
57
+ app_config: app_config_manager)
44
58
  else
45
59
  logger.failure("Must provide one of the following file selection options: -f, l, or -m")
46
60
  exit 1
47
61
  end
48
62
 
49
- [selector, digest_provider]
63
+ [selector, digest_provider, physical_provider]
50
64
  end
51
65
 
52
66
  def self.there_can_be_only_one(failure_msg, options, *names)
@@ -69,7 +83,7 @@ module Longleaf
69
83
  #. <manifest_path> OR @-
70
84
  # @return a hash containing the aggregated contents of the provided manifests. The keys are
71
85
  # paths to manifested files. The values are hashes, mapping digest algorithms to digest values.
72
- def self.manifests_to_digest_mapping(manifest_vals)
86
+ def self.parse_manifest(manifest_vals)
73
87
  alg_manifest_pairs = []
74
88
  # interpret option inputs into a list of algorithms to manifest sources
75
89
  manifest_vals.each do |manifest_val|
@@ -87,6 +101,7 @@ module Longleaf
87
101
 
88
102
  # read the provided manifests to build a mapping from file uri to all supplied digests
89
103
  digests_mapping = Hash.new { |h,k| h[k] = Hash.new }
104
+ logical_phys_mapping = Hash.new
90
105
  alg_manifest_pairs.each do |mpair|
91
106
  source_stream = nil
92
107
  # Determine if reading from a manifest file or stdin
@@ -111,17 +126,28 @@ module Longleaf
111
126
  if current_alg.nil?
112
127
  self.fail("Manifest with unknown checksums encountered, an algorithm must be specified")
113
128
  end
114
- entry_parts = line.split(' ', 2)
115
- if entry_parts.length != 2
129
+ entry_parts = self.split_quoted(line)
130
+ if entry_parts.length != 2 && entry_parts.length != 3
116
131
  self.fail("Invalid manifest entry: #{line}")
117
132
  end
118
133
 
119
134
  digests_mapping[entry_parts[1]][current_alg] = entry_parts[0]
135
+ if (entry_parts.length == 3)
136
+ logical_phys_mapping[entry_parts[1]] = entry_parts[2]
137
+ end
120
138
  end
121
139
  end
122
140
  end
123
141
 
124
- digests_mapping
142
+ [digests_mapping, logical_phys_mapping]
143
+ end
144
+
145
+ # Splits a string of quoted or unquoted tokens separated by spaces
146
+ # @param
147
+ def self.split_quoted(text, delimiter = "\\s+", limit = -1)
148
+ text.split(/#{delimiter}(?=(?:[^'"]|'[^']*'|"[^"]*")*$)/, limit)
149
+ .select {|s| not s.empty? }
150
+ .map {|s| s.gsub(/(^ +)|( +$)|(^["']+)|(["']+$)/, '')}
125
151
  end
126
152
 
127
153
  # Parses the provided options to create a selector for registered files
@@ -65,6 +65,10 @@ module Longleaf
65
65
  def unknown(progname = nil, &block)
66
66
  @stderr_log.unknown(progname, &block)
67
67
  end
68
+
69
+ def <<(msg)
70
+ @stderr_log << msg
71
+ end
68
72
 
69
73
  # Logs a success message to STDOUT, as well as STDERR at info level.
70
74
  #
@@ -7,13 +7,16 @@ module Longleaf
7
7
 
8
8
  # @param file_path [String] path to the file
9
9
  # @param storage_location [StorageLocation] storage location containing the file
10
- def initialize(file_path, storage_location, metadata_record = nil)
10
+ # @param metadata_record [MetadataRecord] metadata record for this file object. Optional.
11
+ # @param physical_path [String] physical path where the file is located. Defaults to the file_path.
12
+ def initialize(file_path, storage_location, metadata_record = nil, physical_path = nil)
11
13
  raise ArgumentError.new("FileRecord requires a path") if file_path.nil?
12
14
  raise ArgumentError.new("FileRecord requires a storage_location") if storage_location.nil?
13
15
 
14
16
  @path = file_path
15
17
  @storage_location = storage_location
16
18
  @metadata_record = metadata_record
19
+ @physical_path = physical_path
17
20
  end
18
21
 
19
22
  # @return [String] path for the metadata file for this file
@@ -21,6 +24,17 @@ module Longleaf
21
24
  @metadata_path = @storage_location.get_metadata_path_for(path) if @metadata_path.nil?
22
25
  @metadata_path
23
26
  end
27
+
28
+ def physical_path
29
+ if @physical_path.nil?
30
+ if @metadata_record.nil? || @metadata_record.physical_path.nil?
31
+ @physical_path = @path
32
+ else
33
+ @physical_path = @metadata_record.physical_path
34
+ end
35
+ end
36
+ @physical_path
37
+ end
24
38
 
25
39
  def metadata_present?
26
40
  File.exist?(metadata_path)
@@ -9,6 +9,7 @@ module Longleaf
9
9
 
10
10
  LAST_MODIFIED = 'last-modified'
11
11
  FILE_SIZE = 'size'
12
+ PHYSICAL_PATH = 'physical-path'
12
13
 
13
14
  CHECKSUMS = 'checksums'
14
15
 
@@ -10,6 +10,7 @@ module Longleaf
10
10
  attr_reader :checksums
11
11
  attr_reader :properties
12
12
  attr_accessor :file_size, :last_modified
13
+ attr_accessor :physical_path
13
14
 
14
15
  # @param properties [Hash] initial data properties for this record
15
16
  # @param services [Hash] initial service property tree
@@ -18,8 +19,9 @@ module Longleaf
18
19
  # @param checksums [Hash] hash of checksum values
19
20
  # @param file_size [Integer] size of file in bytes
20
21
  # @param last_modified [String] iso8601 representation of the last modified date of file
22
+ # @param physical_path [String] physical path where the file is located
21
23
  def initialize(properties: nil, services: nil, deregistered: nil, registered: nil, checksums: nil,
22
- file_size: nil, last_modified: nil)
24
+ file_size: nil, last_modified: nil, physical_path: nil)
23
25
  @properties = properties || Hash.new
24
26
  @registered = registered
25
27
  @deregistered = deregistered
@@ -28,6 +30,7 @@ module Longleaf
28
30
  @services = services || Hash.new
29
31
  @file_size = file_size
30
32
  @last_modified = last_modified
33
+ @physical_path = physical_path
31
34
  end
32
35
 
33
36
  # @return [Boolean] true if the record is deregistered
@@ -1,6 +1,7 @@
1
1
  require 'longleaf/models/storage_location'
2
2
  require 'longleaf/models/storage_types'
3
3
  require 'longleaf/helpers/s3_uri_helper'
4
+ require 'longleaf/logging'
4
5
  require 'uri'
5
6
  require 'aws-sdk-s3'
6
7
 
@@ -12,6 +13,7 @@ module Longleaf
12
13
  # https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#constructor_details
13
14
 
14
15
  class S3StorageLocation < StorageLocation
16
+ include Longleaf::Logging
15
17
 
16
18
  IS_URI_REGEX = /\A#{URI::regexp}\z/
17
19
 
@@ -38,6 +40,9 @@ module Longleaf
38
40
  # Clone options and convert keys to symbols
39
41
  @client_options = Hash[custom_options.map { |(k,v)| [k.to_sym,v] } ]
40
42
  end
43
+ @client_options[:logger] = logger
44
+ @client_options[:log_level] = :debug if @client_options[:log_level].nil?
45
+
41
46
  # If no region directly configured, use region from path
42
47
  if !@client_options.key?(:region)
43
48
  region = S3UriHelper.extract_region(@path)
@@ -23,22 +23,23 @@ module Longleaf
23
23
  # @raise [PreservationServiceError] if the file system information does not match the stored details
24
24
  def perform(file_rec, event)
25
25
  file_path = file_rec.path
26
+ phys_path = file_rec.physical_path
26
27
  md_rec = file_rec.metadata_record
27
28
 
28
29
  logger.debug("Performing file information check of #{file_path}")
29
30
 
30
- if !File.exist?(file_path)
31
- raise PreservationServiceError.new("File does not exist: #{file_path}")
31
+ if !File.exist?(phys_path)
32
+ raise PreservationServiceError.new("File does not exist: #{phys_path}")
32
33
  end
33
34
 
34
- file_size = File.size(file_rec.path)
35
+ file_size = File.size(phys_path)
35
36
  if file_size != md_rec.file_size
36
- raise PreservationServiceError.new("File size for #{file_path} does not match the expected value: registered = #{md_rec.file_size} bytes, actual = #{file_size} bytes")
37
+ raise PreservationServiceError.new("File size for #{phys_path} does not match the expected value: registered = #{md_rec.file_size} bytes, actual = #{file_size} bytes")
37
38
  end
38
39
 
39
- last_modified = File.mtime(file_rec.path).utc.iso8601(3)
40
+ last_modified = File.mtime(phys_path).utc.iso8601(3)
40
41
  if last_modified != md_rec.last_modified
41
- raise PreservationServiceError.new("Last modified timestamp for #{file_path} does not match the expected value: registered = #{md_rec.last_modified}, actual = #{last_modified}")
42
+ raise PreservationServiceError.new("Last modified timestamp for #{phys_path} does not match the expected value: registered = #{md_rec.last_modified}, actual = #{last_modified}")
42
43
  end
43
44
  end
44
45
 
@@ -63,6 +63,7 @@ module Longleaf
63
63
  # @raise [ChecksumMismatchError] if the checksum on record does not match the generated checksum
64
64
  def perform(file_rec, event)
65
65
  path = file_rec.path
66
+ phys_path = file_rec.physical_path
66
67
  md_rec = file_rec.metadata_record
67
68
 
68
69
  # Get the list of existing checksums for the file and normalize algorithm names
@@ -89,19 +90,19 @@ module Longleaf
89
90
  end
90
91
 
91
92
  digest = DigestHelper::start_digest(alg)
92
- digest.file(path)
93
+ digest.file(phys_path)
93
94
  generated_digest = digest.hexdigest
94
95
 
95
96
  # Store the missing checksum if using the 'generate' behavior
96
97
  if existing_digest.nil? && @absent_digest_behavior == GENERATE_IF_ABSENT
97
98
  md_rec.checksums[alg] = generated_digest
98
- logger.info("Generated and stored digest using algorithm '#{alg}' for file #{path}")
99
+ logger.info("Generated and stored digest using algorithm '#{alg}' for file #{phys_path}")
99
100
  else
100
101
  # Compare the new digest to the one on record
101
102
  if existing_digest == generated_digest
102
- logger.info("Fixity check using algorithm '#{alg}' succeeded for file #{path}")
103
+ logger.info("Fixity check using algorithm '#{alg}' succeeded for file #{phys_path}")
103
104
  else
104
- raise ChecksumMismatchError.new("Fixity check using algorithm '#{alg}' failed for file #{path}: expected '#{existing_digest}', calculated '#{generated_digest}.'")
105
+ raise ChecksumMismatchError.new("Fixity check using algorithm '#{alg}' failed for file #{phys_path}: expected '#{existing_digest}', calculated '#{generated_digest}.'")
105
106
  end
106
107
  end
107
108
  end
@@ -51,9 +51,6 @@ module Longleaf
51
51
  + " rsync_options may not include the following: #{DISALLOWED_OPTIONS.join(' ')}")
52
52
  end
53
53
 
54
- # Add -R (--relative) in to command options to ensure full path gets replicated
55
- @options = @options + " -R"
56
-
57
54
  # Set and validate the replica collision policy
58
55
  @collision_policy = @service_def.properties[SF::COLLISION_PROPERTY] || SF::DEFAULT_COLLISION_POLICY
59
56
  if !SF::VALID_COLLISION_POLICIES.include?(@collision_policy)
@@ -101,17 +98,36 @@ module Longleaf
101
98
  else
102
99
  dest_path = destination
103
100
  end
104
-
101
+
102
+ logical_physical_same = file_rec.path == file_rec.physical_path
105
103
  # Determine the path to the file being replicated relative to its storage location
106
104
  rel_path = file_rec.storage_location.relativize(file_rec.path)
107
- # source path with . so that rsync will only create destination directories starting from that point
108
- source_path = File.join(file_rec.storage_location.path, "./#{rel_path}")
105
+
106
+ options = @options
107
+ if logical_physical_same
108
+ options = options + " -R"
109
+ # source path with . so that rsync will only create destination directories starting from that point
110
+ source_path = File.join(file_rec.storage_location.path, "./#{rel_path}")
111
+ else
112
+ options = options + " --no-relative"
113
+ source_path = file_rec.physical_path
114
+ dest_path = File.join(dest_path, rel_path)
115
+ if (dest_is_storage_loc && destination.is_a?(Longleaf::FilesystemStorageLocation)) || !dest_is_storage_loc
116
+ # Fill in missing parent directories, as rsync cannot do so when specifying a different source and dest filename
117
+ dirname = File.dirname(dest_path)
118
+ logger.debug("Creating parent dirs #{dirname} for #{file_rec.path}")
119
+ FileUtils.mkdir_p(dirname)
120
+ else
121
+ raise PreservationServiceError.new(
122
+ "Destination #{destination.name} does not currently support separate physical and logical paths")
123
+ end
124
+ end
109
125
 
110
126
  # Check that the destination is available because attempting to write
111
127
  verify_destination_available(destination, file_rec)
112
128
 
113
- logger.debug("Invoking rsync with command: #{@command} \"#{source_path}\" \"#{dest_path}\" #{@options}")
114
- stdout, stderr, status = Open3.capture3("#{@command} \"#{source_path}\" \"#{dest_path}\" #{@options}")
129
+ logger.debug("Invoking rsync with command: #{@command} \"#{source_path}\" \"#{dest_path}\" #{options}")
130
+ stdout, stderr, status = Open3.capture3("#{@command} \"#{source_path}\" \"#{dest_path}\" #{options}")
115
131
  raise PreservationServiceError.new("Failed to replicate #{file_rec.path} to #{dest_path}: #{stderr}") \
116
132
  unless status.success?
117
133
 
@@ -83,8 +83,6 @@ module Longleaf
83
83
  # Determine the path to the file being replicated relative to its storage location
84
84
  rel_path = file_rec.storage_location.relativize(file_rec.path)
85
85
 
86
- content_md5 = get_content_md5(file_rec)
87
-
88
86
  @destinations.each do |destination|
89
87
  # Check that the destination is available before attempting to write
90
88
  verify_destination_available(destination, file_rec)
@@ -92,7 +90,7 @@ module Longleaf
92
90
  rel_to_bucket = destination.relative_to_bucket_path(rel_path)
93
91
  file_obj = destination.s3_bucket.object(rel_to_bucket)
94
92
  begin
95
- file_obj.upload_file(file_rec.path, { :content_md5 => content_md5 })
93
+ file_obj.upload_file(file_rec.physical_path)
96
94
  rescue Aws::S3::Errors::BadDigest => e
97
95
  raise ChecksumMismatchError.new("Transfer to bucket '#{destination.s3_bucket.name}' failed, " \
98
96
  + "MD5 provided did not match the received content for #{file_rec.path}")
@@ -129,15 +127,5 @@ module Longleaf
129
127
  + e.message)
130
128
  end
131
129
  end
132
-
133
- def get_content_md5(file_rec)
134
- md_rec = file_rec.metadata_record
135
- if md_rec.checksums.key?('md5')
136
- # base 64 encode the digest, as is required by the Content-Md5 header
137
- [[md_rec.checksums['md5']].pack("H*")].pack("m0")
138
- else
139
- nil
140
- end
141
- end
142
130
  end
143
131
  end
@@ -32,6 +32,7 @@ module Longleaf
32
32
  checksums = data.delete(MDFields::CHECKSUMS)
33
33
  file_size = data.delete(MDFields::FILE_SIZE)
34
34
  last_modified = data.delete(MDFields::LAST_MODIFIED)
35
+ physical_path = data.delete(MDFields::PHYSICAL_PATH)
35
36
 
36
37
  services = md[MDF::SERVICES]
37
38
  service_records = Hash.new
@@ -57,7 +58,8 @@ module Longleaf
57
58
  deregistered: deregistered,
58
59
  checksums: checksums,
59
60
  file_size: file_size,
60
- last_modified: last_modified)
61
+ last_modified: last_modified,
62
+ physical_path: physical_path)
61
63
  end
62
64
 
63
65
  # Load configuration a yaml encoded configuration file
@@ -52,6 +52,7 @@ module Longleaf
52
52
  data[MDF::CHECKSUMS] = metadata.checksums unless metadata.checksums && metadata.checksums.empty?
53
53
  data[MDF::FILE_SIZE] = metadata.file_size unless metadata.file_size.nil?
54
54
  data[MDF::LAST_MODIFIED] = metadata.last_modified if metadata.last_modified
55
+ data[MDF::PHYSICAL_PATH] = metadata.physical_path if metadata.physical_path
55
56
 
56
57
  props[MDF::DATA] = data
57
58
 
@@ -29,6 +29,11 @@ module Longleaf
29
29
  @checksums[alg] = value
30
30
  self
31
31
  end
32
+
33
+ def with_physical_path(phys_path)
34
+ @physical_path = phys_path
35
+ self
36
+ end
32
37
 
33
38
  def with_service(name, timestamp: ServiceDateHelper::formatted_timestamp, run_needed: false, properties: nil,
34
39
  failure_timestamp: nil)
@@ -56,7 +61,8 @@ module Longleaf
56
61
  registered: @registered,
57
62
  checksums: @checksums,
58
63
  file_size: @file_size,
59
- last_modified: @last_modified)
64
+ last_modified: @last_modified,
65
+ physical_path: @physical_path)
60
66
  end
61
67
 
62
68
  # Add the generated metadata record to the given file record
@@ -1,3 +1,3 @@
1
1
  module Longleaf
2
- VERSION = "1.0.0"
2
+ VERSION = "1.1.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: longleaf
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Pennell
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-10-14 00:00:00.000000000 Z
11
+ date: 2021-03-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -372,6 +372,7 @@ files:
372
372
  - lib/longleaf.rb
373
373
  - lib/longleaf/candidates/file_selector.rb
374
374
  - lib/longleaf/candidates/manifest_digest_provider.rb
375
+ - lib/longleaf/candidates/physical_path_provider.rb
375
376
  - lib/longleaf/candidates/registered_file_selector.rb
376
377
  - lib/longleaf/candidates/service_candidate_filesystem_iterator.rb
377
378
  - lib/longleaf/candidates/service_candidate_index_iterator.rb
@@ -449,7 +450,7 @@ licenses:
449
450
  - Apache-2.0
450
451
  metadata:
451
452
  source_code_uri: https://github.com/UNC-Libraries/longleaf-preservation
452
- post_install_message:
453
+ post_install_message:
453
454
  rdoc_options: []
454
455
  require_paths:
455
456
  - lib
@@ -464,8 +465,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
464
465
  - !ruby/object:Gem::Version
465
466
  version: '0'
466
467
  requirements: []
467
- rubygems_version: 3.1.2
468
- signing_key:
468
+ rubygems_version: 3.1.4
469
+ signing_key:
469
470
  specification_version: 4
470
471
  summary: Longleaf preservation services tool
471
472
  test_files: []