longleaf 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a5f659d06bd9b1094ed6dd0e10d108282bacde2522e4f07f5a2118dcbffb030e
4
- data.tar.gz: '0368bb1228372a7b617aa148844f8cc88fbd453b3a704aa0c66d2d1b0abd21ef'
3
+ metadata.gz: 71c3c9e78a968f153c457d2a59ae17e8d45b4f6e905deeb787172844cee3733f
4
+ data.tar.gz: d354dba30939cdf1917b09fa7fc874654135f311dd1d5c60453cb42fe2b2a736
5
5
  SHA512:
6
- metadata.gz: 12917813fba8f517585d9e20d22044fce229451b5a22b10edd44f3accaea807a4049d867980fb8b301202f6d0bd4f047b0a06df78d46d054e8e7cc4eeeda9cb6
7
- data.tar.gz: fe2f94a2f0bdbe5b2095c63275adca26a5c0f8a6d8dd013bdfa9a1465073085f80a8f2e32b0d5aa7b53c19cce58e48bb1fa9b16479db9b3bcd0af4c649128188
6
+ metadata.gz: af2433108df902de025137d67c4b3d089b8766f7856d10d5d93d6bd449b82227c7cf3e987e50199c83a0869056842cb34d55f710102fa8a654282170c215882a
7
+ data.tar.gz: 93deda52a5f3de34ebc29baef3f531db099ea76521389dc223f33c2922da0e1fb81d775c8178370ade67ea397761553bf9ec34c03c12eec1e0a77d12174e07de
data/.circleci/config.yml CHANGED
@@ -78,7 +78,7 @@ jobs:
78
78
 
79
79
  test_with_ruby_latest:
80
80
  docker:
81
- - image: circleci/ruby:latest
81
+ - image: circleci/ruby:2.7
82
82
  environment:
83
83
  BUNDLER_VERSION: 2.1.4
84
84
 
@@ -1,4 +1,5 @@
1
1
  require 'longleaf/logging'
2
+ require 'longleaf/candidates/physical_path_provider'
2
3
 
3
4
  module Longleaf
4
5
  # Selects and allows for iteration over files which match a provided set of selection criteria
@@ -10,7 +11,8 @@ module Longleaf
10
11
  attr_reader :specificity
11
12
 
12
13
  # May only provide either file_paths or storage_locations
13
- def initialize(file_paths: nil, storage_locations: nil, app_config:)
14
+ def initialize(file_paths: nil, storage_locations: nil, physical_provider: Longleaf::PhysicalPathProvider.new,
15
+ app_config:)
14
16
  if nil_or_empty?(file_paths) && nil_or_empty?(storage_locations)
15
17
  raise ArgumentError.new("Must provide either file paths or storage locations")
16
18
  end
@@ -36,6 +38,7 @@ module Longleaf
36
38
  end
37
39
  # The set of storage locations to select file paths from
38
40
  @storage_locations = storage_locations
41
+ @physical_provider = physical_provider
39
42
  # Validate that the selected storage locations are known
40
43
  if @storage_locations.nil?
41
44
  @specificity = SPECIFICITY_PATH
@@ -63,7 +66,7 @@ module Longleaf
63
66
  @target_paths
64
67
  end
65
68
 
66
- # Get the next file path for this selector.
69
+ # Get the next logical file path for this selector.
67
70
  # @return [String] an absolute path to the next file targeted by this selector,
68
71
  # or nil if no more files selected
69
72
  def next_path
@@ -80,9 +83,17 @@ module Longleaf
80
83
  path = @paths.pop
81
84
  until path.nil? do
82
85
  @app_config.location_manager.verify_path_in_location(path)
86
+ physical_path = @physical_provider.get_physical_path(path)
87
+ separate_logical = physical_path != path
88
+ if separate_logical
89
+ @app_config.location_manager.verify_path_in_location(physical_path)
90
+ end
83
91
 
84
- if File.exist?(path)
85
- if File.directory?(path)
92
+ if File.exist?(physical_path)
93
+ if File.directory?(physical_path)
94
+ if separate_logical
95
+ raise InvalidStoragePathError.new("Cannot specify physical path to a directory: #{physical_path}")
96
+ end
86
97
  logger.debug("Expanding directory #{path}")
87
98
  # For a directory, add all children to file_paths
88
99
  Dir.entries(path).sort.reverse_each do |child|
@@ -93,7 +104,7 @@ module Longleaf
93
104
  return path
94
105
  end
95
106
  else
96
- raise InvalidStoragePathError.new("File #{path} does not exist.")
107
+ raise InvalidStoragePathError.new("File #{physical_path} does not exist.")
97
108
  end
98
109
 
99
110
  # Returned path was not a suitable file, try the next path
@@ -1,7 +1,7 @@
1
1
  module Longleaf
2
2
  # Provides digests for files from a manifest
3
3
  class ManifestDigestProvider
4
- # @param hash which maps file paths to hashs of digests
4
+ # @param digests_mapping hash which maps file paths to hashs of digests
5
5
  def initialize(digests_mapping)
6
6
  @digests_mapping = digests_mapping
7
7
  end
@@ -0,0 +1,17 @@
1
+ module Longleaf
2
+ # Provides physical paths for logical paths from a mapping
3
+ class PhysicalPathProvider
4
+ # @param phys_mapping hash with logical paths as keys, physical paths as values
5
+ def initialize(phys_mapping = Hash.new)
6
+ @phys_mapping = phys_mapping
7
+ end
8
+
9
+ # @param logical_path [String] logical path of file
10
+ # @return physical path of the file
11
+ def get_physical_path(logical_path)
12
+ # return the logical path itself if no physical path is mapped
13
+ return logical_path unless @phys_mapping.key?(logical_path)
14
+ @phys_mapping[logical_path]
15
+ end
16
+ end
17
+ end
data/lib/longleaf/cli.rb CHANGED
@@ -65,12 +65,12 @@ module Longleaf
65
65
  :aliases => "-f",
66
66
  :required => false,
67
67
  :desc => 'File or files to perform this operation on. If multiple files are provided, they must be comma separated.' })
68
+
68
69
  add_shared_option(
69
- :location, :file_selection, {
70
+ :location, :registered_selection, {
70
71
  :aliases => "-s",
71
72
  :required => false,
72
73
  :desc => 'Name or comma separated names of storage locations to perform this operation over.' })
73
-
74
74
  add_shared_option(
75
75
  :from_list, :registered_selection, {
76
76
  :aliases => "-l",
@@ -115,7 +115,21 @@ module Longleaf
115
115
  ...
116
116
  md5:
117
117
  <digest> <path>
118
- ...})
118
+ ...
119
+
120
+ To provide separate logical and physical paths, add a physical path column:
121
+ '-m sha1:@-'
122
+ Where the content in STDIN adheres to the format:
123
+ <digest> <logical path> <physical path>
124
+ ...
125
+ })
126
+ method_option(:physical_path,
127
+ :aliases => "-p",
128
+ :required => false,
129
+ :desc => %q{Comma separated list of physical paths of files to register. Only needed
130
+ if the physical and logical paths of the files differ, otherwise they will be assumed to be the same.
131
+ Only applicable when used with the -f option, and only for individual files, not directories.
132
+ Must be provided in the same order as the logical paths.})
119
133
  method_option(:force,
120
134
  :type => :boolean,
121
135
  :default => false,
@@ -132,11 +146,12 @@ module Longleaf
132
146
 
133
147
  app_config_manager = load_application_config(options)
134
148
 
135
- file_selector, digest_provider = SelectionOptionsParser.parse_registration_selection_options(
136
- options, app_config_manager)
149
+ file_selector, digest_provider, physical_provider = SelectionOptionsParser
150
+ .parse_registration_selection_options(options, app_config_manager)
137
151
 
138
152
  command = RegisterCommand.new(app_config_manager)
139
- exit command.execute(file_selector: file_selector, force: options[:force], digest_provider: digest_provider)
153
+ exit command.execute(file_selector: file_selector, force: options[:force], digest_provider: digest_provider,
154
+ physical_provider: physical_provider)
140
155
  end
141
156
 
142
157
  desc "deregister", "Deregister files with Longleaf"
@@ -16,9 +16,10 @@ module Longleaf
16
16
  # Execute the register command on the given parameters
17
17
  # @param file_selector [FileSelector] selector for files to register
18
18
  # @param force [Boolean] force flag
19
- # @param digest_provider [DigestProvider] object which provides digests for files being registered
19
+ # @param digest_provider [ManifestDigestProvider] object which provides digests for files being registered
20
+ # @param physical_provider [PhysicalPathProvider] object which provides physical paths for files being registered
20
21
  # @return [Integer] status code
21
- def execute(file_selector:, force: false, digest_provider: nil)
22
+ def execute(file_selector:, force: false, digest_provider: nil, physical_provider: nil)
22
23
  start_time = Time.now
23
24
  logger.info('Performing register command')
24
25
  begin
@@ -29,7 +30,8 @@ module Longleaf
29
30
 
30
31
  storage_location = @app_manager.location_manager.get_location_by_path(f_path)
31
32
 
32
- file_rec = FileRecord.new(f_path, storage_location)
33
+ phys_path = physical_provider.get_physical_path(f_path)
34
+ file_rec = FileRecord.new(f_path, storage_location, nil, phys_path)
33
35
 
34
36
  register_event = RegisterEvent.new(file_rec: file_rec, force: force, app_manager: @app_manager,
35
37
  digest_provider: digest_provider)
@@ -27,12 +27,13 @@ module Longleaf
27
27
  service_manager = @app_manager.service_manager
28
28
  md_rec = @file_rec.metadata_record
29
29
  f_path = @file_rec.path
30
+ phys_path = @file_rec.physical_path
30
31
 
31
- logger.info("Performing preserve event on #{@file_rec.path}")
32
+ logger.info("Performing preserve event on #{f_path}")
32
33
 
33
34
  needs_persist = false
34
35
  begin
35
- if !File.exist?(f_path)
36
+ if !File.exist?(phys_path)
36
37
  # Need to persist metadata to avoid repeating processing of this file too soon.
37
38
  needs_persist = true
38
39
  record_failure(EventNames::PRESERVE, f_path, "File is registered but missing.")
@@ -70,10 +70,17 @@ module Longleaf
70
70
  private
71
71
  def populate_file_properties
72
72
  md_rec = @file_rec.metadata_record
73
+ physical_path = @file_rec.physical_path
73
74
 
74
75
  # Set file properties
75
- md_rec.last_modified = File.mtime(@file_rec.path).utc.iso8601(3)
76
- md_rec.file_size = File.size(@file_rec.path)
76
+ md_rec.last_modified = File.mtime(physical_path).utc.iso8601(3)
77
+ md_rec.file_size = File.size(physical_path)
78
+
79
+ if physical_path != @file_rec.path
80
+ md_rec.physical_path = physical_path
81
+ else
82
+ md_rec.physical_path = nil
83
+ end
77
84
  end
78
85
 
79
86
  # Copy a subset of properties from an existing metadata record to the new record
@@ -1,6 +1,7 @@
1
1
  require 'longleaf/candidates/file_selector'
2
2
  require 'longleaf/candidates/registered_file_selector'
3
3
  require 'longleaf/candidates/manifest_digest_provider'
4
+ require 'longleaf/candidates/physical_path_provider'
4
5
  require 'longleaf/candidates/single_digest_provider'
5
6
 
6
7
  module Longleaf
@@ -18,8 +19,11 @@ module Longleaf
18
19
  options, :file, :manifest, :location)
19
20
 
20
21
  if !options[:manifest].nil?
21
- digests_mapping = self.manifests_to_digest_mapping(options[:manifest])
22
- selector = FileSelector.new(file_paths: digests_mapping.keys, app_config: app_config_manager)
22
+ digests_mapping, logical_phys_mapping = self.parse_manifest(options[:manifest])
23
+ physical_provider = PhysicalPathProvider.new(logical_phys_mapping)
24
+ selector = FileSelector.new(file_paths: digests_mapping.keys,
25
+ physical_provider: physical_provider,
26
+ app_config: app_config_manager)
23
27
  digest_provider = ManifestDigestProvider.new(digests_mapping)
24
28
  elsif !options[:file].nil?
25
29
  if options[:checksums]
@@ -35,18 +39,28 @@ module Longleaf
35
39
  end
36
40
  end
37
41
 
38
- file_paths = options[:file].split(/\s*,\s*/)
39
- selector = FileSelector.new(file_paths: file_paths, app_config: app_config_manager)
40
- elsif !options[:location].nil?
41
- storage_locations = options[:location].split(/\s*,\s*/)
42
- selector = FileSelector.new(storage_locations: storage_locations, app_config: app_config_manager)
43
- digest_provider = SingleDigestProvider.new(nil)
42
+ file_paths = self.split_quoted(options[:file], "\\s*,\\s*")
43
+ if !options[:physical_path].nil?
44
+ physical_paths = self.split_quoted(options[:physical_path], "\\s*,\\s*")
45
+ if physical_paths.length != file_paths.length
46
+ logger.failure("Invalid physical paths parameter, number of paths did not match number of logical paths")
47
+ exit 1
48
+ end
49
+ logical_phys_mapping = Hash[file_paths.zip physical_paths]
50
+ physical_provider = PhysicalPathProvider.new(logical_phys_mapping)
51
+ else
52
+ physical_provider = PhysicalPathProvider.new
53
+ end
54
+
55
+ selector = FileSelector.new(file_paths: file_paths,
56
+ physical_provider: physical_provider,
57
+ app_config: app_config_manager)
44
58
  else
45
59
  logger.failure("Must provide one of the following file selection options: -f, l, or -m")
46
60
  exit 1
47
61
  end
48
62
 
49
- [selector, digest_provider]
63
+ [selector, digest_provider, physical_provider]
50
64
  end
51
65
 
52
66
  def self.there_can_be_only_one(failure_msg, options, *names)
@@ -69,7 +83,7 @@ module Longleaf
69
83
  #. <manifest_path> OR @-
70
84
  # @return a hash containing the aggregated contents of the provided manifests. The keys are
71
85
  # paths to manifested files. The values are hashes, mapping digest algorithms to digest values.
72
- def self.manifests_to_digest_mapping(manifest_vals)
86
+ def self.parse_manifest(manifest_vals)
73
87
  alg_manifest_pairs = []
74
88
  # interpret option inputs into a list of algorithms to manifest sources
75
89
  manifest_vals.each do |manifest_val|
@@ -87,6 +101,7 @@ module Longleaf
87
101
 
88
102
  # read the provided manifests to build a mapping from file uri to all supplied digests
89
103
  digests_mapping = Hash.new { |h,k| h[k] = Hash.new }
104
+ logical_phys_mapping = Hash.new
90
105
  alg_manifest_pairs.each do |mpair|
91
106
  source_stream = nil
92
107
  # Determine if reading from a manifest file or stdin
@@ -111,17 +126,28 @@ module Longleaf
111
126
  if current_alg.nil?
112
127
  self.fail("Manifest with unknown checksums encountered, an algorithm must be specified")
113
128
  end
114
- entry_parts = line.split(' ', 2)
115
- if entry_parts.length != 2
129
+ entry_parts = self.split_quoted(line)
130
+ if entry_parts.length != 2 && entry_parts.length != 3
116
131
  self.fail("Invalid manifest entry: #{line}")
117
132
  end
118
133
 
119
134
  digests_mapping[entry_parts[1]][current_alg] = entry_parts[0]
135
+ if (entry_parts.length == 3)
136
+ logical_phys_mapping[entry_parts[1]] = entry_parts[2]
137
+ end
120
138
  end
121
139
  end
122
140
  end
123
141
 
124
- digests_mapping
142
+ [digests_mapping, logical_phys_mapping]
143
+ end
144
+
145
+ # Splits a string of quoted or unquoted tokens separated by spaces
146
+ # @param
147
+ def self.split_quoted(text, delimiter = "\\s+", limit = -1)
148
+ text.split(/#{delimiter}(?=(?:[^'"]|'[^']*'|"[^"]*")*$)/, limit)
149
+ .select {|s| not s.empty? }
150
+ .map {|s| s.gsub(/(^ +)|( +$)|(^["']+)|(["']+$)/, '')}
125
151
  end
126
152
 
127
153
  # Parses the provided options to create a selector for registered files
@@ -65,6 +65,10 @@ module Longleaf
65
65
  def unknown(progname = nil, &block)
66
66
  @stderr_log.unknown(progname, &block)
67
67
  end
68
+
69
+ def <<(msg)
70
+ @stderr_log << msg
71
+ end
68
72
 
69
73
  # Logs a success message to STDOUT, as well as STDERR at info level.
70
74
  #
@@ -7,13 +7,16 @@ module Longleaf
7
7
 
8
8
  # @param file_path [String] path to the file
9
9
  # @param storage_location [StorageLocation] storage location containing the file
10
- def initialize(file_path, storage_location, metadata_record = nil)
10
+ # @param metadata_record [MetadataRecord] metadata record for this file object. Optional.
11
+ # @param physical_path [String] physical path where the file is located. Defaults to the file_path.
12
+ def initialize(file_path, storage_location, metadata_record = nil, physical_path = nil)
11
13
  raise ArgumentError.new("FileRecord requires a path") if file_path.nil?
12
14
  raise ArgumentError.new("FileRecord requires a storage_location") if storage_location.nil?
13
15
 
14
16
  @path = file_path
15
17
  @storage_location = storage_location
16
18
  @metadata_record = metadata_record
19
+ @physical_path = physical_path
17
20
  end
18
21
 
19
22
  # @return [String] path for the metadata file for this file
@@ -21,6 +24,17 @@ module Longleaf
21
24
  @metadata_path = @storage_location.get_metadata_path_for(path) if @metadata_path.nil?
22
25
  @metadata_path
23
26
  end
27
+
28
+ def physical_path
29
+ if @physical_path.nil?
30
+ if @metadata_record.nil? || @metadata_record.physical_path.nil?
31
+ @physical_path = @path
32
+ else
33
+ @physical_path = @metadata_record.physical_path
34
+ end
35
+ end
36
+ @physical_path
37
+ end
24
38
 
25
39
  def metadata_present?
26
40
  File.exist?(metadata_path)
@@ -9,6 +9,7 @@ module Longleaf
9
9
 
10
10
  LAST_MODIFIED = 'last-modified'
11
11
  FILE_SIZE = 'size'
12
+ PHYSICAL_PATH = 'physical-path'
12
13
 
13
14
  CHECKSUMS = 'checksums'
14
15
 
@@ -10,6 +10,7 @@ module Longleaf
10
10
  attr_reader :checksums
11
11
  attr_reader :properties
12
12
  attr_accessor :file_size, :last_modified
13
+ attr_accessor :physical_path
13
14
 
14
15
  # @param properties [Hash] initial data properties for this record
15
16
  # @param services [Hash] initial service property tree
@@ -18,8 +19,9 @@ module Longleaf
18
19
  # @param checksums [Hash] hash of checksum values
19
20
  # @param file_size [Integer] size of file in bytes
20
21
  # @param last_modified [String] iso8601 representation of the last modified date of file
22
+ # @param physical_path [String] physical path where the file is located
21
23
  def initialize(properties: nil, services: nil, deregistered: nil, registered: nil, checksums: nil,
22
- file_size: nil, last_modified: nil)
24
+ file_size: nil, last_modified: nil, physical_path: nil)
23
25
  @properties = properties || Hash.new
24
26
  @registered = registered
25
27
  @deregistered = deregistered
@@ -28,6 +30,7 @@ module Longleaf
28
30
  @services = services || Hash.new
29
31
  @file_size = file_size
30
32
  @last_modified = last_modified
33
+ @physical_path = physical_path
31
34
  end
32
35
 
33
36
  # @return [Boolean] true if the record is deregistered
@@ -1,6 +1,7 @@
1
1
  require 'longleaf/models/storage_location'
2
2
  require 'longleaf/models/storage_types'
3
3
  require 'longleaf/helpers/s3_uri_helper'
4
+ require 'longleaf/logging'
4
5
  require 'uri'
5
6
  require 'aws-sdk-s3'
6
7
 
@@ -12,6 +13,7 @@ module Longleaf
12
13
  # https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/S3/Client.html#constructor_details
13
14
 
14
15
  class S3StorageLocation < StorageLocation
16
+ include Longleaf::Logging
15
17
 
16
18
  IS_URI_REGEX = /\A#{URI::regexp}\z/
17
19
 
@@ -38,6 +40,9 @@ module Longleaf
38
40
  # Clone options and convert keys to symbols
39
41
  @client_options = Hash[custom_options.map { |(k,v)| [k.to_sym,v] } ]
40
42
  end
43
+ @client_options[:logger] = logger
44
+ @client_options[:log_level] = :debug if @client_options[:log_level].nil?
45
+
41
46
  # If no region directly configured, use region from path
42
47
  if !@client_options.key?(:region)
43
48
  region = S3UriHelper.extract_region(@path)
@@ -23,22 +23,23 @@ module Longleaf
23
23
  # @raise [PreservationServiceError] if the file system information does not match the stored details
24
24
  def perform(file_rec, event)
25
25
  file_path = file_rec.path
26
+ phys_path = file_rec.physical_path
26
27
  md_rec = file_rec.metadata_record
27
28
 
28
29
  logger.debug("Performing file information check of #{file_path}")
29
30
 
30
- if !File.exist?(file_path)
31
- raise PreservationServiceError.new("File does not exist: #{file_path}")
31
+ if !File.exist?(phys_path)
32
+ raise PreservationServiceError.new("File does not exist: #{phys_path}")
32
33
  end
33
34
 
34
- file_size = File.size(file_rec.path)
35
+ file_size = File.size(phys_path)
35
36
  if file_size != md_rec.file_size
36
- raise PreservationServiceError.new("File size for #{file_path} does not match the expected value: registered = #{md_rec.file_size} bytes, actual = #{file_size} bytes")
37
+ raise PreservationServiceError.new("File size for #{phys_path} does not match the expected value: registered = #{md_rec.file_size} bytes, actual = #{file_size} bytes")
37
38
  end
38
39
 
39
- last_modified = File.mtime(file_rec.path).utc.iso8601(3)
40
+ last_modified = File.mtime(phys_path).utc.iso8601(3)
40
41
  if last_modified != md_rec.last_modified
41
- raise PreservationServiceError.new("Last modified timestamp for #{file_path} does not match the expected value: registered = #{md_rec.last_modified}, actual = #{last_modified}")
42
+ raise PreservationServiceError.new("Last modified timestamp for #{phys_path} does not match the expected value: registered = #{md_rec.last_modified}, actual = #{last_modified}")
42
43
  end
43
44
  end
44
45
 
@@ -63,6 +63,7 @@ module Longleaf
63
63
  # @raise [ChecksumMismatchError] if the checksum on record does not match the generated checksum
64
64
  def perform(file_rec, event)
65
65
  path = file_rec.path
66
+ phys_path = file_rec.physical_path
66
67
  md_rec = file_rec.metadata_record
67
68
 
68
69
  # Get the list of existing checksums for the file and normalize algorithm names
@@ -89,19 +90,19 @@ module Longleaf
89
90
  end
90
91
 
91
92
  digest = DigestHelper::start_digest(alg)
92
- digest.file(path)
93
+ digest.file(phys_path)
93
94
  generated_digest = digest.hexdigest
94
95
 
95
96
  # Store the missing checksum if using the 'generate' behavior
96
97
  if existing_digest.nil? && @absent_digest_behavior == GENERATE_IF_ABSENT
97
98
  md_rec.checksums[alg] = generated_digest
98
- logger.info("Generated and stored digest using algorithm '#{alg}' for file #{path}")
99
+ logger.info("Generated and stored digest using algorithm '#{alg}' for file #{phys_path}")
99
100
  else
100
101
  # Compare the new digest to the one on record
101
102
  if existing_digest == generated_digest
102
- logger.info("Fixity check using algorithm '#{alg}' succeeded for file #{path}")
103
+ logger.info("Fixity check using algorithm '#{alg}' succeeded for file #{phys_path}")
103
104
  else
104
- raise ChecksumMismatchError.new("Fixity check using algorithm '#{alg}' failed for file #{path}: expected '#{existing_digest}', calculated '#{generated_digest}.'")
105
+ raise ChecksumMismatchError.new("Fixity check using algorithm '#{alg}' failed for file #{phys_path}: expected '#{existing_digest}', calculated '#{generated_digest}.'")
105
106
  end
106
107
  end
107
108
  end
@@ -51,9 +51,6 @@ module Longleaf
51
51
  + " rsync_options may not include the following: #{DISALLOWED_OPTIONS.join(' ')}")
52
52
  end
53
53
 
54
- # Add -R (--relative) in to command options to ensure full path gets replicated
55
- @options = @options + " -R"
56
-
57
54
  # Set and validate the replica collision policy
58
55
  @collision_policy = @service_def.properties[SF::COLLISION_PROPERTY] || SF::DEFAULT_COLLISION_POLICY
59
56
  if !SF::VALID_COLLISION_POLICIES.include?(@collision_policy)
@@ -101,17 +98,36 @@ module Longleaf
101
98
  else
102
99
  dest_path = destination
103
100
  end
104
-
101
+
102
+ logical_physical_same = file_rec.path == file_rec.physical_path
105
103
  # Determine the path to the file being replicated relative to its storage location
106
104
  rel_path = file_rec.storage_location.relativize(file_rec.path)
107
- # source path with . so that rsync will only create destination directories starting from that point
108
- source_path = File.join(file_rec.storage_location.path, "./#{rel_path}")
105
+
106
+ options = @options
107
+ if logical_physical_same
108
+ options = options + " -R"
109
+ # source path with . so that rsync will only create destination directories starting from that point
110
+ source_path = File.join(file_rec.storage_location.path, "./#{rel_path}")
111
+ else
112
+ options = options + " --no-relative"
113
+ source_path = file_rec.physical_path
114
+ dest_path = File.join(dest_path, rel_path)
115
+ if (dest_is_storage_loc && destination.is_a?(Longleaf::FilesystemStorageLocation)) || !dest_is_storage_loc
116
+ # Fill in missing parent directories, as rsync cannot do so when specifying a different source and dest filename
117
+ dirname = File.dirname(dest_path)
118
+ logger.debug("Creating parent dirs #{dirname} for #{file_rec.path}")
119
+ FileUtils.mkdir_p(dirname)
120
+ else
121
+ raise PreservationServiceError.new(
122
+ "Destination #{destination.name} does not currently support separate physical and logical paths")
123
+ end
124
+ end
109
125
 
110
126
  # Check that the destination is available because attempting to write
111
127
  verify_destination_available(destination, file_rec)
112
128
 
113
- logger.debug("Invoking rsync with command: #{@command} \"#{source_path}\" \"#{dest_path}\" #{@options}")
114
- stdout, stderr, status = Open3.capture3("#{@command} \"#{source_path}\" \"#{dest_path}\" #{@options}")
129
+ logger.debug("Invoking rsync with command: #{@command} \"#{source_path}\" \"#{dest_path}\" #{options}")
130
+ stdout, stderr, status = Open3.capture3("#{@command} \"#{source_path}\" \"#{dest_path}\" #{options}")
115
131
  raise PreservationServiceError.new("Failed to replicate #{file_rec.path} to #{dest_path}: #{stderr}") \
116
132
  unless status.success?
117
133
 
@@ -83,8 +83,6 @@ module Longleaf
83
83
  # Determine the path to the file being replicated relative to its storage location
84
84
  rel_path = file_rec.storage_location.relativize(file_rec.path)
85
85
 
86
- content_md5 = get_content_md5(file_rec)
87
-
88
86
  @destinations.each do |destination|
89
87
  # Check that the destination is available before attempting to write
90
88
  verify_destination_available(destination, file_rec)
@@ -92,7 +90,7 @@ module Longleaf
92
90
  rel_to_bucket = destination.relative_to_bucket_path(rel_path)
93
91
  file_obj = destination.s3_bucket.object(rel_to_bucket)
94
92
  begin
95
- file_obj.upload_file(file_rec.path, { :content_md5 => content_md5 })
93
+ file_obj.upload_file(file_rec.physical_path)
96
94
  rescue Aws::S3::Errors::BadDigest => e
97
95
  raise ChecksumMismatchError.new("Transfer to bucket '#{destination.s3_bucket.name}' failed, " \
98
96
  + "MD5 provided did not match the received content for #{file_rec.path}")
@@ -129,15 +127,5 @@ module Longleaf
129
127
  + e.message)
130
128
  end
131
129
  end
132
-
133
- def get_content_md5(file_rec)
134
- md_rec = file_rec.metadata_record
135
- if md_rec.checksums.key?('md5')
136
- # base 64 encode the digest, as is required by the Content-Md5 header
137
- [[md_rec.checksums['md5']].pack("H*")].pack("m0")
138
- else
139
- nil
140
- end
141
- end
142
130
  end
143
131
  end
@@ -32,6 +32,7 @@ module Longleaf
32
32
  checksums = data.delete(MDFields::CHECKSUMS)
33
33
  file_size = data.delete(MDFields::FILE_SIZE)
34
34
  last_modified = data.delete(MDFields::LAST_MODIFIED)
35
+ physical_path = data.delete(MDFields::PHYSICAL_PATH)
35
36
 
36
37
  services = md[MDF::SERVICES]
37
38
  service_records = Hash.new
@@ -57,7 +58,8 @@ module Longleaf
57
58
  deregistered: deregistered,
58
59
  checksums: checksums,
59
60
  file_size: file_size,
60
- last_modified: last_modified)
61
+ last_modified: last_modified,
62
+ physical_path: physical_path)
61
63
  end
62
64
 
63
65
  # Load configuration a yaml encoded configuration file
@@ -52,6 +52,7 @@ module Longleaf
52
52
  data[MDF::CHECKSUMS] = metadata.checksums unless metadata.checksums && metadata.checksums.empty?
53
53
  data[MDF::FILE_SIZE] = metadata.file_size unless metadata.file_size.nil?
54
54
  data[MDF::LAST_MODIFIED] = metadata.last_modified if metadata.last_modified
55
+ data[MDF::PHYSICAL_PATH] = metadata.physical_path if metadata.physical_path
55
56
 
56
57
  props[MDF::DATA] = data
57
58
 
@@ -29,6 +29,11 @@ module Longleaf
29
29
  @checksums[alg] = value
30
30
  self
31
31
  end
32
+
33
+ def with_physical_path(phys_path)
34
+ @physical_path = phys_path
35
+ self
36
+ end
32
37
 
33
38
  def with_service(name, timestamp: ServiceDateHelper::formatted_timestamp, run_needed: false, properties: nil,
34
39
  failure_timestamp: nil)
@@ -56,7 +61,8 @@ module Longleaf
56
61
  registered: @registered,
57
62
  checksums: @checksums,
58
63
  file_size: @file_size,
59
- last_modified: @last_modified)
64
+ last_modified: @last_modified,
65
+ physical_path: @physical_path)
60
66
  end
61
67
 
62
68
  # Add the generated metadata record to the given file record
@@ -1,3 +1,3 @@
1
1
  module Longleaf
2
- VERSION = "1.0.0"
2
+ VERSION = "1.1.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: longleaf
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Pennell
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-10-14 00:00:00.000000000 Z
11
+ date: 2021-03-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: thor
@@ -372,6 +372,7 @@ files:
372
372
  - lib/longleaf.rb
373
373
  - lib/longleaf/candidates/file_selector.rb
374
374
  - lib/longleaf/candidates/manifest_digest_provider.rb
375
+ - lib/longleaf/candidates/physical_path_provider.rb
375
376
  - lib/longleaf/candidates/registered_file_selector.rb
376
377
  - lib/longleaf/candidates/service_candidate_filesystem_iterator.rb
377
378
  - lib/longleaf/candidates/service_candidate_index_iterator.rb
@@ -449,7 +450,7 @@ licenses:
449
450
  - Apache-2.0
450
451
  metadata:
451
452
  source_code_uri: https://github.com/UNC-Libraries/longleaf-preservation
452
- post_install_message:
453
+ post_install_message:
453
454
  rdoc_options: []
454
455
  require_paths:
455
456
  - lib
@@ -464,8 +465,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
464
465
  - !ruby/object:Gem::Version
465
466
  version: '0'
466
467
  requirements: []
467
- rubygems_version: 3.1.2
468
- signing_key:
468
+ rubygems_version: 3.1.4
469
+ signing_key:
469
470
  specification_version: 4
470
471
  summary: Longleaf preservation services tool
471
472
  test_files: []