longleaf 0.3.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +12 -2
- data/README.md +11 -1
- data/lib/longleaf/candidates/manifest_digest_provider.rb +17 -0
- data/lib/longleaf/candidates/single_digest_provider.rb +13 -0
- data/lib/longleaf/cli.rb +49 -36
- data/lib/longleaf/commands/register_command.rb +3 -3
- data/lib/longleaf/commands/validate_config_command.rb +1 -1
- data/lib/longleaf/events/register_event.rb +8 -4
- data/lib/longleaf/helpers/case_insensitive_hash.rb +38 -0
- data/lib/longleaf/helpers/digest_helper.rb +7 -1
- data/lib/longleaf/helpers/s3_uri_helper.rb +86 -0
- data/lib/longleaf/helpers/selection_options_parser.rb +189 -0
- data/lib/longleaf/helpers/service_date_helper.rb +29 -1
- data/lib/longleaf/indexing/sequel_index_driver.rb +2 -20
- data/lib/longleaf/models/app_fields.rb +4 -2
- data/lib/longleaf/models/filesystem_metadata_location.rb +56 -0
- data/lib/longleaf/models/filesystem_storage_location.rb +52 -0
- data/lib/longleaf/models/metadata_location.rb +47 -0
- data/lib/longleaf/models/metadata_record.rb +3 -1
- data/lib/longleaf/models/s3_storage_location.rb +133 -0
- data/lib/longleaf/models/service_fields.rb +4 -0
- data/lib/longleaf/models/storage_location.rb +17 -48
- data/lib/longleaf/models/storage_types.rb +9 -0
- data/lib/longleaf/preservation_services/rsync_replication_service.rb +9 -11
- data/lib/longleaf/preservation_services/s3_replication_service.rb +143 -0
- data/lib/longleaf/services/application_config_deserializer.rb +26 -4
- data/lib/longleaf/services/application_config_validator.rb +17 -6
- data/lib/longleaf/services/configuration_validator.rb +64 -4
- data/lib/longleaf/services/filesystem_location_validator.rb +16 -0
- data/lib/longleaf/services/metadata_deserializer.rb +41 -9
- data/lib/longleaf/services/metadata_persistence_manager.rb +3 -2
- data/lib/longleaf/services/metadata_serializer.rb +94 -13
- data/lib/longleaf/services/metadata_validator.rb +76 -0
- data/lib/longleaf/services/s3_location_validator.rb +19 -0
- data/lib/longleaf/services/service_definition_validator.rb +16 -8
- data/lib/longleaf/services/service_manager.rb +7 -15
- data/lib/longleaf/services/service_mapping_validator.rb +26 -15
- data/lib/longleaf/services/storage_location_manager.rb +38 -12
- data/lib/longleaf/services/storage_location_validator.rb +41 -30
- data/lib/longleaf/specs/config_builder.rb +10 -3
- data/lib/longleaf/specs/config_validator_helpers.rb +16 -0
- data/lib/longleaf/specs/metadata_builder.rb +1 -0
- data/lib/longleaf/version.rb +1 -1
- data/longleaf.gemspec +3 -1
- data/mkdocs.yml +2 -1
- metadata +48 -8
- data/.travis.yml +0 -4
- data/lib/longleaf/services/storage_path_validator.rb +0 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a5f659d06bd9b1094ed6dd0e10d108282bacde2522e4f07f5a2118dcbffb030e
|
4
|
+
data.tar.gz: '0368bb1228372a7b617aa148844f8cc88fbd453b3a704aa0c66d2d1b0abd21ef'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 12917813fba8f517585d9e20d22044fce229451b5a22b10edd44f3accaea807a4049d867980fb8b301202f6d0bd4f047b0a06df78d46d054e8e7cc4eeeda9cb6
|
7
|
+
data.tar.gz: fe2f94a2f0bdbe5b2095c63275adca26a5c0f8a6d8dd013bdfa9a1465073085f80a8f2e32b0d5aa7b53c19cce58e48bb1fa9b16479db9b3bcd0af4c649128188
|
data/.circleci/config.yml
CHANGED
@@ -53,24 +53,34 @@ commands:
|
|
53
53
|
|
54
54
|
- store_artifacts:
|
55
55
|
path: coverage
|
56
|
+
install_new_bundler:
|
57
|
+
steps:
|
58
|
+
- run:
|
59
|
+
name: upgrade bundler
|
60
|
+
command: |
|
61
|
+
sudo gem update --system
|
62
|
+
sudo gem uninstall bundler
|
63
|
+
sudo rm /usr/local/bin/bundle
|
64
|
+
sudo gem install bundler
|
56
65
|
|
57
66
|
jobs:
|
58
67
|
test_with_ruby_23:
|
59
68
|
docker:
|
60
69
|
- image: circleci/ruby:2.3
|
61
70
|
environment:
|
62
|
-
BUNDLER_VERSION: 1.
|
71
|
+
BUNDLER_VERSION: 2.1.4
|
63
72
|
|
64
73
|
working_directory: ~/repo
|
65
74
|
|
66
75
|
steps:
|
76
|
+
- install_new_bundler
|
67
77
|
- build_and_run_tests
|
68
78
|
|
69
79
|
test_with_ruby_latest:
|
70
80
|
docker:
|
71
81
|
- image: circleci/ruby:latest
|
72
82
|
environment:
|
73
|
-
BUNDLER_VERSION: 1.
|
83
|
+
BUNDLER_VERSION: 2.1.4
|
74
84
|
|
75
85
|
working_directory: ~/repo
|
76
86
|
|
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# Longleaf
|
2
2
|
Code: [![CircleCI](https://circleci.com/gh/UNC-Libraries/longleaf-preservation.svg?style=svg)](https://circleci.com/gh/UNC-Libraries/longleaf-preservation)
|
3
3
|
|
4
|
-
Longleaf is a command-line tool which allows users to configure a set of storage locations and define custom sets of preservation services to run on their contents. These services are executed in response to applicable preservation events issued by clients. Its primary goal is to provide tools to create a simple and customizable preservation environment.
|
4
|
+
Longleaf is a command-line tool which allows users to configure a set of storage locations and define custom sets of preservation services to run on their contents. These services are executed in response to applicable preservation events issued by clients. Its primary goal is to provide tools to create a simple and customizable preservation environment. Longleaf:
|
5
5
|
|
6
6
|
* Offers a predictable command-line interface and integrates with standard command-line tools.
|
7
7
|
* Offers configurable and customizable criteria based preservation workflows.
|
@@ -108,6 +108,16 @@ bundle install --with postgres
|
|
108
108
|
|
109
109
|
Options include: postgres, mysql2, mysql, sqlite, amalgalite
|
110
110
|
|
111
|
+
To setup an index, you will need to add a `system > index` section to your configuration with the details of the database to use for the index. Then to setup the database, run:
|
112
|
+
|
113
|
+
```
|
114
|
+
longleaf setup_index -c <config_file>
|
115
|
+
```
|
116
|
+
And for a one-time indexing:
|
117
|
+
```
|
118
|
+
longleaf reindex -c <config_file>
|
119
|
+
```
|
120
|
+
|
111
121
|
## Contributing
|
112
122
|
|
113
123
|
Bug reports and pull requests are welcome on GitHub at https://github.com/UNC-Libraries/longleaf-preservation.
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Longleaf
|
2
|
+
# Provides digests for files from a manifest
|
3
|
+
class ManifestDigestProvider
|
4
|
+
# @param hash which maps file paths to hashs of digests
|
5
|
+
def initialize(digests_mapping)
|
6
|
+
@digests_mapping = digests_mapping
|
7
|
+
end
|
8
|
+
|
9
|
+
# @param file_path [String] path of file
|
10
|
+
# @return hash containing all the manifested digests for the given path, or nil
|
11
|
+
def get_digests(file_path)
|
12
|
+
# return nil if key not found, in case the hash has default values
|
13
|
+
return nil unless @digests_mapping.key?(file_path)
|
14
|
+
@digests_mapping[file_path]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
data/lib/longleaf/cli.rb
CHANGED
@@ -9,8 +9,7 @@ require 'longleaf/commands/validate_metadata_command'
|
|
9
9
|
require 'longleaf/commands/register_command'
|
10
10
|
require 'longleaf/commands/reindex_command'
|
11
11
|
require 'longleaf/commands/preserve_command'
|
12
|
-
require 'longleaf/
|
13
|
-
require 'longleaf/candidates/registered_file_selector'
|
12
|
+
require 'longleaf/helpers/selection_options_parser'
|
14
13
|
|
15
14
|
module Longleaf
|
16
15
|
# Main commandline interface setup for Longleaf using Thor.
|
@@ -72,6 +71,16 @@ module Longleaf
|
|
72
71
|
:required => false,
|
73
72
|
:desc => 'Name or comma separated names of storage locations to perform this operation over.' })
|
74
73
|
|
74
|
+
add_shared_option(
|
75
|
+
:from_list, :registered_selection, {
|
76
|
+
:aliases => "-l",
|
77
|
+
:required => false,
|
78
|
+
:desc => %q{Provide a list of files to perform this operation on. The list must be new line separated, one file per line.
|
79
|
+
To provide a list from a file:
|
80
|
+
'-l /path/to/file_list.txt'
|
81
|
+
To provide a list from STDIN:
|
82
|
+
'-l @-'}})
|
83
|
+
|
75
84
|
# Commands
|
76
85
|
map %w[--version] => :__print_version
|
77
86
|
desc "--version", "Prints the Longleaf version number."
|
@@ -81,12 +90,39 @@ module Longleaf
|
|
81
90
|
|
82
91
|
desc "register", "Register files with Longleaf"
|
83
92
|
shared_options_group(:file_selection)
|
93
|
+
method_option(:manifest,
|
94
|
+
:aliases => "-m",
|
95
|
+
:type => :array,
|
96
|
+
:desc => %q{Checksum manifests of files to register. Supports the following formats:
|
97
|
+
To submit a md5 manifest from a file
|
98
|
+
'-m md5:/path/to/manifest.txt'
|
99
|
+
|
100
|
+
To provide a sha1 manifest from STDIN
|
101
|
+
'-m sha1:@-'
|
102
|
+
Where the content in STDIN adheres to the format:
|
103
|
+
<digest> <path>
|
104
|
+
<digest> <path>
|
105
|
+
...
|
106
|
+
|
107
|
+
To submit multiple manifests from files
|
108
|
+
'-m md5:/path/to/manifest1.txt sha1:/path/to/manifest2.txt'
|
109
|
+
|
110
|
+
To provide multiple digests via STDIN
|
111
|
+
'-m @-'
|
112
|
+
Where the content in STDIN adheres to the following format:
|
113
|
+
sha1:
|
114
|
+
<digest> <path>
|
115
|
+
...
|
116
|
+
md5:
|
117
|
+
<digest> <path>
|
118
|
+
...})
|
84
119
|
method_option(:force,
|
85
120
|
:type => :boolean,
|
86
121
|
:default => false,
|
87
122
|
:desc => 'Force the registration of already registered files.')
|
88
123
|
method_option(:checksums,
|
89
|
-
:desc => %q{Checksums for the submitted file.
|
124
|
+
:desc => %q{Checksums for the submitted file. Only applicable with the -f option.
|
125
|
+
Each checksum must be prefaced with an algorithm prefix. Multiple checksums must be comma separated. If multiple files were submitted, they will be provided with the same checksums. For example:
|
90
126
|
'--checksums "md5:d8e8fca2dc0f896fd7cb4cb0031ba249,sha1:4e1243bd22c66e76c2ba9eddc1f91394e57f9f83"'})
|
91
127
|
shared_options_group(:common)
|
92
128
|
# Register event command
|
@@ -96,25 +132,16 @@ module Longleaf
|
|
96
132
|
|
97
133
|
app_config_manager = load_application_config(options)
|
98
134
|
|
99
|
-
file_selector =
|
100
|
-
|
101
|
-
checksums = options[:checksums]
|
102
|
-
# validate checksum list format, must a comma delimited list of prefix:checksums
|
103
|
-
if /^[^:,]+:[^:,]+(,[^:,]+:[^:,]+)*$/.match(checksums)
|
104
|
-
# convert checksum list into hash with prefix as key
|
105
|
-
checksums = Hash[*checksums.split(/\s*[:,]\s*/)]
|
106
|
-
else
|
107
|
-
logger.failure("Invalid checksums parameter format, see `longleaf help <command>` for more information")
|
108
|
-
exit 1
|
109
|
-
end
|
110
|
-
end
|
135
|
+
file_selector, digest_provider = SelectionOptionsParser.parse_registration_selection_options(
|
136
|
+
options, app_config_manager)
|
111
137
|
|
112
138
|
command = RegisterCommand.new(app_config_manager)
|
113
|
-
exit command.execute(file_selector: file_selector, force: options[:force],
|
139
|
+
exit command.execute(file_selector: file_selector, force: options[:force], digest_provider: digest_provider)
|
114
140
|
end
|
115
141
|
|
116
142
|
desc "deregister", "Deregister files with Longleaf"
|
117
143
|
shared_options_group(:file_selection)
|
144
|
+
shared_options_group(:registered_selection)
|
118
145
|
method_option(:force,
|
119
146
|
:type => :boolean,
|
120
147
|
:default => false,
|
@@ -126,7 +153,7 @@ module Longleaf
|
|
126
153
|
setup_logger(options)
|
127
154
|
|
128
155
|
app_config_manager = load_application_config(options)
|
129
|
-
file_selector = create_registered_selector(options, app_config_manager)
|
156
|
+
file_selector = SelectionOptionsParser.create_registered_selector(options, app_config_manager)
|
130
157
|
|
131
158
|
command = DeregisterCommand.new(app_config_manager)
|
132
159
|
exit command.execute(file_selector: file_selector, force: options[:force])
|
@@ -134,6 +161,7 @@ module Longleaf
|
|
134
161
|
|
135
162
|
desc "preserve", "Perform preservation services on files with Longleaf"
|
136
163
|
shared_options_group(:file_selection)
|
164
|
+
shared_options_group(:registered_selection)
|
137
165
|
method_option(:force,
|
138
166
|
:type => :boolean,
|
139
167
|
:default => false,
|
@@ -145,7 +173,7 @@ module Longleaf
|
|
145
173
|
|
146
174
|
extend_load_path(options[:load_path])
|
147
175
|
app_config_manager = load_application_config(options)
|
148
|
-
file_selector = create_registered_selector(options, app_config_manager)
|
176
|
+
file_selector = SelectionOptionsParser.create_registered_selector(options, app_config_manager)
|
149
177
|
|
150
178
|
command = PreserveCommand.new(app_config_manager)
|
151
179
|
exit command.execute(file_selector: file_selector, force: options[:force])
|
@@ -164,6 +192,7 @@ module Longleaf
|
|
164
192
|
|
165
193
|
desc "validate_metadata", "Validate metadata files."
|
166
194
|
shared_options_group(:file_selection)
|
195
|
+
shared_options_group(:registered_selection)
|
167
196
|
shared_options_group(:common)
|
168
197
|
# File metadata validation command
|
169
198
|
def validate_metadata
|
@@ -171,7 +200,7 @@ module Longleaf
|
|
171
200
|
setup_logger(options)
|
172
201
|
|
173
202
|
app_config_manager = load_application_config(options)
|
174
|
-
file_selector = create_registered_selector(options, app_config_manager)
|
203
|
+
file_selector = SelectionOptionsParser.create_registered_selector(options, app_config_manager)
|
175
204
|
|
176
205
|
exit Longleaf::ValidateMetadataCommand.new(app_config_manager).execute(file_selector: file_selector)
|
177
206
|
end
|
@@ -220,7 +249,7 @@ module Longleaf
|
|
220
249
|
begin
|
221
250
|
app_manager = ApplicationConfigDeserializer.deserialize(options[:config])
|
222
251
|
rescue ConfigurationError => err
|
223
|
-
logger.failure("Failed to load application configuration due to the following issue:\n#{err.message}")
|
252
|
+
logger.failure("Failed to load application configuration due to the following issue(s):\n#{err.message}")
|
224
253
|
exit 1
|
225
254
|
end
|
226
255
|
end
|
@@ -231,22 +260,6 @@ module Longleaf
|
|
231
260
|
end
|
232
261
|
end
|
233
262
|
|
234
|
-
def create_file_selector(options, app_config_manager, selector_class: FileSelector)
|
235
|
-
file_paths = options[:file]&.split(/\s*,\s*/)
|
236
|
-
storage_locations = options[:location]&.split(/\s*,\s*/)
|
237
|
-
|
238
|
-
begin
|
239
|
-
selector_class.new(file_paths: file_paths, storage_locations: storage_locations, app_config: app_config_manager)
|
240
|
-
rescue ArgumentError => e
|
241
|
-
logger.failure(e.message)
|
242
|
-
exit 1
|
243
|
-
end
|
244
|
-
end
|
245
|
-
|
246
|
-
def create_registered_selector(options, app_config_manager)
|
247
|
-
create_file_selector(options, app_config_manager, selector_class: RegisteredFileSelector)
|
248
|
-
end
|
249
|
-
|
250
263
|
def extend_load_path(load_paths)
|
251
264
|
load_paths = load_paths&.split(/\s*,\s*/)
|
252
265
|
load_paths&.each { |path| $LOAD_PATH.unshift(path) }
|
@@ -16,9 +16,9 @@ module Longleaf
|
|
16
16
|
# Execute the register command on the given parameters
|
17
17
|
# @param file_selector [FileSelector] selector for files to register
|
18
18
|
# @param force [Boolean] force flag
|
19
|
-
# @param
|
19
|
+
# @param digest_provider [DigestProvider] object which provides digests for files being registered
|
20
20
|
# @return [Integer] status code
|
21
|
-
def execute(file_selector:, force: false,
|
21
|
+
def execute(file_selector:, force: false, digest_provider: nil)
|
22
22
|
start_time = Time.now
|
23
23
|
logger.info('Performing register command')
|
24
24
|
begin
|
@@ -32,7 +32,7 @@ module Longleaf
|
|
32
32
|
file_rec = FileRecord.new(f_path, storage_location)
|
33
33
|
|
34
34
|
register_event = RegisterEvent.new(file_rec: file_rec, force: force, app_manager: @app_manager,
|
35
|
-
|
35
|
+
digest_provider: digest_provider)
|
36
36
|
track_status(register_event.perform)
|
37
37
|
end
|
38
38
|
rescue InvalidStoragePathError, StorageLocationUnavailableError => err
|
@@ -26,7 +26,7 @@ module Longleaf
|
|
26
26
|
|
27
27
|
record_success("Application configuration passed validation: #{@config_path}")
|
28
28
|
rescue Longleaf::ConfigurationError, Longleaf::StorageLocationUnavailableError => err
|
29
|
-
record_failure("Application configuration invalid due to the following issue:\n#{err.message}")
|
29
|
+
record_failure("Application configuration invalid due to the following issue(s):\n#{err.message}")
|
30
30
|
rescue => err
|
31
31
|
record_failure("Failed to validate application configuration", error: err)
|
32
32
|
end
|
@@ -14,7 +14,8 @@ module Longleaf
|
|
14
14
|
# @param file_rec [FileRecord] file record
|
15
15
|
# @param app_manager [ApplicationConfigManager] the application configuration
|
16
16
|
# @param force [boolean] if true, then already registered files will be re-registered
|
17
|
-
|
17
|
+
# @param digest_provider [#get_digests] object which provides digests for files being registered
|
18
|
+
def initialize(file_rec:, app_manager:, force: false, digest_provider: nil)
|
18
19
|
raise ArgumentError.new('Must provide a file_rec parameter') if file_rec.nil?
|
19
20
|
raise ArgumentError.new('Parameter file_rec must be a FileRecord') \
|
20
21
|
unless file_rec.is_a?(FileRecord)
|
@@ -25,7 +26,7 @@ module Longleaf
|
|
25
26
|
@app_manager = app_manager
|
26
27
|
@file_rec = file_rec
|
27
28
|
@force = force
|
28
|
-
@
|
29
|
+
@digest_provider = digest_provider
|
29
30
|
end
|
30
31
|
|
31
32
|
# Perform a registration event on the given file
|
@@ -48,7 +49,10 @@ module Longleaf
|
|
48
49
|
|
49
50
|
populate_file_properties
|
50
51
|
|
51
|
-
|
52
|
+
if !@digest_provider.nil?
|
53
|
+
checksums = @digest_provider.get_digests(@file_rec.path)
|
54
|
+
md_rec.checksums.merge!(checksums) unless checksums.nil?
|
55
|
+
end
|
52
56
|
|
53
57
|
# persist the metadata
|
54
58
|
@app_manager.md_manager.persist(@file_rec)
|
@@ -77,7 +81,7 @@ module Longleaf
|
|
77
81
|
md_rec = @file_rec.metadata_record
|
78
82
|
|
79
83
|
old_md = MetadataDeserializer.deserialize(file_path: @file_rec.metadata_path,
|
80
|
-
digest_algs: @file_rec.storage_location.
|
84
|
+
digest_algs: @file_rec.storage_location.metadata_location.digests)
|
81
85
|
# Copy custom properties
|
82
86
|
old_md.properties.each { |name, value| md_rec.properties[name] = value }
|
83
87
|
# Copy stale-replicas flag per service
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Longleaf
|
2
|
+
# Hash subclass which provides case insensitive keys, where keys are always downcased.
|
3
|
+
class CaseInsensitiveHash < Hash
|
4
|
+
def [](key)
|
5
|
+
super _insensitive(key)
|
6
|
+
end
|
7
|
+
|
8
|
+
def []=(key, value)
|
9
|
+
super _insensitive(key), value
|
10
|
+
end
|
11
|
+
|
12
|
+
def delete(key)
|
13
|
+
super _insensitive(key)
|
14
|
+
end
|
15
|
+
|
16
|
+
def has_key?(key)
|
17
|
+
super _insensitive(key)
|
18
|
+
end
|
19
|
+
|
20
|
+
def merge(other_hash)
|
21
|
+
super other_hash.map {|k, v| [_insensitive(k), v] }.to_h
|
22
|
+
end
|
23
|
+
|
24
|
+
def merge!(other_hash)
|
25
|
+
super other_hash.map {|k, v| [_insensitive(k), v] }.to_h
|
26
|
+
end
|
27
|
+
|
28
|
+
# Cause this hash to serialize as a regular hash to avoid deserialization failures
|
29
|
+
def encode_with coder
|
30
|
+
coder.represent_map nil, self
|
31
|
+
end
|
32
|
+
|
33
|
+
protected
|
34
|
+
def _insensitive(key)
|
35
|
+
key.respond_to?(:downcase) ? key.downcase : key
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -13,7 +13,7 @@ module Longleaf
|
|
13
13
|
def self.validate_algorithms(algs)
|
14
14
|
return if algs.nil?
|
15
15
|
if algs.is_a?(String)
|
16
|
-
unless
|
16
|
+
unless self.is_known_algorithm?(algs)
|
17
17
|
raise InvalidDigestAlgorithmError.new("Unknown digest algorithm #{algs}")
|
18
18
|
end
|
19
19
|
else
|
@@ -24,6 +24,12 @@ module Longleaf
|
|
24
24
|
end
|
25
25
|
end
|
26
26
|
|
27
|
+
# @param alg [String] identifier of digest algorithm
|
28
|
+
# @return [Boolean] true if the digest is a valid known algorithm
|
29
|
+
def self.is_known_algorithm?(alg)
|
30
|
+
KNOWN_DIGESTS.include?(alg)
|
31
|
+
end
|
32
|
+
|
27
33
|
# Get a Digest class for the specified algorithm
|
28
34
|
# @param alg [String] name of the digest algorithm
|
29
35
|
# @return [Digest] A digest class for the requested algorithm
|
@@ -0,0 +1,86 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
3
|
+
module Longleaf
|
4
|
+
# Helper for interacting with s3 uris
|
5
|
+
class S3UriHelper
|
6
|
+
ENDPOINT_PATTERN = /^(.+\.)?s3[.\-]([a-z0-9\-]+[\-.])?[a-z0-9]+\./
|
7
|
+
ALLOWED_SCHEMES = ['http', 'https', 's3']
|
8
|
+
|
9
|
+
# Extract the name of the s3 bucket from the provided url
|
10
|
+
# @param url s3 url
|
11
|
+
# @return the name of the bucket, or nil if the name could not be identified
|
12
|
+
def self.extract_bucket(url)
|
13
|
+
uri = s3_uri(url)
|
14
|
+
|
15
|
+
matches = ENDPOINT_PATTERN.match(uri.host)
|
16
|
+
if matches.nil?
|
17
|
+
raise ArgumentError.new("Provided URI does match the expected pattern for an S3 URI")
|
18
|
+
end
|
19
|
+
|
20
|
+
prefix = matches[1]
|
21
|
+
if prefix.nil? || prefix.empty?
|
22
|
+
# Is a path style url
|
23
|
+
path = uri.path
|
24
|
+
|
25
|
+
return nil if path == '/'
|
26
|
+
|
27
|
+
path_parts = path.split('/')
|
28
|
+
return nil if path_parts.empty?
|
29
|
+
return path_parts[1]
|
30
|
+
else
|
31
|
+
return prefix[0..-2]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.extract_path(url)
|
36
|
+
uri = s3_uri(url)
|
37
|
+
|
38
|
+
matches = ENDPOINT_PATTERN.match(uri.host)
|
39
|
+
if matches.nil?
|
40
|
+
raise ArgumentError.new("Provided URI does match the expected pattern for an S3 URI")
|
41
|
+
end
|
42
|
+
|
43
|
+
path = uri.path
|
44
|
+
return nil if path == '/' || path.empty?
|
45
|
+
|
46
|
+
# trim off the first slash
|
47
|
+
path = path.partition('/').last
|
48
|
+
|
49
|
+
# Determine if the first part of the path is the bucket name
|
50
|
+
prefix = matches[1]
|
51
|
+
if prefix.nil? || prefix.empty?
|
52
|
+
# trim off the bucket name
|
53
|
+
path = path.partition('/').last
|
54
|
+
end
|
55
|
+
|
56
|
+
path
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.extract_region(url)
|
60
|
+
uri = s3_uri(url)
|
61
|
+
|
62
|
+
matches = ENDPOINT_PATTERN.match(uri.host)
|
63
|
+
|
64
|
+
if matches[2].nil?
|
65
|
+
# No region specified
|
66
|
+
nil
|
67
|
+
else
|
68
|
+
matches[2][0..-2]
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.s3_uri(url)
|
73
|
+
if url.nil?
|
74
|
+
raise ArgumentError.new("url cannot be empty")
|
75
|
+
end
|
76
|
+
uri = URI(url)
|
77
|
+
if !ALLOWED_SCHEMES.include?(uri.scheme&.downcase)
|
78
|
+
raise ArgumentError.new("Invalid scheme for s3 URI #{url}, only http, https and s3 are permitted")
|
79
|
+
end
|
80
|
+
if uri.host.nil?
|
81
|
+
raise ArgumentError.new("Invalid S3 URI, no hostname: #{url}")
|
82
|
+
end
|
83
|
+
uri
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|