cul-preservation_utils 0.1.6 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +24 -10
- data/lib/cul/preservation_utils/file_path.rb +9 -9
- data/lib/cul/preservation_utils/version.rb +1 -1
- metadata +13 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6726052ab3c57e5a2a4e2a3efad429007d1f53b76d0814a38041ec50991c0594
|
4
|
+
data.tar.gz: 3c2c35d368aaadaef863fbbbbf39f273a1feed861de69a69660a693cc787ff26
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5efa81994783bd57cfdda961a3b36b7cac3bd7b9af8f3702bd78e4df48b54a6811bdc8e6cc98fb89eeb6dc617de3e879bf1b6ab58ebccc51ae0fbb936051fe0c
|
7
|
+
data.tar.gz: 7b2a2df9bc67d82d98c3f961b9736bc0ac3d27d9cd3383071fcdab278bc8f41d828c141d0848d42c5de9a28b7783a33e3ccd0f294b513ad3b8cca3c0607f8b85
|
data/README.md
CHANGED
@@ -1,28 +1,42 @@
|
|
1
1
|
# Cul::PreservationUtils
|
2
2
|
|
3
|
-
|
3
|
+
A collection of modules with Utility Methods related to working with Preservation objects.
|
4
4
|
|
5
|
-
|
5
|
+
Incudes:
|
6
|
+
- **FilePath**: a module for standardizing filepaths for objects used in our Preservation services.
|
6
7
|
|
7
|
-
|
8
|
-
|
9
|
-
TODO: Replace `UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_ORG` with your gem name right after releasing it to RubyGems.org. Please do not do it earlier due to security reasons. Alternatively, replace this section with instructions to install your gem from git if you don't plan to release to RubyGems.org.
|
8
|
+
(More modules will be added in the future)
|
10
9
|
|
11
|
-
|
10
|
+
Preservation objects are put in cloud storage, either with Google Cloud or Amazon Web Services. Validation rules for object file paths were imlpemented based on the charactersets allowed in object names in Google Cloud and AWS.
|
12
11
|
|
12
|
+
## Installation
|
13
13
|
```bash
|
14
|
-
bundle add
|
14
|
+
bundle add cul/preservation_utils
|
15
15
|
```
|
16
16
|
|
17
17
|
If bundler is not being used to manage dependencies, install the gem by executing:
|
18
18
|
|
19
19
|
```bash
|
20
|
-
gem install
|
20
|
+
gem install cul/preservation_utils
|
21
21
|
```
|
22
22
|
|
23
23
|
## Usage
|
24
24
|
|
25
|
-
|
25
|
+
You can access the FilePath module via namespace constants:
|
26
|
+
```
|
27
|
+
require 'cul/preservaion_utils'
|
28
|
+
|
29
|
+
# Determine if a file path is valid or not (invalid means it contains unsupported characters)
|
30
|
+
|
31
|
+
test_file_path = 'top_dir/!a$b%c%/我能.我能'
|
32
|
+
Cul::PreservationUtils::FilePath.valid_file_path?(test_file_path)
|
33
|
+
#=> False
|
34
|
+
|
35
|
+
# Remediate a file path
|
36
|
+
|
37
|
+
remediated_file_path = Cul::PreservationUtils::FilePath.remediate(test_file_path)
|
38
|
+
puts remediated_file_path #=> 'top_dir/_a_b_c_/Wo_Neng_.Wo_Neng_'
|
39
|
+
```
|
26
40
|
|
27
41
|
## Development
|
28
42
|
|
@@ -32,4 +46,4 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
32
46
|
|
33
47
|
## Contributing
|
34
48
|
|
35
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
49
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/cul/cul-preservation_utils.
|
@@ -24,7 +24,7 @@ module Cul
|
|
24
24
|
|
25
25
|
DISALLOWED_ASCII_REGEX = '[^-a-zA-Z0-9_.()]'
|
26
26
|
|
27
|
-
def
|
27
|
+
def valid_file_path?(path_filename)
|
28
28
|
return false if ['', '.', '..', '/'].include? path_filename
|
29
29
|
|
30
30
|
pathname = Pathname.new(path_filename)
|
@@ -47,10 +47,10 @@ module Cul
|
|
47
47
|
true
|
48
48
|
end
|
49
49
|
|
50
|
-
def
|
51
|
-
return filepath if !unavailable_file_paths.include?(filepath) &&
|
50
|
+
def remediate_file_path(filepath, unavailable_file_paths = []) # rubocop:disable Metrics/AbcSize
|
51
|
+
return filepath if !unavailable_file_paths.include?(filepath) && valid_file_path?(filepath)
|
52
52
|
|
53
|
-
|
53
|
+
argument_check(filepath)
|
54
54
|
|
55
55
|
pathname = Pathname.new(filepath)
|
56
56
|
|
@@ -60,21 +60,21 @@ module Cul
|
|
60
60
|
filename_valid_ascii =
|
61
61
|
Stringex::Unidecoder.decode(filename.to_s).gsub(/#{DISALLOWED_ASCII_REGEX}/, '_').gsub(/\.$/, '_')
|
62
62
|
|
63
|
-
remediated_key_name =
|
63
|
+
remediated_key_name = remediate_path(path_to_file, remediated_pathname).join(filename_valid_ascii).to_s
|
64
64
|
|
65
65
|
# no collisions
|
66
66
|
return remediated_key_name unless unavailable_file_paths.include? remediated_key_name
|
67
67
|
|
68
68
|
# handle collisions
|
69
|
-
|
69
|
+
handle_collision(remediated_key_name, unavailable_file_paths)
|
70
70
|
end
|
71
71
|
|
72
|
-
def
|
72
|
+
def argument_check(filepath_key)
|
73
73
|
raise ArgumentError, "Bad argument: '#{filepath_key}'" if ['', '.', '..', '/'].include? filepath_key
|
74
74
|
raise ArgumentError, 'Bad argument: absolute path' if filepath_key.start_with?('/')
|
75
75
|
end
|
76
76
|
|
77
|
-
def
|
77
|
+
def remediate_path(path_to_file, remediated_pathname)
|
78
78
|
# remediate each component in the path to the file
|
79
79
|
path_to_file.each_filename do |path_segment|
|
80
80
|
remediated_path_segment = Stringex::Unidecoder.decode(path_segment).gsub(/#{DISALLOWED_ASCII_REGEX}/, '_')
|
@@ -83,7 +83,7 @@ module Cul
|
|
83
83
|
remediated_pathname
|
84
84
|
end
|
85
85
|
|
86
|
-
def
|
86
|
+
def handle_collision(remediated_file_path, unavailable_file_path)
|
87
87
|
pathname = Pathname.new(remediated_file_path)
|
88
88
|
base = pathname.to_s.delete_suffix(pathname.extname)
|
89
89
|
new_remediated_file_path = "#{base}_1#{pathname.extname}"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cul-preservation_utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bradley Goldsmith
|
@@ -10,39 +10,39 @@ cert_chain: []
|
|
10
10
|
date: 1980-01-02 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
|
-
name:
|
13
|
+
name: ostruct
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
15
15
|
requirements:
|
16
16
|
- - "~>"
|
17
17
|
- !ruby/object:Gem::Version
|
18
|
-
version:
|
19
|
-
- - ">="
|
20
|
-
- !ruby/object:Gem::Version
|
21
|
-
version: 2.8.6
|
18
|
+
version: 0.1.0
|
22
19
|
type: :runtime
|
23
20
|
prerelease: false
|
24
21
|
version_requirements: !ruby/object:Gem::Requirement
|
25
22
|
requirements:
|
26
23
|
- - "~>"
|
27
24
|
- !ruby/object:Gem::Version
|
28
|
-
version:
|
29
|
-
- - ">="
|
30
|
-
- !ruby/object:Gem::Version
|
31
|
-
version: 2.8.6
|
25
|
+
version: 0.1.0
|
32
26
|
- !ruby/object:Gem::Dependency
|
33
|
-
name:
|
27
|
+
name: stringex
|
34
28
|
requirement: !ruby/object:Gem::Requirement
|
35
29
|
requirements:
|
36
30
|
- - "~>"
|
37
31
|
- !ruby/object:Gem::Version
|
38
|
-
version:
|
32
|
+
version: '2.8'
|
33
|
+
- - ">="
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: 2.8.6
|
39
36
|
type: :runtime
|
40
37
|
prerelease: false
|
41
38
|
version_requirements: !ruby/object:Gem::Requirement
|
42
39
|
requirements:
|
43
40
|
- - "~>"
|
44
41
|
- !ruby/object:Gem::Version
|
45
|
-
version:
|
42
|
+
version: '2.8'
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 2.8.6
|
46
46
|
description: PreservationUtils provides the FilePath module for standardizing filepaths
|
47
47
|
for objects used in our Preservation services.
|
48
48
|
email:
|