cul-preservation_utils 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +5 -0
- data/CHANGELOG.md +5 -0
- data/LICENSE.txt +13 -0
- data/README.md +35 -0
- data/Rakefile +12 -0
- data/lib/cul/preservation_utils/file_path.rb +99 -0
- data/lib/cul/preservation_utils/version.rb +7 -0
- data/lib/cul/preservation_utils.rb +13 -0
- data/sig/cul/preservation_utils.rbs +6 -0
- metadata +75 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e40bd6ef52afe2b4a3e501191ba1da9a63b984c1c192f59f79fbd88f502c7171
|
4
|
+
data.tar.gz: 8b8057c5e56fc06c1e8540b599975e8e10927a9ea823505b1d74c9462662afbf
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 89f2499f64cd91e9c7acfc1e417466b7947d770b3874d1ee745d775da185f1fbe477a32a27882b3321114b702666fcae33ee352b4c6561d325d342e777b8821e
|
7
|
+
data.tar.gz: 3ffdd1f6ab8e17f84edd968c24ab6bb564038e7bc422e96781b7f210b868797516581ecb275399c09b89aee123fa02097417d48485d7730b96fe549b83b2836e
|
data/.rspec
ADDED
data/.rubocop.yml
ADDED
data/CHANGELOG.md
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright 2025 The Trustees of Columbia University in the City of New York
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/README.md
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
# Cul::PreservationUtils
|
2
|
+
|
3
|
+
TODO: Delete this and the text below, and describe your gem
|
4
|
+
|
5
|
+
Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/cul/preservation_utils`. To experiment with that code, run `bin/console` for an interactive prompt.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
TODO: Replace `UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_ORG` with your gem name right after releasing it to RubyGems.org. Please do not do it earlier due to security reasons. Alternatively, replace this section with instructions to install your gem from git if you don't plan to release to RubyGems.org.
|
10
|
+
|
11
|
+
Install the gem and add to the application's Gemfile by executing:
|
12
|
+
|
13
|
+
```bash
|
14
|
+
bundle add UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_ORG
|
15
|
+
```
|
16
|
+
|
17
|
+
If bundler is not being used to manage dependencies, install the gem by executing:
|
18
|
+
|
19
|
+
```bash
|
20
|
+
gem install UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_ORG
|
21
|
+
```
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
TODO: Write usage instructions here
|
26
|
+
|
27
|
+
## Development
|
28
|
+
|
29
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
30
|
+
|
31
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
32
|
+
|
33
|
+
## Contributing
|
34
|
+
|
35
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/cul-preservation_utils.
|
data/Rakefile
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'pathname'
|
4
|
+
require 'stringex'
|
5
|
+
|
6
|
+
# Cul::PreservationUtils::FilePath
|
7
|
+
# The Filepath module provides utilities for validating that file names and file paths do not
|
8
|
+
# include any characters that may be problematic for preservation objects that
|
9
|
+
# will be stored in Google or Amazon cloud services.
|
10
|
+
# Additionally, this has the benefit of having matching names in both local
|
11
|
+
# copies of such Preservation objects and copies stored on the cloud.
|
12
|
+
module Cul
|
13
|
+
module PreservationUtils
|
14
|
+
module FilePath
|
15
|
+
# The following code was taken from the ATC app's Atc::Utils::ObjectKeyNameUtils module
|
16
|
+
# Credit to fcd1
|
17
|
+
|
18
|
+
# About Cloud Storage objects: https://cloud.google.com/storage/docs/objects
|
19
|
+
# According to the above (and quite probably most Google Cloud Storage documentation),
|
20
|
+
# objects have names
|
21
|
+
# AWS - Creating object key names:
|
22
|
+
# https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
|
23
|
+
# As seen in the title for the above page, an object in AWS S3 has a key name (or key)
|
24
|
+
|
25
|
+
DISALLOWED_ASCII_REGEX = '[^-a-zA-Z0-9_.()]'
|
26
|
+
|
27
|
+
def self.valid_file_path?(path_filename)
|
28
|
+
return false if ['', '.', '..', '/'].include? path_filename
|
29
|
+
|
30
|
+
pathname = Pathname.new(path_filename)
|
31
|
+
|
32
|
+
# a relative path is invalid
|
33
|
+
# todo : doesn't this code do the opposite of that?
|
34
|
+
return false if pathname.absolute?
|
35
|
+
|
36
|
+
path_to_file, filename = pathname.split
|
37
|
+
|
38
|
+
# validate filename
|
39
|
+
return false if filename.to_s.end_with?('.') || /#{DISALLOWED_ASCII_REGEX}/.match?(filename.to_s)
|
40
|
+
# if the valid filename is at the top level, return true
|
41
|
+
return true if pathname == pathname.basename
|
42
|
+
|
43
|
+
# check each component in the path to the file
|
44
|
+
path_to_file.each_filename do |path_segment|
|
45
|
+
return false if /#{DISALLOWED_ASCII_REGEX}/.match? path_segment
|
46
|
+
end
|
47
|
+
true
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.remediate_file_path(filepath, unavailable_file_paths = []) # rubocop:disable Metrics/AbcSize
|
51
|
+
return filepath if !unavailable_file_paths.include?(filepath) && self.valid_file_path?(filepath)
|
52
|
+
|
53
|
+
self.argument_check(filepath)
|
54
|
+
|
55
|
+
pathname = Pathname.new(filepath)
|
56
|
+
|
57
|
+
remediated_pathname = Pathname.new('')
|
58
|
+
path_to_file, filename = pathname.split
|
59
|
+
|
60
|
+
filename_valid_ascii =
|
61
|
+
Stringex::Unidecoder.decode(filename.to_s).gsub(/#{DISALLOWED_ASCII_REGEX}/, '_').gsub(/\.$/, '_')
|
62
|
+
|
63
|
+
remediated_key_name = self.remediate_path(path_to_file, remediated_pathname).join(filename_valid_ascii).to_s
|
64
|
+
|
65
|
+
# no collisions
|
66
|
+
return remediated_key_name unless unavailable_file_paths.include? remediated_key_name
|
67
|
+
|
68
|
+
# handle collisions
|
69
|
+
self.handle_collision(remediated_key_name, unavailable_file_paths)
|
70
|
+
end
|
71
|
+
|
72
|
+
def self.argument_check(filepath_key)
|
73
|
+
raise ArgumentError, "Bad argument: '#{filepath_key}'" if ['', '.', '..', '/'].include? filepath_key
|
74
|
+
raise ArgumentError, 'Bad argument: absolute path' if filepath_key.start_with?('/')
|
75
|
+
end
|
76
|
+
|
77
|
+
def self.remediate_path(path_to_file, remediated_pathname)
|
78
|
+
# remediate each component in the path to the file
|
79
|
+
path_to_file.each_filename do |path_segment|
|
80
|
+
remediated_path_segment = Stringex::Unidecoder.decode(path_segment).gsub(/#{DISALLOWED_ASCII_REGEX}/, '_')
|
81
|
+
remediated_pathname += remediated_path_segment
|
82
|
+
end
|
83
|
+
remediated_pathname
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.handle_collision(remediated_file_path, unavailable_file_path)
|
87
|
+
pathname = Pathname.new(remediated_file_path)
|
88
|
+
base = pathname.to_s.delete_suffix(pathname.extname)
|
89
|
+
new_remediated_file_path = "#{base}_1#{pathname.extname}"
|
90
|
+
suffix_num = 1
|
91
|
+
while unavailable_file_path.include? new_remediated_file_path
|
92
|
+
suffix_num += 1
|
93
|
+
new_remediated_file_path = "#{base}_#{suffix_num}#{pathname.extname}"
|
94
|
+
end
|
95
|
+
new_remediated_file_path
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# require_relative 'preservation_utils/file_path'
|
4
|
+
require_relative 'preservation_utils/file_path'
|
5
|
+
|
6
|
+
module Cul
|
7
|
+
module PreservationUtils
|
8
|
+
class Error < StandardError; end
|
9
|
+
# PreservationUtilities is implemented through submodules:
|
10
|
+
# Cul::PreservationUtils::FilePath
|
11
|
+
# - See /lib/cul/preservation_utils/file_path.rb
|
12
|
+
end
|
13
|
+
end
|
metadata
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: cul-preservation_utils
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.4
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Bradley Goldsmith
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2025-05-05 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: stringex
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '2.8'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 2.8.6
|
23
|
+
type: :runtime
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - "~>"
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '2.8'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 2.8.6
|
33
|
+
description: PreservationUtils provides the FilePath module for standardizing filepaths
|
34
|
+
for objects used in our Preservation services.
|
35
|
+
email:
|
36
|
+
- bg2918@columbia.edu
|
37
|
+
executables: []
|
38
|
+
extensions: []
|
39
|
+
extra_rdoc_files: []
|
40
|
+
files:
|
41
|
+
- ".rspec"
|
42
|
+
- ".rubocop.yml"
|
43
|
+
- CHANGELOG.md
|
44
|
+
- LICENSE.txt
|
45
|
+
- README.md
|
46
|
+
- Rakefile
|
47
|
+
- lib/cul/preservation_utils.rb
|
48
|
+
- lib/cul/preservation_utils/file_path.rb
|
49
|
+
- lib/cul/preservation_utils/version.rb
|
50
|
+
- sig/cul/preservation_utils.rbs
|
51
|
+
homepage: https://github.com/cul/cul-preservation_utils
|
52
|
+
licenses:
|
53
|
+
- Apache-2.0
|
54
|
+
metadata:
|
55
|
+
homepage_uri: https://github.com/cul/cul-preservation_utils
|
56
|
+
post_install_message:
|
57
|
+
rdoc_options: []
|
58
|
+
require_paths:
|
59
|
+
- lib
|
60
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
61
|
+
requirements:
|
62
|
+
- - ">="
|
63
|
+
- !ruby/object:Gem::Version
|
64
|
+
version: 3.1.0
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
requirements: []
|
71
|
+
rubygems_version: 3.5.16
|
72
|
+
signing_key:
|
73
|
+
specification_version: 4
|
74
|
+
summary: Utilities related to Preservation workflows at Columbia University Library.
|
75
|
+
test_files: []
|