cul-preservation_utils 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: e40bd6ef52afe2b4a3e501191ba1da9a63b984c1c192f59f79fbd88f502c7171
4
+ data.tar.gz: 8b8057c5e56fc06c1e8540b599975e8e10927a9ea823505b1d74c9462662afbf
5
+ SHA512:
6
+ metadata.gz: 89f2499f64cd91e9c7acfc1e417466b7947d770b3874d1ee745d775da185f1fbe477a32a27882b3321114b702666fcae33ee352b4c6561d325d342e777b8821e
7
+ data.tar.gz: 3ffdd1f6ab8e17f84edd968c24ab6bb564038e7bc422e96781b7f210b868797516581ecb275399c09b89aee123fa02097417d48485d7730b96fe549b83b2836e
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --format documentation
2
+ --color
3
+ --require spec_helper
data/.rubocop.yml ADDED
@@ -0,0 +1,5 @@
1
+ inherit_gem:
2
+ rubocul: rubocul_default.yml
3
+
4
+ AllCops:
5
+ TargetRubyVersion: 3.1 # TODO change to lower ruby version (match folio client)?
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ ## [Unreleased]
2
+
3
+ ## [0.1.0] - 2025-05-01
4
+
5
+ - Initial release
data/LICENSE.txt ADDED
@@ -0,0 +1,13 @@
1
+ Copyright 2025 The Trustees of Columbia University in the City of New York
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
data/README.md ADDED
@@ -0,0 +1,35 @@
1
+ # Cul::PreservationUtils
2
+
3
+ TODO: Delete this and the text below, and describe your gem
4
+
5
+ Welcome to your new gem! In this directory, you'll find the files you need to be able to package up your Ruby library into a gem. Put your Ruby code in the file `lib/cul/preservation_utils`. To experiment with that code, run `bin/console` for an interactive prompt.
6
+
7
+ ## Installation
8
+
9
+ TODO: Replace `UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_ORG` with your gem name right after releasing it to RubyGems.org. Please do not do it earlier due to security reasons. Alternatively, replace this section with instructions to install your gem from git if you don't plan to release to RubyGems.org.
10
+
11
+ Install the gem and add to the application's Gemfile by executing:
12
+
13
+ ```bash
14
+ bundle add UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_ORG
15
+ ```
16
+
17
+ If bundler is not being used to manage dependencies, install the gem by executing:
18
+
19
+ ```bash
20
+ gem install UPDATE_WITH_YOUR_GEM_NAME_IMMEDIATELY_AFTER_RELEASE_TO_RUBYGEMS_ORG
21
+ ```
22
+
23
+ ## Usage
24
+
25
+ TODO: Write usage instructions here
26
+
27
+ ## Development
28
+
29
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
30
+
31
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
32
+
33
+ ## Contributing
34
+
35
+ Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/cul-preservation_utils.
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'bundler/gem_tasks'
4
+ require 'rspec/core/rake_task'
5
+
6
+ RSpec::Core::RakeTask.new(:spec)
7
+
8
+ require 'rubocop/rake_task'
9
+
10
+ RuboCop::RakeTask.new
11
+
12
+ task default: %i[spec rubocop]
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'pathname'
4
+ require 'stringex'
5
+
6
+ # Cul::PreservationUtils::FilePath
7
+ # The Filepath module provides utilities for validating that file names and file paths do not
8
+ # include any characters that may be problematic for preservation objects that
9
+ # will be stored in Google or Amazon cloud services.
10
+ # Additionally, this has the benefit of having matching names in both local
11
+ # copies of such Preservation objects and copies stored on the cloud.
12
+ module Cul
13
+ module PreservationUtils
14
+ module FilePath
15
+ # The following code was taken from the ATC app's Atc::Utils::ObjectKeyNameUtils module
16
+ # Credit to fcd1
17
+
18
+ # About Cloud Storage objects: https://cloud.google.com/storage/docs/objects
19
+ # According to the above (and quite probably most Google Cloud Storage documentation),
20
+ # objects have names
21
+ # AWS - Creating object key names:
22
+ # https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
23
+ # As seen in the title for the above page, an object in AWS S3 has a key name (or key)
24
+
25
+ DISALLOWED_ASCII_REGEX = '[^-a-zA-Z0-9_.()]'
26
+
27
+ def self.valid_file_path?(path_filename)
28
+ return false if ['', '.', '..', '/'].include? path_filename
29
+
30
+ pathname = Pathname.new(path_filename)
31
+
32
+ # a relative path is invalid
33
+ # todo : doesn't this code do the opposite of that?
34
+ return false if pathname.absolute?
35
+
36
+ path_to_file, filename = pathname.split
37
+
38
+ # validate filename
39
+ return false if filename.to_s.end_with?('.') || /#{DISALLOWED_ASCII_REGEX}/.match?(filename.to_s)
40
+ # if the valid filename is at the top level, return true
41
+ return true if pathname == pathname.basename
42
+
43
+ # check each component in the path to the file
44
+ path_to_file.each_filename do |path_segment|
45
+ return false if /#{DISALLOWED_ASCII_REGEX}/.match? path_segment
46
+ end
47
+ true
48
+ end
49
+
50
+ def self.remediate_file_path(filepath, unavailable_file_paths = []) # rubocop:disable Metrics/AbcSize
51
+ return filepath if !unavailable_file_paths.include?(filepath) && self.valid_file_path?(filepath)
52
+
53
+ self.argument_check(filepath)
54
+
55
+ pathname = Pathname.new(filepath)
56
+
57
+ remediated_pathname = Pathname.new('')
58
+ path_to_file, filename = pathname.split
59
+
60
+ filename_valid_ascii =
61
+ Stringex::Unidecoder.decode(filename.to_s).gsub(/#{DISALLOWED_ASCII_REGEX}/, '_').gsub(/\.$/, '_')
62
+
63
+ remediated_key_name = self.remediate_path(path_to_file, remediated_pathname).join(filename_valid_ascii).to_s
64
+
65
+ # no collisions
66
+ return remediated_key_name unless unavailable_file_paths.include? remediated_key_name
67
+
68
+ # handle collisions
69
+ self.handle_collision(remediated_key_name, unavailable_file_paths)
70
+ end
71
+
72
+ def self.argument_check(filepath_key)
73
+ raise ArgumentError, "Bad argument: '#{filepath_key}'" if ['', '.', '..', '/'].include? filepath_key
74
+ raise ArgumentError, 'Bad argument: absolute path' if filepath_key.start_with?('/')
75
+ end
76
+
77
+ def self.remediate_path(path_to_file, remediated_pathname)
78
+ # remediate each component in the path to the file
79
+ path_to_file.each_filename do |path_segment|
80
+ remediated_path_segment = Stringex::Unidecoder.decode(path_segment).gsub(/#{DISALLOWED_ASCII_REGEX}/, '_')
81
+ remediated_pathname += remediated_path_segment
82
+ end
83
+ remediated_pathname
84
+ end
85
+
86
+ def self.handle_collision(remediated_file_path, unavailable_file_path)
87
+ pathname = Pathname.new(remediated_file_path)
88
+ base = pathname.to_s.delete_suffix(pathname.extname)
89
+ new_remediated_file_path = "#{base}_1#{pathname.extname}"
90
+ suffix_num = 1
91
+ while unavailable_file_path.include? new_remediated_file_path
92
+ suffix_num += 1
93
+ new_remediated_file_path = "#{base}_#{suffix_num}#{pathname.extname}"
94
+ end
95
+ new_remediated_file_path
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Cul
4
+ module PreservationUtils
5
+ VERSION = '0.1.4'
6
+ end
7
+ end
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ # require_relative 'preservation_utils/file_path'
4
+ require_relative 'preservation_utils/file_path'
5
+
6
+ module Cul
7
+ module PreservationUtils
8
+ class Error < StandardError; end
9
+ # PreservationUtilities is implemented through submodules:
10
+ # Cul::PreservationUtils::FilePath
11
+ # - See /lib/cul/preservation_utils/file_path.rb
12
+ end
13
+ end
@@ -0,0 +1,6 @@
1
+ module Cul
2
+ module PreservationUtils
3
+ VERSION: String
4
+ # See the writing guide of rbs: https://github.com/ruby/rbs#guides
5
+ end
6
+ end
metadata ADDED
@@ -0,0 +1,75 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: cul-preservation_utils
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.4
5
+ platform: ruby
6
+ authors:
7
+ - Bradley Goldsmith
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2025-05-05 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: stringex
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.8'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 2.8.6
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '2.8'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 2.8.6
33
+ description: PreservationUtils provides the FilePath module for standardizing filepaths
34
+ for objects used in our Preservation services.
35
+ email:
36
+ - bg2918@columbia.edu
37
+ executables: []
38
+ extensions: []
39
+ extra_rdoc_files: []
40
+ files:
41
+ - ".rspec"
42
+ - ".rubocop.yml"
43
+ - CHANGELOG.md
44
+ - LICENSE.txt
45
+ - README.md
46
+ - Rakefile
47
+ - lib/cul/preservation_utils.rb
48
+ - lib/cul/preservation_utils/file_path.rb
49
+ - lib/cul/preservation_utils/version.rb
50
+ - sig/cul/preservation_utils.rbs
51
+ homepage: https://github.com/cul/cul-preservation_utils
52
+ licenses:
53
+ - Apache-2.0
54
+ metadata:
55
+ homepage_uri: https://github.com/cul/cul-preservation_utils
56
+ post_install_message:
57
+ rdoc_options: []
58
+ require_paths:
59
+ - lib
60
+ required_ruby_version: !ruby/object:Gem::Requirement
61
+ requirements:
62
+ - - ">="
63
+ - !ruby/object:Gem::Version
64
+ version: 3.1.0
65
+ required_rubygems_version: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ requirements: []
71
+ rubygems_version: 3.5.16
72
+ signing_key:
73
+ specification_version: 4
74
+ summary: Utilities related to Preservation workflows at Columbia University Library.
75
+ test_files: []