pdfh 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +39 -10
- data/.rubocop.yml +22 -13
- data/.ruby-version +1 -1
- data/CHANGELOG.md +13 -0
- data/Gemfile +15 -3
- data/Gemfile.lock +79 -30
- data/README.md +26 -7
- data/Rakefile +22 -2
- data/bin/console +9 -6
- data/bin/run +8 -0
- data/exe/pdfh +17 -24
- data/lib/ext/string.rb +5 -9
- data/lib/pdfh.rb +115 -77
- data/lib/pdfh/document.rb +60 -146
- data/lib/pdfh/document_period.rb +32 -0
- data/lib/pdfh/document_processor.rb +163 -0
- data/lib/pdfh/document_type.rb +43 -0
- data/lib/pdfh/month.rb +42 -32
- data/lib/pdfh/opt_parser.rb +41 -0
- data/lib/pdfh/pdf_handler.rb +19 -18
- data/lib/pdfh/settings.rb +32 -40
- data/lib/pdfh/settings_template.rb +21 -0
- data/lib/pdfh/version.rb +1 -1
- data/pdfh.gemspec +26 -35
- metadata +19 -85
- data/.ruby-gemset +0 -1
- data/.travis.yml +0 -7
- data/lib/pdfh/utils.rb +0 -42
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 375df17076824594d8622b7dd9474ab53400297c5ff4598463b5dac3fea352d2
|
|
4
|
+
data.tar.gz: 5174dd71598e65630e78b394f3aa46d2562e9de7d682dcb30cd5695af7a75012
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 68bed2cab1ef5346e7774d03d66538fcdbdd0ccbc72168359d4e3422a7d39c306de4483dd074483aec6f63d197d7275b072cce080178547b94c6adf2853e81e9
|
|
7
|
+
data.tar.gz: 35087b577052e468a97183af2305e3cef5d03fa42d9942d4528701280894dbe290759bc01b124ce54ba5835e7af53a67cbff5438c568cc1d7e0ce4f50bcdaccd
|
data/.gitignore
CHANGED
|
@@ -1,13 +1,42 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
/
|
|
4
|
-
/
|
|
5
|
-
/
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
/
|
|
1
|
+
*.yml
|
|
2
|
+
|
|
3
|
+
.yardoc/
|
|
4
|
+
_yardoc/
|
|
5
|
+
coverage/
|
|
6
|
+
pkg/
|
|
7
|
+
spec/reports/
|
|
8
|
+
spec/coverage/
|
|
9
|
+
tmp/
|
|
9
10
|
|
|
10
11
|
# rspec failure tracking
|
|
11
12
|
.rspec_status
|
|
12
|
-
|
|
13
|
-
|
|
13
|
+
|
|
14
|
+
### macOS
|
|
15
|
+
# General
|
|
16
|
+
.DS_Store
|
|
17
|
+
.AppleDouble
|
|
18
|
+
.LSOverride
|
|
19
|
+
|
|
20
|
+
# Icon must end with two \r
|
|
21
|
+
Icon
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# Thumbnails
|
|
25
|
+
._*
|
|
26
|
+
|
|
27
|
+
# Files that might appear in the root of a volume
|
|
28
|
+
.DocumentRevisions-V100
|
|
29
|
+
.fseventsd
|
|
30
|
+
.Spotlight-V100
|
|
31
|
+
.TemporaryItems
|
|
32
|
+
.Trashes
|
|
33
|
+
.VolumeIcon.icns
|
|
34
|
+
.com.apple.timemachine.donotpresent
|
|
35
|
+
|
|
36
|
+
# Directories potentially created on remote AFP share
|
|
37
|
+
.AppleDB
|
|
38
|
+
.AppleDesktop
|
|
39
|
+
Network Trash Folder
|
|
40
|
+
Temporary Items
|
|
41
|
+
.apdisk
|
|
42
|
+
|
data/.rubocop.yml
CHANGED
|
@@ -1,18 +1,27 @@
|
|
|
1
|
-
|
|
2
|
-
AllCops:
|
|
3
|
-
Exclude:
|
|
4
|
-
- '.git/**/*'
|
|
5
|
-
- 'spec/**/*'
|
|
1
|
+
inherit_from: .rubocop_todo.yml
|
|
6
2
|
|
|
7
|
-
|
|
8
|
-
|
|
3
|
+
require:
|
|
4
|
+
- rubocop-performance
|
|
5
|
+
- rubocop-rake
|
|
6
|
+
- rubocop-rspec
|
|
9
7
|
|
|
10
|
-
|
|
8
|
+
AllCops:
|
|
9
|
+
NewCops: enable
|
|
10
|
+
TargetRubyVersion: 2.5
|
|
11
11
|
Exclude:
|
|
12
|
-
-
|
|
12
|
+
- doc/**/*
|
|
13
|
+
- pkg/**/*
|
|
14
|
+
- tmp/**/*
|
|
15
|
+
- vendor/**/*
|
|
16
|
+
|
|
17
|
+
Layout/LineLength:
|
|
18
|
+
Max: 120
|
|
19
|
+
IgnoredPatterns: ['^\s*#']
|
|
13
20
|
|
|
14
|
-
|
|
15
|
-
|
|
21
|
+
Style/StringLiterals:
|
|
22
|
+
Enabled: true
|
|
23
|
+
EnforcedStyle: double_quotes
|
|
16
24
|
|
|
17
|
-
|
|
18
|
-
|
|
25
|
+
Style/StringLiteralsInInterpolation:
|
|
26
|
+
Enabled: true
|
|
27
|
+
EnforcedStyle: double_quotes
|
data/.ruby-version
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
ruby-
|
|
1
|
+
ruby-3.0.1
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,16 @@
|
|
|
1
|
+
## v0.2.0
|
|
2
|
+
* Major gem refactoring
|
|
3
|
+
* Changed setting `base_path` to `destination_base_path`
|
|
4
|
+
* Add DocumentType listing option on executable file
|
|
5
|
+
* Add process individual documents providing type and files
|
|
6
|
+
```bash
|
|
7
|
+
pdfh -t document_type_id path/to_files.pdf
|
|
8
|
+
```
|
|
9
|
+
* Add settings.yml template in order to create a sample file
|
|
10
|
+
|
|
11
|
+
## v0.1.9
|
|
12
|
+
* Add dependencies validation at run
|
|
13
|
+
|
|
1
14
|
## v0.1.5
|
|
2
15
|
* Add print_cmd field in config file for information purposes
|
|
3
16
|
* Settings now validates a no existing directory
|
data/Gemfile
CHANGED
|
@@ -1,8 +1,20 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
source
|
|
4
|
-
|
|
5
|
-
git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
|
|
3
|
+
source "https://rubygems.org"
|
|
6
4
|
|
|
7
5
|
# Specify your gem's dependencies in pdfh.gemspec
|
|
8
6
|
gemspec
|
|
7
|
+
|
|
8
|
+
gem "bundler", "~> 2.0"
|
|
9
|
+
gem "code-scanning-rubocop", "~> 0.5"
|
|
10
|
+
gem "pry", "~> 0.14"
|
|
11
|
+
gem "rake", "~> 13.0"
|
|
12
|
+
gem "rspec", "~> 3.9"
|
|
13
|
+
gem "rspec_junit_formatter", "~> 0.4"
|
|
14
|
+
gem "rubocop", "~> 1.0"
|
|
15
|
+
gem "rubocop-performance", "~> 1.9"
|
|
16
|
+
gem "rubocop-rake", "~> 0.5"
|
|
17
|
+
gem "rubocop-rspec", "~> 2.2"
|
|
18
|
+
gem "simplecov", "~> 0.21"
|
|
19
|
+
gem "simplecov-console", "~> 0.9"
|
|
20
|
+
gem "versionomy", "~> 0.5"
|
data/Gemfile.lock
CHANGED
|
@@ -1,52 +1,101 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
pdfh (0.
|
|
5
|
-
colorize (~> 0.8.
|
|
4
|
+
pdfh (0.2.0)
|
|
5
|
+
colorize (~> 0.8.0)
|
|
6
6
|
|
|
7
7
|
GEM
|
|
8
8
|
remote: https://rubygems.org/
|
|
9
9
|
specs:
|
|
10
10
|
ansi (1.5.0)
|
|
11
|
+
ast (2.4.2)
|
|
12
|
+
blockenspiel (0.5.0)
|
|
13
|
+
code-scanning-rubocop (0.5.0)
|
|
14
|
+
rubocop (~> 1.0)
|
|
15
|
+
coderay (1.1.3)
|
|
11
16
|
colorize (0.8.1)
|
|
12
|
-
diff-lcs (1.
|
|
13
|
-
docile (1.
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
17
|
+
diff-lcs (1.4.4)
|
|
18
|
+
docile (1.4.0)
|
|
19
|
+
method_source (1.0.0)
|
|
20
|
+
parallel (1.20.1)
|
|
21
|
+
parser (3.0.1.1)
|
|
22
|
+
ast (~> 2.4.1)
|
|
23
|
+
pry (0.14.1)
|
|
24
|
+
coderay (~> 1.1)
|
|
25
|
+
method_source (~> 1.0)
|
|
26
|
+
rainbow (3.0.0)
|
|
27
|
+
rake (13.0.3)
|
|
28
|
+
regexp_parser (2.1.1)
|
|
29
|
+
rexml (3.2.5)
|
|
30
|
+
rspec (3.10.0)
|
|
31
|
+
rspec-core (~> 3.10.0)
|
|
32
|
+
rspec-expectations (~> 3.10.0)
|
|
33
|
+
rspec-mocks (~> 3.10.0)
|
|
34
|
+
rspec-core (3.10.1)
|
|
35
|
+
rspec-support (~> 3.10.0)
|
|
36
|
+
rspec-expectations (3.10.1)
|
|
24
37
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
25
|
-
rspec-support (~> 3.
|
|
26
|
-
rspec-mocks (3.
|
|
38
|
+
rspec-support (~> 3.10.0)
|
|
39
|
+
rspec-mocks (3.10.2)
|
|
27
40
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
28
|
-
rspec-support (~> 3.
|
|
29
|
-
rspec-support (3.
|
|
30
|
-
|
|
41
|
+
rspec-support (~> 3.10.0)
|
|
42
|
+
rspec-support (3.10.2)
|
|
43
|
+
rspec_junit_formatter (0.4.1)
|
|
44
|
+
rspec-core (>= 2, < 4, != 2.12.0)
|
|
45
|
+
rubocop (1.14.0)
|
|
46
|
+
parallel (~> 1.10)
|
|
47
|
+
parser (>= 3.0.0.0)
|
|
48
|
+
rainbow (>= 2.2.2, < 4.0)
|
|
49
|
+
regexp_parser (>= 1.8, < 3.0)
|
|
50
|
+
rexml
|
|
51
|
+
rubocop-ast (>= 1.5.0, < 2.0)
|
|
52
|
+
ruby-progressbar (~> 1.7)
|
|
53
|
+
unicode-display_width (>= 1.4.0, < 3.0)
|
|
54
|
+
rubocop-ast (1.5.0)
|
|
55
|
+
parser (>= 3.0.1.1)
|
|
56
|
+
rubocop-performance (1.11.3)
|
|
57
|
+
rubocop (>= 1.7.0, < 2.0)
|
|
58
|
+
rubocop-ast (>= 0.4.0)
|
|
59
|
+
rubocop-rake (0.5.1)
|
|
60
|
+
rubocop
|
|
61
|
+
rubocop-rspec (2.3.0)
|
|
62
|
+
rubocop (~> 1.0)
|
|
63
|
+
rubocop-ast (>= 1.1.0)
|
|
64
|
+
ruby-progressbar (1.11.0)
|
|
65
|
+
simplecov (0.21.2)
|
|
31
66
|
docile (~> 1.1)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
simplecov-console (0.
|
|
67
|
+
simplecov-html (~> 0.11)
|
|
68
|
+
simplecov_json_formatter (~> 0.1)
|
|
69
|
+
simplecov-console (0.9.1)
|
|
35
70
|
ansi
|
|
36
|
-
hirb
|
|
37
71
|
simplecov
|
|
38
|
-
|
|
72
|
+
terminal-table
|
|
73
|
+
simplecov-html (0.12.3)
|
|
74
|
+
simplecov_json_formatter (0.1.3)
|
|
75
|
+
terminal-table (3.0.1)
|
|
76
|
+
unicode-display_width (>= 1.1.1, < 3)
|
|
77
|
+
unicode-display_width (2.0.0)
|
|
78
|
+
versionomy (0.5.0)
|
|
79
|
+
blockenspiel (~> 0.5)
|
|
39
80
|
|
|
40
81
|
PLATFORMS
|
|
41
82
|
ruby
|
|
42
83
|
|
|
43
84
|
DEPENDENCIES
|
|
44
|
-
bundler (~>
|
|
85
|
+
bundler (~> 2.0)
|
|
86
|
+
code-scanning-rubocop (~> 0.5)
|
|
45
87
|
pdfh!
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
88
|
+
pry (~> 0.14)
|
|
89
|
+
rake (~> 13.0)
|
|
90
|
+
rspec (~> 3.9)
|
|
91
|
+
rspec_junit_formatter (~> 0.4)
|
|
92
|
+
rubocop (~> 1.0)
|
|
93
|
+
rubocop-performance (~> 1.9)
|
|
94
|
+
rubocop-rake (~> 0.5)
|
|
95
|
+
rubocop-rspec (~> 2.2)
|
|
96
|
+
simplecov (~> 0.21)
|
|
97
|
+
simplecov-console (~> 0.9)
|
|
98
|
+
versionomy (~> 0.5)
|
|
50
99
|
|
|
51
100
|
BUNDLED WITH
|
|
52
|
-
|
|
101
|
+
2.2.17
|
data/README.md
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
|
-
#
|
|
1
|
+
# PDF Handler (pdfh)
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
[](https://github.com/iax7/pdfh/actions/workflows/rubocop-analysis.yml)
|
|
4
|
+
[![Ruby][ruby-badge]][ruby-url]
|
|
5
|
+
|
|
6
|
+
Examine all PDF files in Look up directories, remove password (if has one), rename and copy to a new directory using regular expresions.
|
|
4
7
|
|
|
5
8
|
## Installation
|
|
6
9
|
|
|
@@ -8,15 +11,24 @@ Examine all PDF files in scrape directories, remove password (if has one), renam
|
|
|
8
11
|
gem install pdfh
|
|
9
12
|
```
|
|
10
13
|
|
|
14
|
+
### Dependencies
|
|
15
|
+
|
|
16
|
+
You need to install pdf handling dependencies in order to use this gem. (I have only tested it on macOS)
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
brew install qpdf
|
|
20
|
+
brew install xpdf
|
|
21
|
+
```
|
|
22
|
+
|
|
11
23
|
## Usage
|
|
12
24
|
|
|
13
25
|
After installing this gem you need to create your configuration file on your home folder.
|
|
14
26
|
`pdfh.yml`
|
|
15
27
|
```yaml
|
|
16
28
|
---
|
|
17
|
-
|
|
29
|
+
lookup_dirs: # Directories where all pdf's are going to be analyzed
|
|
18
30
|
- ~/Downloads
|
|
19
|
-
|
|
31
|
+
destination_base_path: ~/PDFs # Directory where all matching documents will be copied (MUST exist)
|
|
20
32
|
document_types:
|
|
21
33
|
- name: Document From Bank # Description
|
|
22
34
|
re_file: '.*MyBankReg\.pdf' # Regular expression to match its filename
|
|
@@ -24,18 +36,21 @@ document_types:
|
|
|
24
36
|
pwd: base64string # [OPTIONAL] Password if the document is protected
|
|
25
37
|
store_path: "{YEAR}/bank_docs" # Relative path to copy this document
|
|
26
38
|
name_template: '{period} {subtype}' # Template for new filename when copied
|
|
27
|
-
sub_types: # [OPTIONAL] In case your need an extra category
|
|
39
|
+
sub_types: # [OPTIONAL] In case your need an extra category
|
|
28
40
|
- name: Account1 # Regular expresion to match this subtype
|
|
29
|
-
month_offset: -1 # [OPTIONAL] Integer value to adjust month
|
|
41
|
+
month_offset: -1 # [OPTIONAL] Integer (signed) value to adjust month
|
|
30
42
|
```
|
|
31
43
|
|
|
32
44
|
## Development
|
|
33
45
|
|
|
34
46
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
|
35
47
|
|
|
36
|
-
To install this gem onto your local machine, run `
|
|
48
|
+
To install this gem onto your local machine, run `rake install`. To release a new version, run `rake bump`, and then run `rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
|
37
49
|
|
|
38
50
|
```bash
|
|
51
|
+
rake install
|
|
52
|
+
|
|
53
|
+
# step by step
|
|
39
54
|
build pdfh.gemspec
|
|
40
55
|
gem install pdfh-*
|
|
41
56
|
```
|
|
@@ -51,3 +66,7 @@ The gem is available as open source under the terms of the [MIT License](https:/
|
|
|
51
66
|
## Code of Conduct
|
|
52
67
|
|
|
53
68
|
Everyone interacting in the Pdfh project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/iax7/pdfh/blob/master/CODE_OF_CONDUCT.md).
|
|
69
|
+
|
|
70
|
+
<!-- Links -->
|
|
71
|
+
[ruby-badge]: https://img.shields.io/badge/ruby-3.0.1-blue?style=flat&logo=ruby&logoColor=CC342D&labelColor=white
|
|
72
|
+
[ruby-url]: https://www.ruby-lang.org/en/
|
data/Rakefile
CHANGED
|
@@ -1,8 +1,28 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require
|
|
4
|
-
require
|
|
3
|
+
require "colorize"
|
|
4
|
+
require "bundler/gem_tasks"
|
|
5
|
+
require "rspec/core/rake_task"
|
|
6
|
+
require "versionomy"
|
|
5
7
|
|
|
6
8
|
RSpec::Core::RakeTask.new(:spec)
|
|
7
9
|
|
|
8
10
|
task default: :spec
|
|
11
|
+
|
|
12
|
+
desc "Bump gem version number (tiny|minor|major)"
|
|
13
|
+
task :bump, :type do |_t, args|
|
|
14
|
+
args.with_defaults(type: :tiny)
|
|
15
|
+
|
|
16
|
+
version_file = File.join(__dir__, "lib", "pdfh", "version.rb")
|
|
17
|
+
content = File.read(version_file)
|
|
18
|
+
|
|
19
|
+
version_pattern = /(?<major>\d+)\.(?<minor>\d+)\.(?<tiny>\d+)/
|
|
20
|
+
current_version = content.match(version_pattern)
|
|
21
|
+
next_version = Versionomy.parse(current_version.to_s).bump(args.type).to_s
|
|
22
|
+
|
|
23
|
+
File.write(version_file, content.gsub(version_pattern, "\\1#{next_version}\\3"))
|
|
24
|
+
|
|
25
|
+
puts "Successfully bumped from #{current_version.to_s.red} to #{next_version.green}"
|
|
26
|
+
puts "\n> Building v#{next_version.green}..."
|
|
27
|
+
puts `rake build`
|
|
28
|
+
end
|
data/bin/console
CHANGED
|
@@ -1,15 +1,18 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
2
|
# frozen_string_literal: true
|
|
3
3
|
|
|
4
|
-
require
|
|
5
|
-
require
|
|
4
|
+
require "bundler/setup"
|
|
5
|
+
require "pdfh"
|
|
6
6
|
|
|
7
7
|
# You can add fixtures and/or initialization code here to make experimenting
|
|
8
8
|
# with your gem easier. You can also use a different console, if you like.
|
|
9
9
|
|
|
10
10
|
# (If you use this, don't forget to add pry to your Gemfile!)
|
|
11
|
-
|
|
12
|
-
# Pry.start
|
|
11
|
+
require "pry"
|
|
13
12
|
|
|
14
|
-
|
|
15
|
-
|
|
13
|
+
p Pdfh.parse_argv
|
|
14
|
+
|
|
15
|
+
Pry.start
|
|
16
|
+
|
|
17
|
+
# require "irb"
|
|
18
|
+
# IRB.start(__FILE__)
|
data/bin/run
ADDED
data/exe/pdfh
CHANGED
|
@@ -1,33 +1,26 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
2
|
# frozen_string_literal: true
|
|
3
3
|
|
|
4
|
-
require
|
|
5
|
-
require
|
|
6
|
-
require 'pdfh/version'
|
|
7
|
-
require 'pdfh/utils'
|
|
4
|
+
require "open3"
|
|
5
|
+
require "pdfh"
|
|
8
6
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
exit
|
|
18
|
-
end
|
|
19
|
-
opts.on_tail('-h', '--help', 'Show this message') do
|
|
20
|
-
puts opts
|
|
21
|
-
exit
|
|
7
|
+
# @param apps [Array]
|
|
8
|
+
# @return [Boolean]
|
|
9
|
+
def validate_installed(*apps)
|
|
10
|
+
found_app = []
|
|
11
|
+
apps.each_with_object(found_app) do |app, result|
|
|
12
|
+
_stdout, _stderr, status = Open3.capture3("command -v #{app}")
|
|
13
|
+
puts "Missing #{app} command." unless status.success?
|
|
14
|
+
result << status.success?
|
|
22
15
|
end
|
|
16
|
+
|
|
17
|
+
found_app.all?
|
|
23
18
|
end
|
|
24
19
|
|
|
20
|
+
exit(1) unless validate_installed("qpdf", "pdftotext")
|
|
21
|
+
|
|
25
22
|
begin
|
|
26
|
-
|
|
27
|
-
rescue
|
|
28
|
-
Pdfh.
|
|
29
|
-
puts opt
|
|
30
|
-
exit 1
|
|
23
|
+
Pdfh::DocumentProcessor.new.start
|
|
24
|
+
rescue StandardError => e
|
|
25
|
+
Pdfh.error_print e.message
|
|
31
26
|
end
|
|
32
|
-
|
|
33
|
-
Pdfh.main(options)
|