pdfh 3.0.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/.rubocop_todo.yml +24 -16
- data/.tool-versions +1 -1
- data/CHANGELOG.md +1 -1
- data/Gemfile +5 -4
- data/Gemfile.lock +75 -39
- data/README.md +21 -7
- data/bin/console +1 -1
- data/bin/run +1 -1
- data/exe/pdfh +1 -1
- data/lib/pdfh/main.rb +93 -0
- data/lib/pdfh/{document.rb → models/document.rb} +34 -17
- data/lib/pdfh/{document_type.rb → models/document_type.rb} +18 -5
- data/lib/pdfh/{settings.rb → models/settings.rb} +23 -15
- data/lib/pdfh/settings_template.rb +11 -11
- data/lib/pdfh/utils/console.rb +101 -0
- data/lib/pdfh/utils/opt_parser.rb +56 -0
- data/lib/pdfh/utils/options.rb +38 -0
- data/lib/pdfh/utils/pdf_file_handler.rb +121 -0
- data/lib/pdfh/utils/settings_builder.rb +57 -0
- data/lib/pdfh/version.rb +1 -1
- data/lib/pdfh.rb +29 -118
- data/pdfh.gemspec +1 -1
- metadata +15 -13
- data/.ruby-version +0 -1
- data/lib/pdfh/document_processor.rb +0 -164
- data/lib/pdfh/opt_parser.rb +0 -41
- data/lib/pdfh/pdf_handler.rb +0 -55
- /data/lib/pdfh/{document_period.rb → models/document_period.rb} +0 -0
- /data/lib/pdfh/{month.rb → utils/month.rb} +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e56c6dcb8915785a475e34a5120d44556fd6630d3738849161697b76ded58fa6
|
|
4
|
+
data.tar.gz: 335b684595f23f61607d3ff3e0c6ed106da1e52842090c4ddc89381252f5771a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 45bbfee057aeda92595c687b67a3e2f34c8ed4f36f28dbc5b3880fafb05f7a9e170b70e1ec61bc21709ba7ed3a1401667fca03cadf35593fb8843b93ee7fdfdb
|
|
7
|
+
data.tar.gz: 930644f381d3e338ec3161d8346179f3268c336bf3407ae64ed256eb9c30b0bd2b0d0bcf6c0ab31c2d0a57a25d56f05fd0f99bf89f7978be77a0d78f985da101
|
data/.rubocop.yml
CHANGED
data/.rubocop_todo.yml
CHANGED
|
@@ -1,36 +1,31 @@
|
|
|
1
1
|
# This configuration was generated by
|
|
2
2
|
# `rubocop --auto-gen-config`
|
|
3
|
-
# on
|
|
3
|
+
# on 2023-07-13 05:43:39 UTC using RuboCop version 1.54.1.
|
|
4
4
|
# The point is for the user to remove these configuration records
|
|
5
5
|
# one by one as the offenses are removed from the code base.
|
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
|
8
8
|
|
|
9
|
-
# Offense count:
|
|
10
|
-
# Configuration parameters:
|
|
9
|
+
# Offense count: 7
|
|
10
|
+
# Configuration parameters: AllowedMethods, AllowedPatterns, CountRepeatedAttributes.
|
|
11
11
|
Metrics/AbcSize:
|
|
12
|
-
Max:
|
|
13
|
-
|
|
14
|
-
# Offense count: 6
|
|
15
|
-
# Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
|
|
16
|
-
# IgnoredMethods: refine
|
|
17
|
-
Metrics/BlockLength:
|
|
18
|
-
Max: 87
|
|
12
|
+
Max: 25
|
|
19
13
|
|
|
20
14
|
# Offense count: 1
|
|
21
|
-
# Configuration parameters: CountComments, CountAsOne.
|
|
22
|
-
|
|
23
|
-
|
|
15
|
+
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns, inherit_mode.
|
|
16
|
+
# AllowedMethods: refine
|
|
17
|
+
Metrics/BlockLength:
|
|
18
|
+
Max: 26
|
|
24
19
|
|
|
25
20
|
# Offense count: 6
|
|
26
|
-
# Configuration parameters: CountComments, CountAsOne,
|
|
21
|
+
# Configuration parameters: CountComments, CountAsOne, AllowedMethods, AllowedPatterns.
|
|
27
22
|
Metrics/MethodLength:
|
|
28
23
|
Max: 17
|
|
29
24
|
|
|
30
25
|
# Offense count: 2
|
|
31
26
|
RSpec/AnyInstance:
|
|
32
27
|
Exclude:
|
|
33
|
-
- 'spec/pdfh/
|
|
28
|
+
- 'spec/pdfh/main_spec.rb'
|
|
34
29
|
|
|
35
30
|
# Offense count: 1
|
|
36
31
|
# Configuration parameters: CountAsOne.
|
|
@@ -41,7 +36,20 @@ RSpec/ExampleLength:
|
|
|
41
36
|
RSpec/MultipleExpectations:
|
|
42
37
|
Max: 2
|
|
43
38
|
|
|
39
|
+
# Offense count: 1
|
|
40
|
+
# Configuration parameters: AllowedPatterns.
|
|
41
|
+
# AllowedPatterns: ^expect_, ^assert_
|
|
42
|
+
RSpec/NoExpectationExample:
|
|
43
|
+
Exclude:
|
|
44
|
+
- 'spec/pdfh/pdf_handler_spec.rb'
|
|
45
|
+
|
|
44
46
|
# Offense count: 1
|
|
45
47
|
RSpec/SubjectStub:
|
|
46
48
|
Exclude:
|
|
47
|
-
- 'spec/pdfh/
|
|
49
|
+
- 'spec/pdfh/main_spec.rb'
|
|
50
|
+
|
|
51
|
+
# Offense count: 3
|
|
52
|
+
# This cop supports safe autocorrection (--autocorrect).
|
|
53
|
+
Style/RedundantStringEscape:
|
|
54
|
+
Exclude:
|
|
55
|
+
- 'lib/pdfh/settings_template.rb'
|
data/.tool-versions
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
ruby 3.
|
|
1
|
+
ruby 3.3.0
|
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
|
@@ -7,14 +7,15 @@ gemspec
|
|
|
7
7
|
|
|
8
8
|
gem "bundler", "~> 2.0"
|
|
9
9
|
gem "code-scanning-rubocop", "~> 0.5"
|
|
10
|
+
gem "factory_bot", "~> 6.2"
|
|
10
11
|
gem "pry", "~> 0.14"
|
|
11
12
|
gem "rake", "~> 13.0"
|
|
12
|
-
gem "rspec", "~> 3.
|
|
13
|
+
gem "rspec", "~> 3.12"
|
|
13
14
|
gem "rspec_junit_formatter", "~> 0.4"
|
|
14
|
-
gem "rubocop", "~> 1.
|
|
15
|
-
gem "rubocop-performance", "~> 1.
|
|
15
|
+
gem "rubocop", "~> 1.50"
|
|
16
|
+
gem "rubocop-performance", "~> 1.18"
|
|
16
17
|
gem "rubocop-rake", "~> 0.5"
|
|
17
18
|
gem "rubocop-rspec", "~> 2.2"
|
|
18
|
-
gem "simplecov", "~> 0.
|
|
19
|
+
gem "simplecov", "~> 0.22"
|
|
19
20
|
gem "simplecov-console", "~> 0.9"
|
|
20
21
|
gem "versionomy", "~> 0.5"
|
data/Gemfile.lock
CHANGED
|
@@ -1,69 +1,102 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
pdfh (3.0.
|
|
4
|
+
pdfh (3.0.1)
|
|
5
5
|
colorize (~> 0.8.0)
|
|
6
6
|
|
|
7
7
|
GEM
|
|
8
8
|
remote: https://rubygems.org/
|
|
9
9
|
specs:
|
|
10
|
+
activesupport (7.1.2)
|
|
11
|
+
base64
|
|
12
|
+
bigdecimal
|
|
13
|
+
concurrent-ruby (~> 1.0, >= 1.0.2)
|
|
14
|
+
connection_pool (>= 2.2.5)
|
|
15
|
+
drb
|
|
16
|
+
i18n (>= 1.6, < 2)
|
|
17
|
+
minitest (>= 5.1)
|
|
18
|
+
mutex_m
|
|
19
|
+
tzinfo (~> 2.0)
|
|
10
20
|
ansi (1.5.0)
|
|
11
21
|
ast (2.4.2)
|
|
22
|
+
base64 (0.2.0)
|
|
23
|
+
bigdecimal (3.1.5)
|
|
12
24
|
blockenspiel (0.5.0)
|
|
13
25
|
code-scanning-rubocop (0.6.1)
|
|
14
26
|
rubocop (~> 1.0)
|
|
15
27
|
coderay (1.1.3)
|
|
16
28
|
colorize (0.8.1)
|
|
29
|
+
concurrent-ruby (1.2.2)
|
|
30
|
+
connection_pool (2.4.1)
|
|
17
31
|
diff-lcs (1.5.0)
|
|
18
32
|
docile (1.4.0)
|
|
19
|
-
|
|
33
|
+
drb (2.2.0)
|
|
34
|
+
ruby2_keywords
|
|
35
|
+
factory_bot (6.4.5)
|
|
36
|
+
activesupport (>= 5.0.0)
|
|
37
|
+
i18n (1.14.1)
|
|
38
|
+
concurrent-ruby (~> 1.0)
|
|
39
|
+
json (2.7.1)
|
|
40
|
+
language_server-protocol (3.17.0.3)
|
|
20
41
|
method_source (1.0.0)
|
|
21
|
-
|
|
22
|
-
|
|
42
|
+
minitest (5.20.0)
|
|
43
|
+
mutex_m (0.2.0)
|
|
44
|
+
parallel (1.24.0)
|
|
45
|
+
parser (3.3.0.2)
|
|
23
46
|
ast (~> 2.4.1)
|
|
24
|
-
|
|
47
|
+
racc
|
|
48
|
+
pry (0.14.2)
|
|
25
49
|
coderay (~> 1.1)
|
|
26
50
|
method_source (~> 1.0)
|
|
51
|
+
racc (1.7.3)
|
|
27
52
|
rainbow (3.1.1)
|
|
28
|
-
rake (13.0
|
|
29
|
-
regexp_parser (2.
|
|
30
|
-
rexml (3.2.
|
|
31
|
-
rspec (3.
|
|
32
|
-
rspec-core (~> 3.
|
|
33
|
-
rspec-expectations (~> 3.
|
|
34
|
-
rspec-mocks (~> 3.
|
|
35
|
-
rspec-core (3.
|
|
36
|
-
rspec-support (~> 3.
|
|
37
|
-
rspec-expectations (3.
|
|
53
|
+
rake (13.1.0)
|
|
54
|
+
regexp_parser (2.9.0)
|
|
55
|
+
rexml (3.2.6)
|
|
56
|
+
rspec (3.12.0)
|
|
57
|
+
rspec-core (~> 3.12.0)
|
|
58
|
+
rspec-expectations (~> 3.12.0)
|
|
59
|
+
rspec-mocks (~> 3.12.0)
|
|
60
|
+
rspec-core (3.12.2)
|
|
61
|
+
rspec-support (~> 3.12.0)
|
|
62
|
+
rspec-expectations (3.12.3)
|
|
38
63
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
39
|
-
rspec-support (~> 3.
|
|
40
|
-
rspec-mocks (3.
|
|
64
|
+
rspec-support (~> 3.12.0)
|
|
65
|
+
rspec-mocks (3.12.6)
|
|
41
66
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
42
|
-
rspec-support (~> 3.
|
|
43
|
-
rspec-support (3.
|
|
44
|
-
rspec_junit_formatter (0.
|
|
67
|
+
rspec-support (~> 3.12.0)
|
|
68
|
+
rspec-support (3.12.1)
|
|
69
|
+
rspec_junit_formatter (0.6.0)
|
|
45
70
|
rspec-core (>= 2, < 4, != 2.12.0)
|
|
46
|
-
rubocop (1.
|
|
71
|
+
rubocop (1.59.0)
|
|
47
72
|
json (~> 2.3)
|
|
73
|
+
language_server-protocol (>= 3.17.0)
|
|
48
74
|
parallel (~> 1.10)
|
|
49
|
-
parser (>= 3.
|
|
75
|
+
parser (>= 3.2.2.4)
|
|
50
76
|
rainbow (>= 2.2.2, < 4.0)
|
|
51
77
|
regexp_parser (>= 1.8, < 3.0)
|
|
52
78
|
rexml (>= 3.2.5, < 4.0)
|
|
53
|
-
rubocop-ast (>= 1.
|
|
79
|
+
rubocop-ast (>= 1.30.0, < 2.0)
|
|
54
80
|
ruby-progressbar (~> 1.7)
|
|
55
|
-
unicode-display_width (>=
|
|
56
|
-
rubocop-ast (1.
|
|
57
|
-
parser (>= 3.
|
|
58
|
-
rubocop-
|
|
59
|
-
rubocop (
|
|
60
|
-
|
|
81
|
+
unicode-display_width (>= 2.4.0, < 3.0)
|
|
82
|
+
rubocop-ast (1.30.0)
|
|
83
|
+
parser (>= 3.2.1.0)
|
|
84
|
+
rubocop-capybara (2.20.0)
|
|
85
|
+
rubocop (~> 1.41)
|
|
86
|
+
rubocop-factory_bot (2.25.1)
|
|
87
|
+
rubocop (~> 1.41)
|
|
88
|
+
rubocop-performance (1.20.2)
|
|
89
|
+
rubocop (>= 1.48.1, < 2.0)
|
|
90
|
+
rubocop-ast (>= 1.30.0, < 2.0)
|
|
61
91
|
rubocop-rake (0.6.0)
|
|
62
92
|
rubocop (~> 1.0)
|
|
63
|
-
rubocop-rspec (2.
|
|
64
|
-
rubocop (~> 1.
|
|
65
|
-
|
|
66
|
-
|
|
93
|
+
rubocop-rspec (2.26.1)
|
|
94
|
+
rubocop (~> 1.40)
|
|
95
|
+
rubocop-capybara (~> 2.17)
|
|
96
|
+
rubocop-factory_bot (~> 2.22)
|
|
97
|
+
ruby-progressbar (1.13.0)
|
|
98
|
+
ruby2_keywords (0.0.5)
|
|
99
|
+
simplecov (0.22.0)
|
|
67
100
|
docile (~> 1.1)
|
|
68
101
|
simplecov-html (~> 0.11)
|
|
69
102
|
simplecov_json_formatter (~> 0.1)
|
|
@@ -75,7 +108,9 @@ GEM
|
|
|
75
108
|
simplecov_json_formatter (0.1.4)
|
|
76
109
|
terminal-table (3.0.2)
|
|
77
110
|
unicode-display_width (>= 1.1.1, < 3)
|
|
78
|
-
|
|
111
|
+
tzinfo (2.0.6)
|
|
112
|
+
concurrent-ruby (~> 1.0)
|
|
113
|
+
unicode-display_width (2.5.0)
|
|
79
114
|
versionomy (0.5.0)
|
|
80
115
|
blockenspiel (~> 0.5)
|
|
81
116
|
|
|
@@ -85,18 +120,19 @@ PLATFORMS
|
|
|
85
120
|
DEPENDENCIES
|
|
86
121
|
bundler (~> 2.0)
|
|
87
122
|
code-scanning-rubocop (~> 0.5)
|
|
123
|
+
factory_bot (~> 6.2)
|
|
88
124
|
pdfh!
|
|
89
125
|
pry (~> 0.14)
|
|
90
126
|
rake (~> 13.0)
|
|
91
|
-
rspec (~> 3.
|
|
127
|
+
rspec (~> 3.12)
|
|
92
128
|
rspec_junit_formatter (~> 0.4)
|
|
93
|
-
rubocop (~> 1.
|
|
94
|
-
rubocop-performance (~> 1.
|
|
129
|
+
rubocop (~> 1.50)
|
|
130
|
+
rubocop-performance (~> 1.18)
|
|
95
131
|
rubocop-rake (~> 0.5)
|
|
96
132
|
rubocop-rspec (~> 2.2)
|
|
97
|
-
simplecov (~> 0.
|
|
133
|
+
simplecov (~> 0.22)
|
|
98
134
|
simplecov-console (~> 0.9)
|
|
99
135
|
versionomy (~> 0.5)
|
|
100
136
|
|
|
101
137
|
BUNDLED WITH
|
|
102
|
-
2.
|
|
138
|
+
2.5.4
|
data/README.md
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
# PDF Handler (pdfh)
|
|
2
2
|
|
|
3
|
-
[![Rubocop]
|
|
4
|
-
[![Ruby][ruby-
|
|
3
|
+
[![Rubocop][rubocop-img]][rubocop-url]
|
|
4
|
+
[![Ruby][ruby-img]][ruby-url]
|
|
5
|
+
[![Conventional Commits][cc-img]][cc-url]
|
|
6
|
+
[![Current version][gem-img]][gem-url]
|
|
5
7
|
|
|
6
|
-
Examine all PDF files in Look up directories, remove password (if has one), rename and copy to a new directory using regular
|
|
8
|
+
Examine all PDF files in Look up directories, remove password (if has one), rename and copy to a new directory using regular expressions.
|
|
7
9
|
|
|
8
10
|
## Installation
|
|
9
11
|
|
|
@@ -13,11 +15,17 @@ gem install pdfh
|
|
|
13
15
|
|
|
14
16
|
### Dependencies
|
|
15
17
|
|
|
16
|
-
You need to install pdf handling dependencies in order to use this gem.
|
|
18
|
+
You need to install pdf handling dependencies in order to use this gem.
|
|
17
19
|
|
|
20
|
+
#### macOS
|
|
18
21
|
```bash
|
|
19
|
-
brew install qpdf
|
|
20
|
-
brew install xpdf
|
|
22
|
+
brew install qpdf # for qpdf
|
|
23
|
+
brew install xpdf # for pdftotext
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
#### Fedora
|
|
27
|
+
```bash
|
|
28
|
+
sudo dnf install -y qpdf poppler-utils
|
|
21
29
|
```
|
|
22
30
|
|
|
23
31
|
## Usage
|
|
@@ -81,5 +89,11 @@ The gem is available as open source under the terms of the [MIT License](https:/
|
|
|
81
89
|
Everyone interacting in the Pdfh project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/iax7/pdfh/blob/master/CODE_OF_CONDUCT.md).
|
|
82
90
|
|
|
83
91
|
<!-- Links -->
|
|
84
|
-
[
|
|
92
|
+
[rubocop-img]: https://github.com/iax7/pdfh/actions/workflows/rubocop-analysis.yml/badge.svg
|
|
93
|
+
[rubocop-url]: https://github.com/iax7/pdfh/actions/workflows/rubocop-analysis.yml
|
|
94
|
+
[ruby-img]: https://img.shields.io/badge/ruby-3.1-blue?style=flat&logo=ruby&logoColor=CC342D&labelColor=white
|
|
85
95
|
[ruby-url]: https://www.ruby-lang.org/en/
|
|
96
|
+
[cc-img]: https://img.shields.io/badge/Conventional%20Commits-1.0.0-%23FE5196?logo=conventionalcommits&logoColor=00&labelColor=fff
|
|
97
|
+
[cc-url]: https://conventionalcommits.org
|
|
98
|
+
[gem-img]: https://img.shields.io/gem/v/pdfh?labelColor=fff&label=version
|
|
99
|
+
[gem-url]: https://rubygems.org/gems/pdfh
|
data/bin/console
CHANGED
data/bin/run
CHANGED
data/exe/pdfh
CHANGED
data/lib/pdfh/main.rb
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Pdfh
|
|
4
|
+
# Main functionality. This class is intended to manage the pdf documents
|
|
5
|
+
class Main
|
|
6
|
+
class << self
|
|
7
|
+
# @return [void]
|
|
8
|
+
def start
|
|
9
|
+
arg_options = Pdfh::OptParser.parse_argv
|
|
10
|
+
@options = Options.new(arg_options)
|
|
11
|
+
|
|
12
|
+
Pdfh.instance_variable_set(:@options, options)
|
|
13
|
+
Pdfh.instance_variable_set(:@console, Console.new(options.verbose?))
|
|
14
|
+
Pdfh.print_options(arg_options)
|
|
15
|
+
|
|
16
|
+
@settings = SettingsBuilder.build
|
|
17
|
+
Pdfh.debug "Destination path: #{settings.base_path.colorize(:light_blue)}"
|
|
18
|
+
|
|
19
|
+
options.file_mode? ? process_provided_files : process_lookup_dirs
|
|
20
|
+
rescue SettingsIOError => e
|
|
21
|
+
Pdfh.error_print(e.message, exit_app: false)
|
|
22
|
+
Pdfh.create_settings_file
|
|
23
|
+
exit(1)
|
|
24
|
+
rescue StandardError => e
|
|
25
|
+
Pdfh.error_print(e.message)
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
private
|
|
29
|
+
|
|
30
|
+
attr_reader :options, :settings
|
|
31
|
+
|
|
32
|
+
# @param [String] file_name
|
|
33
|
+
# @return [DocumentType]
|
|
34
|
+
def match_doc_type(file_name)
|
|
35
|
+
settings.document_types.each do |type|
|
|
36
|
+
match = type.re_file.match(file_name)
|
|
37
|
+
return type if match
|
|
38
|
+
end
|
|
39
|
+
nil
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# @return [void]
|
|
43
|
+
def process_provided_files
|
|
44
|
+
type_id = options.type
|
|
45
|
+
raise ArgumentError, "No files provided to process #{type_id.inspect} type." unless options.files?
|
|
46
|
+
|
|
47
|
+
type = settings.document_type(type_id)
|
|
48
|
+
Pdfh.error_print "Type #{type_id.inspect} was not found." if type.nil?
|
|
49
|
+
options.files.each do |file|
|
|
50
|
+
next Pdfh.warn_print "File #{file.inspect} does not exist." unless File.exist?(file)
|
|
51
|
+
next Pdfh.warn_print "File #{file.inspect} is not a pdf." unless File.extname(file) == ".pdf"
|
|
52
|
+
|
|
53
|
+
PdfFileHandler.new(file, type).process_document(settings.base_path)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# @return [void]
|
|
58
|
+
def process_lookup_dirs
|
|
59
|
+
settings.lookup_dirs.each do |work_directory|
|
|
60
|
+
process_directory(work_directory)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# @param [String] work_directory
|
|
65
|
+
# @return [void]
|
|
66
|
+
def process_directory(work_directory)
|
|
67
|
+
Pdfh.headline(work_directory)
|
|
68
|
+
processed_count = 0
|
|
69
|
+
ignored_files = []
|
|
70
|
+
files = Dir["#{work_directory}/*.pdf"]
|
|
71
|
+
files.each do |pdf_file|
|
|
72
|
+
type = match_doc_type(pdf_file)
|
|
73
|
+
if type
|
|
74
|
+
processed_count += 1
|
|
75
|
+
PdfFileHandler.new(pdf_file, type).process_document(settings.base_path)
|
|
76
|
+
else
|
|
77
|
+
ignored_files << base_name_no_ext(pdf_file)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
puts " (No files processed)".colorize(:light_black) if processed_count.zero?
|
|
81
|
+
return unless Pdfh.verbose?
|
|
82
|
+
|
|
83
|
+
puts "\n No document type found for these PDF files:" if ignored_files.any?
|
|
84
|
+
ignored_files.each.with_index(1) { |file, index| Pdfh.ident_print index, file, color: :magenta }
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# @return [String]
|
|
88
|
+
def base_name_no_ext(file)
|
|
89
|
+
File.basename(file, File.extname(file))
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
@@ -3,27 +3,44 @@
|
|
|
3
3
|
module Pdfh
|
|
4
4
|
# Handles the PDF detected by the rules
|
|
5
5
|
class Document
|
|
6
|
-
|
|
6
|
+
IDENT = 12
|
|
7
|
+
|
|
8
|
+
attr_reader :text, :type, :file, :extra, :period
|
|
7
9
|
|
|
8
10
|
# @param file [String]
|
|
9
11
|
# @param type [DocumentType]
|
|
12
|
+
# @param text [String]
|
|
10
13
|
# @return [self]
|
|
11
|
-
def initialize(file, type)
|
|
12
|
-
raise IOError, "File #{file} not found" unless File.exist?(file)
|
|
13
|
-
|
|
14
|
+
def initialize(file, type, text)
|
|
14
15
|
@file = file
|
|
15
16
|
@type = type
|
|
16
|
-
Pdfh.
|
|
17
|
-
@
|
|
18
|
-
|
|
19
|
-
Pdfh.verbose_print "~~~~~~~~~~~~~~~~~~ Finding a subtype"
|
|
17
|
+
Pdfh.debug "=== Document Type: #{type.name} =============================="
|
|
18
|
+
@text = text
|
|
19
|
+
Pdfh.debug "~~~~~~~~~~~~~~~~~~ Finding a subtype"
|
|
20
20
|
@sub_type = type.sub_type(@text)
|
|
21
|
-
Pdfh.
|
|
21
|
+
Pdfh.debug " SubType: #{@sub_type}"
|
|
22
22
|
@companion = search_companion_files
|
|
23
23
|
|
|
24
24
|
month, year, @extra = match_data
|
|
25
25
|
@period = DocumentPeriod.new(day: extra, month: month, month_offset: @sub_type&.month_offset, year: year)
|
|
26
|
-
Pdfh.
|
|
26
|
+
Pdfh.debug " Period: #{@period.inspect}"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# @return [void]
|
|
30
|
+
def print_info
|
|
31
|
+
print_info_line "Type", type.name
|
|
32
|
+
print_info_line "Sub-Type", sub_type
|
|
33
|
+
print_info_line "Period", period
|
|
34
|
+
print_info_line "New Name", new_name
|
|
35
|
+
print_info_line "Store Path", store_path
|
|
36
|
+
print_info_line "Extra files", companion_files(join: true)
|
|
37
|
+
print_info_line "Print CMD", print_cmd
|
|
38
|
+
print_info_line "Processed?", "No (in Dry mode)" if Pdfh.dry?
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# @return [void]
|
|
42
|
+
def print_info_line(property, info)
|
|
43
|
+
Pdfh.ident_print property, info.to_s, color: :light_blue, width: IDENT
|
|
27
44
|
end
|
|
28
45
|
|
|
29
46
|
# @return [String]
|
|
@@ -100,13 +117,13 @@ module Pdfh
|
|
|
100
117
|
# unnamed matches needs to be in order month, year
|
|
101
118
|
# @return [Array] - format [month, year, day]
|
|
102
119
|
def match_data
|
|
103
|
-
Pdfh.
|
|
104
|
-
Pdfh.
|
|
105
|
-
Pdfh.
|
|
120
|
+
Pdfh.debug "~~~~~~~~~~~~~~~~~~ Match Data RegEx"
|
|
121
|
+
Pdfh.debug " Using regex: #{@type.re_date}"
|
|
122
|
+
Pdfh.debug " named: #{@type.re_date.named_captures}"
|
|
106
123
|
matched = @type.re_date.match(@text)
|
|
107
124
|
raise ReDateError unless matched
|
|
108
125
|
|
|
109
|
-
Pdfh.
|
|
126
|
+
Pdfh.debug " captured: #{matched.captures}"
|
|
110
127
|
|
|
111
128
|
return matched.captures.map(&:downcase) if @type.re_date.named_captures.empty?
|
|
112
129
|
|
|
@@ -116,12 +133,12 @@ module Pdfh
|
|
|
116
133
|
|
|
117
134
|
# @return [Array]
|
|
118
135
|
def search_companion_files
|
|
119
|
-
Pdfh.
|
|
120
|
-
Pdfh.
|
|
136
|
+
Pdfh.debug "~~~~~~~~~~~~~~~~~~ Searching Companion files"
|
|
137
|
+
Pdfh.debug " Searching on: #{home_dir.inspect}"
|
|
121
138
|
Dir.chdir(home_dir) do
|
|
122
139
|
files_matching = Dir["#{file_name_only}.*"]
|
|
123
140
|
companion = files_matching.reject { |file| file.include? ".pdf" }
|
|
124
|
-
Pdfh.
|
|
141
|
+
Pdfh.debug " Found: #{companion.inspect}"
|
|
125
142
|
|
|
126
143
|
companion
|
|
127
144
|
end
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "base64"
|
|
4
|
-
|
|
5
3
|
module Pdfh
|
|
6
4
|
DocumentSubType = Struct.new(:name, :month_offset, keyword_init: true)
|
|
7
5
|
|
|
@@ -13,10 +11,13 @@ module Pdfh
|
|
|
13
11
|
self.name_template ||= "{original}"
|
|
14
12
|
self.re_file = Regexp.new(re_file)
|
|
15
13
|
self.re_date = Regexp.new(re_date)
|
|
16
|
-
self.pwd = Base64.decode64(pwd) if pwd
|
|
17
14
|
self.sub_types = extract_subtype(sub_types) if sub_types
|
|
18
15
|
end
|
|
19
16
|
|
|
17
|
+
# removes special characters from string and replaces spaces with dashes
|
|
18
|
+
# @example usage
|
|
19
|
+
# "Test This?%&".gid
|
|
20
|
+
# # => "test-this"
|
|
20
21
|
# @return [String]
|
|
21
22
|
def gid
|
|
22
23
|
name.downcase.gsub(/[^0-9A-Za-z\s]/, "").tr(" ", "-")
|
|
@@ -28,14 +29,26 @@ module Pdfh
|
|
|
28
29
|
sub_types&.find { |st| /#{st.name}/i.match?(text) }
|
|
29
30
|
end
|
|
30
31
|
|
|
32
|
+
# @return [String]
|
|
33
|
+
def password
|
|
34
|
+
return Base64.decode64(pwd) if base64?
|
|
35
|
+
|
|
36
|
+
pwd
|
|
37
|
+
end
|
|
38
|
+
|
|
31
39
|
private
|
|
32
40
|
|
|
41
|
+
# @return [boolean]
|
|
42
|
+
def base64?
|
|
43
|
+
pwd.is_a?(String) && Base64.strict_encode64(Base64.decode64(pwd)) == pwd
|
|
44
|
+
end
|
|
45
|
+
|
|
33
46
|
# @param sub_types [Array]
|
|
34
47
|
# @return [DocumentSubType]
|
|
35
48
|
def extract_subtype(sub_types)
|
|
36
49
|
sub_types.map do |st|
|
|
37
|
-
name = st[
|
|
38
|
-
offset = st[
|
|
50
|
+
name = st[:name]
|
|
51
|
+
offset = st[:month_offset].to_i
|
|
39
52
|
DocumentSubType.new(name: name, month_offset: offset)
|
|
40
53
|
end
|
|
41
54
|
end
|
|
@@ -1,26 +1,31 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "yaml"
|
|
4
|
-
|
|
5
3
|
module Pdfh
|
|
6
4
|
# Handles the config yaml data mapping, and associates a file name with a doc type
|
|
7
5
|
class Settings
|
|
8
|
-
attr_reader :lookup_dirs, :base_path
|
|
6
|
+
attr_reader :lookup_dirs, :base_path
|
|
9
7
|
|
|
10
|
-
# @param
|
|
8
|
+
# @param config_data [Hash]
|
|
11
9
|
# @return [self]
|
|
12
|
-
def initialize(
|
|
13
|
-
|
|
14
|
-
|
|
10
|
+
def initialize(config_data)
|
|
11
|
+
process_lookup_dirs(config_data[:lookup_dirs])
|
|
12
|
+
process_destination_base(config_data[:destination_base_path])
|
|
13
|
+
|
|
14
|
+
Pdfh.debug "Configured Look up directories:"
|
|
15
|
+
lookup_dirs.each.with_index(1) { |dir, idx| Pdfh.debug " #{idx}. #{dir}" }
|
|
16
|
+
Pdfh.debug
|
|
15
17
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
+
load_doc_types(config_data[:document_types])
|
|
19
|
+
end
|
|
18
20
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
21
|
+
# @return [Array<DocumentType>]
|
|
22
|
+
def document_types
|
|
23
|
+
@document_types.values
|
|
24
|
+
end
|
|
22
25
|
|
|
23
|
-
|
|
26
|
+
# @return [DocumentType]
|
|
27
|
+
def document_type(id)
|
|
28
|
+
@document_types[id]
|
|
24
29
|
end
|
|
25
30
|
|
|
26
31
|
private
|
|
@@ -30,7 +35,7 @@ module Pdfh
|
|
|
30
35
|
@lookup_dirs = lookup_dirs_list.filter_map do |dir|
|
|
31
36
|
expanded = File.expand_path(dir)
|
|
32
37
|
unless File.directory?(expanded)
|
|
33
|
-
Pdfh.
|
|
38
|
+
Pdfh.debug " ** Error, Directory #{dir} does not exists."
|
|
34
39
|
next
|
|
35
40
|
end
|
|
36
41
|
expanded
|
|
@@ -47,7 +52,10 @@ module Pdfh
|
|
|
47
52
|
|
|
48
53
|
# @return [Array<DocumentType>]
|
|
49
54
|
def load_doc_types(doc_types)
|
|
50
|
-
doc_types.
|
|
55
|
+
@document_types = doc_types.each_with_object({}) do |data, result|
|
|
56
|
+
doc_type = DocumentType.new(data)
|
|
57
|
+
result.store(doc_type.gid, doc_type)
|
|
58
|
+
end
|
|
51
59
|
end
|
|
52
60
|
end
|
|
53
61
|
end
|