pdfh 3.0.3 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +22 -19
- data/README.md +14 -14
- data/lib/pdfh/models/document.rb +11 -6
- data/lib/pdfh/models/document_type.rb +20 -5
- data/lib/pdfh/utils/rename_validator.rb +14 -8
- data/lib/pdfh/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ecddee5472149aa038427798c482557111e62070ebed5558fdffcde55417b325
|
4
|
+
data.tar.gz: 75ff53bc3429bc5303411a81d3922e3f0b6f4249bf66bae396e867cd0b4615a9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c51ac8db9d6f9b56c1608289c7d7694b21af6856eb69e06b4fd203ff2824ed3884a1ef1c64cefea37ff1358614e41e3c55264134d9616d9d14819d36ed7241a
|
7
|
+
data.tar.gz: b3d382767616e4008997ce8050691c1e578b8a181ee2a36a4fb4f6a8ed912e93f65511967db2990efbbcd33d73ffdc21535a5163ad9e248225e5e0054548b570
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
pdfh (3.0
|
4
|
+
pdfh (3.1.0)
|
5
5
|
colorize (~> 1.1.0)
|
6
6
|
|
7
7
|
GEM
|
@@ -20,7 +20,7 @@ GEM
|
|
20
20
|
ansi (1.5.0)
|
21
21
|
ast (2.4.2)
|
22
22
|
base64 (0.2.0)
|
23
|
-
bigdecimal (3.1.
|
23
|
+
bigdecimal (3.1.7)
|
24
24
|
blockenspiel (0.5.0)
|
25
25
|
code-scanning-rubocop (0.6.1)
|
26
26
|
rubocop (~> 1.0)
|
@@ -28,7 +28,7 @@ GEM
|
|
28
28
|
colorize (1.1.0)
|
29
29
|
concurrent-ruby (1.2.3)
|
30
30
|
connection_pool (2.4.1)
|
31
|
-
debug (1.9.
|
31
|
+
debug (1.9.2)
|
32
32
|
irb (~> 1.10)
|
33
33
|
reline (>= 0.3.8)
|
34
34
|
diff-lcs (1.5.1)
|
@@ -39,16 +39,16 @@ GEM
|
|
39
39
|
i18n (1.14.4)
|
40
40
|
concurrent-ruby (~> 1.0)
|
41
41
|
io-console (0.7.2)
|
42
|
-
irb (1.
|
43
|
-
rdoc
|
42
|
+
irb (1.13.0)
|
43
|
+
rdoc (>= 4.0.0)
|
44
44
|
reline (>= 0.4.2)
|
45
|
-
json (2.7.
|
45
|
+
json (2.7.2)
|
46
46
|
language_server-protocol (3.17.0.3)
|
47
|
-
method_source (1.
|
48
|
-
minitest (5.22.
|
47
|
+
method_source (1.1.0)
|
48
|
+
minitest (5.22.3)
|
49
49
|
mutex_m (0.2.0)
|
50
50
|
parallel (1.24.0)
|
51
|
-
parser (3.3.0
|
51
|
+
parser (3.3.1.0)
|
52
52
|
ast (~> 2.4.1)
|
53
53
|
racc
|
54
54
|
pry (0.14.2)
|
@@ -58,11 +58,11 @@ GEM
|
|
58
58
|
stringio
|
59
59
|
racc (1.7.3)
|
60
60
|
rainbow (3.1.1)
|
61
|
-
rake (13.1
|
62
|
-
rdoc (6.6.
|
61
|
+
rake (13.2.1)
|
62
|
+
rdoc (6.6.3.1)
|
63
63
|
psych (>= 4.0.0)
|
64
64
|
regexp_parser (2.9.0)
|
65
|
-
reline (0.
|
65
|
+
reline (0.5.5)
|
66
66
|
io-console (~> 0.5)
|
67
67
|
rexml (3.2.6)
|
68
68
|
rspec (3.13.0)
|
@@ -80,7 +80,7 @@ GEM
|
|
80
80
|
rspec-support (3.13.1)
|
81
81
|
rspec_junit_formatter (0.6.0)
|
82
82
|
rspec-core (>= 2, < 4, != 2.12.0)
|
83
|
-
rubocop (1.
|
83
|
+
rubocop (1.63.4)
|
84
84
|
json (~> 2.3)
|
85
85
|
language_server-protocol (>= 3.17.0)
|
86
86
|
parallel (~> 1.10)
|
@@ -91,21 +91,24 @@ GEM
|
|
91
91
|
rubocop-ast (>= 1.31.1, < 2.0)
|
92
92
|
ruby-progressbar (~> 1.7)
|
93
93
|
unicode-display_width (>= 2.4.0, < 3.0)
|
94
|
-
rubocop-ast (1.31.
|
95
|
-
parser (>= 3.3.0
|
94
|
+
rubocop-ast (1.31.3)
|
95
|
+
parser (>= 3.3.1.0)
|
96
96
|
rubocop-capybara (2.20.0)
|
97
97
|
rubocop (~> 1.41)
|
98
98
|
rubocop-factory_bot (2.25.1)
|
99
99
|
rubocop (~> 1.41)
|
100
|
-
rubocop-performance (1.
|
100
|
+
rubocop-performance (1.21.0)
|
101
101
|
rubocop (>= 1.48.1, < 2.0)
|
102
|
-
rubocop-ast (>= 1.
|
102
|
+
rubocop-ast (>= 1.31.1, < 2.0)
|
103
103
|
rubocop-rake (0.6.0)
|
104
104
|
rubocop (~> 1.0)
|
105
|
-
rubocop-rspec (2.
|
105
|
+
rubocop-rspec (2.29.2)
|
106
106
|
rubocop (~> 1.40)
|
107
107
|
rubocop-capybara (~> 2.17)
|
108
108
|
rubocop-factory_bot (~> 2.22)
|
109
|
+
rubocop-rspec_rails (~> 2.28)
|
110
|
+
rubocop-rspec_rails (2.28.3)
|
111
|
+
rubocop (~> 1.40)
|
109
112
|
ruby-progressbar (1.13.0)
|
110
113
|
simplecov (0.22.0)
|
111
114
|
docile (~> 1.1)
|
@@ -148,4 +151,4 @@ DEPENDENCIES
|
|
148
151
|
versionomy (~> 0.5)
|
149
152
|
|
150
153
|
BUNDLED WITH
|
151
|
-
2.5.
|
154
|
+
2.5.10
|
data/README.md
CHANGED
@@ -18,20 +18,22 @@ gem install pdfh
|
|
18
18
|
You need to install pdf handling dependencies in order to use this gem.
|
19
19
|
|
20
20
|
#### macOS
|
21
|
+
|
21
22
|
```bash
|
22
23
|
brew install qpdf # for qpdf
|
23
24
|
brew install xpdf # for pdftotext
|
24
25
|
```
|
25
26
|
|
26
27
|
#### Fedora
|
28
|
+
|
27
29
|
```bash
|
28
30
|
sudo dnf install -y qpdf poppler-utils
|
29
31
|
```
|
30
32
|
|
31
33
|
## Usage
|
32
34
|
|
33
|
-
After installing this gem you need to create your configuration file on your home folder.
|
34
|
-
|
35
|
+
After installing this gem you need to create your configuration file on your home folder. `pdfh.yml`
|
36
|
+
|
35
37
|
```yaml
|
36
38
|
---
|
37
39
|
lookup_dirs: # Directories where all pdf's are going to be analyzed
|
@@ -42,25 +44,22 @@ document_types:
|
|
42
44
|
re_file: '.*MyBankReg\.pdf' # Regular expression to match its filename
|
43
45
|
re_date: 'al \d{1,2} de (\w+) del? (\d+)' # Date regular expresion
|
44
46
|
pwd: base64string # [OPTIONAL] Password if the document is protected
|
45
|
-
store_path: "{
|
47
|
+
store_path: "{year}/bank_docs" # Relative path to copy this document
|
46
48
|
name_template: '{period} {subtype}' # Template for new filename when copied
|
47
49
|
sub_types: # [OPTIONAL] In case your need an extra category
|
48
50
|
- name: Account1 # Regular expresion to match this subtype
|
49
51
|
month_offset: -1 # [OPTIONAL] Integer (signed) value to adjust month
|
50
52
|
```
|
51
53
|
|
52
|
-
Store Path supported placeholders:
|
53
|
-
* `{YEAR}` 2022
|
54
|
+
**Store Path** and **Name Template** supported placeholders:
|
54
55
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
* `{subtype}` subtype.name if matched
|
63
|
-
* `{extra}` day if provided/matched
|
56
|
+
- `{original}` Original filename
|
57
|
+
- `{period}` 2022-01
|
58
|
+
- `{year}` 2022
|
59
|
+
- `{month}` 01
|
60
|
+
- `{type}` document_type.name
|
61
|
+
- `{subtype}` subtype.name if matched
|
62
|
+
- `{extra}` day if provided/matched
|
64
63
|
|
65
64
|
## Development
|
66
65
|
|
@@ -77,6 +76,7 @@ gem install pdfh-*
|
|
77
76
|
```
|
78
77
|
|
79
78
|
### Conventional Commits
|
79
|
+
|
80
80
|
```bash
|
81
81
|
npm install -g @commitlint/cli @commitlint/config-conventional
|
82
82
|
commitlint --from origin --to @
|
data/lib/pdfh/models/document.rb
CHANGED
@@ -3,8 +3,6 @@
|
|
3
3
|
module Pdfh
|
4
4
|
# Handles the PDF detected by the rules
|
5
5
|
class Document
|
6
|
-
IDENT = 12
|
7
|
-
|
8
6
|
attr_reader :text, :type, :file, :extra, :period
|
9
7
|
|
10
8
|
# @param file [String]
|
@@ -40,12 +38,17 @@ module Pdfh
|
|
40
38
|
|
41
39
|
# @return [void]
|
42
40
|
def print_info_line(property, info)
|
43
|
-
Pdfh.ident_print property, info.to_s, color: :light_blue, width:
|
41
|
+
Pdfh.ident_print property, info.to_s, color: :light_blue, width: 12
|
44
42
|
end
|
45
43
|
|
46
44
|
# @return [String]
|
47
45
|
def file_name_only
|
48
|
-
File.basename(@file,
|
46
|
+
File.basename(@file, file_extension)
|
47
|
+
end
|
48
|
+
|
49
|
+
# @return [String]
|
50
|
+
def file_extension
|
51
|
+
File.extname(@file)
|
49
52
|
end
|
50
53
|
|
51
54
|
# @return [String]
|
@@ -83,12 +86,13 @@ module Pdfh
|
|
83
86
|
|
84
87
|
# @return [String]
|
85
88
|
def new_name
|
86
|
-
type.generate_new_name(rename_data)
|
89
|
+
new_name = type.generate_new_name(rename_data)
|
90
|
+
"#{new_name}#{file_extension}"
|
87
91
|
end
|
88
92
|
|
89
93
|
# @return [String]
|
90
94
|
def store_path
|
91
|
-
|
95
|
+
type.generate_path(rename_data)
|
92
96
|
end
|
93
97
|
|
94
98
|
# @return [String]
|
@@ -99,6 +103,7 @@ module Pdfh
|
|
99
103
|
"#{type.print_cmd} #{relative_path}"
|
100
104
|
end
|
101
105
|
|
106
|
+
# @return [String (frozen)]
|
102
107
|
def companion_files(join: false)
|
103
108
|
return @companion unless join
|
104
109
|
|
@@ -11,11 +11,11 @@ module Pdfh
|
|
11
11
|
self.re_file = Regexp.new(re_file)
|
12
12
|
self.re_date = Regexp.new(re_date)
|
13
13
|
self.sub_types = extract_subtype(sub_types) if sub_types
|
14
|
-
@
|
15
|
-
|
14
|
+
@path_validator = RenameValidator.new(store_path)
|
15
|
+
@name_validator = RenameValidator.new(name_template)
|
16
|
+
return if @path_validator.valid? && @name_validator.valid?
|
16
17
|
|
17
|
-
|
18
|
-
raise ArgumentError, "Document type #{name.inspect} has invalid :name_template. Tokens not recognized: #{errors}"
|
18
|
+
raise_validators_error
|
19
19
|
end
|
20
20
|
|
21
21
|
# removes special characters from string and replaces spaces with dashes
|
@@ -43,7 +43,13 @@ module Pdfh
|
|
43
43
|
# @param values [Hash{Symbol->String}
|
44
44
|
# @return [String]
|
45
45
|
def generate_new_name(values)
|
46
|
-
@
|
46
|
+
@name_validator.gsub(values)
|
47
|
+
end
|
48
|
+
|
49
|
+
# @param values [Hash{Symbol->String}
|
50
|
+
# @return [String]
|
51
|
+
def generate_path(values)
|
52
|
+
@path_validator.gsub(values)
|
47
53
|
end
|
48
54
|
|
49
55
|
private
|
@@ -62,5 +68,14 @@ module Pdfh
|
|
62
68
|
DocumentSubType.new(name: name, month_offset: offset)
|
63
69
|
end
|
64
70
|
end
|
71
|
+
|
72
|
+
# @raise [ArgumentError] when called
|
73
|
+
# @return [void]
|
74
|
+
def raise_validators_error
|
75
|
+
template = "has invalid %<1>s. Unknown tokens: %<2>s"
|
76
|
+
path_errors = format(template, :store_path, @path_validator.unknown_list) unless @path_validator.valid?
|
77
|
+
name_errors = format(template, :name_template, @name_validator.unknown_list) unless @name_validator.valid?
|
78
|
+
raise ArgumentError, "Document type #{name.inspect} #{path_errors} #{name_errors}"
|
79
|
+
end
|
65
80
|
end
|
66
81
|
end
|
@@ -13,13 +13,13 @@ module Pdfh
|
|
13
13
|
"extra" => "Extra data extracted from date_re"
|
14
14
|
}.freeze
|
15
15
|
|
16
|
-
attr_reader :
|
16
|
+
attr_reader :template, :all, :unknown, :valid
|
17
17
|
|
18
|
-
# @param
|
18
|
+
# @param template [String]
|
19
19
|
# @return [self]
|
20
|
-
def initialize(
|
21
|
-
@
|
22
|
-
@all =
|
20
|
+
def initialize(template)
|
21
|
+
@template = template
|
22
|
+
@all = template.scan(/{([^}]+)}/).flatten.map(&:downcase)
|
23
23
|
@unknown = all - types
|
24
24
|
@valid = all - unknown
|
25
25
|
end
|
@@ -34,11 +34,17 @@ module Pdfh
|
|
34
34
|
unknown.empty?
|
35
35
|
end
|
36
36
|
|
37
|
+
# @return [String]
|
38
|
+
def unknown_list
|
39
|
+
unknown.join(", ")
|
40
|
+
end
|
41
|
+
|
37
42
|
# @param values [Hash{Symbol->String}]
|
38
43
|
# @return [String (frozen)]
|
39
|
-
def
|
40
|
-
|
41
|
-
|
44
|
+
def gsub(values)
|
45
|
+
template
|
46
|
+
.gsub(/\{([^}]+)}/, &:downcase) # convert all text between {} to lowercase
|
47
|
+
.gsub("{", "%{") % values
|
42
48
|
end
|
43
49
|
end
|
44
50
|
end
|
data/lib/pdfh/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdfh
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0
|
4
|
+
version: 3.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Isaias Piña
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-03
|
11
|
+
date: 2024-05-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: colorize
|
@@ -92,7 +92,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
92
92
|
- !ruby/object:Gem::Version
|
93
93
|
version: '0'
|
94
94
|
requirements: []
|
95
|
-
rubygems_version: 3.5.
|
95
|
+
rubygems_version: 3.5.7
|
96
96
|
signing_key:
|
97
97
|
specification_version: 4
|
98
98
|
summary: Organize PDF files
|