pdfh 0.1.3 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.rubocop.yml +6 -3
- data/.rubocop_todo.yml +22 -0
- data/.ruby-version +1 -1
- data/.travis.yml +1 -1
- data/CHANGELOG.md +8 -0
- data/Gemfile.lock +59 -29
- data/README.md +7 -4
- data/Rakefile +17 -0
- data/docs/legacy.md +453 -0
- data/exe/pdfh +2 -1
- data/lib/ext/string.rb +13 -0
- data/lib/pdfh.rb +58 -40
- data/lib/pdfh/document.rb +74 -97
- data/lib/pdfh/month.rb +41 -0
- data/lib/pdfh/pdf_handler.rb +54 -0
- data/lib/pdfh/settings.rb +17 -7
- data/lib/pdfh/utils.rb +7 -2
- data/lib/pdfh/version.rb +1 -1
- data/pdfh.gemspec +11 -7
- metadata +68 -22
- data/.ruby-gemset +0 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: bd8f993d70a6ed67ff55c9554ba63c74eb17abbb9861f562c899a39d2979f97c
|
|
4
|
+
data.tar.gz: 8ff19318f2a97df3875d6299ab9566cca9a6effa01169f2523ab4ed27266e3c0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 685b6ea921370a125341e55a7bad2d4931e0932a6ff685ac6fc5aff996bcd2800f4c1e0081fb35f1b6cfd7063760b7ca0a4bc28ba60ad84095314cea4af850ad
|
|
7
|
+
data.tar.gz: 9ab206d0accfbebadd06283a5dd6b6af3905b890a61ea05f65081b795433d0151ae5653ec7889588d78fc42053fd09c9baa7f9c9d0232ae3f628341babbfd329
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
data/.rubocop_todo.yml
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# This configuration was generated by
|
|
2
|
+
# `rubocop --auto-gen-config`
|
|
3
|
+
# on 2020-10-01 00:19:12 UTC using RuboCop version 0.92.0.
|
|
4
|
+
# The point is for the user to remove these configuration records
|
|
5
|
+
# one by one as the offenses are removed from the code base.
|
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
|
8
|
+
|
|
9
|
+
# Offense count: 1
|
|
10
|
+
# Configuration parameters: IgnoredMethods.
|
|
11
|
+
Metrics/AbcSize:
|
|
12
|
+
Max: 23
|
|
13
|
+
|
|
14
|
+
# Offense count: 1
|
|
15
|
+
# Configuration parameters: CountComments, CountAsOne.
|
|
16
|
+
Metrics/ClassLength:
|
|
17
|
+
Max: 149
|
|
18
|
+
|
|
19
|
+
# Offense count: 5
|
|
20
|
+
# Configuration parameters: CountComments, CountAsOne, ExcludedMethods.
|
|
21
|
+
Metrics/MethodLength:
|
|
22
|
+
Max: 13
|
data/.ruby-version
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
2.7.1
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
## v0.1.5
|
|
2
|
+
* Add print_cmd field in config file for information purposes
|
|
3
|
+
* Settings now validates a no existing directory
|
|
4
|
+
* Refactor for easier maintenance
|
|
5
|
+
|
|
6
|
+
## v0.1.4
|
|
7
|
+
* Add titleize format when writing new file
|
|
8
|
+
|
|
1
9
|
## v0.1.3
|
|
2
10
|
* Fixed copy companion files, which was not copying the files.
|
|
3
11
|
|
data/Gemfile.lock
CHANGED
|
@@ -1,52 +1,82 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
pdfh (0.1.
|
|
5
|
-
colorize (~> 0.8.
|
|
4
|
+
pdfh (0.1.8)
|
|
5
|
+
colorize (~> 0.8.0)
|
|
6
6
|
|
|
7
7
|
GEM
|
|
8
8
|
remote: https://rubygems.org/
|
|
9
9
|
specs:
|
|
10
10
|
ansi (1.5.0)
|
|
11
|
+
ast (2.4.1)
|
|
12
|
+
blockenspiel (0.5.0)
|
|
13
|
+
coderay (1.1.3)
|
|
11
14
|
colorize (0.8.1)
|
|
12
|
-
diff-lcs (1.
|
|
13
|
-
docile (1.3.
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
15
|
+
diff-lcs (1.4.4)
|
|
16
|
+
docile (1.3.2)
|
|
17
|
+
method_source (1.0.0)
|
|
18
|
+
parallel (1.19.2)
|
|
19
|
+
parser (2.7.1.5)
|
|
20
|
+
ast (~> 2.4.1)
|
|
21
|
+
pry (0.13.1)
|
|
22
|
+
coderay (~> 1.1)
|
|
23
|
+
method_source (~> 1.0)
|
|
24
|
+
rainbow (3.0.0)
|
|
25
|
+
rake (13.0.1)
|
|
26
|
+
regexp_parser (1.8.1)
|
|
27
|
+
rexml (3.2.4)
|
|
28
|
+
rspec (3.9.0)
|
|
29
|
+
rspec-core (~> 3.9.0)
|
|
30
|
+
rspec-expectations (~> 3.9.0)
|
|
31
|
+
rspec-mocks (~> 3.9.0)
|
|
32
|
+
rspec-core (3.9.3)
|
|
33
|
+
rspec-support (~> 3.9.3)
|
|
34
|
+
rspec-expectations (3.9.2)
|
|
24
35
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
25
|
-
rspec-support (~> 3.
|
|
26
|
-
rspec-mocks (3.
|
|
36
|
+
rspec-support (~> 3.9.0)
|
|
37
|
+
rspec-mocks (3.9.1)
|
|
27
38
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
28
|
-
rspec-support (~> 3.
|
|
29
|
-
rspec-support (3.
|
|
30
|
-
|
|
39
|
+
rspec-support (~> 3.9.0)
|
|
40
|
+
rspec-support (3.9.3)
|
|
41
|
+
rubocop (0.92.0)
|
|
42
|
+
parallel (~> 1.10)
|
|
43
|
+
parser (>= 2.7.1.5)
|
|
44
|
+
rainbow (>= 2.2.2, < 4.0)
|
|
45
|
+
regexp_parser (>= 1.7)
|
|
46
|
+
rexml
|
|
47
|
+
rubocop-ast (>= 0.5.0)
|
|
48
|
+
ruby-progressbar (~> 1.7)
|
|
49
|
+
unicode-display_width (>= 1.4.0, < 2.0)
|
|
50
|
+
rubocop-ast (0.7.1)
|
|
51
|
+
parser (>= 2.7.1.5)
|
|
52
|
+
ruby-progressbar (1.10.1)
|
|
53
|
+
simplecov (0.19.0)
|
|
31
54
|
docile (~> 1.1)
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
simplecov-console (0.4.2)
|
|
55
|
+
simplecov-html (~> 0.11)
|
|
56
|
+
simplecov-console (0.7.2)
|
|
35
57
|
ansi
|
|
36
|
-
hirb
|
|
37
58
|
simplecov
|
|
38
|
-
|
|
59
|
+
terminal-table
|
|
60
|
+
simplecov-html (0.12.3)
|
|
61
|
+
terminal-table (1.8.0)
|
|
62
|
+
unicode-display_width (~> 1.1, >= 1.1.1)
|
|
63
|
+
unicode-display_width (1.7.0)
|
|
64
|
+
versionomy (0.5.0)
|
|
65
|
+
blockenspiel (~> 0.5)
|
|
39
66
|
|
|
40
67
|
PLATFORMS
|
|
41
68
|
ruby
|
|
42
69
|
|
|
43
70
|
DEPENDENCIES
|
|
44
|
-
bundler (~>
|
|
71
|
+
bundler (~> 2.0)
|
|
45
72
|
pdfh!
|
|
46
|
-
|
|
73
|
+
pry
|
|
74
|
+
rake (~> 13.0)
|
|
47
75
|
rspec (~> 3.0)
|
|
48
|
-
|
|
49
|
-
simplecov
|
|
76
|
+
rubocop
|
|
77
|
+
simplecov
|
|
78
|
+
simplecov-console
|
|
79
|
+
versionomy
|
|
50
80
|
|
|
51
81
|
BUNDLED WITH
|
|
52
|
-
1.
|
|
82
|
+
2.1.4
|
data/README.md
CHANGED
|
@@ -24,7 +24,7 @@ document_types:
|
|
|
24
24
|
pwd: base64string # [OPTIONAL] Password if the document is protected
|
|
25
25
|
store_path: "{YEAR}/bank_docs" # Relative path to copy this document
|
|
26
26
|
name_template: '{period} {subtype}' # Template for new filename when copied
|
|
27
|
-
sub_types: # [OPTIONAL] In case your need an extra category
|
|
27
|
+
sub_types: # [OPTIONAL] In case your need an extra category
|
|
28
28
|
- name: Account1 # Regular expresion to match this subtype
|
|
29
29
|
month_offset: -1 # [OPTIONAL] Integer value to adjust month
|
|
30
30
|
```
|
|
@@ -33,16 +33,19 @@ document_types:
|
|
|
33
33
|
|
|
34
34
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
|
35
35
|
|
|
36
|
-
To install this gem onto your local machine, run `
|
|
36
|
+
To install this gem onto your local machine, run `rake install`. To release a new version, run `rake bump`, and then run `rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
|
37
37
|
|
|
38
38
|
```bash
|
|
39
|
+
rake install
|
|
40
|
+
|
|
41
|
+
# step by step
|
|
39
42
|
build pdfh.gemspec
|
|
40
43
|
gem install pdfh-*
|
|
41
44
|
```
|
|
42
45
|
|
|
43
46
|
## Contributing
|
|
44
47
|
|
|
45
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
|
48
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/iax7/pdfh. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
|
46
49
|
|
|
47
50
|
## License
|
|
48
51
|
|
|
@@ -50,4 +53,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
|
|
|
50
53
|
|
|
51
54
|
## Code of Conduct
|
|
52
55
|
|
|
53
|
-
Everyone interacting in the Pdfh project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/
|
|
56
|
+
Everyone interacting in the Pdfh project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/iax7/pdfh/blob/master/CODE_OF_CONDUCT.md).
|
data/Rakefile
CHANGED
|
@@ -1,8 +1,25 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'colorize'
|
|
3
4
|
require 'bundler/gem_tasks'
|
|
4
5
|
require 'rspec/core/rake_task'
|
|
6
|
+
require 'versionomy'
|
|
5
7
|
|
|
6
8
|
RSpec::Core::RakeTask.new(:spec)
|
|
7
9
|
|
|
8
10
|
task default: :spec
|
|
11
|
+
|
|
12
|
+
desc 'Bump gem version number (tiny|minor|major)'
|
|
13
|
+
task :bump, :type do |_t, args|
|
|
14
|
+
args.with_defaults(type: :tiny)
|
|
15
|
+
version_file = File.join(__dir__, 'lib', 'pdfh', 'version.rb')
|
|
16
|
+
content = File.read(version_file)
|
|
17
|
+
|
|
18
|
+
version_pattern = /(?<major>\d+)\.(?<minor>\d+)\.(?<tiny>\d+)/
|
|
19
|
+
current_version = content.match(version_pattern)
|
|
20
|
+
next_version = Versionomy.parse(current_version.to_s).bump(args.type).to_s
|
|
21
|
+
|
|
22
|
+
File.write(version_file, content.gsub(version_pattern, "\\1#{next_version}\\3"))
|
|
23
|
+
|
|
24
|
+
puts "Successfully bumped from #{current_version.to_s.red} to #{next_version.green}"
|
|
25
|
+
end
|
data/docs/legacy.md
ADDED
|
@@ -0,0 +1,453 @@
|
|
|
1
|
+
# Legacy
|
|
2
|
+
|
|
3
|
+
## Python
|
|
4
|
+
|
|
5
|
+
This project was born as a bash script. It was initially ported to a Python script,
|
|
6
|
+
and ended as a Ruby gem. Below is the old Python code, provided just for fun.
|
|
7
|
+
|
|
8
|
+
```python
|
|
9
|
+
#!/usr/bin/env python3
|
|
10
|
+
"""Organize PDF protected password files, using rules defined in yaml format."""
|
|
11
|
+
from __future__ import print_function
|
|
12
|
+
import os
|
|
13
|
+
import re
|
|
14
|
+
import base64
|
|
15
|
+
import pprint
|
|
16
|
+
import argparse
|
|
17
|
+
import tempfile
|
|
18
|
+
import subprocess
|
|
19
|
+
import yaml
|
|
20
|
+
from shutil import copyfile
|
|
21
|
+
from colorama import Fore
|
|
22
|
+
|
|
23
|
+
IS_VERBOSE = False
|
|
24
|
+
IS_DRY = False
|
|
25
|
+
# TODO: calendar.month_name[11] current locale
|
|
26
|
+
MONTHS = dict(
|
|
27
|
+
enero = 1,
|
|
28
|
+
febrero = 2,
|
|
29
|
+
marzo = 3,
|
|
30
|
+
abril = 4,
|
|
31
|
+
mayo = 5,
|
|
32
|
+
junio = 6,
|
|
33
|
+
julio = 7,
|
|
34
|
+
agosto = 8,
|
|
35
|
+
septiembre = 9,
|
|
36
|
+
octubre = 10,
|
|
37
|
+
noviembre = 11,
|
|
38
|
+
diciembre = 12
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
class InlineClass(object):
|
|
42
|
+
"""Wrapper to have an object like dictionary"""
|
|
43
|
+
def __init__(self, dict):
|
|
44
|
+
self.__dict__ = dict
|
|
45
|
+
def has_key(self, key):
|
|
46
|
+
return key in self.__dict__.keys()
|
|
47
|
+
|
|
48
|
+
def get_month_num(num):
|
|
49
|
+
# Not implemented yet
|
|
50
|
+
import locale
|
|
51
|
+
locale.setlocale(locale.LC_ALL, 'es_MX')
|
|
52
|
+
import calendar
|
|
53
|
+
calendar.month_name[num]
|
|
54
|
+
|
|
55
|
+
class Document(object):
|
|
56
|
+
"""Handles the PDF detected by the rules, and makes tranformations"""
|
|
57
|
+
def __init__(self, file, account, **kwargs):
|
|
58
|
+
self._file = file
|
|
59
|
+
self._act = account
|
|
60
|
+
self._extra = ''
|
|
61
|
+
self._has_xml = False
|
|
62
|
+
self._verbose = kwargs['verbose']
|
|
63
|
+
verbose = self._verbose
|
|
64
|
+
if verbose:
|
|
65
|
+
print(Fore.CYAN + account.name, '==================' + Fore.RESET)
|
|
66
|
+
|
|
67
|
+
self._pwd = base64.b64decode(self._act.pwd) if self._act.pwd else ''
|
|
68
|
+
if type(self._pwd) is bytes:
|
|
69
|
+
self._pwd = self._pwd.decode()
|
|
70
|
+
|
|
71
|
+
if not os.path.exists(self._file):
|
|
72
|
+
raise IOError("I can't find the PDF")
|
|
73
|
+
|
|
74
|
+
# Check if aditional XML file exists
|
|
75
|
+
self._xml_file = os.path.splitext(self._file)[0]+'.xml'
|
|
76
|
+
if os.path.exists(self._xml_file):
|
|
77
|
+
self._has_xml = True
|
|
78
|
+
|
|
79
|
+
self._tmp = tempfile.mktemp(suffix=".pdf")
|
|
80
|
+
if verbose:
|
|
81
|
+
print(Fore.CYAN + ' --> ' + self._tmp + ' temporal file assigned.' + Fore.RESET)
|
|
82
|
+
|
|
83
|
+
cmd1 = "qpdf --password='{}' --decrypt --stream-data=uncompress '{}' '{}'" \
|
|
84
|
+
.format(self._pwd, self._file, self._tmp)
|
|
85
|
+
subprocess.call(cmd1, shell=True)
|
|
86
|
+
|
|
87
|
+
cmd2 = "pdftotext -enc UTF-8 '{}' -".format(self._tmp)
|
|
88
|
+
|
|
89
|
+
p = subprocess.Popen(cmd2, stdout=subprocess.PIPE, shell=True)
|
|
90
|
+
self._text, _err = p.communicate()
|
|
91
|
+
if type(self._text) is bytes:
|
|
92
|
+
self._text = self._text.decode(encoding="utf-8", errors="replace")
|
|
93
|
+
if verbose:
|
|
94
|
+
print(Fore.CYAN + self._text + Fore.RESET)
|
|
95
|
+
|
|
96
|
+
match = re.search(self._act.re_date, self._text, re.MULTILINE)
|
|
97
|
+
if not match:
|
|
98
|
+
print(Fore.RED, 'Err, date was not extracted with regex provided: ' + Fore.LIGHTRED_EX +
|
|
99
|
+
self._act.re_date + Fore.RESET)
|
|
100
|
+
exit(1)
|
|
101
|
+
if verbose:
|
|
102
|
+
print(Fore.CYAN, '==== Regex Groups:', match.groups(), Fore.RESET)
|
|
103
|
+
try:
|
|
104
|
+
self._month = match.group('m')
|
|
105
|
+
self._year = match.group('y')
|
|
106
|
+
except IndexError:
|
|
107
|
+
self._month, self._year = match.groups()
|
|
108
|
+
|
|
109
|
+
if len(match.groups()) > 2:
|
|
110
|
+
self._extra = match.group(3)
|
|
111
|
+
|
|
112
|
+
self._month = self._month.lower()
|
|
113
|
+
if verbose:
|
|
114
|
+
print(Fore.CYAN, '==== Assigned:', (self._month, self._year, self._extra),
|
|
115
|
+
'==( Month, Year, Extra )================' + Fore.RESET)
|
|
116
|
+
|
|
117
|
+
if self._act.has_key('types'):
|
|
118
|
+
for t in self._act.types:
|
|
119
|
+
name = t['name']
|
|
120
|
+
if re.search(name, self._text, re.IGNORECASE):
|
|
121
|
+
self.type = name
|
|
122
|
+
self.offset = t.get('month_offset', 0)
|
|
123
|
+
else:
|
|
124
|
+
self.type = None
|
|
125
|
+
self.offset = 0
|
|
126
|
+
|
|
127
|
+
if verbose:
|
|
128
|
+
print(Fore.CYAN, 'Offset settings, Type:', self.type, '/ Month:', self.offset, Fore.RESET)
|
|
129
|
+
#Used if the month offset results in change in year.
|
|
130
|
+
self._year_offset = 0
|
|
131
|
+
if verbose:
|
|
132
|
+
print(Fore.CYAN, 'END INIT ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' + Fore.RESET)
|
|
133
|
+
|
|
134
|
+
def __repr__(self):
|
|
135
|
+
type_str = self.type if self.type else 'N/A'
|
|
136
|
+
format_string = 'Name : {}\nType : {}\nPeriod : {}\nFile Path: {}\n'+\
|
|
137
|
+
'File Name: {}\nNew Name : {}\nStorePath: {}\nFullPath : {}'
|
|
138
|
+
return format_string.format(
|
|
139
|
+
self.name, type_str, self.period, self._file,
|
|
140
|
+
self.filename_only, self.new_name, self.store_path, self.full_path)
|
|
141
|
+
|
|
142
|
+
def write_pdf(self):
|
|
143
|
+
dir_path = os.path.dirname(self.full_path)
|
|
144
|
+
if not os.path.exists(dir_path):
|
|
145
|
+
raise IOError("I can't find the store_path")
|
|
146
|
+
|
|
147
|
+
cmd = "qpdf --password='{}' --decrypt '{}' '{}'" \
|
|
148
|
+
.format(self._pwd, self._file, self.full_path)
|
|
149
|
+
subprocess.call(cmd, shell=True)
|
|
150
|
+
|
|
151
|
+
if os.path.exists(self.full_path):
|
|
152
|
+
bkp = self._file + '_'
|
|
153
|
+
os.rename(self._file, bkp)
|
|
154
|
+
# Copy XML File if exists
|
|
155
|
+
if self._has_xml:
|
|
156
|
+
xml_new_path = os.path.splitext(self.full_path)[0]+'.xml'
|
|
157
|
+
copyfile(self._xml_file, xml_new_path)
|
|
158
|
+
xml_bkp = self._xml_file + '_'
|
|
159
|
+
os.rename(self._xml_file, xml_bkp)
|
|
160
|
+
if self._verbose:
|
|
161
|
+
print(Fore.CYAN, 'XML Written: ', xml_new_path, Fore.RESET)
|
|
162
|
+
else:
|
|
163
|
+
raise IOError("The file was not created.")
|
|
164
|
+
|
|
165
|
+
@property
|
|
166
|
+
def name(self): return self._act.name
|
|
167
|
+
@property
|
|
168
|
+
def filename_only(self):
|
|
169
|
+
dir, file = os.path.split(self._file)
|
|
170
|
+
filename, ext = os.path.splitext(file)
|
|
171
|
+
return filename
|
|
172
|
+
@property
|
|
173
|
+
def text(self): return self._text
|
|
174
|
+
@property
|
|
175
|
+
def month(self):
|
|
176
|
+
try:
|
|
177
|
+
month_num = int(self._month)
|
|
178
|
+
except:
|
|
179
|
+
if len(self._month) == 3:
|
|
180
|
+
for month in MONTHS:
|
|
181
|
+
if month[0:3] == self._month:
|
|
182
|
+
month_num = MONTHS[month]
|
|
183
|
+
else:
|
|
184
|
+
month_num = MONTHS[self._month]
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
if self.offset:
|
|
188
|
+
tmp = month_num + self.offset
|
|
189
|
+
if tmp == 0:
|
|
190
|
+
tmp = 12
|
|
191
|
+
self._year_offset = -1
|
|
192
|
+
elif tmp == 13:
|
|
193
|
+
tmp = 1
|
|
194
|
+
self._year_offset = 1
|
|
195
|
+
else:
|
|
196
|
+
tmp = month_num
|
|
197
|
+
return str(tmp).zfill(2)
|
|
198
|
+
@property
|
|
199
|
+
def year(self):
|
|
200
|
+
if len(self._year) == 2:
|
|
201
|
+
tmp = '20' + self._year
|
|
202
|
+
else:
|
|
203
|
+
tmp = self._year
|
|
204
|
+
year = int(tmp) + self._year_offset
|
|
205
|
+
|
|
206
|
+
return str(year)
|
|
207
|
+
@property
|
|
208
|
+
def period(self): return "{}-{}".format(self.year, self.month)
|
|
209
|
+
@property
|
|
210
|
+
def new_name(self):
|
|
211
|
+
if self._act.has_key('name_template'):
|
|
212
|
+
template = self._act.name_template
|
|
213
|
+
else:
|
|
214
|
+
template = '{original}'
|
|
215
|
+
|
|
216
|
+
type = self.type if self.type else 'NA'
|
|
217
|
+
new = template \
|
|
218
|
+
.replace('{original}', self.filename_only) \
|
|
219
|
+
.replace('{period}', self.period) \
|
|
220
|
+
.replace('{type}', type) \
|
|
221
|
+
.replace('{extra}', self._extra)
|
|
222
|
+
return new + '.pdf'
|
|
223
|
+
@property
|
|
224
|
+
def store_path(self):
|
|
225
|
+
tmp = self._act.store_path.replace('{YEAR}', self.year)
|
|
226
|
+
return tmp
|
|
227
|
+
@property
|
|
228
|
+
def full_path(self):
|
|
229
|
+
tmp = self.store_path
|
|
230
|
+
tmp = tmp if tmp[0] != '/' else tmp[1:]
|
|
231
|
+
base = os.path.expanduser(self._act.base_path)
|
|
232
|
+
base = os.path.abspath(base)
|
|
233
|
+
return os.path.join(base, tmp, self.new_name)
|
|
234
|
+
|
|
235
|
+
class Settings(object):
|
|
236
|
+
"""Open the rules YAML file"""
|
|
237
|
+
def __init__(self):
|
|
238
|
+
name = os.path.basename(__file__).replace('py', 'yml')
|
|
239
|
+
dir_oder = []
|
|
240
|
+
dir_oder.append(os.path.dirname(__file__))
|
|
241
|
+
dir_oder.append(os.path.expanduser('~'))
|
|
242
|
+
|
|
243
|
+
paths = map(lambda x: os.path.join(x, name), dir_oder)
|
|
244
|
+
|
|
245
|
+
for path in paths:
|
|
246
|
+
if os.path.isfile(path):
|
|
247
|
+
conf_path = path
|
|
248
|
+
break
|
|
249
|
+
|
|
250
|
+
if 'conf_path' not in locals():
|
|
251
|
+
print('{}Error, no configuraton file was found: {}{}{}'
|
|
252
|
+
.format(Fore.RED, Fore.MAGENTA, ', '.join(paths), Fore.RESET))
|
|
253
|
+
exit(1)
|
|
254
|
+
|
|
255
|
+
fsettings = open(conf_path, 'r')
|
|
256
|
+
if IS_VERBOSE:
|
|
257
|
+
print("Loaded configuration file: {}{}{}"
|
|
258
|
+
.format(Fore.GREEN, conf_path, Fore.RESET))
|
|
259
|
+
self.__dict__ = yaml.load(fsettings)
|
|
260
|
+
|
|
261
|
+
def print(self):
|
|
262
|
+
pp = pprint.PrettyPrinter(indent=2)
|
|
263
|
+
pp.pprint(self.__dict__)
|
|
264
|
+
|
|
265
|
+
def getAccount(self, file_name):
|
|
266
|
+
for act in self.accounts:
|
|
267
|
+
srch = re.search(act['re_file'], file_name)
|
|
268
|
+
if srch != None:
|
|
269
|
+
act['base_path'] = self.base_path
|
|
270
|
+
return InlineClass(act)
|
|
271
|
+
|
|
272
|
+
def getScrapeDirectories(self):
|
|
273
|
+
max_length = len(max(self.scrape_dirs, key=len))
|
|
274
|
+
|
|
275
|
+
if IS_VERBOSE:
|
|
276
|
+
print('Processing directories:')
|
|
277
|
+
for directory in self.scrape_dirs:
|
|
278
|
+
path = os.path.expanduser(directory)
|
|
279
|
+
path = os.path.abspath(path)
|
|
280
|
+
print_ident(directory, path, color=Fore.BLUE, field_width=max_length)
|
|
281
|
+
print()
|
|
282
|
+
|
|
283
|
+
for directory in self.scrape_dirs:
|
|
284
|
+
path = os.path.expanduser(directory)
|
|
285
|
+
path = os.path.abspath(path)
|
|
286
|
+
yield path
|
|
287
|
+
|
|
288
|
+
def get_files(directory=None):
|
|
289
|
+
"""Analyze current directory for PDF files"""
|
|
290
|
+
path = os.path.dirname(os.path.abspath(__file__)) if directory == None else directory
|
|
291
|
+
for pdffile in os.listdir(path):
|
|
292
|
+
if pdffile.endswith(".pdf"):
|
|
293
|
+
yield os.path.join(path, pdffile)
|
|
294
|
+
|
|
295
|
+
def print_ident(field, value, **kwargs):
|
|
296
|
+
"""Print value with the color specified and correct identation.
|
|
297
|
+
|
|
298
|
+
Args:
|
|
299
|
+
field (int): The value name
|
|
300
|
+
value (str): The value to print
|
|
301
|
+
color (AnsiFore): The color to use
|
|
302
|
+
field_width (int): The identation lenght of fields
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
None: No value is returned.
|
|
306
|
+
"""
|
|
307
|
+
color = kwargs['color'] if 'color' in kwargs else Fore.GREEN
|
|
308
|
+
field_width = kwargs['field_width'] if 'field_width' in kwargs else 7
|
|
309
|
+
string_format = ' {:>'+str(field_width)+'}: {}{}{}'
|
|
310
|
+
print(string_format.format(field, color, value, Fore.RESET))
|
|
311
|
+
|
|
312
|
+
def print_separator(title, color=Fore.LIGHTYELLOW_EX):
|
|
313
|
+
_rows, cols = os.popen('stty size', 'r').read().split()
|
|
314
|
+
sep = '\n' + color
|
|
315
|
+
sep += '-' * 40 + ' ' + title + ' '
|
|
316
|
+
remaining_cols = int(cols) - len(sep)
|
|
317
|
+
if remaining_cols > 0:
|
|
318
|
+
sep += '-' * remaining_cols
|
|
319
|
+
sep += Fore.RESET
|
|
320
|
+
print(sep)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def main():
|
|
324
|
+
parser = argparse.ArgumentParser()
|
|
325
|
+
parser.add_argument("-d", "--dry",
|
|
326
|
+
action="store_true",
|
|
327
|
+
help="Dry run, does not write new pdf")
|
|
328
|
+
parser.add_argument("-v", "--verbose",
|
|
329
|
+
action="store_true",
|
|
330
|
+
help="Show more output, useful for debug")
|
|
331
|
+
args = parser.parse_args()
|
|
332
|
+
|
|
333
|
+
if args.dry:
|
|
334
|
+
global IS_DRY
|
|
335
|
+
IS_DRY = True
|
|
336
|
+
print(Fore.CYAN + "Running in dry mode..." + Fore.RESET)
|
|
337
|
+
if args.verbose:
|
|
338
|
+
global IS_VERBOSE
|
|
339
|
+
IS_VERBOSE = True
|
|
340
|
+
print(Fore.CYAN + "Running in verbose mode..." + Fore.RESET)
|
|
341
|
+
|
|
342
|
+
settings = Settings()
|
|
343
|
+
#settings.getScrapeDirectories()
|
|
344
|
+
#sys.exit(1)
|
|
345
|
+
|
|
346
|
+
for work_directory in settings.getScrapeDirectories():
|
|
347
|
+
print_separator(work_directory)
|
|
348
|
+
ignored_files = []
|
|
349
|
+
for pdffile in get_files(work_directory):
|
|
350
|
+
try:
|
|
351
|
+
base = os.path.basename(pdffile)
|
|
352
|
+
act = settings.getAccount(pdffile)
|
|
353
|
+
if not act:
|
|
354
|
+
raise ValueError('no account was matched.')
|
|
355
|
+
print('Working on' + Fore.LIGHTGREEN_EX, base, Fore.RESET)
|
|
356
|
+
print_ident(' Cuenta', act.name, color=Fore.LIGHTBLUE_EX)
|
|
357
|
+
doc = Document(pdffile, act, verbose=IS_VERBOSE)
|
|
358
|
+
#print(edocta) # Debug ----
|
|
359
|
+
print_ident('Periodo', doc.period)
|
|
360
|
+
if IS_VERBOSE:
|
|
361
|
+
print(Fore.CYAN, doc, Fore.RESET)
|
|
362
|
+
if not IS_DRY:
|
|
363
|
+
doc.write_pdf()
|
|
364
|
+
print_ident('NewFile', doc.full_path)
|
|
365
|
+
except ValueError as e:
|
|
366
|
+
#print(e)
|
|
367
|
+
ignored_files.append(base)
|
|
368
|
+
#print(Fore.LIGHTRED_EX + ' Error!', e, Fore.RESET)
|
|
369
|
+
except IOError as e:
|
|
370
|
+
print('Error, the filepath {} does not exists.'.format(doc.full_path))
|
|
371
|
+
|
|
372
|
+
print('\nNo account was matched for these PDF files:')
|
|
373
|
+
for num, path in enumerate(ignored_files, start=1):
|
|
374
|
+
print_ident(num, path, color=Fore.RED, field_width=3)
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
if __name__ == '__main__': main()
|
|
378
|
+
|
|
379
|
+
```
|
|
380
|
+
|
|
381
|
+
## Bash
|
|
382
|
+
|
|
383
|
+
```bash
|
|
384
|
+
#!/bin/env bash
|
|
385
|
+
. .common
|
|
386
|
+
|
|
387
|
+
YEAR=$(date +%Y)
|
|
388
|
+
PASS=abcdef
|
|
389
|
+
GREP_PERIOD='al [0-9]{1,2} de ([A-Zz-z]*) de.? [0-9]+'
|
|
390
|
+
#Path to move, Dropbox. Use "{YEAR}" to replace with actual year
|
|
391
|
+
MVTO=../"Impuestos/FISCAL-{YEAR}/Edo Cuenta"
|
|
392
|
+
|
|
393
|
+
app_installed qpdf
|
|
394
|
+
|
|
395
|
+
count=$(find . -type f -name '[!2]*.pdf' | wc -l)
|
|
396
|
+
if [ "$count" == '0' ]; then
|
|
397
|
+
echo -e "${RED}Error, no pdf files found.${RST}"
|
|
398
|
+
exit 1
|
|
399
|
+
fi
|
|
400
|
+
|
|
401
|
+
for pdf in [!2]*.pdf; do
|
|
402
|
+
[ ! -r "$pdf" ] && echo -e "${RED}Error, can't access $pdf${RST}" && exit 1
|
|
403
|
+
echo -e "Working on ${GRE}$pdf${RST}..."
|
|
404
|
+
|
|
405
|
+
# Decrypt PDF and uncompress to work with it
|
|
406
|
+
temp=$(mktemp)
|
|
407
|
+
#trap 'rm $temp' 0 SIGINT SIGQUIT SIGTERM
|
|
408
|
+
qpdf --password="$PASS" --decrypt --stream-data=uncompress "$pdf" "$temp"
|
|
409
|
+
|
|
410
|
+
# Extract Data from PDF
|
|
411
|
+
account=$(strings "$temp" | grep -ioE 'platinum|perfiles' | head -1)
|
|
412
|
+
account=${account,,}
|
|
413
|
+
account=${account^}
|
|
414
|
+
echo -e " account: ${BLU}$account${RST}"
|
|
415
|
+
#period=$(strings "$temp" | grep -iEo 'al [0-9]{1,2} de ([A-Zz-z]*) de [0-9]+' | tail -1)
|
|
416
|
+
#month=$(echo "$period" | tr ' ' '\n'| tail -3 | head -1)
|
|
417
|
+
#year=$(echo "$period" | tr ' ' '\n' | tail -1)
|
|
418
|
+
period=$(pdftotext "$temp" - | grep -iEo "$GREP_PERIOD" | tail -1 )
|
|
419
|
+
month=$(echo "$period" | awk '{print $4}')
|
|
420
|
+
year=$(echo "$period" | awk '{print $6}')
|
|
421
|
+
period=${month,,}
|
|
422
|
+
|
|
423
|
+
if [ -z "$period" ]; then
|
|
424
|
+
echo -e "${RED}Error, period not found.${RST}"
|
|
425
|
+
exit 1
|
|
426
|
+
fi
|
|
427
|
+
|
|
428
|
+
number=$(convert_month $period)
|
|
429
|
+
if [ "$account" == "Perfiles" ]; then
|
|
430
|
+
#number=$(( number - 1 ))
|
|
431
|
+
number=$(echo "$number - 1" | bc)
|
|
432
|
+
if [ "${#number}" -eq 1 ]; then
|
|
433
|
+
number="0$number"
|
|
434
|
+
fi
|
|
435
|
+
fi
|
|
436
|
+
echo -e " period: ${BLU}$year-$period${RST}"
|
|
437
|
+
|
|
438
|
+
#Prepare new PDF
|
|
439
|
+
newfile="$year-${number} ${account}.pdf"
|
|
440
|
+
#pdftk "$pdf" input_pw "$PASS" output "$newfile"
|
|
441
|
+
qpdf --password="$PASS" --decrypt "$pdf" "$newfile"
|
|
442
|
+
if [ -f "$newfile" ]; then
|
|
443
|
+
mv "$pdf" "${newfile/.pdf/}_$pdf"
|
|
444
|
+
echo -e " new file: ${BLU}$newfile${RST}"
|
|
445
|
+
fi
|
|
446
|
+
|
|
447
|
+
#Copy it
|
|
448
|
+
MVTO="${MVTO//'{YEAR}'/$year}"
|
|
449
|
+
if [ -d "$MVTO" ]; then
|
|
450
|
+
cp -v "$newfile" "$MVTO"
|
|
451
|
+
fi
|
|
452
|
+
done
|
|
453
|
+
```
|