pdfh 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.ruby-version +1 -1
- data/.travis.yml +1 -1
- data/Gemfile.lock +37 -25
- data/README.md +5 -2
- data/Rakefile +16 -0
- data/docs/legacy.md +453 -0
- data/lib/pdfh/document.rb +2 -0
- data/lib/pdfh/version.rb +1 -1
- data/pdfh.gemspec +8 -6
- metadata +43 -15
- data/.ruby-gemset +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 77f42bce0e05785d392ed0745ae2fc245f634d83b58e0257fe5f9b4273203072
|
4
|
+
data.tar.gz: 92f71bbc691405e04a1a34288e1753120c1716c120103edb49bf419c06db00da
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: beba17f5bdc7795fa3b19fa37a2417ca0e493e4808b0ee62aa75447fa64f9272ed10c4ea3717be040e4ab27304d1b0cc04b195b4c6b9fdf28afbb2da37798b7b
|
7
|
+
data.tar.gz: d4f19d454fbf2b9bce448a3f197762032d8d10c41bf7a28959eff043e060ca6f467184cd13bc83a3df8d74affc338a1df4a2e1f770701224e8d76a2dcc9e329b
|
data/.gitignore
CHANGED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
2.7.0
|
data/.travis.yml
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,52 +1,64 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
pdfh (0.1.
|
5
|
-
colorize (~> 0.8.
|
4
|
+
pdfh (0.1.6)
|
5
|
+
colorize (~> 0.8.0)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
9
9
|
specs:
|
10
10
|
ansi (1.5.0)
|
11
|
+
blockenspiel (0.5.0)
|
12
|
+
coderay (1.1.2)
|
11
13
|
colorize (0.8.1)
|
12
14
|
diff-lcs (1.3)
|
13
|
-
docile (1.3.
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
rspec-
|
23
|
-
|
15
|
+
docile (1.3.2)
|
16
|
+
json (2.3.0)
|
17
|
+
method_source (0.9.2)
|
18
|
+
pry (0.12.2)
|
19
|
+
coderay (~> 1.1.0)
|
20
|
+
method_source (~> 0.9.0)
|
21
|
+
rake (13.0.1)
|
22
|
+
rspec (3.9.0)
|
23
|
+
rspec-core (~> 3.9.0)
|
24
|
+
rspec-expectations (~> 3.9.0)
|
25
|
+
rspec-mocks (~> 3.9.0)
|
26
|
+
rspec-core (3.9.1)
|
27
|
+
rspec-support (~> 3.9.1)
|
28
|
+
rspec-expectations (3.9.0)
|
24
29
|
diff-lcs (>= 1.2.0, < 2.0)
|
25
|
-
rspec-support (~> 3.
|
26
|
-
rspec-mocks (3.
|
30
|
+
rspec-support (~> 3.9.0)
|
31
|
+
rspec-mocks (3.9.1)
|
27
32
|
diff-lcs (>= 1.2.0, < 2.0)
|
28
|
-
rspec-support (~> 3.
|
29
|
-
rspec-support (3.
|
30
|
-
simplecov (0.
|
33
|
+
rspec-support (~> 3.9.0)
|
34
|
+
rspec-support (3.9.2)
|
35
|
+
simplecov (0.17.1)
|
31
36
|
docile (~> 1.1)
|
32
37
|
json (>= 1.8, < 3)
|
33
38
|
simplecov-html (~> 0.10.0)
|
34
|
-
simplecov-console (0.
|
39
|
+
simplecov-console (0.6.0)
|
35
40
|
ansi
|
36
|
-
hirb
|
37
41
|
simplecov
|
42
|
+
terminal-table
|
38
43
|
simplecov-html (0.10.2)
|
44
|
+
terminal-table (1.8.0)
|
45
|
+
unicode-display_width (~> 1.1, >= 1.1.1)
|
46
|
+
unicode-display_width (1.6.1)
|
47
|
+
versionomy (0.5.0)
|
48
|
+
blockenspiel (~> 0.5)
|
39
49
|
|
40
50
|
PLATFORMS
|
41
51
|
ruby
|
42
52
|
|
43
53
|
DEPENDENCIES
|
44
|
-
bundler (~>
|
54
|
+
bundler (~> 2.0)
|
45
55
|
pdfh!
|
46
|
-
|
56
|
+
pry (~> 0.12.0)
|
57
|
+
rake (~> 13.0)
|
47
58
|
rspec (~> 3.0)
|
48
|
-
simplecov (~> 0.
|
49
|
-
simplecov-console (~> 0.
|
59
|
+
simplecov (~> 0.17.0)
|
60
|
+
simplecov-console (~> 0.6.0)
|
61
|
+
versionomy (~> 0.5)
|
50
62
|
|
51
63
|
BUNDLED WITH
|
52
|
-
1.
|
64
|
+
2.1.2
|
data/README.md
CHANGED
@@ -24,7 +24,7 @@ document_types:
|
|
24
24
|
pwd: base64string # [OPTIONAL] Password if the document is protected
|
25
25
|
store_path: "{YEAR}/bank_docs" # Relative path to copy this document
|
26
26
|
name_template: '{period} {subtype}' # Template for new filename when copied
|
27
|
-
sub_types: # [OPTIONAL] In case your need an extra category
|
27
|
+
sub_types: # [OPTIONAL] In case your need an extra category
|
28
28
|
- name: Account1 # Regular expresion to match this subtype
|
29
29
|
month_offset: -1 # [OPTIONAL] Integer value to adjust month
|
30
30
|
```
|
@@ -33,9 +33,12 @@ document_types:
|
|
33
33
|
|
34
34
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
35
35
|
|
36
|
-
To install this gem onto your local machine, run `
|
36
|
+
To install this gem onto your local machine, run `rake install`. To release a new version, run `rake bump`, and then run `rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
37
37
|
|
38
38
|
```bash
|
39
|
+
rake install
|
40
|
+
|
41
|
+
# step by step
|
39
42
|
build pdfh.gemspec
|
40
43
|
gem install pdfh-*
|
41
44
|
```
|
data/Rakefile
CHANGED
@@ -2,7 +2,23 @@
|
|
2
2
|
|
3
3
|
require 'bundler/gem_tasks'
|
4
4
|
require 'rspec/core/rake_task'
|
5
|
+
require 'versionomy'
|
5
6
|
|
6
7
|
RSpec::Core::RakeTask.new(:spec)
|
7
8
|
|
8
9
|
task default: :spec
|
10
|
+
|
11
|
+
desc 'Bump gem version number (tiny|minor|major)'
|
12
|
+
task :bump, :type do |t, args|
|
13
|
+
args.with_defaults(:type => :tiny)
|
14
|
+
version_file = File.join(__dir__, 'lib', 'pdfh', 'version.rb')
|
15
|
+
content = File.read(version_file)
|
16
|
+
|
17
|
+
version_pattern = /(?<major>\d+)\.(?<minor>\d+)\.(?<tiny>\d+)/
|
18
|
+
current_version = content.match(version_pattern)
|
19
|
+
next_version = Versionomy.parse(current_version.to_s).bump(args.type).to_s
|
20
|
+
|
21
|
+
File.write(version_file, content.gsub(version_pattern, "\\1#{next_version}\\3"))
|
22
|
+
|
23
|
+
puts "Successfully bumped from #{current_version} to #{next_version}!"
|
24
|
+
end
|
data/docs/legacy.md
ADDED
@@ -0,0 +1,453 @@
|
|
1
|
+
# Legacy
|
2
|
+
|
3
|
+
## Python
|
4
|
+
|
5
|
+
This project was born as a bash script. It was initially ported to a Python script,
|
6
|
+
and ended as a Ruby gem. Below is the old Python code, provided just for fun.
|
7
|
+
|
8
|
+
```python
|
9
|
+
#!/usr/bin/env python3
|
10
|
+
"""Organize PDF protected password files, using rules defined in yaml format."""
|
11
|
+
from __future__ import print_function
|
12
|
+
import os
|
13
|
+
import re
|
14
|
+
import base64
|
15
|
+
import pprint
|
16
|
+
import argparse
|
17
|
+
import tempfile
|
18
|
+
import subprocess
|
19
|
+
import yaml
|
20
|
+
from shutil import copyfile
|
21
|
+
from colorama import Fore
|
22
|
+
|
23
|
+
IS_VERBOSE = False
|
24
|
+
IS_DRY = False
|
25
|
+
# TODO: calendar.month_name[11] current locale
|
26
|
+
MONTHS = dict(
|
27
|
+
enero = 1,
|
28
|
+
febrero = 2,
|
29
|
+
marzo = 3,
|
30
|
+
abril = 4,
|
31
|
+
mayo = 5,
|
32
|
+
junio = 6,
|
33
|
+
julio = 7,
|
34
|
+
agosto = 8,
|
35
|
+
septiembre = 9,
|
36
|
+
octubre = 10,
|
37
|
+
noviembre = 11,
|
38
|
+
diciembre = 12
|
39
|
+
)
|
40
|
+
|
41
|
+
class InlineClass(object):
|
42
|
+
"""Wrapper to have an object like dictionary"""
|
43
|
+
def __init__(self, dict):
|
44
|
+
self.__dict__ = dict
|
45
|
+
def has_key(self, key):
|
46
|
+
return key in self.__dict__.keys()
|
47
|
+
|
48
|
+
def get_month_num(num):
|
49
|
+
# Not implemented yet
|
50
|
+
import locale
|
51
|
+
locale.setlocale(locale.LC_ALL, 'es_MX')
|
52
|
+
import calendar
|
53
|
+
calendar.month_name[num]
|
54
|
+
|
55
|
+
class Document(object):
|
56
|
+
"""Handles the PDF detected by the rules, and makes tranformations"""
|
57
|
+
def __init__(self, file, account, **kwargs):
|
58
|
+
self._file = file
|
59
|
+
self._act = account
|
60
|
+
self._extra = ''
|
61
|
+
self._has_xml = False
|
62
|
+
self._verbose = kwargs['verbose']
|
63
|
+
verbose = self._verbose
|
64
|
+
if verbose:
|
65
|
+
print(Fore.CYAN + account.name, '==================' + Fore.RESET)
|
66
|
+
|
67
|
+
self._pwd = base64.b64decode(self._act.pwd) if self._act.pwd else ''
|
68
|
+
if type(self._pwd) is bytes:
|
69
|
+
self._pwd = self._pwd.decode()
|
70
|
+
|
71
|
+
if not os.path.exists(self._file):
|
72
|
+
raise IOError("I can't find the PDF")
|
73
|
+
|
74
|
+
# Check if aditional XML file exists
|
75
|
+
self._xml_file = os.path.splitext(self._file)[0]+'.xml'
|
76
|
+
if os.path.exists(self._xml_file):
|
77
|
+
self._has_xml = True
|
78
|
+
|
79
|
+
self._tmp = tempfile.mktemp(suffix=".pdf")
|
80
|
+
if verbose:
|
81
|
+
print(Fore.CYAN + ' --> ' + self._tmp + ' temporal file assigned.' + Fore.RESET)
|
82
|
+
|
83
|
+
cmd1 = "qpdf --password='{}' --decrypt --stream-data=uncompress '{}' '{}'" \
|
84
|
+
.format(self._pwd, self._file, self._tmp)
|
85
|
+
subprocess.call(cmd1, shell=True)
|
86
|
+
|
87
|
+
cmd2 = "pdftotext -enc UTF-8 '{}' -".format(self._tmp)
|
88
|
+
|
89
|
+
p = subprocess.Popen(cmd2, stdout=subprocess.PIPE, shell=True)
|
90
|
+
self._text, _err = p.communicate()
|
91
|
+
if type(self._text) is bytes:
|
92
|
+
self._text = self._text.decode(encoding="utf-8", errors="replace")
|
93
|
+
if verbose:
|
94
|
+
print(Fore.CYAN + self._text + Fore.RESET)
|
95
|
+
|
96
|
+
match = re.search(self._act.re_date, self._text, re.MULTILINE)
|
97
|
+
if not match:
|
98
|
+
print(Fore.RED, 'Err, date was not extracted with regex provided: ' + Fore.LIGHTRED_EX +
|
99
|
+
self._act.re_date + Fore.RESET)
|
100
|
+
exit(1)
|
101
|
+
if verbose:
|
102
|
+
print(Fore.CYAN, '==== Regex Groups:', match.groups(), Fore.RESET)
|
103
|
+
try:
|
104
|
+
self._month = match.group('m')
|
105
|
+
self._year = match.group('y')
|
106
|
+
except IndexError:
|
107
|
+
self._month, self._year = match.groups()
|
108
|
+
|
109
|
+
if len(match.groups()) > 2:
|
110
|
+
self._extra = match.group(3)
|
111
|
+
|
112
|
+
self._month = self._month.lower()
|
113
|
+
if verbose:
|
114
|
+
print(Fore.CYAN, '==== Assigned:', (self._month, self._year, self._extra),
|
115
|
+
'==( Month, Year, Extra )================' + Fore.RESET)
|
116
|
+
|
117
|
+
if self._act.has_key('types'):
|
118
|
+
for t in self._act.types:
|
119
|
+
name = t['name']
|
120
|
+
if re.search(name, self._text, re.IGNORECASE):
|
121
|
+
self.type = name
|
122
|
+
self.offset = t.get('month_offset', 0)
|
123
|
+
else:
|
124
|
+
self.type = None
|
125
|
+
self.offset = 0
|
126
|
+
|
127
|
+
if verbose:
|
128
|
+
print(Fore.CYAN, 'Offset settings, Type:', self.type, '/ Month:', self.offset, Fore.RESET)
|
129
|
+
#Used if the month offset results in change in year.
|
130
|
+
self._year_offset = 0
|
131
|
+
if verbose:
|
132
|
+
print(Fore.CYAN, 'END INIT ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' + Fore.RESET)
|
133
|
+
|
134
|
+
def __repr__(self):
|
135
|
+
type_str = self.type if self.type else 'N/A'
|
136
|
+
format_string = 'Name : {}\nType : {}\nPeriod : {}\nFile Path: {}\n'+\
|
137
|
+
'File Name: {}\nNew Name : {}\nStorePath: {}\nFullPath : {}'
|
138
|
+
return format_string.format(
|
139
|
+
self.name, type_str, self.period, self._file,
|
140
|
+
self.filename_only, self.new_name, self.store_path, self.full_path)
|
141
|
+
|
142
|
+
def write_pdf(self):
|
143
|
+
dir_path = os.path.dirname(self.full_path)
|
144
|
+
if not os.path.exists(dir_path):
|
145
|
+
raise IOError("I can't find the store_path")
|
146
|
+
|
147
|
+
cmd = "qpdf --password='{}' --decrypt '{}' '{}'" \
|
148
|
+
.format(self._pwd, self._file, self.full_path)
|
149
|
+
subprocess.call(cmd, shell=True)
|
150
|
+
|
151
|
+
if os.path.exists(self.full_path):
|
152
|
+
bkp = self._file + '_'
|
153
|
+
os.rename(self._file, bkp)
|
154
|
+
# Copy XML File if exists
|
155
|
+
if self._has_xml:
|
156
|
+
xml_new_path = os.path.splitext(self.full_path)[0]+'.xml'
|
157
|
+
copyfile(self._xml_file, xml_new_path)
|
158
|
+
xml_bkp = self._xml_file + '_'
|
159
|
+
os.rename(self._xml_file, xml_bkp)
|
160
|
+
if self._verbose:
|
161
|
+
print(Fore.CYAN, 'XML Written: ', xml_new_path, Fore.RESET)
|
162
|
+
else:
|
163
|
+
raise IOError("The file was not created.")
|
164
|
+
|
165
|
+
@property
|
166
|
+
def name(self): return self._act.name
|
167
|
+
@property
|
168
|
+
def filename_only(self):
|
169
|
+
dir, file = os.path.split(self._file)
|
170
|
+
filename, ext = os.path.splitext(file)
|
171
|
+
return filename
|
172
|
+
@property
|
173
|
+
def text(self): return self._text
|
174
|
+
@property
|
175
|
+
def month(self):
|
176
|
+
try:
|
177
|
+
month_num = int(self._month)
|
178
|
+
except:
|
179
|
+
if len(self._month) == 3:
|
180
|
+
for month in MONTHS:
|
181
|
+
if month[0:3] == self._month:
|
182
|
+
month_num = MONTHS[month]
|
183
|
+
else:
|
184
|
+
month_num = MONTHS[self._month]
|
185
|
+
|
186
|
+
|
187
|
+
if self.offset:
|
188
|
+
tmp = month_num + self.offset
|
189
|
+
if tmp == 0:
|
190
|
+
tmp = 12
|
191
|
+
self._year_offset = -1
|
192
|
+
elif tmp == 13:
|
193
|
+
tmp = 1
|
194
|
+
self._year_offset = 1
|
195
|
+
else:
|
196
|
+
tmp = month_num
|
197
|
+
return str(tmp).zfill(2)
|
198
|
+
@property
|
199
|
+
def year(self):
|
200
|
+
if len(self._year) == 2:
|
201
|
+
tmp = '20' + self._year
|
202
|
+
else:
|
203
|
+
tmp = self._year
|
204
|
+
year = int(tmp) + self._year_offset
|
205
|
+
|
206
|
+
return str(year)
|
207
|
+
@property
|
208
|
+
def period(self): return "{}-{}".format(self.year, self.month)
|
209
|
+
@property
|
210
|
+
def new_name(self):
|
211
|
+
if self._act.has_key('name_template'):
|
212
|
+
template = self._act.name_template
|
213
|
+
else:
|
214
|
+
template = '{original}'
|
215
|
+
|
216
|
+
type = self.type if self.type else 'NA'
|
217
|
+
new = template \
|
218
|
+
.replace('{original}', self.filename_only) \
|
219
|
+
.replace('{period}', self.period) \
|
220
|
+
.replace('{type}', type) \
|
221
|
+
.replace('{extra}', self._extra)
|
222
|
+
return new + '.pdf'
|
223
|
+
@property
|
224
|
+
def store_path(self):
|
225
|
+
tmp = self._act.store_path.replace('{YEAR}', self.year)
|
226
|
+
return tmp
|
227
|
+
@property
|
228
|
+
def full_path(self):
|
229
|
+
tmp = self.store_path
|
230
|
+
tmp = tmp if tmp[0] != '/' else tmp[1:]
|
231
|
+
base = os.path.expanduser(self._act.base_path)
|
232
|
+
base = os.path.abspath(base)
|
233
|
+
return os.path.join(base, tmp, self.new_name)
|
234
|
+
|
235
|
+
class Settings(object):
|
236
|
+
"""Open the rules YAML file"""
|
237
|
+
def __init__(self):
|
238
|
+
name = os.path.basename(__file__).replace('py', 'yml')
|
239
|
+
dir_oder = []
|
240
|
+
dir_oder.append(os.path.dirname(__file__))
|
241
|
+
dir_oder.append(os.path.expanduser('~'))
|
242
|
+
|
243
|
+
paths = map(lambda x: os.path.join(x, name), dir_oder)
|
244
|
+
|
245
|
+
for path in paths:
|
246
|
+
if os.path.isfile(path):
|
247
|
+
conf_path = path
|
248
|
+
break
|
249
|
+
|
250
|
+
if 'conf_path' not in locals():
|
251
|
+
print('{}Error, no configuraton file was found: {}{}{}'
|
252
|
+
.format(Fore.RED, Fore.MAGENTA, ', '.join(paths), Fore.RESET))
|
253
|
+
exit(1)
|
254
|
+
|
255
|
+
fsettings = open(conf_path, 'r')
|
256
|
+
if IS_VERBOSE:
|
257
|
+
print("Loaded configuration file: {}{}{}"
|
258
|
+
.format(Fore.GREEN, conf_path, Fore.RESET))
|
259
|
+
self.__dict__ = yaml.load(fsettings)
|
260
|
+
|
261
|
+
def print(self):
|
262
|
+
pp = pprint.PrettyPrinter(indent=2)
|
263
|
+
pp.pprint(self.__dict__)
|
264
|
+
|
265
|
+
def getAccount(self, file_name):
|
266
|
+
for act in self.accounts:
|
267
|
+
srch = re.search(act['re_file'], file_name)
|
268
|
+
if srch != None:
|
269
|
+
act['base_path'] = self.base_path
|
270
|
+
return InlineClass(act)
|
271
|
+
|
272
|
+
def getScrapeDirectories(self):
|
273
|
+
max_length = len(max(self.scrape_dirs, key=len))
|
274
|
+
|
275
|
+
if IS_VERBOSE:
|
276
|
+
print('Processing directories:')
|
277
|
+
for directory in self.scrape_dirs:
|
278
|
+
path = os.path.expanduser(directory)
|
279
|
+
path = os.path.abspath(path)
|
280
|
+
print_ident(directory, path, color=Fore.BLUE, field_width=max_length)
|
281
|
+
print()
|
282
|
+
|
283
|
+
for directory in self.scrape_dirs:
|
284
|
+
path = os.path.expanduser(directory)
|
285
|
+
path = os.path.abspath(path)
|
286
|
+
yield path
|
287
|
+
|
288
|
+
def get_files(directory=None):
|
289
|
+
"""Analyze current directory for PDF files"""
|
290
|
+
path = os.path.dirname(os.path.abspath(__file__)) if directory == None else directory
|
291
|
+
for pdffile in os.listdir(path):
|
292
|
+
if pdffile.endswith(".pdf"):
|
293
|
+
yield os.path.join(path, pdffile)
|
294
|
+
|
295
|
+
def print_ident(field, value, **kwargs):
|
296
|
+
"""Print value with the color specified and correct identation.
|
297
|
+
|
298
|
+
Args:
|
299
|
+
field (int): The value name
|
300
|
+
value (str): The value to print
|
301
|
+
color (AnsiFore): The color to use
|
302
|
+
field_width (int): The identation lenght of fields
|
303
|
+
|
304
|
+
Returns:
|
305
|
+
None: No value is returned.
|
306
|
+
"""
|
307
|
+
color = kwargs['color'] if 'color' in kwargs else Fore.GREEN
|
308
|
+
field_width = kwargs['field_width'] if 'field_width' in kwargs else 7
|
309
|
+
string_format = ' {:>'+str(field_width)+'}: {}{}{}'
|
310
|
+
print(string_format.format(field, color, value, Fore.RESET))
|
311
|
+
|
312
|
+
def print_separator(title, color=Fore.LIGHTYELLOW_EX):
|
313
|
+
_rows, cols = os.popen('stty size', 'r').read().split()
|
314
|
+
sep = '\n' + color
|
315
|
+
sep += '-' * 40 + ' ' + title + ' '
|
316
|
+
remaining_cols = int(cols) - len(sep)
|
317
|
+
if remaining_cols > 0:
|
318
|
+
sep += '-' * remaining_cols
|
319
|
+
sep += Fore.RESET
|
320
|
+
print(sep)
|
321
|
+
|
322
|
+
|
323
|
+
def main():
|
324
|
+
parser = argparse.ArgumentParser()
|
325
|
+
parser.add_argument("-d", "--dry",
|
326
|
+
action="store_true",
|
327
|
+
help="Dry run, does not write new pdf")
|
328
|
+
parser.add_argument("-v", "--verbose",
|
329
|
+
action="store_true",
|
330
|
+
help="Show more output, useful for debug")
|
331
|
+
args = parser.parse_args()
|
332
|
+
|
333
|
+
if args.dry:
|
334
|
+
global IS_DRY
|
335
|
+
IS_DRY = True
|
336
|
+
print(Fore.CYAN + "Running in dry mode..." + Fore.RESET)
|
337
|
+
if args.verbose:
|
338
|
+
global IS_VERBOSE
|
339
|
+
IS_VERBOSE = True
|
340
|
+
print(Fore.CYAN + "Running in verbose mode..." + Fore.RESET)
|
341
|
+
|
342
|
+
settings = Settings()
|
343
|
+
#settings.getScrapeDirectories()
|
344
|
+
#sys.exit(1)
|
345
|
+
|
346
|
+
for work_directory in settings.getScrapeDirectories():
|
347
|
+
print_separator(work_directory)
|
348
|
+
ignored_files = []
|
349
|
+
for pdffile in get_files(work_directory):
|
350
|
+
try:
|
351
|
+
base = os.path.basename(pdffile)
|
352
|
+
act = settings.getAccount(pdffile)
|
353
|
+
if not act:
|
354
|
+
raise ValueError('no account was matched.')
|
355
|
+
print('Working on' + Fore.LIGHTGREEN_EX, base, Fore.RESET)
|
356
|
+
print_ident(' Cuenta', act.name, color=Fore.LIGHTBLUE_EX)
|
357
|
+
doc = Document(pdffile, act, verbose=IS_VERBOSE)
|
358
|
+
#print(edocta) # Debug ----
|
359
|
+
print_ident('Periodo', doc.period)
|
360
|
+
if IS_VERBOSE:
|
361
|
+
print(Fore.CYAN, doc, Fore.RESET)
|
362
|
+
if not IS_DRY:
|
363
|
+
doc.write_pdf()
|
364
|
+
print_ident('NewFile', doc.full_path)
|
365
|
+
except ValueError as e:
|
366
|
+
#print(e)
|
367
|
+
ignored_files.append(base)
|
368
|
+
#print(Fore.LIGHTRED_EX + ' Error!', e, Fore.RESET)
|
369
|
+
except IOError as e:
|
370
|
+
print('Error, the filepath {} does not exists.'.format(doc.full_path))
|
371
|
+
|
372
|
+
print('\nNo account was matched for these PDF files:')
|
373
|
+
for num, path in enumerate(ignored_files, start=1):
|
374
|
+
print_ident(num, path, color=Fore.RED, field_width=3)
|
375
|
+
|
376
|
+
|
377
|
+
if __name__ == '__main__': main()
|
378
|
+
|
379
|
+
```
|
380
|
+
|
381
|
+
## Bash
|
382
|
+
|
383
|
+
```bash
|
384
|
+
#!/bin/env bash
|
385
|
+
. .common
|
386
|
+
|
387
|
+
YEAR=$(date +%Y)
|
388
|
+
PASS=abcdef
|
389
|
+
GREP_PERIOD='al [0-9]{1,2} de ([A-Zz-z]*) de.? [0-9]+'
|
390
|
+
#Path to move, Dropbox. Use "{YEAR}" to replace with actual year
|
391
|
+
MVTO=../"Impuestos/FISCAL-{YEAR}/Edo Cuenta"
|
392
|
+
|
393
|
+
app_installed qpdf
|
394
|
+
|
395
|
+
count=$(find . -type f -name '[!2]*.pdf' | wc -l)
|
396
|
+
if [ "$count" == '0' ]; then
|
397
|
+
echo -e "${RED}Error, no pdf files found.${RST}"
|
398
|
+
exit 1
|
399
|
+
fi
|
400
|
+
|
401
|
+
for pdf in [!2]*.pdf; do
|
402
|
+
[ ! -r "$pdf" ] && echo -e "${RED}Error, can't access $pdf${RST}" && exit 1
|
403
|
+
echo -e "Working on ${GRE}$pdf${RST}..."
|
404
|
+
|
405
|
+
# Decrypt PDF and uncompress to work with it
|
406
|
+
temp=$(mktemp)
|
407
|
+
#trap 'rm $temp' 0 SIGINT SIGQUIT SIGTERM
|
408
|
+
qpdf --password="$PASS" --decrypt --stream-data=uncompress "$pdf" "$temp"
|
409
|
+
|
410
|
+
# Extract Data from PDF
|
411
|
+
account=$(strings "$temp" | grep -ioE 'platinum|perfiles' | head -1)
|
412
|
+
account=${account,,}
|
413
|
+
account=${account^}
|
414
|
+
echo -e " account: ${BLU}$account${RST}"
|
415
|
+
#period=$(strings "$temp" | grep -iEo 'al [0-9]{1,2} de ([A-Zz-z]*) de [0-9]+' | tail -1)
|
416
|
+
#month=$(echo "$period" | tr ' ' '\n'| tail -3 | head -1)
|
417
|
+
#year=$(echo "$period" | tr ' ' '\n' | tail -1)
|
418
|
+
period=$(pdftotext "$temp" - | grep -iEo "$GREP_PERIOD" | tail -1 )
|
419
|
+
month=$(echo "$period" | awk '{print $4}')
|
420
|
+
year=$(echo "$period" | awk '{print $6}')
|
421
|
+
period=${month,,}
|
422
|
+
|
423
|
+
if [ -z "$period" ]; then
|
424
|
+
echo -e "${RED}Error, period not found.${RST}"
|
425
|
+
exit 1
|
426
|
+
fi
|
427
|
+
|
428
|
+
number=$(convert_month $period)
|
429
|
+
if [ "$account" == "Perfiles" ]; then
|
430
|
+
#number=$(( number - 1 ))
|
431
|
+
number=$(echo "$number - 1" | bc)
|
432
|
+
if [ "${#number}" -eq 1 ]; then
|
433
|
+
number="0$number"
|
434
|
+
fi
|
435
|
+
fi
|
436
|
+
echo -e " period: ${BLU}$year-$period${RST}"
|
437
|
+
|
438
|
+
#Prepare new PDF
|
439
|
+
newfile="$year-${number} ${account}.pdf"
|
440
|
+
#pdftk "$pdf" input_pw "$PASS" output "$newfile"
|
441
|
+
qpdf --password="$PASS" --decrypt "$pdf" "$newfile"
|
442
|
+
if [ -f "$newfile" ]; then
|
443
|
+
mv "$pdf" "${newfile/.pdf/}_$pdf"
|
444
|
+
echo -e " new file: ${BLU}$newfile${RST}"
|
445
|
+
fi
|
446
|
+
|
447
|
+
#Copy it
|
448
|
+
MVTO="${MVTO//'{YEAR}'/$year}"
|
449
|
+
if [ -d "$MVTO" ]; then
|
450
|
+
cp -v "$newfile" "$MVTO"
|
451
|
+
fi
|
452
|
+
done
|
453
|
+
```
|
data/lib/pdfh/document.rb
CHANGED
data/lib/pdfh/version.rb
CHANGED
data/pdfh.gemspec
CHANGED
@@ -14,7 +14,7 @@ Gem::Specification.new do |spec|
|
|
14
14
|
spec.description = 'Examine all PDF files in scrape directories, remove password (if has one), rename and copy to a new directory using regular expresions.'
|
15
15
|
spec.homepage = 'https://github.com/iax7/pdfh'
|
16
16
|
spec.license = 'MIT'
|
17
|
-
spec.required_ruby_version = '>= 2.
|
17
|
+
spec.required_ruby_version = '>= 2.7.0'
|
18
18
|
|
19
19
|
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
20
20
|
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
@@ -38,11 +38,13 @@ Gem::Specification.new do |spec|
|
|
38
38
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
39
39
|
spec.require_paths = ['lib']
|
40
40
|
|
41
|
-
spec.add_dependency 'colorize', '~> 0.8.
|
41
|
+
spec.add_dependency 'colorize', '~> 0.8.0'
|
42
42
|
|
43
|
-
spec.add_development_dependency 'bundler', '~>
|
44
|
-
spec.add_development_dependency '
|
43
|
+
spec.add_development_dependency 'bundler', '~> 2.0'
|
44
|
+
spec.add_development_dependency 'pry', '~> 0.12.0'
|
45
|
+
spec.add_development_dependency 'rake', '~> 13.0'
|
45
46
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
46
|
-
spec.add_development_dependency 'simplecov', '~> 0.
|
47
|
-
spec.add_development_dependency 'simplecov-console', '~> 0.
|
47
|
+
spec.add_development_dependency 'simplecov', '~> 0.17.0'
|
48
|
+
spec.add_development_dependency 'simplecov-console', '~> 0.6.0'
|
49
|
+
spec.add_development_dependency 'versionomy', '~> 0.5'
|
48
50
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdfh
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Isaias Piña
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-02-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: colorize
|
@@ -16,42 +16,56 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.8.
|
19
|
+
version: 0.8.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.8.
|
26
|
+
version: 0.8.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: '2.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: '2.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: pry
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.12.0
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.12.0
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: rake
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
44
58
|
requirements:
|
45
59
|
- - "~>"
|
46
60
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
61
|
+
version: '13.0'
|
48
62
|
type: :development
|
49
63
|
prerelease: false
|
50
64
|
version_requirements: !ruby/object:Gem::Requirement
|
51
65
|
requirements:
|
52
66
|
- - "~>"
|
53
67
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
68
|
+
version: '13.0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: rspec
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -72,28 +86,42 @@ dependencies:
|
|
72
86
|
requirements:
|
73
87
|
- - "~>"
|
74
88
|
- !ruby/object:Gem::Version
|
75
|
-
version: 0.
|
89
|
+
version: 0.17.0
|
76
90
|
type: :development
|
77
91
|
prerelease: false
|
78
92
|
version_requirements: !ruby/object:Gem::Requirement
|
79
93
|
requirements:
|
80
94
|
- - "~>"
|
81
95
|
- !ruby/object:Gem::Version
|
82
|
-
version: 0.
|
96
|
+
version: 0.17.0
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: simplecov-console
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
86
100
|
requirements:
|
87
101
|
- - "~>"
|
88
102
|
- !ruby/object:Gem::Version
|
89
|
-
version: 0.
|
103
|
+
version: 0.6.0
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 0.6.0
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: versionomy
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0.5'
|
90
118
|
type: :development
|
91
119
|
prerelease: false
|
92
120
|
version_requirements: !ruby/object:Gem::Requirement
|
93
121
|
requirements:
|
94
122
|
- - "~>"
|
95
123
|
- !ruby/object:Gem::Version
|
96
|
-
version: 0.
|
124
|
+
version: '0.5'
|
97
125
|
description: Examine all PDF files in scrape directories, remove password (if has
|
98
126
|
one), rename and copy to a new directory using regular expresions.
|
99
127
|
email:
|
@@ -106,7 +134,6 @@ files:
|
|
106
134
|
- ".gitignore"
|
107
135
|
- ".rspec"
|
108
136
|
- ".rubocop.yml"
|
109
|
-
- ".ruby-gemset"
|
110
137
|
- ".ruby-version"
|
111
138
|
- ".travis.yml"
|
112
139
|
- CHANGELOG.md
|
@@ -118,6 +145,7 @@ files:
|
|
118
145
|
- Rakefile
|
119
146
|
- bin/console
|
120
147
|
- bin/setup
|
148
|
+
- docs/legacy.md
|
121
149
|
- exe/pdfh
|
122
150
|
- lib/ext/string.rb
|
123
151
|
- lib/pdfh.rb
|
@@ -144,14 +172,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
144
172
|
requirements:
|
145
173
|
- - ">="
|
146
174
|
- !ruby/object:Gem::Version
|
147
|
-
version: 2.
|
175
|
+
version: 2.7.0
|
148
176
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
149
177
|
requirements:
|
150
178
|
- - ">="
|
151
179
|
- !ruby/object:Gem::Version
|
152
180
|
version: '0'
|
153
181
|
requirements: []
|
154
|
-
rubygems_version: 3.
|
182
|
+
rubygems_version: 3.1.2
|
155
183
|
signing_key:
|
156
184
|
specification_version: 4
|
157
185
|
summary: Organize PDF files
|
data/.ruby-gemset
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
pdfh
|