pdfh 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.ruby-version +1 -1
- data/.travis.yml +1 -1
- data/Gemfile.lock +37 -25
- data/README.md +5 -2
- data/Rakefile +16 -0
- data/docs/legacy.md +453 -0
- data/lib/pdfh/document.rb +2 -0
- data/lib/pdfh/version.rb +1 -1
- data/pdfh.gemspec +8 -6
- metadata +43 -15
- data/.ruby-gemset +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 77f42bce0e05785d392ed0745ae2fc245f634d83b58e0257fe5f9b4273203072
|
4
|
+
data.tar.gz: 92f71bbc691405e04a1a34288e1753120c1716c120103edb49bf419c06db00da
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: beba17f5bdc7795fa3b19fa37a2417ca0e493e4808b0ee62aa75447fa64f9272ed10c4ea3717be040e4ab27304d1b0cc04b195b4c6b9fdf28afbb2da37798b7b
|
7
|
+
data.tar.gz: d4f19d454fbf2b9bce448a3f197762032d8d10c41bf7a28959eff043e060ca6f467184cd13bc83a3df8d74affc338a1df4a2e1f770701224e8d76a2dcc9e329b
|
data/.gitignore
CHANGED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
2.7.0
|
data/.travis.yml
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,52 +1,64 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
pdfh (0.1.
|
5
|
-
colorize (~> 0.8.
|
4
|
+
pdfh (0.1.6)
|
5
|
+
colorize (~> 0.8.0)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
9
9
|
specs:
|
10
10
|
ansi (1.5.0)
|
11
|
+
blockenspiel (0.5.0)
|
12
|
+
coderay (1.1.2)
|
11
13
|
colorize (0.8.1)
|
12
14
|
diff-lcs (1.3)
|
13
|
-
docile (1.3.
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
rspec-
|
23
|
-
|
15
|
+
docile (1.3.2)
|
16
|
+
json (2.3.0)
|
17
|
+
method_source (0.9.2)
|
18
|
+
pry (0.12.2)
|
19
|
+
coderay (~> 1.1.0)
|
20
|
+
method_source (~> 0.9.0)
|
21
|
+
rake (13.0.1)
|
22
|
+
rspec (3.9.0)
|
23
|
+
rspec-core (~> 3.9.0)
|
24
|
+
rspec-expectations (~> 3.9.0)
|
25
|
+
rspec-mocks (~> 3.9.0)
|
26
|
+
rspec-core (3.9.1)
|
27
|
+
rspec-support (~> 3.9.1)
|
28
|
+
rspec-expectations (3.9.0)
|
24
29
|
diff-lcs (>= 1.2.0, < 2.0)
|
25
|
-
rspec-support (~> 3.
|
26
|
-
rspec-mocks (3.
|
30
|
+
rspec-support (~> 3.9.0)
|
31
|
+
rspec-mocks (3.9.1)
|
27
32
|
diff-lcs (>= 1.2.0, < 2.0)
|
28
|
-
rspec-support (~> 3.
|
29
|
-
rspec-support (3.
|
30
|
-
simplecov (0.
|
33
|
+
rspec-support (~> 3.9.0)
|
34
|
+
rspec-support (3.9.2)
|
35
|
+
simplecov (0.17.1)
|
31
36
|
docile (~> 1.1)
|
32
37
|
json (>= 1.8, < 3)
|
33
38
|
simplecov-html (~> 0.10.0)
|
34
|
-
simplecov-console (0.
|
39
|
+
simplecov-console (0.6.0)
|
35
40
|
ansi
|
36
|
-
hirb
|
37
41
|
simplecov
|
42
|
+
terminal-table
|
38
43
|
simplecov-html (0.10.2)
|
44
|
+
terminal-table (1.8.0)
|
45
|
+
unicode-display_width (~> 1.1, >= 1.1.1)
|
46
|
+
unicode-display_width (1.6.1)
|
47
|
+
versionomy (0.5.0)
|
48
|
+
blockenspiel (~> 0.5)
|
39
49
|
|
40
50
|
PLATFORMS
|
41
51
|
ruby
|
42
52
|
|
43
53
|
DEPENDENCIES
|
44
|
-
bundler (~>
|
54
|
+
bundler (~> 2.0)
|
45
55
|
pdfh!
|
46
|
-
|
56
|
+
pry (~> 0.12.0)
|
57
|
+
rake (~> 13.0)
|
47
58
|
rspec (~> 3.0)
|
48
|
-
simplecov (~> 0.
|
49
|
-
simplecov-console (~> 0.
|
59
|
+
simplecov (~> 0.17.0)
|
60
|
+
simplecov-console (~> 0.6.0)
|
61
|
+
versionomy (~> 0.5)
|
50
62
|
|
51
63
|
BUNDLED WITH
|
52
|
-
1.
|
64
|
+
2.1.2
|
data/README.md
CHANGED
@@ -24,7 +24,7 @@ document_types:
|
|
24
24
|
pwd: base64string # [OPTIONAL] Password if the document is protected
|
25
25
|
store_path: "{YEAR}/bank_docs" # Relative path to copy this document
|
26
26
|
name_template: '{period} {subtype}' # Template for new filename when copied
|
27
|
-
sub_types: # [OPTIONAL] In case your need an extra category
|
27
|
+
sub_types: # [OPTIONAL] In case your need an extra category
|
28
28
|
- name: Account1 # Regular expresion to match this subtype
|
29
29
|
month_offset: -1 # [OPTIONAL] Integer value to adjust month
|
30
30
|
```
|
@@ -33,9 +33,12 @@ document_types:
|
|
33
33
|
|
34
34
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
35
35
|
|
36
|
-
To install this gem onto your local machine, run `
|
36
|
+
To install this gem onto your local machine, run `rake install`. To release a new version, run `rake bump`, and then run `rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
37
37
|
|
38
38
|
```bash
|
39
|
+
rake install
|
40
|
+
|
41
|
+
# step by step
|
39
42
|
build pdfh.gemspec
|
40
43
|
gem install pdfh-*
|
41
44
|
```
|
data/Rakefile
CHANGED
@@ -2,7 +2,23 @@
|
|
2
2
|
|
3
3
|
require 'bundler/gem_tasks'
|
4
4
|
require 'rspec/core/rake_task'
|
5
|
+
require 'versionomy'
|
5
6
|
|
6
7
|
RSpec::Core::RakeTask.new(:spec)
|
7
8
|
|
8
9
|
task default: :spec
|
10
|
+
|
11
|
+
desc 'Bump gem version number (tiny|minor|major)'
|
12
|
+
task :bump, :type do |t, args|
|
13
|
+
args.with_defaults(:type => :tiny)
|
14
|
+
version_file = File.join(__dir__, 'lib', 'pdfh', 'version.rb')
|
15
|
+
content = File.read(version_file)
|
16
|
+
|
17
|
+
version_pattern = /(?<major>\d+)\.(?<minor>\d+)\.(?<tiny>\d+)/
|
18
|
+
current_version = content.match(version_pattern)
|
19
|
+
next_version = Versionomy.parse(current_version.to_s).bump(args.type).to_s
|
20
|
+
|
21
|
+
File.write(version_file, content.gsub(version_pattern, "\\1#{next_version}\\3"))
|
22
|
+
|
23
|
+
puts "Successfully bumped from #{current_version} to #{next_version}!"
|
24
|
+
end
|
data/docs/legacy.md
ADDED
@@ -0,0 +1,453 @@
|
|
1
|
+
# Legacy
|
2
|
+
|
3
|
+
## Python
|
4
|
+
|
5
|
+
This project was born as a bash script. It was initially ported to a Python script,
|
6
|
+
and ended as a Ruby gem. Below is the old Python code, provided just for fun.
|
7
|
+
|
8
|
+
```python
|
9
|
+
#!/usr/bin/env python3
|
10
|
+
"""Organize PDF protected password files, using rules defined in yaml format."""
|
11
|
+
from __future__ import print_function
|
12
|
+
import os
|
13
|
+
import re
|
14
|
+
import base64
|
15
|
+
import pprint
|
16
|
+
import argparse
|
17
|
+
import tempfile
|
18
|
+
import subprocess
|
19
|
+
import yaml
|
20
|
+
from shutil import copyfile
|
21
|
+
from colorama import Fore
|
22
|
+
|
23
|
+
IS_VERBOSE = False
|
24
|
+
IS_DRY = False
|
25
|
+
# TODO: calendar.month_name[11] current locale
|
26
|
+
MONTHS = dict(
|
27
|
+
enero = 1,
|
28
|
+
febrero = 2,
|
29
|
+
marzo = 3,
|
30
|
+
abril = 4,
|
31
|
+
mayo = 5,
|
32
|
+
junio = 6,
|
33
|
+
julio = 7,
|
34
|
+
agosto = 8,
|
35
|
+
septiembre = 9,
|
36
|
+
octubre = 10,
|
37
|
+
noviembre = 11,
|
38
|
+
diciembre = 12
|
39
|
+
)
|
40
|
+
|
41
|
+
class InlineClass(object):
|
42
|
+
"""Wrapper to have an object like dictionary"""
|
43
|
+
def __init__(self, dict):
|
44
|
+
self.__dict__ = dict
|
45
|
+
def has_key(self, key):
|
46
|
+
return key in self.__dict__.keys()
|
47
|
+
|
48
|
+
def get_month_num(num):
|
49
|
+
# Not implemented yet
|
50
|
+
import locale
|
51
|
+
locale.setlocale(locale.LC_ALL, 'es_MX')
|
52
|
+
import calendar
|
53
|
+
calendar.month_name[num]
|
54
|
+
|
55
|
+
class Document(object):
|
56
|
+
"""Handles the PDF detected by the rules, and makes tranformations"""
|
57
|
+
def __init__(self, file, account, **kwargs):
|
58
|
+
self._file = file
|
59
|
+
self._act = account
|
60
|
+
self._extra = ''
|
61
|
+
self._has_xml = False
|
62
|
+
self._verbose = kwargs['verbose']
|
63
|
+
verbose = self._verbose
|
64
|
+
if verbose:
|
65
|
+
print(Fore.CYAN + account.name, '==================' + Fore.RESET)
|
66
|
+
|
67
|
+
self._pwd = base64.b64decode(self._act.pwd) if self._act.pwd else ''
|
68
|
+
if type(self._pwd) is bytes:
|
69
|
+
self._pwd = self._pwd.decode()
|
70
|
+
|
71
|
+
if not os.path.exists(self._file):
|
72
|
+
raise IOError("I can't find the PDF")
|
73
|
+
|
74
|
+
# Check if aditional XML file exists
|
75
|
+
self._xml_file = os.path.splitext(self._file)[0]+'.xml'
|
76
|
+
if os.path.exists(self._xml_file):
|
77
|
+
self._has_xml = True
|
78
|
+
|
79
|
+
self._tmp = tempfile.mktemp(suffix=".pdf")
|
80
|
+
if verbose:
|
81
|
+
print(Fore.CYAN + ' --> ' + self._tmp + ' temporal file assigned.' + Fore.RESET)
|
82
|
+
|
83
|
+
cmd1 = "qpdf --password='{}' --decrypt --stream-data=uncompress '{}' '{}'" \
|
84
|
+
.format(self._pwd, self._file, self._tmp)
|
85
|
+
subprocess.call(cmd1, shell=True)
|
86
|
+
|
87
|
+
cmd2 = "pdftotext -enc UTF-8 '{}' -".format(self._tmp)
|
88
|
+
|
89
|
+
p = subprocess.Popen(cmd2, stdout=subprocess.PIPE, shell=True)
|
90
|
+
self._text, _err = p.communicate()
|
91
|
+
if type(self._text) is bytes:
|
92
|
+
self._text = self._text.decode(encoding="utf-8", errors="replace")
|
93
|
+
if verbose:
|
94
|
+
print(Fore.CYAN + self._text + Fore.RESET)
|
95
|
+
|
96
|
+
match = re.search(self._act.re_date, self._text, re.MULTILINE)
|
97
|
+
if not match:
|
98
|
+
print(Fore.RED, 'Err, date was not extracted with regex provided: ' + Fore.LIGHTRED_EX +
|
99
|
+
self._act.re_date + Fore.RESET)
|
100
|
+
exit(1)
|
101
|
+
if verbose:
|
102
|
+
print(Fore.CYAN, '==== Regex Groups:', match.groups(), Fore.RESET)
|
103
|
+
try:
|
104
|
+
self._month = match.group('m')
|
105
|
+
self._year = match.group('y')
|
106
|
+
except IndexError:
|
107
|
+
self._month, self._year = match.groups()
|
108
|
+
|
109
|
+
if len(match.groups()) > 2:
|
110
|
+
self._extra = match.group(3)
|
111
|
+
|
112
|
+
self._month = self._month.lower()
|
113
|
+
if verbose:
|
114
|
+
print(Fore.CYAN, '==== Assigned:', (self._month, self._year, self._extra),
|
115
|
+
'==( Month, Year, Extra )================' + Fore.RESET)
|
116
|
+
|
117
|
+
if self._act.has_key('types'):
|
118
|
+
for t in self._act.types:
|
119
|
+
name = t['name']
|
120
|
+
if re.search(name, self._text, re.IGNORECASE):
|
121
|
+
self.type = name
|
122
|
+
self.offset = t.get('month_offset', 0)
|
123
|
+
else:
|
124
|
+
self.type = None
|
125
|
+
self.offset = 0
|
126
|
+
|
127
|
+
if verbose:
|
128
|
+
print(Fore.CYAN, 'Offset settings, Type:', self.type, '/ Month:', self.offset, Fore.RESET)
|
129
|
+
#Used if the month offset results in change in year.
|
130
|
+
self._year_offset = 0
|
131
|
+
if verbose:
|
132
|
+
print(Fore.CYAN, 'END INIT ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' + Fore.RESET)
|
133
|
+
|
134
|
+
def __repr__(self):
|
135
|
+
type_str = self.type if self.type else 'N/A'
|
136
|
+
format_string = 'Name : {}\nType : {}\nPeriod : {}\nFile Path: {}\n'+\
|
137
|
+
'File Name: {}\nNew Name : {}\nStorePath: {}\nFullPath : {}'
|
138
|
+
return format_string.format(
|
139
|
+
self.name, type_str, self.period, self._file,
|
140
|
+
self.filename_only, self.new_name, self.store_path, self.full_path)
|
141
|
+
|
142
|
+
def write_pdf(self):
|
143
|
+
dir_path = os.path.dirname(self.full_path)
|
144
|
+
if not os.path.exists(dir_path):
|
145
|
+
raise IOError("I can't find the store_path")
|
146
|
+
|
147
|
+
cmd = "qpdf --password='{}' --decrypt '{}' '{}'" \
|
148
|
+
.format(self._pwd, self._file, self.full_path)
|
149
|
+
subprocess.call(cmd, shell=True)
|
150
|
+
|
151
|
+
if os.path.exists(self.full_path):
|
152
|
+
bkp = self._file + '_'
|
153
|
+
os.rename(self._file, bkp)
|
154
|
+
# Copy XML File if exists
|
155
|
+
if self._has_xml:
|
156
|
+
xml_new_path = os.path.splitext(self.full_path)[0]+'.xml'
|
157
|
+
copyfile(self._xml_file, xml_new_path)
|
158
|
+
xml_bkp = self._xml_file + '_'
|
159
|
+
os.rename(self._xml_file, xml_bkp)
|
160
|
+
if self._verbose:
|
161
|
+
print(Fore.CYAN, 'XML Written: ', xml_new_path, Fore.RESET)
|
162
|
+
else:
|
163
|
+
raise IOError("The file was not created.")
|
164
|
+
|
165
|
+
@property
|
166
|
+
def name(self): return self._act.name
|
167
|
+
@property
|
168
|
+
def filename_only(self):
|
169
|
+
dir, file = os.path.split(self._file)
|
170
|
+
filename, ext = os.path.splitext(file)
|
171
|
+
return filename
|
172
|
+
@property
|
173
|
+
def text(self): return self._text
|
174
|
+
@property
|
175
|
+
def month(self):
|
176
|
+
try:
|
177
|
+
month_num = int(self._month)
|
178
|
+
except:
|
179
|
+
if len(self._month) == 3:
|
180
|
+
for month in MONTHS:
|
181
|
+
if month[0:3] == self._month:
|
182
|
+
month_num = MONTHS[month]
|
183
|
+
else:
|
184
|
+
month_num = MONTHS[self._month]
|
185
|
+
|
186
|
+
|
187
|
+
if self.offset:
|
188
|
+
tmp = month_num + self.offset
|
189
|
+
if tmp == 0:
|
190
|
+
tmp = 12
|
191
|
+
self._year_offset = -1
|
192
|
+
elif tmp == 13:
|
193
|
+
tmp = 1
|
194
|
+
self._year_offset = 1
|
195
|
+
else:
|
196
|
+
tmp = month_num
|
197
|
+
return str(tmp).zfill(2)
|
198
|
+
@property
|
199
|
+
def year(self):
|
200
|
+
if len(self._year) == 2:
|
201
|
+
tmp = '20' + self._year
|
202
|
+
else:
|
203
|
+
tmp = self._year
|
204
|
+
year = int(tmp) + self._year_offset
|
205
|
+
|
206
|
+
return str(year)
|
207
|
+
@property
|
208
|
+
def period(self): return "{}-{}".format(self.year, self.month)
|
209
|
+
@property
|
210
|
+
def new_name(self):
|
211
|
+
if self._act.has_key('name_template'):
|
212
|
+
template = self._act.name_template
|
213
|
+
else:
|
214
|
+
template = '{original}'
|
215
|
+
|
216
|
+
type = self.type if self.type else 'NA'
|
217
|
+
new = template \
|
218
|
+
.replace('{original}', self.filename_only) \
|
219
|
+
.replace('{period}', self.period) \
|
220
|
+
.replace('{type}', type) \
|
221
|
+
.replace('{extra}', self._extra)
|
222
|
+
return new + '.pdf'
|
223
|
+
@property
|
224
|
+
def store_path(self):
|
225
|
+
tmp = self._act.store_path.replace('{YEAR}', self.year)
|
226
|
+
return tmp
|
227
|
+
@property
|
228
|
+
def full_path(self):
|
229
|
+
tmp = self.store_path
|
230
|
+
tmp = tmp if tmp[0] != '/' else tmp[1:]
|
231
|
+
base = os.path.expanduser(self._act.base_path)
|
232
|
+
base = os.path.abspath(base)
|
233
|
+
return os.path.join(base, tmp, self.new_name)
|
234
|
+
|
235
|
+
class Settings(object):
|
236
|
+
"""Open the rules YAML file"""
|
237
|
+
def __init__(self):
|
238
|
+
name = os.path.basename(__file__).replace('py', 'yml')
|
239
|
+
dir_oder = []
|
240
|
+
dir_oder.append(os.path.dirname(__file__))
|
241
|
+
dir_oder.append(os.path.expanduser('~'))
|
242
|
+
|
243
|
+
paths = map(lambda x: os.path.join(x, name), dir_oder)
|
244
|
+
|
245
|
+
for path in paths:
|
246
|
+
if os.path.isfile(path):
|
247
|
+
conf_path = path
|
248
|
+
break
|
249
|
+
|
250
|
+
if 'conf_path' not in locals():
|
251
|
+
print('{}Error, no configuraton file was found: {}{}{}'
|
252
|
+
.format(Fore.RED, Fore.MAGENTA, ', '.join(paths), Fore.RESET))
|
253
|
+
exit(1)
|
254
|
+
|
255
|
+
fsettings = open(conf_path, 'r')
|
256
|
+
if IS_VERBOSE:
|
257
|
+
print("Loaded configuration file: {}{}{}"
|
258
|
+
.format(Fore.GREEN, conf_path, Fore.RESET))
|
259
|
+
self.__dict__ = yaml.load(fsettings)
|
260
|
+
|
261
|
+
def print(self):
|
262
|
+
pp = pprint.PrettyPrinter(indent=2)
|
263
|
+
pp.pprint(self.__dict__)
|
264
|
+
|
265
|
+
def getAccount(self, file_name):
|
266
|
+
for act in self.accounts:
|
267
|
+
srch = re.search(act['re_file'], file_name)
|
268
|
+
if srch != None:
|
269
|
+
act['base_path'] = self.base_path
|
270
|
+
return InlineClass(act)
|
271
|
+
|
272
|
+
def getScrapeDirectories(self):
|
273
|
+
max_length = len(max(self.scrape_dirs, key=len))
|
274
|
+
|
275
|
+
if IS_VERBOSE:
|
276
|
+
print('Processing directories:')
|
277
|
+
for directory in self.scrape_dirs:
|
278
|
+
path = os.path.expanduser(directory)
|
279
|
+
path = os.path.abspath(path)
|
280
|
+
print_ident(directory, path, color=Fore.BLUE, field_width=max_length)
|
281
|
+
print()
|
282
|
+
|
283
|
+
for directory in self.scrape_dirs:
|
284
|
+
path = os.path.expanduser(directory)
|
285
|
+
path = os.path.abspath(path)
|
286
|
+
yield path
|
287
|
+
|
288
|
+
def get_files(directory=None):
|
289
|
+
"""Analyze current directory for PDF files"""
|
290
|
+
path = os.path.dirname(os.path.abspath(__file__)) if directory == None else directory
|
291
|
+
for pdffile in os.listdir(path):
|
292
|
+
if pdffile.endswith(".pdf"):
|
293
|
+
yield os.path.join(path, pdffile)
|
294
|
+
|
295
|
+
def print_ident(field, value, **kwargs):
|
296
|
+
"""Print value with the color specified and correct identation.
|
297
|
+
|
298
|
+
Args:
|
299
|
+
field (int): The value name
|
300
|
+
value (str): The value to print
|
301
|
+
color (AnsiFore): The color to use
|
302
|
+
field_width (int): The identation lenght of fields
|
303
|
+
|
304
|
+
Returns:
|
305
|
+
None: No value is returned.
|
306
|
+
"""
|
307
|
+
color = kwargs['color'] if 'color' in kwargs else Fore.GREEN
|
308
|
+
field_width = kwargs['field_width'] if 'field_width' in kwargs else 7
|
309
|
+
string_format = ' {:>'+str(field_width)+'}: {}{}{}'
|
310
|
+
print(string_format.format(field, color, value, Fore.RESET))
|
311
|
+
|
312
|
+
def print_separator(title, color=Fore.LIGHTYELLOW_EX):
|
313
|
+
_rows, cols = os.popen('stty size', 'r').read().split()
|
314
|
+
sep = '\n' + color
|
315
|
+
sep += '-' * 40 + ' ' + title + ' '
|
316
|
+
remaining_cols = int(cols) - len(sep)
|
317
|
+
if remaining_cols > 0:
|
318
|
+
sep += '-' * remaining_cols
|
319
|
+
sep += Fore.RESET
|
320
|
+
print(sep)
|
321
|
+
|
322
|
+
|
323
|
+
def main():
|
324
|
+
parser = argparse.ArgumentParser()
|
325
|
+
parser.add_argument("-d", "--dry",
|
326
|
+
action="store_true",
|
327
|
+
help="Dry run, does not write new pdf")
|
328
|
+
parser.add_argument("-v", "--verbose",
|
329
|
+
action="store_true",
|
330
|
+
help="Show more output, useful for debug")
|
331
|
+
args = parser.parse_args()
|
332
|
+
|
333
|
+
if args.dry:
|
334
|
+
global IS_DRY
|
335
|
+
IS_DRY = True
|
336
|
+
print(Fore.CYAN + "Running in dry mode..." + Fore.RESET)
|
337
|
+
if args.verbose:
|
338
|
+
global IS_VERBOSE
|
339
|
+
IS_VERBOSE = True
|
340
|
+
print(Fore.CYAN + "Running in verbose mode..." + Fore.RESET)
|
341
|
+
|
342
|
+
settings = Settings()
|
343
|
+
#settings.getScrapeDirectories()
|
344
|
+
#sys.exit(1)
|
345
|
+
|
346
|
+
for work_directory in settings.getScrapeDirectories():
|
347
|
+
print_separator(work_directory)
|
348
|
+
ignored_files = []
|
349
|
+
for pdffile in get_files(work_directory):
|
350
|
+
try:
|
351
|
+
base = os.path.basename(pdffile)
|
352
|
+
act = settings.getAccount(pdffile)
|
353
|
+
if not act:
|
354
|
+
raise ValueError('no account was matched.')
|
355
|
+
print('Working on' + Fore.LIGHTGREEN_EX, base, Fore.RESET)
|
356
|
+
print_ident(' Cuenta', act.name, color=Fore.LIGHTBLUE_EX)
|
357
|
+
doc = Document(pdffile, act, verbose=IS_VERBOSE)
|
358
|
+
#print(edocta) # Debug ----
|
359
|
+
print_ident('Periodo', doc.period)
|
360
|
+
if IS_VERBOSE:
|
361
|
+
print(Fore.CYAN, doc, Fore.RESET)
|
362
|
+
if not IS_DRY:
|
363
|
+
doc.write_pdf()
|
364
|
+
print_ident('NewFile', doc.full_path)
|
365
|
+
except ValueError as e:
|
366
|
+
#print(e)
|
367
|
+
ignored_files.append(base)
|
368
|
+
#print(Fore.LIGHTRED_EX + ' Error!', e, Fore.RESET)
|
369
|
+
except IOError as e:
|
370
|
+
print('Error, the filepath {} does not exists.'.format(doc.full_path))
|
371
|
+
|
372
|
+
print('\nNo account was matched for these PDF files:')
|
373
|
+
for num, path in enumerate(ignored_files, start=1):
|
374
|
+
print_ident(num, path, color=Fore.RED, field_width=3)
|
375
|
+
|
376
|
+
|
377
|
+
if __name__ == '__main__': main()
|
378
|
+
|
379
|
+
```
|
380
|
+
|
381
|
+
## Bash
|
382
|
+
|
383
|
+
```bash
|
384
|
+
#!/bin/env bash
|
385
|
+
. .common
|
386
|
+
|
387
|
+
YEAR=$(date +%Y)
|
388
|
+
PASS=abcdef
|
389
|
+
GREP_PERIOD='al [0-9]{1,2} de ([A-Zz-z]*) de.? [0-9]+'
|
390
|
+
#Path to move, Dropbox. Use "{YEAR}" to replace with actual year
|
391
|
+
MVTO=../"Impuestos/FISCAL-{YEAR}/Edo Cuenta"
|
392
|
+
|
393
|
+
app_installed qpdf
|
394
|
+
|
395
|
+
count=$(find . -type f -name '[!2]*.pdf' | wc -l)
|
396
|
+
if [ "$count" == '0' ]; then
|
397
|
+
echo -e "${RED}Error, no pdf files found.${RST}"
|
398
|
+
exit 1
|
399
|
+
fi
|
400
|
+
|
401
|
+
for pdf in [!2]*.pdf; do
|
402
|
+
[ ! -r "$pdf" ] && echo -e "${RED}Error, can't access $pdf${RST}" && exit 1
|
403
|
+
echo -e "Working on ${GRE}$pdf${RST}..."
|
404
|
+
|
405
|
+
# Decrypt PDF and uncompress to work with it
|
406
|
+
temp=$(mktemp)
|
407
|
+
#trap 'rm $temp' 0 SIGINT SIGQUIT SIGTERM
|
408
|
+
qpdf --password="$PASS" --decrypt --stream-data=uncompress "$pdf" "$temp"
|
409
|
+
|
410
|
+
# Extract Data from PDF
|
411
|
+
account=$(strings "$temp" | grep -ioE 'platinum|perfiles' | head -1)
|
412
|
+
account=${account,,}
|
413
|
+
account=${account^}
|
414
|
+
echo -e " account: ${BLU}$account${RST}"
|
415
|
+
#period=$(strings "$temp" | grep -iEo 'al [0-9]{1,2} de ([A-Zz-z]*) de [0-9]+' | tail -1)
|
416
|
+
#month=$(echo "$period" | tr ' ' '\n'| tail -3 | head -1)
|
417
|
+
#year=$(echo "$period" | tr ' ' '\n' | tail -1)
|
418
|
+
period=$(pdftotext "$temp" - | grep -iEo "$GREP_PERIOD" | tail -1 )
|
419
|
+
month=$(echo "$period" | awk '{print $4}')
|
420
|
+
year=$(echo "$period" | awk '{print $6}')
|
421
|
+
period=${month,,}
|
422
|
+
|
423
|
+
if [ -z "$period" ]; then
|
424
|
+
echo -e "${RED}Error, period not found.${RST}"
|
425
|
+
exit 1
|
426
|
+
fi
|
427
|
+
|
428
|
+
number=$(convert_month $period)
|
429
|
+
if [ "$account" == "Perfiles" ]; then
|
430
|
+
#number=$(( number - 1 ))
|
431
|
+
number=$(echo "$number - 1" | bc)
|
432
|
+
if [ "${#number}" -eq 1 ]; then
|
433
|
+
number="0$number"
|
434
|
+
fi
|
435
|
+
fi
|
436
|
+
echo -e " period: ${BLU}$year-$period${RST}"
|
437
|
+
|
438
|
+
#Prepare new PDF
|
439
|
+
newfile="$year-${number} ${account}.pdf"
|
440
|
+
#pdftk "$pdf" input_pw "$PASS" output "$newfile"
|
441
|
+
qpdf --password="$PASS" --decrypt "$pdf" "$newfile"
|
442
|
+
if [ -f "$newfile" ]; then
|
443
|
+
mv "$pdf" "${newfile/.pdf/}_$pdf"
|
444
|
+
echo -e " new file: ${BLU}$newfile${RST}"
|
445
|
+
fi
|
446
|
+
|
447
|
+
#Copy it
|
448
|
+
MVTO="${MVTO//'{YEAR}'/$year}"
|
449
|
+
if [ -d "$MVTO" ]; then
|
450
|
+
cp -v "$newfile" "$MVTO"
|
451
|
+
fi
|
452
|
+
done
|
453
|
+
```
|
data/lib/pdfh/document.rb
CHANGED
data/lib/pdfh/version.rb
CHANGED
data/pdfh.gemspec
CHANGED
@@ -14,7 +14,7 @@ Gem::Specification.new do |spec|
|
|
14
14
|
spec.description = 'Examine all PDF files in scrape directories, remove password (if has one), rename and copy to a new directory using regular expresions.'
|
15
15
|
spec.homepage = 'https://github.com/iax7/pdfh'
|
16
16
|
spec.license = 'MIT'
|
17
|
-
spec.required_ruby_version = '>= 2.
|
17
|
+
spec.required_ruby_version = '>= 2.7.0'
|
18
18
|
|
19
19
|
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
20
20
|
# to allow pushing to a single host or delete this section to allow pushing to any host.
|
@@ -38,11 +38,13 @@ Gem::Specification.new do |spec|
|
|
38
38
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
39
39
|
spec.require_paths = ['lib']
|
40
40
|
|
41
|
-
spec.add_dependency 'colorize', '~> 0.8.
|
41
|
+
spec.add_dependency 'colorize', '~> 0.8.0'
|
42
42
|
|
43
|
-
spec.add_development_dependency 'bundler', '~>
|
44
|
-
spec.add_development_dependency '
|
43
|
+
spec.add_development_dependency 'bundler', '~> 2.0'
|
44
|
+
spec.add_development_dependency 'pry', '~> 0.12.0'
|
45
|
+
spec.add_development_dependency 'rake', '~> 13.0'
|
45
46
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
46
|
-
spec.add_development_dependency 'simplecov', '~> 0.
|
47
|
-
spec.add_development_dependency 'simplecov-console', '~> 0.
|
47
|
+
spec.add_development_dependency 'simplecov', '~> 0.17.0'
|
48
|
+
spec.add_development_dependency 'simplecov-console', '~> 0.6.0'
|
49
|
+
spec.add_development_dependency 'versionomy', '~> 0.5'
|
48
50
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdfh
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Isaias Piña
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2020-02-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: colorize
|
@@ -16,42 +16,56 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.8.
|
19
|
+
version: 0.8.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.8.
|
26
|
+
version: 0.8.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: bundler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: '2.0'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: '2.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: pry
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.12.0
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.12.0
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: rake
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
44
58
|
requirements:
|
45
59
|
- - "~>"
|
46
60
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
61
|
+
version: '13.0'
|
48
62
|
type: :development
|
49
63
|
prerelease: false
|
50
64
|
version_requirements: !ruby/object:Gem::Requirement
|
51
65
|
requirements:
|
52
66
|
- - "~>"
|
53
67
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
68
|
+
version: '13.0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: rspec
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -72,28 +86,42 @@ dependencies:
|
|
72
86
|
requirements:
|
73
87
|
- - "~>"
|
74
88
|
- !ruby/object:Gem::Version
|
75
|
-
version: 0.
|
89
|
+
version: 0.17.0
|
76
90
|
type: :development
|
77
91
|
prerelease: false
|
78
92
|
version_requirements: !ruby/object:Gem::Requirement
|
79
93
|
requirements:
|
80
94
|
- - "~>"
|
81
95
|
- !ruby/object:Gem::Version
|
82
|
-
version: 0.
|
96
|
+
version: 0.17.0
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: simplecov-console
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
86
100
|
requirements:
|
87
101
|
- - "~>"
|
88
102
|
- !ruby/object:Gem::Version
|
89
|
-
version: 0.
|
103
|
+
version: 0.6.0
|
104
|
+
type: :development
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: 0.6.0
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: versionomy
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0.5'
|
90
118
|
type: :development
|
91
119
|
prerelease: false
|
92
120
|
version_requirements: !ruby/object:Gem::Requirement
|
93
121
|
requirements:
|
94
122
|
- - "~>"
|
95
123
|
- !ruby/object:Gem::Version
|
96
|
-
version: 0.
|
124
|
+
version: '0.5'
|
97
125
|
description: Examine all PDF files in scrape directories, remove password (if has
|
98
126
|
one), rename and copy to a new directory using regular expresions.
|
99
127
|
email:
|
@@ -106,7 +134,6 @@ files:
|
|
106
134
|
- ".gitignore"
|
107
135
|
- ".rspec"
|
108
136
|
- ".rubocop.yml"
|
109
|
-
- ".ruby-gemset"
|
110
137
|
- ".ruby-version"
|
111
138
|
- ".travis.yml"
|
112
139
|
- CHANGELOG.md
|
@@ -118,6 +145,7 @@ files:
|
|
118
145
|
- Rakefile
|
119
146
|
- bin/console
|
120
147
|
- bin/setup
|
148
|
+
- docs/legacy.md
|
121
149
|
- exe/pdfh
|
122
150
|
- lib/ext/string.rb
|
123
151
|
- lib/pdfh.rb
|
@@ -144,14 +172,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
144
172
|
requirements:
|
145
173
|
- - ">="
|
146
174
|
- !ruby/object:Gem::Version
|
147
|
-
version: 2.
|
175
|
+
version: 2.7.0
|
148
176
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
149
177
|
requirements:
|
150
178
|
- - ">="
|
151
179
|
- !ruby/object:Gem::Version
|
152
180
|
version: '0'
|
153
181
|
requirements: []
|
154
|
-
rubygems_version: 3.
|
182
|
+
rubygems_version: 3.1.2
|
155
183
|
signing_key:
|
156
184
|
specification_version: 4
|
157
185
|
summary: Organize PDF files
|
data/.ruby-gemset
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
pdfh
|