pdfh 0.1.3 → 0.1.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8946e2ef210a5573d2654e9514f775c950e42a65b429365703450b117d3b073e
4
- data.tar.gz: d9a5927dc608bb8e0ab74cbe28e7d2ff18597158b0daed41784e486dfe83eb92
3
+ metadata.gz: bd8f993d70a6ed67ff55c9554ba63c74eb17abbb9861f562c899a39d2979f97c
4
+ data.tar.gz: 8ff19318f2a97df3875d6299ab9566cca9a6effa01169f2523ab4ed27266e3c0
5
5
  SHA512:
6
- metadata.gz: 4927717bb5ed460cd444db6a6088d852969e2804c3ebb6efde21b9f7f73a6a310b814b83f062f3ef453d06aaad90a1bff6b9d7cf44bdb6bca22f8dd60cd80d43
7
- data.tar.gz: 1a7d43a0ebd0bd9d990337cda4470193b18f645fd3011828dc82c88c6e1d7e75670d3580ccdde721214e80848eb98086b9162ce7ed2b75a536ac8a04f3100f1f
6
+ metadata.gz: 685b6ea921370a125341e55a7bad2d4931e0932a6ff685ac6fc5aff996bcd2800f4c1e0081fb35f1b6cfd7063760b7ca0a4bc28ba60ad84095314cea4af850ad
7
+ data.tar.gz: 9ab206d0accfbebadd06283a5dd6b6af3905b890a61ea05f65081b795433d0151ae5653ec7889588d78fc42053fd09c9baa7f9c9d0232ae3f628341babbfd329
data/.gitignore CHANGED
@@ -7,6 +7,8 @@
7
7
  /spec/reports/
8
8
  /tmp/
9
9
 
10
+ .DS_Store
11
+
10
12
  # rspec failure tracking
11
13
  .rspec_status
12
14
  coverage
@@ -1,8 +1,11 @@
1
- ---
1
+ inherit_from: .rubocop_todo.yml
2
+
2
3
  AllCops:
4
+ NewCops: enable
5
+ TargetRubyVersion: 2.5
3
6
  Exclude:
4
7
  - '.git/**/*'
5
8
  - 'spec/**/*'
6
9
 
7
- Metrics/LineLength:
8
- Enabled: false
10
+
11
+
@@ -0,0 +1,22 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2020-10-01 00:19:12 UTC using RuboCop version 0.92.0.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 1
10
+ # Configuration parameters: IgnoredMethods.
11
+ Metrics/AbcSize:
12
+ Max: 23
13
+
14
+ # Offense count: 1
15
+ # Configuration parameters: CountComments, CountAsOne.
16
+ Metrics/ClassLength:
17
+ Max: 149
18
+
19
+ # Offense count: 5
20
+ # Configuration parameters: CountComments, CountAsOne, ExcludedMethods.
21
+ Metrics/MethodLength:
22
+ Max: 13
@@ -1 +1 @@
1
- ruby-2.6.0
1
+ 2.7.1
@@ -3,5 +3,5 @@ sudo: false
3
3
  language: ruby
4
4
  cache: bundler
5
5
  rvm:
6
- - 2.6.0
6
+ - 2.7.0
7
7
  before_install: gem install bundler -v 1.17.2
@@ -1,3 +1,11 @@
1
+ ## v0.1.5
2
+ * Add print_cmd field in config file for information purposes
3
+ * Settings now validates a no existing directory
4
+ * Refactor for easier maintenance
5
+
6
+ ## v0.1.4
7
+ * Add titleize format when writing new file
8
+
1
9
  ## v0.1.3
2
10
  * Fixed copy companion files, which was not copying the files.
3
11
 
@@ -1,52 +1,82 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- pdfh (0.1.2)
5
- colorize (~> 0.8.1)
4
+ pdfh (0.1.8)
5
+ colorize (~> 0.8.0)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
10
  ansi (1.5.0)
11
+ ast (2.4.1)
12
+ blockenspiel (0.5.0)
13
+ coderay (1.1.3)
11
14
  colorize (0.8.1)
12
- diff-lcs (1.3)
13
- docile (1.3.1)
14
- hirb (0.7.3)
15
- json (2.1.0)
16
- rake (10.5.0)
17
- rspec (3.8.0)
18
- rspec-core (~> 3.8.0)
19
- rspec-expectations (~> 3.8.0)
20
- rspec-mocks (~> 3.8.0)
21
- rspec-core (3.8.0)
22
- rspec-support (~> 3.8.0)
23
- rspec-expectations (3.8.2)
15
+ diff-lcs (1.4.4)
16
+ docile (1.3.2)
17
+ method_source (1.0.0)
18
+ parallel (1.19.2)
19
+ parser (2.7.1.5)
20
+ ast (~> 2.4.1)
21
+ pry (0.13.1)
22
+ coderay (~> 1.1)
23
+ method_source (~> 1.0)
24
+ rainbow (3.0.0)
25
+ rake (13.0.1)
26
+ regexp_parser (1.8.1)
27
+ rexml (3.2.4)
28
+ rspec (3.9.0)
29
+ rspec-core (~> 3.9.0)
30
+ rspec-expectations (~> 3.9.0)
31
+ rspec-mocks (~> 3.9.0)
32
+ rspec-core (3.9.3)
33
+ rspec-support (~> 3.9.3)
34
+ rspec-expectations (3.9.2)
24
35
  diff-lcs (>= 1.2.0, < 2.0)
25
- rspec-support (~> 3.8.0)
26
- rspec-mocks (3.8.0)
36
+ rspec-support (~> 3.9.0)
37
+ rspec-mocks (3.9.1)
27
38
  diff-lcs (>= 1.2.0, < 2.0)
28
- rspec-support (~> 3.8.0)
29
- rspec-support (3.8.0)
30
- simplecov (0.16.1)
39
+ rspec-support (~> 3.9.0)
40
+ rspec-support (3.9.3)
41
+ rubocop (0.92.0)
42
+ parallel (~> 1.10)
43
+ parser (>= 2.7.1.5)
44
+ rainbow (>= 2.2.2, < 4.0)
45
+ regexp_parser (>= 1.7)
46
+ rexml
47
+ rubocop-ast (>= 0.5.0)
48
+ ruby-progressbar (~> 1.7)
49
+ unicode-display_width (>= 1.4.0, < 2.0)
50
+ rubocop-ast (0.7.1)
51
+ parser (>= 2.7.1.5)
52
+ ruby-progressbar (1.10.1)
53
+ simplecov (0.19.0)
31
54
  docile (~> 1.1)
32
- json (>= 1.8, < 3)
33
- simplecov-html (~> 0.10.0)
34
- simplecov-console (0.4.2)
55
+ simplecov-html (~> 0.11)
56
+ simplecov-console (0.7.2)
35
57
  ansi
36
- hirb
37
58
  simplecov
38
- simplecov-html (0.10.2)
59
+ terminal-table
60
+ simplecov-html (0.12.3)
61
+ terminal-table (1.8.0)
62
+ unicode-display_width (~> 1.1, >= 1.1.1)
63
+ unicode-display_width (1.7.0)
64
+ versionomy (0.5.0)
65
+ blockenspiel (~> 0.5)
39
66
 
40
67
  PLATFORMS
41
68
  ruby
42
69
 
43
70
  DEPENDENCIES
44
- bundler (~> 1.17.2)
71
+ bundler (~> 2.0)
45
72
  pdfh!
46
- rake (~> 10.0)
73
+ pry
74
+ rake (~> 13.0)
47
75
  rspec (~> 3.0)
48
- simplecov (~> 0.16.1)
49
- simplecov-console (~> 0.4.2)
76
+ rubocop
77
+ simplecov
78
+ simplecov-console
79
+ versionomy
50
80
 
51
81
  BUNDLED WITH
52
- 1.17.2
82
+ 2.1.4
data/README.md CHANGED
@@ -24,7 +24,7 @@ document_types:
24
24
  pwd: base64string # [OPTIONAL] Password if the document is protected
25
25
  store_path: "{YEAR}/bank_docs" # Relative path to copy this document
26
26
  name_template: '{period} {subtype}' # Template for new filename when copied
27
- sub_types: # [OPTIONAL] In case your need an extra category
27
+ sub_types: # [OPTIONAL] In case your need an extra category
28
28
  - name: Account1 # Regular expresion to match this subtype
29
29
  month_offset: -1 # [OPTIONAL] Integer value to adjust month
30
30
  ```
@@ -33,16 +33,19 @@ document_types:
33
33
 
34
34
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
35
35
 
36
- To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
36
+ To install this gem onto your local machine, run `rake install`. To release a new version, run `rake bump`, and then run `rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
37
37
 
38
38
  ```bash
39
+ rake install
40
+
41
+ # step by step
39
42
  build pdfh.gemspec
40
43
  gem install pdfh-*
41
44
  ```
42
45
 
43
46
  ## Contributing
44
47
 
45
- Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/pdfh. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
48
+ Bug reports and pull requests are welcome on GitHub at https://github.com/iax7/pdfh. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
46
49
 
47
50
  ## License
48
51
 
@@ -50,4 +53,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
50
53
 
51
54
  ## Code of Conduct
52
55
 
53
- Everyone interacting in the Pdfh project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/pdfh/blob/master/CODE_OF_CONDUCT.md).
56
+ Everyone interacting in the Pdfh project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/iax7/pdfh/blob/master/CODE_OF_CONDUCT.md).
data/Rakefile CHANGED
@@ -1,8 +1,25 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'colorize'
3
4
  require 'bundler/gem_tasks'
4
5
  require 'rspec/core/rake_task'
6
+ require 'versionomy'
5
7
 
6
8
  RSpec::Core::RakeTask.new(:spec)
7
9
 
8
10
  task default: :spec
11
+
12
+ desc 'Bump gem version number (tiny|minor|major)'
13
+ task :bump, :type do |_t, args|
14
+ args.with_defaults(type: :tiny)
15
+ version_file = File.join(__dir__, 'lib', 'pdfh', 'version.rb')
16
+ content = File.read(version_file)
17
+
18
+ version_pattern = /(?<major>\d+)\.(?<minor>\d+)\.(?<tiny>\d+)/
19
+ current_version = content.match(version_pattern)
20
+ next_version = Versionomy.parse(current_version.to_s).bump(args.type).to_s
21
+
22
+ File.write(version_file, content.gsub(version_pattern, "\\1#{next_version}\\3"))
23
+
24
+ puts "Successfully bumped from #{current_version.to_s.red} to #{next_version.green}"
25
+ end
@@ -0,0 +1,453 @@
1
+ # Legacy
2
+
3
+ ## Python
4
+
5
+ This project was born as a bash script. It was initially ported to a Python script,
6
+ and ended as a Ruby gem. Below is the old Python code, provided just for fun.
7
+
8
+ ```python
9
+ #!/usr/bin/env python3
10
+ """Organize PDF protected password files, using rules defined in yaml format."""
11
+ from __future__ import print_function
12
+ import os
13
+ import re
14
+ import base64
15
+ import pprint
16
+ import argparse
17
+ import tempfile
18
+ import subprocess
19
+ import yaml
20
+ from shutil import copyfile
21
+ from colorama import Fore
22
+
23
+ IS_VERBOSE = False
24
+ IS_DRY = False
25
+ # TODO: calendar.month_name[11] current locale
26
+ MONTHS = dict(
27
+ enero = 1,
28
+ febrero = 2,
29
+ marzo = 3,
30
+ abril = 4,
31
+ mayo = 5,
32
+ junio = 6,
33
+ julio = 7,
34
+ agosto = 8,
35
+ septiembre = 9,
36
+ octubre = 10,
37
+ noviembre = 11,
38
+ diciembre = 12
39
+ )
40
+
41
+ class InlineClass(object):
42
+ """Wrapper to have an object like dictionary"""
43
+ def __init__(self, dict):
44
+ self.__dict__ = dict
45
+ def has_key(self, key):
46
+ return key in self.__dict__.keys()
47
+
48
+ def get_month_num(num):
49
+ # Not implemented yet
50
+ import locale
51
+ locale.setlocale(locale.LC_ALL, 'es_MX')
52
+ import calendar
53
+ calendar.month_name[num]
54
+
55
+ class Document(object):
56
+ """Handles the PDF detected by the rules, and makes tranformations"""
57
+ def __init__(self, file, account, **kwargs):
58
+ self._file = file
59
+ self._act = account
60
+ self._extra = ''
61
+ self._has_xml = False
62
+ self._verbose = kwargs['verbose']
63
+ verbose = self._verbose
64
+ if verbose:
65
+ print(Fore.CYAN + account.name, '==================' + Fore.RESET)
66
+
67
+ self._pwd = base64.b64decode(self._act.pwd) if self._act.pwd else ''
68
+ if type(self._pwd) is bytes:
69
+ self._pwd = self._pwd.decode()
70
+
71
+ if not os.path.exists(self._file):
72
+ raise IOError("I can't find the PDF")
73
+
74
+ # Check if aditional XML file exists
75
+ self._xml_file = os.path.splitext(self._file)[0]+'.xml'
76
+ if os.path.exists(self._xml_file):
77
+ self._has_xml = True
78
+
79
+ self._tmp = tempfile.mktemp(suffix=".pdf")
80
+ if verbose:
81
+ print(Fore.CYAN + ' --> ' + self._tmp + ' temporal file assigned.' + Fore.RESET)
82
+
83
+ cmd1 = "qpdf --password='{}' --decrypt --stream-data=uncompress '{}' '{}'" \
84
+ .format(self._pwd, self._file, self._tmp)
85
+ subprocess.call(cmd1, shell=True)
86
+
87
+ cmd2 = "pdftotext -enc UTF-8 '{}' -".format(self._tmp)
88
+
89
+ p = subprocess.Popen(cmd2, stdout=subprocess.PIPE, shell=True)
90
+ self._text, _err = p.communicate()
91
+ if type(self._text) is bytes:
92
+ self._text = self._text.decode(encoding="utf-8", errors="replace")
93
+ if verbose:
94
+ print(Fore.CYAN + self._text + Fore.RESET)
95
+
96
+ match = re.search(self._act.re_date, self._text, re.MULTILINE)
97
+ if not match:
98
+ print(Fore.RED, 'Err, date was not extracted with regex provided: ' + Fore.LIGHTRED_EX +
99
+ self._act.re_date + Fore.RESET)
100
+ exit(1)
101
+ if verbose:
102
+ print(Fore.CYAN, '==== Regex Groups:', match.groups(), Fore.RESET)
103
+ try:
104
+ self._month = match.group('m')
105
+ self._year = match.group('y')
106
+ except IndexError:
107
+ self._month, self._year = match.groups()
108
+
109
+ if len(match.groups()) > 2:
110
+ self._extra = match.group(3)
111
+
112
+ self._month = self._month.lower()
113
+ if verbose:
114
+ print(Fore.CYAN, '==== Assigned:', (self._month, self._year, self._extra),
115
+ '==( Month, Year, Extra )================' + Fore.RESET)
116
+
117
+ if self._act.has_key('types'):
118
+ for t in self._act.types:
119
+ name = t['name']
120
+ if re.search(name, self._text, re.IGNORECASE):
121
+ self.type = name
122
+ self.offset = t.get('month_offset', 0)
123
+ else:
124
+ self.type = None
125
+ self.offset = 0
126
+
127
+ if verbose:
128
+ print(Fore.CYAN, 'Offset settings, Type:', self.type, '/ Month:', self.offset, Fore.RESET)
129
+ #Used if the month offset results in change in year.
130
+ self._year_offset = 0
131
+ if verbose:
132
+ print(Fore.CYAN, 'END INIT ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' + Fore.RESET)
133
+
134
+ def __repr__(self):
135
+ type_str = self.type if self.type else 'N/A'
136
+ format_string = 'Name : {}\nType : {}\nPeriod : {}\nFile Path: {}\n'+\
137
+ 'File Name: {}\nNew Name : {}\nStorePath: {}\nFullPath : {}'
138
+ return format_string.format(
139
+ self.name, type_str, self.period, self._file,
140
+ self.filename_only, self.new_name, self.store_path, self.full_path)
141
+
142
+ def write_pdf(self):
143
+ dir_path = os.path.dirname(self.full_path)
144
+ if not os.path.exists(dir_path):
145
+ raise IOError("I can't find the store_path")
146
+
147
+ cmd = "qpdf --password='{}' --decrypt '{}' '{}'" \
148
+ .format(self._pwd, self._file, self.full_path)
149
+ subprocess.call(cmd, shell=True)
150
+
151
+ if os.path.exists(self.full_path):
152
+ bkp = self._file + '_'
153
+ os.rename(self._file, bkp)
154
+ # Copy XML File if exists
155
+ if self._has_xml:
156
+ xml_new_path = os.path.splitext(self.full_path)[0]+'.xml'
157
+ copyfile(self._xml_file, xml_new_path)
158
+ xml_bkp = self._xml_file + '_'
159
+ os.rename(self._xml_file, xml_bkp)
160
+ if self._verbose:
161
+ print(Fore.CYAN, 'XML Written: ', xml_new_path, Fore.RESET)
162
+ else:
163
+ raise IOError("The file was not created.")
164
+
165
+ @property
166
+ def name(self): return self._act.name
167
+ @property
168
+ def filename_only(self):
169
+ dir, file = os.path.split(self._file)
170
+ filename, ext = os.path.splitext(file)
171
+ return filename
172
+ @property
173
+ def text(self): return self._text
174
+ @property
175
+ def month(self):
176
+ try:
177
+ month_num = int(self._month)
178
+ except:
179
+ if len(self._month) == 3:
180
+ for month in MONTHS:
181
+ if month[0:3] == self._month:
182
+ month_num = MONTHS[month]
183
+ else:
184
+ month_num = MONTHS[self._month]
185
+
186
+
187
+ if self.offset:
188
+ tmp = month_num + self.offset
189
+ if tmp == 0:
190
+ tmp = 12
191
+ self._year_offset = -1
192
+ elif tmp == 13:
193
+ tmp = 1
194
+ self._year_offset = 1
195
+ else:
196
+ tmp = month_num
197
+ return str(tmp).zfill(2)
198
+ @property
199
+ def year(self):
200
+ if len(self._year) == 2:
201
+ tmp = '20' + self._year
202
+ else:
203
+ tmp = self._year
204
+ year = int(tmp) + self._year_offset
205
+
206
+ return str(year)
207
+ @property
208
+ def period(self): return "{}-{}".format(self.year, self.month)
209
+ @property
210
+ def new_name(self):
211
+ if self._act.has_key('name_template'):
212
+ template = self._act.name_template
213
+ else:
214
+ template = '{original}'
215
+
216
+ type = self.type if self.type else 'NA'
217
+ new = template \
218
+ .replace('{original}', self.filename_only) \
219
+ .replace('{period}', self.period) \
220
+ .replace('{type}', type) \
221
+ .replace('{extra}', self._extra)
222
+ return new + '.pdf'
223
+ @property
224
+ def store_path(self):
225
+ tmp = self._act.store_path.replace('{YEAR}', self.year)
226
+ return tmp
227
+ @property
228
+ def full_path(self):
229
+ tmp = self.store_path
230
+ tmp = tmp if tmp[0] != '/' else tmp[1:]
231
+ base = os.path.expanduser(self._act.base_path)
232
+ base = os.path.abspath(base)
233
+ return os.path.join(base, tmp, self.new_name)
234
+
235
+ class Settings(object):
236
+ """Open the rules YAML file"""
237
+ def __init__(self):
238
+ name = os.path.basename(__file__).replace('py', 'yml')
239
+ dir_oder = []
240
+ dir_oder.append(os.path.dirname(__file__))
241
+ dir_oder.append(os.path.expanduser('~'))
242
+
243
+ paths = map(lambda x: os.path.join(x, name), dir_oder)
244
+
245
+ for path in paths:
246
+ if os.path.isfile(path):
247
+ conf_path = path
248
+ break
249
+
250
+ if 'conf_path' not in locals():
251
+ print('{}Error, no configuraton file was found: {}{}{}'
252
+ .format(Fore.RED, Fore.MAGENTA, ', '.join(paths), Fore.RESET))
253
+ exit(1)
254
+
255
+ fsettings = open(conf_path, 'r')
256
+ if IS_VERBOSE:
257
+ print("Loaded configuration file: {}{}{}"
258
+ .format(Fore.GREEN, conf_path, Fore.RESET))
259
+ self.__dict__ = yaml.load(fsettings)
260
+
261
+ def print(self):
262
+ pp = pprint.PrettyPrinter(indent=2)
263
+ pp.pprint(self.__dict__)
264
+
265
+ def getAccount(self, file_name):
266
+ for act in self.accounts:
267
+ srch = re.search(act['re_file'], file_name)
268
+ if srch != None:
269
+ act['base_path'] = self.base_path
270
+ return InlineClass(act)
271
+
272
+ def getScrapeDirectories(self):
273
+ max_length = len(max(self.scrape_dirs, key=len))
274
+
275
+ if IS_VERBOSE:
276
+ print('Processing directories:')
277
+ for directory in self.scrape_dirs:
278
+ path = os.path.expanduser(directory)
279
+ path = os.path.abspath(path)
280
+ print_ident(directory, path, color=Fore.BLUE, field_width=max_length)
281
+ print()
282
+
283
+ for directory in self.scrape_dirs:
284
+ path = os.path.expanduser(directory)
285
+ path = os.path.abspath(path)
286
+ yield path
287
+
288
+ def get_files(directory=None):
289
+ """Analyze current directory for PDF files"""
290
+ path = os.path.dirname(os.path.abspath(__file__)) if directory == None else directory
291
+ for pdffile in os.listdir(path):
292
+ if pdffile.endswith(".pdf"):
293
+ yield os.path.join(path, pdffile)
294
+
295
+ def print_ident(field, value, **kwargs):
296
+ """Print value with the color specified and correct identation.
297
+
298
+ Args:
299
+ field (int): The value name
300
+ value (str): The value to print
301
+ color (AnsiFore): The color to use
302
+ field_width (int): The identation lenght of fields
303
+
304
+ Returns:
305
+ None: No value is returned.
306
+ """
307
+ color = kwargs['color'] if 'color' in kwargs else Fore.GREEN
308
+ field_width = kwargs['field_width'] if 'field_width' in kwargs else 7
309
+ string_format = ' {:>'+str(field_width)+'}: {}{}{}'
310
+ print(string_format.format(field, color, value, Fore.RESET))
311
+
312
+ def print_separator(title, color=Fore.LIGHTYELLOW_EX):
313
+ _rows, cols = os.popen('stty size', 'r').read().split()
314
+ sep = '\n' + color
315
+ sep += '-' * 40 + ' ' + title + ' '
316
+ remaining_cols = int(cols) - len(sep)
317
+ if remaining_cols > 0:
318
+ sep += '-' * remaining_cols
319
+ sep += Fore.RESET
320
+ print(sep)
321
+
322
+
323
+ def main():
324
+ parser = argparse.ArgumentParser()
325
+ parser.add_argument("-d", "--dry",
326
+ action="store_true",
327
+ help="Dry run, does not write new pdf")
328
+ parser.add_argument("-v", "--verbose",
329
+ action="store_true",
330
+ help="Show more output, useful for debug")
331
+ args = parser.parse_args()
332
+
333
+ if args.dry:
334
+ global IS_DRY
335
+ IS_DRY = True
336
+ print(Fore.CYAN + "Running in dry mode..." + Fore.RESET)
337
+ if args.verbose:
338
+ global IS_VERBOSE
339
+ IS_VERBOSE = True
340
+ print(Fore.CYAN + "Running in verbose mode..." + Fore.RESET)
341
+
342
+ settings = Settings()
343
+ #settings.getScrapeDirectories()
344
+ #sys.exit(1)
345
+
346
+ for work_directory in settings.getScrapeDirectories():
347
+ print_separator(work_directory)
348
+ ignored_files = []
349
+ for pdffile in get_files(work_directory):
350
+ try:
351
+ base = os.path.basename(pdffile)
352
+ act = settings.getAccount(pdffile)
353
+ if not act:
354
+ raise ValueError('no account was matched.')
355
+ print('Working on' + Fore.LIGHTGREEN_EX, base, Fore.RESET)
356
+ print_ident(' Cuenta', act.name, color=Fore.LIGHTBLUE_EX)
357
+ doc = Document(pdffile, act, verbose=IS_VERBOSE)
358
+ #print(edocta) # Debug ----
359
+ print_ident('Periodo', doc.period)
360
+ if IS_VERBOSE:
361
+ print(Fore.CYAN, doc, Fore.RESET)
362
+ if not IS_DRY:
363
+ doc.write_pdf()
364
+ print_ident('NewFile', doc.full_path)
365
+ except ValueError as e:
366
+ #print(e)
367
+ ignored_files.append(base)
368
+ #print(Fore.LIGHTRED_EX + ' Error!', e, Fore.RESET)
369
+ except IOError as e:
370
+ print('Error, the filepath {} does not exists.'.format(doc.full_path))
371
+
372
+ print('\nNo account was matched for these PDF files:')
373
+ for num, path in enumerate(ignored_files, start=1):
374
+ print_ident(num, path, color=Fore.RED, field_width=3)
375
+
376
+
377
+ if __name__ == '__main__': main()
378
+
379
+ ```
380
+
381
+ ## Bash
382
+
383
+ ```bash
384
+ #!/bin/env bash
385
+ . .common
386
+
387
+ YEAR=$(date +%Y)
388
+ PASS=abcdef
389
+ GREP_PERIOD='al [0-9]{1,2} de ([A-Zz-z]*) de.? [0-9]+'
390
+ #Path to move, Dropbox. Use "{YEAR}" to replace with actual year
391
+ MVTO=../"Impuestos/FISCAL-{YEAR}/Edo Cuenta"
392
+
393
+ app_installed qpdf
394
+
395
+ count=$(find . -type f -name '[!2]*.pdf' | wc -l)
396
+ if [ "$count" == '0' ]; then
397
+ echo -e "${RED}Error, no pdf files found.${RST}"
398
+ exit 1
399
+ fi
400
+
401
+ for pdf in [!2]*.pdf; do
402
+ [ ! -r "$pdf" ] && echo -e "${RED}Error, can't access $pdf${RST}" && exit 1
403
+ echo -e "Working on ${GRE}$pdf${RST}..."
404
+
405
+ # Decrypt PDF and uncompress to work with it
406
+ temp=$(mktemp)
407
+ #trap 'rm $temp' 0 SIGINT SIGQUIT SIGTERM
408
+ qpdf --password="$PASS" --decrypt --stream-data=uncompress "$pdf" "$temp"
409
+
410
+ # Extract Data from PDF
411
+ account=$(strings "$temp" | grep -ioE 'platinum|perfiles' | head -1)
412
+ account=${account,,}
413
+ account=${account^}
414
+ echo -e " account: ${BLU}$account${RST}"
415
+ #period=$(strings "$temp" | grep -iEo 'al [0-9]{1,2} de ([A-Zz-z]*) de [0-9]+' | tail -1)
416
+ #month=$(echo "$period" | tr ' ' '\n'| tail -3 | head -1)
417
+ #year=$(echo "$period" | tr ' ' '\n' | tail -1)
418
+ period=$(pdftotext "$temp" - | grep -iEo "$GREP_PERIOD" | tail -1 )
419
+ month=$(echo "$period" | awk '{print $4}')
420
+ year=$(echo "$period" | awk '{print $6}')
421
+ period=${month,,}
422
+
423
+ if [ -z "$period" ]; then
424
+ echo -e "${RED}Error, period not found.${RST}"
425
+ exit 1
426
+ fi
427
+
428
+ number=$(convert_month $period)
429
+ if [ "$account" == "Perfiles" ]; then
430
+ #number=$(( number - 1 ))
431
+ number=$(echo "$number - 1" | bc)
432
+ if [ "${#number}" -eq 1 ]; then
433
+ number="0$number"
434
+ fi
435
+ fi
436
+ echo -e " period: ${BLU}$year-$period${RST}"
437
+
438
+ #Prepare new PDF
439
+ newfile="$year-${number} ${account}.pdf"
440
+ #pdftk "$pdf" input_pw "$PASS" output "$newfile"
441
+ qpdf --password="$PASS" --decrypt "$pdf" "$newfile"
442
+ if [ -f "$newfile" ]; then
443
+ mv "$pdf" "${newfile/.pdf/}_$pdf"
444
+ echo -e " new file: ${BLU}$newfile${RST}"
445
+ fi
446
+
447
+ #Copy it
448
+ MVTO="${MVTO//'{YEAR}'/$year}"
449
+ if [ -d "$MVTO" ]; then
450
+ cp -v "$newfile" "$MVTO"
451
+ fi
452
+ done
453
+ ```