pdfh 0.1.3 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8946e2ef210a5573d2654e9514f775c950e42a65b429365703450b117d3b073e
4
- data.tar.gz: d9a5927dc608bb8e0ab74cbe28e7d2ff18597158b0daed41784e486dfe83eb92
3
+ metadata.gz: bd8f993d70a6ed67ff55c9554ba63c74eb17abbb9861f562c899a39d2979f97c
4
+ data.tar.gz: 8ff19318f2a97df3875d6299ab9566cca9a6effa01169f2523ab4ed27266e3c0
5
5
  SHA512:
6
- metadata.gz: 4927717bb5ed460cd444db6a6088d852969e2804c3ebb6efde21b9f7f73a6a310b814b83f062f3ef453d06aaad90a1bff6b9d7cf44bdb6bca22f8dd60cd80d43
7
- data.tar.gz: 1a7d43a0ebd0bd9d990337cda4470193b18f645fd3011828dc82c88c6e1d7e75670d3580ccdde721214e80848eb98086b9162ce7ed2b75a536ac8a04f3100f1f
6
+ metadata.gz: 685b6ea921370a125341e55a7bad2d4931e0932a6ff685ac6fc5aff996bcd2800f4c1e0081fb35f1b6cfd7063760b7ca0a4bc28ba60ad84095314cea4af850ad
7
+ data.tar.gz: 9ab206d0accfbebadd06283a5dd6b6af3905b890a61ea05f65081b795433d0151ae5653ec7889588d78fc42053fd09c9baa7f9c9d0232ae3f628341babbfd329
data/.gitignore CHANGED
@@ -7,6 +7,8 @@
7
7
  /spec/reports/
8
8
  /tmp/
9
9
 
10
+ .DS_Store
11
+
10
12
  # rspec failure tracking
11
13
  .rspec_status
12
14
  coverage
@@ -1,8 +1,11 @@
1
- ---
1
+ inherit_from: .rubocop_todo.yml
2
+
2
3
  AllCops:
4
+ NewCops: enable
5
+ TargetRubyVersion: 2.5
3
6
  Exclude:
4
7
  - '.git/**/*'
5
8
  - 'spec/**/*'
6
9
 
7
- Metrics/LineLength:
8
- Enabled: false
10
+
11
+
@@ -0,0 +1,22 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2020-10-01 00:19:12 UTC using RuboCop version 0.92.0.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 1
10
+ # Configuration parameters: IgnoredMethods.
11
+ Metrics/AbcSize:
12
+ Max: 23
13
+
14
+ # Offense count: 1
15
+ # Configuration parameters: CountComments, CountAsOne.
16
+ Metrics/ClassLength:
17
+ Max: 149
18
+
19
+ # Offense count: 5
20
+ # Configuration parameters: CountComments, CountAsOne, ExcludedMethods.
21
+ Metrics/MethodLength:
22
+ Max: 13
@@ -1 +1 @@
1
- ruby-2.6.0
1
+ 2.7.1
@@ -3,5 +3,5 @@ sudo: false
3
3
  language: ruby
4
4
  cache: bundler
5
5
  rvm:
6
- - 2.6.0
6
+ - 2.7.0
7
7
  before_install: gem install bundler -v 1.17.2
@@ -1,3 +1,11 @@
1
+ ## v0.1.5
2
+ * Add print_cmd field in config file for information purposes
3
+ * Settings now validates a no existing directory
4
+ * Refactor for easier maintenance
5
+
6
+ ## v0.1.4
7
+ * Add titleize format when writing new file
8
+
1
9
  ## v0.1.3
2
10
  * Fixed copy companion files, which was not copying the files.
3
11
 
@@ -1,52 +1,82 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- pdfh (0.1.2)
5
- colorize (~> 0.8.1)
4
+ pdfh (0.1.8)
5
+ colorize (~> 0.8.0)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
10
  ansi (1.5.0)
11
+ ast (2.4.1)
12
+ blockenspiel (0.5.0)
13
+ coderay (1.1.3)
11
14
  colorize (0.8.1)
12
- diff-lcs (1.3)
13
- docile (1.3.1)
14
- hirb (0.7.3)
15
- json (2.1.0)
16
- rake (10.5.0)
17
- rspec (3.8.0)
18
- rspec-core (~> 3.8.0)
19
- rspec-expectations (~> 3.8.0)
20
- rspec-mocks (~> 3.8.0)
21
- rspec-core (3.8.0)
22
- rspec-support (~> 3.8.0)
23
- rspec-expectations (3.8.2)
15
+ diff-lcs (1.4.4)
16
+ docile (1.3.2)
17
+ method_source (1.0.0)
18
+ parallel (1.19.2)
19
+ parser (2.7.1.5)
20
+ ast (~> 2.4.1)
21
+ pry (0.13.1)
22
+ coderay (~> 1.1)
23
+ method_source (~> 1.0)
24
+ rainbow (3.0.0)
25
+ rake (13.0.1)
26
+ regexp_parser (1.8.1)
27
+ rexml (3.2.4)
28
+ rspec (3.9.0)
29
+ rspec-core (~> 3.9.0)
30
+ rspec-expectations (~> 3.9.0)
31
+ rspec-mocks (~> 3.9.0)
32
+ rspec-core (3.9.3)
33
+ rspec-support (~> 3.9.3)
34
+ rspec-expectations (3.9.2)
24
35
  diff-lcs (>= 1.2.0, < 2.0)
25
- rspec-support (~> 3.8.0)
26
- rspec-mocks (3.8.0)
36
+ rspec-support (~> 3.9.0)
37
+ rspec-mocks (3.9.1)
27
38
  diff-lcs (>= 1.2.0, < 2.0)
28
- rspec-support (~> 3.8.0)
29
- rspec-support (3.8.0)
30
- simplecov (0.16.1)
39
+ rspec-support (~> 3.9.0)
40
+ rspec-support (3.9.3)
41
+ rubocop (0.92.0)
42
+ parallel (~> 1.10)
43
+ parser (>= 2.7.1.5)
44
+ rainbow (>= 2.2.2, < 4.0)
45
+ regexp_parser (>= 1.7)
46
+ rexml
47
+ rubocop-ast (>= 0.5.0)
48
+ ruby-progressbar (~> 1.7)
49
+ unicode-display_width (>= 1.4.0, < 2.0)
50
+ rubocop-ast (0.7.1)
51
+ parser (>= 2.7.1.5)
52
+ ruby-progressbar (1.10.1)
53
+ simplecov (0.19.0)
31
54
  docile (~> 1.1)
32
- json (>= 1.8, < 3)
33
- simplecov-html (~> 0.10.0)
34
- simplecov-console (0.4.2)
55
+ simplecov-html (~> 0.11)
56
+ simplecov-console (0.7.2)
35
57
  ansi
36
- hirb
37
58
  simplecov
38
- simplecov-html (0.10.2)
59
+ terminal-table
60
+ simplecov-html (0.12.3)
61
+ terminal-table (1.8.0)
62
+ unicode-display_width (~> 1.1, >= 1.1.1)
63
+ unicode-display_width (1.7.0)
64
+ versionomy (0.5.0)
65
+ blockenspiel (~> 0.5)
39
66
 
40
67
  PLATFORMS
41
68
  ruby
42
69
 
43
70
  DEPENDENCIES
44
- bundler (~> 1.17.2)
71
+ bundler (~> 2.0)
45
72
  pdfh!
46
- rake (~> 10.0)
73
+ pry
74
+ rake (~> 13.0)
47
75
  rspec (~> 3.0)
48
- simplecov (~> 0.16.1)
49
- simplecov-console (~> 0.4.2)
76
+ rubocop
77
+ simplecov
78
+ simplecov-console
79
+ versionomy
50
80
 
51
81
  BUNDLED WITH
52
- 1.17.2
82
+ 2.1.4
data/README.md CHANGED
@@ -24,7 +24,7 @@ document_types:
24
24
  pwd: base64string # [OPTIONAL] Password if the document is protected
25
25
  store_path: "{YEAR}/bank_docs" # Relative path to copy this document
26
26
  name_template: '{period} {subtype}' # Template for new filename when copied
27
- sub_types: # [OPTIONAL] In case your need an extra category
27
+ sub_types: # [OPTIONAL] In case your need an extra category
28
28
  - name: Account1 # Regular expresion to match this subtype
29
29
  month_offset: -1 # [OPTIONAL] Integer value to adjust month
30
30
  ```
@@ -33,16 +33,19 @@ document_types:
33
33
 
34
34
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
35
35
 
36
- To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
36
+ To install this gem onto your local machine, run `rake install`. To release a new version, run `rake bump`, and then run `rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
37
37
 
38
38
  ```bash
39
+ rake install
40
+
41
+ # step by step
39
42
  build pdfh.gemspec
40
43
  gem install pdfh-*
41
44
  ```
42
45
 
43
46
  ## Contributing
44
47
 
45
- Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/pdfh. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
48
+ Bug reports and pull requests are welcome on GitHub at https://github.com/iax7/pdfh. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
46
49
 
47
50
  ## License
48
51
 
@@ -50,4 +53,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
50
53
 
51
54
  ## Code of Conduct
52
55
 
53
- Everyone interacting in the Pdfh project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/pdfh/blob/master/CODE_OF_CONDUCT.md).
56
+ Everyone interacting in the Pdfh project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/iax7/pdfh/blob/master/CODE_OF_CONDUCT.md).
data/Rakefile CHANGED
@@ -1,8 +1,25 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'colorize'
3
4
  require 'bundler/gem_tasks'
4
5
  require 'rspec/core/rake_task'
6
+ require 'versionomy'
5
7
 
6
8
  RSpec::Core::RakeTask.new(:spec)
7
9
 
8
10
  task default: :spec
11
+
12
+ desc 'Bump gem version number (tiny|minor|major)'
13
+ task :bump, :type do |_t, args|
14
+ args.with_defaults(type: :tiny)
15
+ version_file = File.join(__dir__, 'lib', 'pdfh', 'version.rb')
16
+ content = File.read(version_file)
17
+
18
+ version_pattern = /(?<major>\d+)\.(?<minor>\d+)\.(?<tiny>\d+)/
19
+ current_version = content.match(version_pattern)
20
+ next_version = Versionomy.parse(current_version.to_s).bump(args.type).to_s
21
+
22
+ File.write(version_file, content.gsub(version_pattern, "\\1#{next_version}\\3"))
23
+
24
+ puts "Successfully bumped from #{current_version.to_s.red} to #{next_version.green}"
25
+ end
@@ -0,0 +1,453 @@
1
+ # Legacy
2
+
3
+ ## Python
4
+
5
+ This project was born as a bash script. It was initially ported to a Python script,
6
+ and ended as a Ruby gem. Below is the old Python code, provided just for fun.
7
+
8
+ ```python
9
+ #!/usr/bin/env python3
10
+ """Organize PDF protected password files, using rules defined in yaml format."""
11
+ from __future__ import print_function
12
+ import os
13
+ import re
14
+ import base64
15
+ import pprint
16
+ import argparse
17
+ import tempfile
18
+ import subprocess
19
+ import yaml
20
+ from shutil import copyfile
21
+ from colorama import Fore
22
+
23
+ IS_VERBOSE = False
24
+ IS_DRY = False
25
+ # TODO: calendar.month_name[11] current locale
26
+ MONTHS = dict(
27
+ enero = 1,
28
+ febrero = 2,
29
+ marzo = 3,
30
+ abril = 4,
31
+ mayo = 5,
32
+ junio = 6,
33
+ julio = 7,
34
+ agosto = 8,
35
+ septiembre = 9,
36
+ octubre = 10,
37
+ noviembre = 11,
38
+ diciembre = 12
39
+ )
40
+
41
+ class InlineClass(object):
42
+ """Wrapper to have an object like dictionary"""
43
+ def __init__(self, dict):
44
+ self.__dict__ = dict
45
+ def has_key(self, key):
46
+ return key in self.__dict__.keys()
47
+
48
+ def get_month_num(num):
49
+ # Not implemented yet
50
+ import locale
51
+ locale.setlocale(locale.LC_ALL, 'es_MX')
52
+ import calendar
53
+ calendar.month_name[num]
54
+
55
+ class Document(object):
56
+ """Handles the PDF detected by the rules, and makes tranformations"""
57
+ def __init__(self, file, account, **kwargs):
58
+ self._file = file
59
+ self._act = account
60
+ self._extra = ''
61
+ self._has_xml = False
62
+ self._verbose = kwargs['verbose']
63
+ verbose = self._verbose
64
+ if verbose:
65
+ print(Fore.CYAN + account.name, '==================' + Fore.RESET)
66
+
67
+ self._pwd = base64.b64decode(self._act.pwd) if self._act.pwd else ''
68
+ if type(self._pwd) is bytes:
69
+ self._pwd = self._pwd.decode()
70
+
71
+ if not os.path.exists(self._file):
72
+ raise IOError("I can't find the PDF")
73
+
74
+ # Check if aditional XML file exists
75
+ self._xml_file = os.path.splitext(self._file)[0]+'.xml'
76
+ if os.path.exists(self._xml_file):
77
+ self._has_xml = True
78
+
79
+ self._tmp = tempfile.mktemp(suffix=".pdf")
80
+ if verbose:
81
+ print(Fore.CYAN + ' --> ' + self._tmp + ' temporal file assigned.' + Fore.RESET)
82
+
83
+ cmd1 = "qpdf --password='{}' --decrypt --stream-data=uncompress '{}' '{}'" \
84
+ .format(self._pwd, self._file, self._tmp)
85
+ subprocess.call(cmd1, shell=True)
86
+
87
+ cmd2 = "pdftotext -enc UTF-8 '{}' -".format(self._tmp)
88
+
89
+ p = subprocess.Popen(cmd2, stdout=subprocess.PIPE, shell=True)
90
+ self._text, _err = p.communicate()
91
+ if type(self._text) is bytes:
92
+ self._text = self._text.decode(encoding="utf-8", errors="replace")
93
+ if verbose:
94
+ print(Fore.CYAN + self._text + Fore.RESET)
95
+
96
+ match = re.search(self._act.re_date, self._text, re.MULTILINE)
97
+ if not match:
98
+ print(Fore.RED, 'Err, date was not extracted with regex provided: ' + Fore.LIGHTRED_EX +
99
+ self._act.re_date + Fore.RESET)
100
+ exit(1)
101
+ if verbose:
102
+ print(Fore.CYAN, '==== Regex Groups:', match.groups(), Fore.RESET)
103
+ try:
104
+ self._month = match.group('m')
105
+ self._year = match.group('y')
106
+ except IndexError:
107
+ self._month, self._year = match.groups()
108
+
109
+ if len(match.groups()) > 2:
110
+ self._extra = match.group(3)
111
+
112
+ self._month = self._month.lower()
113
+ if verbose:
114
+ print(Fore.CYAN, '==== Assigned:', (self._month, self._year, self._extra),
115
+ '==( Month, Year, Extra )================' + Fore.RESET)
116
+
117
+ if self._act.has_key('types'):
118
+ for t in self._act.types:
119
+ name = t['name']
120
+ if re.search(name, self._text, re.IGNORECASE):
121
+ self.type = name
122
+ self.offset = t.get('month_offset', 0)
123
+ else:
124
+ self.type = None
125
+ self.offset = 0
126
+
127
+ if verbose:
128
+ print(Fore.CYAN, 'Offset settings, Type:', self.type, '/ Month:', self.offset, Fore.RESET)
129
+ #Used if the month offset results in change in year.
130
+ self._year_offset = 0
131
+ if verbose:
132
+ print(Fore.CYAN, 'END INIT ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' + Fore.RESET)
133
+
134
+ def __repr__(self):
135
+ type_str = self.type if self.type else 'N/A'
136
+ format_string = 'Name : {}\nType : {}\nPeriod : {}\nFile Path: {}\n'+\
137
+ 'File Name: {}\nNew Name : {}\nStorePath: {}\nFullPath : {}'
138
+ return format_string.format(
139
+ self.name, type_str, self.period, self._file,
140
+ self.filename_only, self.new_name, self.store_path, self.full_path)
141
+
142
+ def write_pdf(self):
143
+ dir_path = os.path.dirname(self.full_path)
144
+ if not os.path.exists(dir_path):
145
+ raise IOError("I can't find the store_path")
146
+
147
+ cmd = "qpdf --password='{}' --decrypt '{}' '{}'" \
148
+ .format(self._pwd, self._file, self.full_path)
149
+ subprocess.call(cmd, shell=True)
150
+
151
+ if os.path.exists(self.full_path):
152
+ bkp = self._file + '_'
153
+ os.rename(self._file, bkp)
154
+ # Copy XML File if exists
155
+ if self._has_xml:
156
+ xml_new_path = os.path.splitext(self.full_path)[0]+'.xml'
157
+ copyfile(self._xml_file, xml_new_path)
158
+ xml_bkp = self._xml_file + '_'
159
+ os.rename(self._xml_file, xml_bkp)
160
+ if self._verbose:
161
+ print(Fore.CYAN, 'XML Written: ', xml_new_path, Fore.RESET)
162
+ else:
163
+ raise IOError("The file was not created.")
164
+
165
+ @property
166
+ def name(self): return self._act.name
167
+ @property
168
+ def filename_only(self):
169
+ dir, file = os.path.split(self._file)
170
+ filename, ext = os.path.splitext(file)
171
+ return filename
172
+ @property
173
+ def text(self): return self._text
174
+ @property
175
+ def month(self):
176
+ try:
177
+ month_num = int(self._month)
178
+ except:
179
+ if len(self._month) == 3:
180
+ for month in MONTHS:
181
+ if month[0:3] == self._month:
182
+ month_num = MONTHS[month]
183
+ else:
184
+ month_num = MONTHS[self._month]
185
+
186
+
187
+ if self.offset:
188
+ tmp = month_num + self.offset
189
+ if tmp == 0:
190
+ tmp = 12
191
+ self._year_offset = -1
192
+ elif tmp == 13:
193
+ tmp = 1
194
+ self._year_offset = 1
195
+ else:
196
+ tmp = month_num
197
+ return str(tmp).zfill(2)
198
+ @property
199
+ def year(self):
200
+ if len(self._year) == 2:
201
+ tmp = '20' + self._year
202
+ else:
203
+ tmp = self._year
204
+ year = int(tmp) + self._year_offset
205
+
206
+ return str(year)
207
+ @property
208
+ def period(self): return "{}-{}".format(self.year, self.month)
209
+ @property
210
+ def new_name(self):
211
+ if self._act.has_key('name_template'):
212
+ template = self._act.name_template
213
+ else:
214
+ template = '{original}'
215
+
216
+ type = self.type if self.type else 'NA'
217
+ new = template \
218
+ .replace('{original}', self.filename_only) \
219
+ .replace('{period}', self.period) \
220
+ .replace('{type}', type) \
221
+ .replace('{extra}', self._extra)
222
+ return new + '.pdf'
223
+ @property
224
+ def store_path(self):
225
+ tmp = self._act.store_path.replace('{YEAR}', self.year)
226
+ return tmp
227
+ @property
228
+ def full_path(self):
229
+ tmp = self.store_path
230
+ tmp = tmp if tmp[0] != '/' else tmp[1:]
231
+ base = os.path.expanduser(self._act.base_path)
232
+ base = os.path.abspath(base)
233
+ return os.path.join(base, tmp, self.new_name)
234
+
235
+ class Settings(object):
236
+ """Open the rules YAML file"""
237
+ def __init__(self):
238
+ name = os.path.basename(__file__).replace('py', 'yml')
239
+ dir_oder = []
240
+ dir_oder.append(os.path.dirname(__file__))
241
+ dir_oder.append(os.path.expanduser('~'))
242
+
243
+ paths = map(lambda x: os.path.join(x, name), dir_oder)
244
+
245
+ for path in paths:
246
+ if os.path.isfile(path):
247
+ conf_path = path
248
+ break
249
+
250
+ if 'conf_path' not in locals():
251
+ print('{}Error, no configuraton file was found: {}{}{}'
252
+ .format(Fore.RED, Fore.MAGENTA, ', '.join(paths), Fore.RESET))
253
+ exit(1)
254
+
255
+ fsettings = open(conf_path, 'r')
256
+ if IS_VERBOSE:
257
+ print("Loaded configuration file: {}{}{}"
258
+ .format(Fore.GREEN, conf_path, Fore.RESET))
259
+ self.__dict__ = yaml.load(fsettings)
260
+
261
+ def print(self):
262
+ pp = pprint.PrettyPrinter(indent=2)
263
+ pp.pprint(self.__dict__)
264
+
265
+ def getAccount(self, file_name):
266
+ for act in self.accounts:
267
+ srch = re.search(act['re_file'], file_name)
268
+ if srch != None:
269
+ act['base_path'] = self.base_path
270
+ return InlineClass(act)
271
+
272
+ def getScrapeDirectories(self):
273
+ max_length = len(max(self.scrape_dirs, key=len))
274
+
275
+ if IS_VERBOSE:
276
+ print('Processing directories:')
277
+ for directory in self.scrape_dirs:
278
+ path = os.path.expanduser(directory)
279
+ path = os.path.abspath(path)
280
+ print_ident(directory, path, color=Fore.BLUE, field_width=max_length)
281
+ print()
282
+
283
+ for directory in self.scrape_dirs:
284
+ path = os.path.expanduser(directory)
285
+ path = os.path.abspath(path)
286
+ yield path
287
+
288
+ def get_files(directory=None):
289
+ """Analyze current directory for PDF files"""
290
+ path = os.path.dirname(os.path.abspath(__file__)) if directory == None else directory
291
+ for pdffile in os.listdir(path):
292
+ if pdffile.endswith(".pdf"):
293
+ yield os.path.join(path, pdffile)
294
+
295
+ def print_ident(field, value, **kwargs):
296
+ """Print value with the color specified and correct identation.
297
+
298
+ Args:
299
+ field (int): The value name
300
+ value (str): The value to print
301
+ color (AnsiFore): The color to use
302
+ field_width (int): The identation lenght of fields
303
+
304
+ Returns:
305
+ None: No value is returned.
306
+ """
307
+ color = kwargs['color'] if 'color' in kwargs else Fore.GREEN
308
+ field_width = kwargs['field_width'] if 'field_width' in kwargs else 7
309
+ string_format = ' {:>'+str(field_width)+'}: {}{}{}'
310
+ print(string_format.format(field, color, value, Fore.RESET))
311
+
312
+ def print_separator(title, color=Fore.LIGHTYELLOW_EX):
313
+ _rows, cols = os.popen('stty size', 'r').read().split()
314
+ sep = '\n' + color
315
+ sep += '-' * 40 + ' ' + title + ' '
316
+ remaining_cols = int(cols) - len(sep)
317
+ if remaining_cols > 0:
318
+ sep += '-' * remaining_cols
319
+ sep += Fore.RESET
320
+ print(sep)
321
+
322
+
323
+ def main():
324
+ parser = argparse.ArgumentParser()
325
+ parser.add_argument("-d", "--dry",
326
+ action="store_true",
327
+ help="Dry run, does not write new pdf")
328
+ parser.add_argument("-v", "--verbose",
329
+ action="store_true",
330
+ help="Show more output, useful for debug")
331
+ args = parser.parse_args()
332
+
333
+ if args.dry:
334
+ global IS_DRY
335
+ IS_DRY = True
336
+ print(Fore.CYAN + "Running in dry mode..." + Fore.RESET)
337
+ if args.verbose:
338
+ global IS_VERBOSE
339
+ IS_VERBOSE = True
340
+ print(Fore.CYAN + "Running in verbose mode..." + Fore.RESET)
341
+
342
+ settings = Settings()
343
+ #settings.getScrapeDirectories()
344
+ #sys.exit(1)
345
+
346
+ for work_directory in settings.getScrapeDirectories():
347
+ print_separator(work_directory)
348
+ ignored_files = []
349
+ for pdffile in get_files(work_directory):
350
+ try:
351
+ base = os.path.basename(pdffile)
352
+ act = settings.getAccount(pdffile)
353
+ if not act:
354
+ raise ValueError('no account was matched.')
355
+ print('Working on' + Fore.LIGHTGREEN_EX, base, Fore.RESET)
356
+ print_ident(' Cuenta', act.name, color=Fore.LIGHTBLUE_EX)
357
+ doc = Document(pdffile, act, verbose=IS_VERBOSE)
358
+ #print(edocta) # Debug ----
359
+ print_ident('Periodo', doc.period)
360
+ if IS_VERBOSE:
361
+ print(Fore.CYAN, doc, Fore.RESET)
362
+ if not IS_DRY:
363
+ doc.write_pdf()
364
+ print_ident('NewFile', doc.full_path)
365
+ except ValueError as e:
366
+ #print(e)
367
+ ignored_files.append(base)
368
+ #print(Fore.LIGHTRED_EX + ' Error!', e, Fore.RESET)
369
+ except IOError as e:
370
+ print('Error, the filepath {} does not exists.'.format(doc.full_path))
371
+
372
+ print('\nNo account was matched for these PDF files:')
373
+ for num, path in enumerate(ignored_files, start=1):
374
+ print_ident(num, path, color=Fore.RED, field_width=3)
375
+
376
+
377
+ if __name__ == '__main__': main()
378
+
379
+ ```
380
+
381
+ ## Bash
382
+
383
+ ```bash
384
+ #!/bin/env bash
385
+ . .common
386
+
387
+ YEAR=$(date +%Y)
388
+ PASS=abcdef
389
+ GREP_PERIOD='al [0-9]{1,2} de ([A-Zz-z]*) de.? [0-9]+'
390
+ #Path to move, Dropbox. Use "{YEAR}" to replace with actual year
391
+ MVTO=../"Impuestos/FISCAL-{YEAR}/Edo Cuenta"
392
+
393
+ app_installed qpdf
394
+
395
+ count=$(find . -type f -name '[!2]*.pdf' | wc -l)
396
+ if [ "$count" == '0' ]; then
397
+ echo -e "${RED}Error, no pdf files found.${RST}"
398
+ exit 1
399
+ fi
400
+
401
+ for pdf in [!2]*.pdf; do
402
+ [ ! -r "$pdf" ] && echo -e "${RED}Error, can't access $pdf${RST}" && exit 1
403
+ echo -e "Working on ${GRE}$pdf${RST}..."
404
+
405
+ # Decrypt PDF and uncompress to work with it
406
+ temp=$(mktemp)
407
+ #trap 'rm $temp' 0 SIGINT SIGQUIT SIGTERM
408
+ qpdf --password="$PASS" --decrypt --stream-data=uncompress "$pdf" "$temp"
409
+
410
+ # Extract Data from PDF
411
+ account=$(strings "$temp" | grep -ioE 'platinum|perfiles' | head -1)
412
+ account=${account,,}
413
+ account=${account^}
414
+ echo -e " account: ${BLU}$account${RST}"
415
+ #period=$(strings "$temp" | grep -iEo 'al [0-9]{1,2} de ([A-Zz-z]*) de [0-9]+' | tail -1)
416
+ #month=$(echo "$period" | tr ' ' '\n'| tail -3 | head -1)
417
+ #year=$(echo "$period" | tr ' ' '\n' | tail -1)
418
+ period=$(pdftotext "$temp" - | grep -iEo "$GREP_PERIOD" | tail -1 )
419
+ month=$(echo "$period" | awk '{print $4}')
420
+ year=$(echo "$period" | awk '{print $6}')
421
+ period=${month,,}
422
+
423
+ if [ -z "$period" ]; then
424
+ echo -e "${RED}Error, period not found.${RST}"
425
+ exit 1
426
+ fi
427
+
428
+ number=$(convert_month $period)
429
+ if [ "$account" == "Perfiles" ]; then
430
+ #number=$(( number - 1 ))
431
+ number=$(echo "$number - 1" | bc)
432
+ if [ "${#number}" -eq 1 ]; then
433
+ number="0$number"
434
+ fi
435
+ fi
436
+ echo -e " period: ${BLU}$year-$period${RST}"
437
+
438
+ #Prepare new PDF
439
+ newfile="$year-${number} ${account}.pdf"
440
+ #pdftk "$pdf" input_pw "$PASS" output "$newfile"
441
+ qpdf --password="$PASS" --decrypt "$pdf" "$newfile"
442
+ if [ -f "$newfile" ]; then
443
+ mv "$pdf" "${newfile/.pdf/}_$pdf"
444
+ echo -e " new file: ${BLU}$newfile${RST}"
445
+ fi
446
+
447
+ #Copy it
448
+ MVTO="${MVTO//'{YEAR}'/$year}"
449
+ if [ -d "$MVTO" ]; then
450
+ cp -v "$newfile" "$MVTO"
451
+ fi
452
+ done
453
+ ```