arquivo 0.1.5 → 0.1.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 455448d9835d5e9ca98b7aa885e6faa0d8fe55896f58af9f2a225eb031055b6e
4
- data.tar.gz: ce466c0ee18b09c0e3f782150a7f04c7a40df189f79fce3b051aba6a999c1bbf
3
+ metadata.gz: 5c32add30cf30ae788f2ace6ba483e70104a801d5078952ea4d4bb623b3b77da
4
+ data.tar.gz: c2137272520d205c30930579d36338d98635b3084f4bb12cdd139a5d27aa928b
5
5
  SHA512:
6
- metadata.gz: 78a428ec047c3de7e902feb3e9624cef67243c463bf306ef197d760db78b758c631629d143a470b0991220db283eb78b38df2c31dc65bbedb2b89b663c752bf3
7
- data.tar.gz: f7dc9bbc50fc95766949bfdb2d3af06cceab0d7842a0ffe4a858db6b383ca3f187a6d402abb6668de5419a06ae5d10c0a0419fe3aeabc996037facf1457ae59e
6
+ metadata.gz: 58dd3ebad2641ac46c45dec416a6b45d911905b9b270ebeebf35e3f4b396bbedf800bd65858593d5a52624eb4bc42998d965e007c8576849621ca3e4c1fe1f6f
7
+ data.tar.gz: '09ea0d8b876b8d061a67af94dfc2c30e2b272c0e8a0c349070823f4ec0961b1d87db956cf1fb1da0b98a0405f7d48d9ca5307526f414c51ab00f5e44ebb03df0'
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- arquivo (0.1.5)
4
+ arquivo (0.1.6)
5
5
  fastimage (~> 2.1)
6
6
  google-api-client (~> 0.34)
7
7
  pdf-reader (~> 2.3)
@@ -3,23 +3,16 @@
3
3
  require 'thor'
4
4
  require 'arquivo/version'
5
5
  require 'arquivo/extrato'
6
- require 'arquivo/pdf'
7
6
  require 'arquivo/dir'
7
+ require 'arquivo/pdf'
8
8
  require 'arquivo/jpg'
9
9
 
10
10
  module Arquivo
11
11
  class Error < StandardError; end
12
12
 
13
- # size limit for trim attempt
14
- LT = 9000
15
-
16
- # A4 page (8.27x11.69) inches
17
- X4 = 8.27
18
- Y4 = 11.69
19
-
20
13
  # CLI para analisar/processar documentos c118
21
14
  class CLI < Thor
22
- desc 'pdf FILE', 'processa extratos ou faturas'
15
+ desc 'pdf PDF', 'processa PDF criando pasta de documentos para arquivo'
23
16
  def pdf(file)
24
17
  return unless File.ftype(file) == 'file'
25
18
 
@@ -37,9 +30,9 @@ module Arquivo
37
30
 
38
31
  desc 'dir PASTA', 'processa faturas/recibos/extratos/minutas'
39
32
  option :fuzz, type: :numeric, default: 29,
40
- desc: 'fuzz trim N-1 jpg -> escolhe menor scanned pdf'
33
+ desc: 'fuzz trim jpg N-1, escolhe menor -> scanned pdf'
41
34
  option :quality, type: :numeric, default: 15,
42
- desc: 'compress N% jpg -> scanned pdf (less=low quality)'
35
+ desc: 'compress jpg N% -> scanned pdf (less=low quality)'
43
36
  def dir(fdir)
44
37
  return unless File.ftype(fdir) == 'directory'
45
38
 
@@ -4,20 +4,9 @@ require 'google/apis/sheets_v4'
4
4
  require 'googleauth'
5
5
  require 'googleauth/stores/file_token_store'
6
6
 
7
- # ordenacao c118 dos documentos
8
- class Array
9
- def c118_ordena
10
- sort do |a, b|
11
- # processa pela ordem <st2>-<st1>
12
- c = File.basename(a, '.???')
13
- d = File.basename(b, '.???')
14
- c[/(\w+)-(\w+)/, 2] + c[/(\w+)-(\w+)/, 1] <=>
15
- d[/(\w+)-(\w+)/, 2] + d[/(\w+)-(\w+)/, 1]
16
- end
17
- end
18
- end
19
-
20
7
  module Arquivo
8
+ CO = '1>/dev/null 2>&1'
9
+
21
10
  # analisar/processar pasta
22
11
  class C118dir < Enumerator
23
12
  # @return [Enumerator] lista ordenada items dentro duma pasta
@@ -27,25 +16,25 @@ module Arquivo
27
16
  # @return [Hash] dados (faturas/recibos) de c118-contas
28
17
  attr_reader :dados
29
18
 
30
- # @return [String] base do documento
19
+ # @return [String] base nome ficheiros finais (pdf, tar.gz)
31
20
  attr_reader :base
32
21
 
33
22
  # @return [C118dir] documentos c118
34
23
  def initialize(pasta)
35
24
  @items = Dir.glob(File.join(pasta, '*')).each
36
- @base = File.basename(pasta, File.extname(pasta)).downcase +
25
+ @base = File.basename(pasta, File.extname(pasta)) +
37
26
  Date.today.strftime('%Y%m%d')
38
27
  obter_dados(pasta)
39
28
  end
40
29
 
41
30
  def obter_dados(dir)
42
- return unless /factura/i.match?(dir) ||
31
+ return unless /fac?tura/i.match?(dir) ||
43
32
  /recibo/i.match?(dir) ||
44
33
  dados.empty?
45
34
 
46
35
  # obtem dados (faturas/recibos) da sheet c118-contas
47
36
  id = '1PbiMrtTtqGztZMhe3AiJbDS6NQE9o3hXebnQEFdt954'
48
- sh = (/factura/i.match?(dir) ? 'rft' : 'rrc') + '!A2:E'
37
+ sh = (/fac?tura/i.match?(dir) ? 'rft' : 'rrc') + '!A2:E'
49
38
  @dados = c118_sheets.get_spreadsheet_values(id, sh).values
50
39
  .group_by { |k| k[0][/\w+/] }
51
40
  rescue StandardError
@@ -60,6 +49,7 @@ module Arquivo
60
49
  # file obtido console.cloud.google.com/apis OAuth 2.0 client IDs
61
50
  i = Google::Auth::ClientId.from_file("#{p}credentials.json")
62
51
  s = Google::Apis::SheetsV4::AUTH_SPREADSHEETS_READONLY
52
+ # file criado aquando new_credentials is executed
63
53
  f = Google::Auth::Stores::FileTokenStore.new(file: "#{p}token.yaml")
64
54
  z = Google::Auth::UserAuthorizer.new(i, s, f)
65
55
 
@@ -74,10 +64,10 @@ module Arquivo
74
64
  #
75
65
  # @return [Google::Auth::UserAuthorizer] OAuth2 credentials
76
66
  def new_credentials(aut, oob)
77
- puts 'Open URL and copy code after authorization',
67
+ puts 'Open URL and copy code after authorization, in <codigo-aqui>',
78
68
  aut.get_authorization_url(base_url: oob)
79
69
  aut.get_and_store_credentials_from_code(user_id: 'default',
80
- code: 'codigo aqui',
70
+ code: '<codigo-aqui>',
81
71
  base_url: oob)
82
72
  end
83
73
 
@@ -102,10 +92,9 @@ module Arquivo
102
92
 
103
93
  def processa_fim
104
94
  system "rm -f #{base}.*;" \
105
- "pdftk tmp/stamped*.pdf cat output #{base}.pdf;cd tmp/zip;" \
106
- "tar cf ../../#{base}.tar *.pdf;" \
107
- "gzip --best ../../#{base}.tar;"
108
- # 'rm -f *.pdf ../*.pdf'
95
+ "pdftk tmp/stamped*.pdf cat output #{base}.pdf;" \
96
+ "cd tmp/zip;tar cf ../../#{base}.tar *.pdf;" \
97
+ "gzip --best ../../#{base}.tar"
109
98
  end
110
99
 
111
100
  def processa_file(options)
@@ -11,39 +11,30 @@ module Arquivo
11
11
  attr_reader :ext
12
12
  # @return [String] base do documento
13
13
  attr_reader :base
14
-
15
- # @return [String] texto duma pagina pdf
16
- attr_reader :page
17
- # @return [String] base extrato processado
18
- attr_reader :nome
19
- # @return [String] list paginas extrato processado
20
- attr_reader :list
21
-
22
14
  # @return [String] key do documento ft????/rc????/ex??0??/sc??????
23
15
  attr_reader :key
24
- # @return [Numeric] tamanho do pdf
16
+ # @return [Integer] tamanho do pdf
25
17
  attr_reader :size
26
18
 
27
- # @return [C118jpg] scanned jpg em processamento
28
- attr_reader :pjpg
29
- # @return [C118pdf] pdf em processamento
30
- attr_reader :ppdf
19
+ # @return [Array<Integer>] numeros pagina do extrato final
20
+ attr_reader :paginas
21
+ # @return [String] texto pagina pdf
22
+ attr_reader :pagina
23
+ # @return [String] nome extrato
24
+ attr_reader :nome
31
25
 
32
26
  # @return [C118pdf] pdf c118
33
27
  def initialize(fpdf)
34
28
  @file = fpdf
35
29
  @ext = File.extname(fpdf).downcase
36
- @base = File.basename(fpdf, File.extname(fpdf)).downcase
37
-
30
+ @base = File.basename(fpdf, File.extname(fpdf))
38
31
  @key = @base[/\w+/]
39
32
  @size = File.size(fpdf)
40
-
41
- @ppdf = self
42
33
  end
43
34
 
44
35
  def c118_gs
45
36
  # filtrar images para scq e extratos
46
- fi = /^[se]/i.match?(key.to_s) ? ' -dFILTERIMAGE' : ''
37
+ fi = /^[se]/i.match?(key) ? ' -dFILTERIMAGE' : ''
47
38
 
48
39
  'gs -sDEVICE=pdfwrite ' \
49
40
  '-dNOPAUSE -dBATCH -dQUIET ' \
@@ -57,7 +48,7 @@ module Arquivo
57
48
  def processa_extrato?
58
49
  return true if !File.exist?(base) &&
59
50
  File.exist?(file) && ext == '.pdf' &&
60
- first_page
51
+ first_extrato
61
52
 
62
53
  if File.exist?(base)
63
54
  puts "erro: #{base} pasta ja existe"
@@ -69,8 +60,8 @@ module Arquivo
69
60
 
70
61
  def processa_extrato(cnt)
71
62
  cnt += 1
72
- @list += ',' + cnt.to_s if c118_conta?
73
- if next_page
63
+ @paginas << cnt if conta_c118?
64
+ if proxima_pagina
74
65
  faz_extrato if extrato?
75
66
  processa_extrato(cnt)
76
67
  else
@@ -79,47 +70,47 @@ module Arquivo
79
70
  end
80
71
 
81
72
  def extrato?
82
- c118_conta? && page.match?(/extrato +combinado/i)
73
+ conta_c118? && pagina.match?(/extrato +combinado/i)
83
74
  end
84
75
 
85
76
  def faz_extrato
86
77
  system "#{c118_gs} " \
87
78
  "-sOutputFile=#{base}/#{nome}-extrato.pdf " \
88
- "-sPageList=#{list[1..-1]} \"#{file}\" 1>/dev/null 2>&1"
79
+ "-sPageList=#{paginas.join(',')} \"#{file}\" #{CO}"
89
80
  puts "#{nome}-extrato"
90
- base_extrato
81
+ proximo_extrato
91
82
  end
92
83
 
93
- def c118_conta?
94
- page.include?('45463760224')
84
+ def conta_c118?
85
+ pagina.include?('45463760224')
95
86
  end
96
87
 
97
- # @return [PDF::Reader] leitor pdf
98
- def rpdf
99
- @rpdf ||= PDF::Reader.new(file).pages.lazy
88
+ # @return [Enumerator::Lazy] leitor pdf
89
+ def leitor
90
+ @leitor ||= PDF::Reader.new(file).pages.lazy
100
91
  rescue StandardError
101
- @rpdf = nil
92
+ @leitor = nil
102
93
  end
103
94
 
104
95
  # @return [String] texto duma pagina pdf
105
- def next_page
106
- @page = rpdf.next.text
96
+ def proxima_pagina
97
+ @pagina = leitor.next.text
107
98
  rescue StopIteration
108
- @page = nil
99
+ @pagina = nil
109
100
  end
110
101
 
111
- def base_extrato
112
- return false unless page
102
+ def proximo_extrato
103
+ return false unless pagina
113
104
 
114
- @list = ''
115
- n = page.scan(%r{N\. *(\d+)/(\d+)}).flatten
105
+ @paginas = []
106
+ n = pagina.scan(%r{N\. *(\d+)/(\d+)}).flatten
116
107
  @nome = "ex#{n[0].to_s[/\d{2}$/]}#{n[1]}"
117
108
  rescue StandardError
118
109
  @nome = nil
119
110
  end
120
111
 
121
- def first_page
122
- rpdf && next_page && base_extrato
112
+ def first_extrato
113
+ leitor && proxima_pagina && proximo_extrato
123
114
  end
124
115
 
125
116
  def split
@@ -3,7 +3,19 @@
3
3
  require 'fastimage'
4
4
 
5
5
  module Arquivo
6
- # analisar/processar pdf
6
+ # size limit after trim attempt
7
+ LT = 9000
8
+
9
+ # A4 page (8.27x11.69) inches
10
+ X4 = 8.27
11
+ Y4 = 11.69
12
+
13
+ # to calculate image density (in dpi) needed to fit
14
+ # the image with a 2% border all around an A4 page.
15
+ # Factor 1.04 creates 2*2% borders,
16
+ FB = 1.04
17
+
18
+ # analisar/processar jpg
7
19
  class C118jpg < String
8
20
  # @return [String] nome do ficheiro
9
21
  attr_reader :file
@@ -11,24 +23,31 @@ module Arquivo
11
23
  attr_reader :ext
12
24
  # @return [String] base do ficheiro
13
25
  attr_reader :base
14
-
15
26
  # @return [String] key do documento ft????/rc????/ex??0??/sc??????
16
27
  attr_reader :key
17
- # @return [Numeric] tamanho do jpg
28
+ # @return [Integer] tamanho do jpg
18
29
  attr_reader :size
19
30
 
20
31
  # @return [C118jpg] jpg c118
21
32
  def initialize(fjpg)
22
33
  @file = fjpg
23
34
  @ext = File.extname(fjpg).downcase
24
- @base = File.basename(fjpg, File.extname(fjpg)).downcase
25
-
35
+ @base = File.basename(fjpg, File.extname(fjpg))
26
36
  @key = @base[/\w+/]
27
37
  @size = File.size(fjpg)
28
38
  end
29
39
 
30
40
  def processa_jpg(options, dados)
31
- trim(options).jpg2pdf(options).final(dados[key])
41
+ trim(options).converte(options).final(dados[key]).marca
42
+ end
43
+
44
+ def parm_trim(options, fuzz)
45
+ "-fuzz #{fuzz}% -trim +repage #{parm_qualidade(options)} " \
46
+ "tmp/#{key}-#{fuzz}.jpg #{CO}"
47
+ end
48
+
49
+ def parm_qualidade(options)
50
+ "-quality #{options[:quality]}% -compress jpeg"
32
51
  end
33
52
 
34
53
  def trim(options)
@@ -36,42 +55,33 @@ module Arquivo
36
55
  h = {}
37
56
  # obter jpg menor triming borders ao maximo
38
57
  while f >= 1
39
- system "convert \"#{file}\" -fuzz #{f}% -trim +repage " \
40
- "tmp/#{base}#{f}.jpg "
41
- h[f] = File.size("tmp/#{base}#{f}.jpg")
58
+ system "convert \"#{file}\" #{parm_trim(options, f)}"
59
+ h[f] = File.size("tmp/#{key}-#{f}.jpg")
42
60
  f -= 4
43
61
  end
44
62
  m = h.min_by { |_, v| v }
45
- m[1].between?(LT, size) ? C118jpg.new("tmp/#{base}#{m[0]}.jpg") : self
63
+ m[1].between?(LT, size) ? C118jpg.new("tmp/#{key}-#{m[0]}.jpg") : self
46
64
  end
47
65
 
48
- def jpg2pdf(options)
49
- o = "tmp/#{base}.pdf"
50
-
51
- # Center image on a larger canvas (with a size given by "-extent").
52
- x, y = scale_xy
53
- system "convert \"#{file}\" -units PixelsPerInch " \
54
- "-gravity center -extent #{x}x#{y} " \
55
- "-quality #{options[:quality]}% -compress jpeg -format pdf " \
56
- "#{o} 1>/dev/null 2>&1"
66
+ def converte(options)
67
+ # expande jpg on a larger canvas
68
+ system "convert \"#{file}\" #{expande} #{parm_qualidade(options)} " \
69
+ "-format pdf tmp/#{key}-trimed.pdf #{CO}"
57
70
 
58
71
  # devolve pdf processado a partir de jpg
59
- C118pdf.new(o)
72
+ C118pdf.new("tmp/#{key}-trimed.pdf")
60
73
  end
61
74
 
62
- def scale_xy
63
- # Determine image dimensions in pixels.
75
+ def expande
76
+ # image dimensions in pixels.
64
77
  x, y = FastImage.size(file)
65
78
 
66
- # Calculate image density (in dpi) needed to fit the image
67
- # with a 5% border all around an A4 page.
68
- # Factor 1.1 creates 2*5% borders,
69
- # Use the higher density to prevent exceeding the required fit.
70
- density = [x / X4 * 1.04, y / Y4 * 1.04].max
79
+ # use the higher density to prevent exceeding fit
80
+ density = [x / X4 * FB, y / Y4 * FB].max
71
81
 
72
- # Calculate canvas dimensions in pixels.
73
- # (Canvas is an A4 page with the calculated density.)
74
- [X4 * density, Y4 * density]
82
+ # canvas is an A4 page with the calculated density
83
+ '-units PixelsPerInch -gravity center ' \
84
+ "-extent #{X4 * density}x#{Y4 * density}"
75
85
  end
76
86
  end
77
87
  end
@@ -1,40 +1,40 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'i18n'
4
+
4
5
  I18n.config.available_locales = :pt
5
6
 
6
7
  module Arquivo
7
8
  # analisar/processar pdf
8
9
  class C118pdf < String
9
10
  def processa_pdf(options, dados)
10
- @ppdf = pjpg.trim(options).jpg2pdf(options) if jpg?
11
- @ppdf = self if size < ppdf.size
12
- ppdf.final(dados[key])
11
+ # em caso de scanned pdf extract.trim.jpg -> trimed pdf
12
+ tpdf = jpg? ? extract.trim(options).converte(options) : self
13
+
14
+ # usar trimed pdf somente se for menor que original
15
+ (tpdf.size < size ? tpdf : self).final(dados[key]).marca
13
16
  end
14
17
 
15
18
  def marca
16
- # produzir pdf com stamp
17
19
  o = "tmp/stamped-#{base[/-(\w+)/, 1]}-#{key}.pdf"
18
- t = '2 2 moveto /Ubuntu findfont 7 scalefont ' \
20
+ s = '2 2 moveto /Ubuntu findfont 7 scalefont ' \
19
21
  "setfont (#{base}) show"
20
- system "#{c118_gs} -sOutputFile=tmp/stamp-#{key}.pdf -c \"#{t}\";\
21
- pdftk tmp/zip/#{base}.pdf stamp tmp/stamp-#{key}.pdf output #{o}"
22
-
23
- C118pdf.new(o)
22
+ system "#{c118_gs} -sOutputFile=tmp/stamp-#{key}.pdf -c \"#{s}\";" \
23
+ "pdftk tmp/zip/#{base}.pdf " \
24
+ "stamp tmp/stamp-#{key}.pdf output #{o} #{CO}"
24
25
  end
25
26
 
26
27
  def final(kda)
27
28
  c118_stamp(kda)
28
29
  o = "tmp/zip/#{base}.pdf"
29
30
 
30
- if key[0] == 'r'
31
- # google producess better && smaller pdf then c118_gs
32
- system "cp \"#{file}\" #{o}"
33
- else
34
- system "#{c118_gs} -sOutputFile=#{o} \"#{file}\" 1>/dev/null 2>&1"
35
- end
36
- @ppdf = C118pdf.new(o) if File.size(o) <= size
37
- ppdf.marca
31
+ recibo = key[0] == 'r'
32
+ # google print has better && smaller pdf then c118_gs
33
+ system "#{c118_gs} -sOutputFile=#{o} \"#{file}\" #{CO}" unless recibo
34
+ # usar copia do original se processado for maior
35
+ system "cp \"#{file}\" #{o}" if recibo || File.size(o) > size
36
+
37
+ C118pdf.new(o)
38
38
  end
39
39
 
40
40
  def base_stamp(kda)
@@ -94,22 +94,20 @@ module Arquivo
94
94
  def jpg?
95
95
  return false if key[0] == 'r'
96
96
 
97
- o = "tmp/#{base}.txt"
97
+ o = "tmp/#{key}.txt"
98
98
  # teste scanned pdf (se contem texto -> not scanned)
99
99
  system "pdftotext -q -eol unix -nopgbrk \"#{file}\" #{o}"
100
- return false if File.size?(o)
101
-
102
- @pjpg = extract_jpg
100
+ File.size?(o) ? false : true
103
101
  end
104
102
 
105
- def extract_jpg
106
- o = "tmp/#{base}3.jpg"
103
+ def extract
104
+ o = "tmp/#{key}-extract.jpg"
107
105
 
108
- system "pdfimages -q -j #{file} tmp/#{base}2"
106
+ system "pdfimages -q -j \"#{file}\" tmp/#{key}"
109
107
  # nem sempre as imagens sao jpg
110
108
  # somente utilizar a primeira
111
- g = Dir.glob("tmp/#{base}2*.???")
112
- system "convert #{g[0]} #{o} 1>/dev/null 2>&1"
109
+ g = Dir.glob("tmp/#{key}-???.???")
110
+ system "convert #{g[0]} #{o} #{CO}"
113
111
  return unless File.size(o) > LT
114
112
 
115
113
  C118jpg.new(o)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Arquivo
4
- VERSION = '0.1.5'
4
+ VERSION = '0.1.6'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arquivo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hernâni Rodrigues Vaz
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-12-14 00:00:00.000000000 Z
11
+ date: 2019-12-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler