arquivo 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 455448d9835d5e9ca98b7aa885e6faa0d8fe55896f58af9f2a225eb031055b6e
4
- data.tar.gz: ce466c0ee18b09c0e3f782150a7f04c7a40df189f79fce3b051aba6a999c1bbf
3
+ metadata.gz: 5c32add30cf30ae788f2ace6ba483e70104a801d5078952ea4d4bb623b3b77da
4
+ data.tar.gz: c2137272520d205c30930579d36338d98635b3084f4bb12cdd139a5d27aa928b
5
5
  SHA512:
6
- metadata.gz: 78a428ec047c3de7e902feb3e9624cef67243c463bf306ef197d760db78b758c631629d143a470b0991220db283eb78b38df2c31dc65bbedb2b89b663c752bf3
7
- data.tar.gz: f7dc9bbc50fc95766949bfdb2d3af06cceab0d7842a0ffe4a858db6b383ca3f187a6d402abb6668de5419a06ae5d10c0a0419fe3aeabc996037facf1457ae59e
6
+ metadata.gz: 58dd3ebad2641ac46c45dec416a6b45d911905b9b270ebeebf35e3f4b396bbedf800bd65858593d5a52624eb4bc42998d965e007c8576849621ca3e4c1fe1f6f
7
+ data.tar.gz: '09ea0d8b876b8d061a67af94dfc2c30e2b272c0e8a0c349070823f4ec0961b1d87db956cf1fb1da0b98a0405f7d48d9ca5307526f414c51ab00f5e44ebb03df0'
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- arquivo (0.1.5)
4
+ arquivo (0.1.6)
5
5
  fastimage (~> 2.1)
6
6
  google-api-client (~> 0.34)
7
7
  pdf-reader (~> 2.3)
@@ -3,23 +3,16 @@
3
3
  require 'thor'
4
4
  require 'arquivo/version'
5
5
  require 'arquivo/extrato'
6
- require 'arquivo/pdf'
7
6
  require 'arquivo/dir'
7
+ require 'arquivo/pdf'
8
8
  require 'arquivo/jpg'
9
9
 
10
10
  module Arquivo
11
11
  class Error < StandardError; end
12
12
 
13
- # size limit for trim attempt
14
- LT = 9000
15
-
16
- # A4 page (8.27x11.69) inches
17
- X4 = 8.27
18
- Y4 = 11.69
19
-
20
13
  # CLI para analisar/processar documentos c118
21
14
  class CLI < Thor
22
- desc 'pdf FILE', 'processa extratos ou faturas'
15
+ desc 'pdf PDF', 'processa PDF criando pasta de documentos para arquivo'
23
16
  def pdf(file)
24
17
  return unless File.ftype(file) == 'file'
25
18
 
@@ -37,9 +30,9 @@ module Arquivo
37
30
 
38
31
  desc 'dir PASTA', 'processa faturas/recibos/extratos/minutas'
39
32
  option :fuzz, type: :numeric, default: 29,
40
- desc: 'fuzz trim N-1 jpg -> escolhe menor scanned pdf'
33
+ desc: 'fuzz trim jpg N-1, escolhe menor -> scanned pdf'
41
34
  option :quality, type: :numeric, default: 15,
42
- desc: 'compress N% jpg -> scanned pdf (less=low quality)'
35
+ desc: 'compress jpg N% -> scanned pdf (less=low quality)'
43
36
  def dir(fdir)
44
37
  return unless File.ftype(fdir) == 'directory'
45
38
 
@@ -4,20 +4,9 @@ require 'google/apis/sheets_v4'
4
4
  require 'googleauth'
5
5
  require 'googleauth/stores/file_token_store'
6
6
 
7
- # ordenacao c118 dos documentos
8
- class Array
9
- def c118_ordena
10
- sort do |a, b|
11
- # processa pela ordem <st2>-<st1>
12
- c = File.basename(a, '.???')
13
- d = File.basename(b, '.???')
14
- c[/(\w+)-(\w+)/, 2] + c[/(\w+)-(\w+)/, 1] <=>
15
- d[/(\w+)-(\w+)/, 2] + d[/(\w+)-(\w+)/, 1]
16
- end
17
- end
18
- end
19
-
20
7
  module Arquivo
8
+ CO = '1>/dev/null 2>&1'
9
+
21
10
  # analisar/processar pasta
22
11
  class C118dir < Enumerator
23
12
  # @return [Enumerator] lista ordenada items dentro duma pasta
@@ -27,25 +16,25 @@ module Arquivo
27
16
  # @return [Hash] dados (faturas/recibos) de c118-contas
28
17
  attr_reader :dados
29
18
 
30
- # @return [String] base do documento
19
+ # @return [String] base nome ficheiros finais (pdf, tar.gz)
31
20
  attr_reader :base
32
21
 
33
22
  # @return [C118dir] documentos c118
34
23
  def initialize(pasta)
35
24
  @items = Dir.glob(File.join(pasta, '*')).each
36
- @base = File.basename(pasta, File.extname(pasta)).downcase +
25
+ @base = File.basename(pasta, File.extname(pasta)) +
37
26
  Date.today.strftime('%Y%m%d')
38
27
  obter_dados(pasta)
39
28
  end
40
29
 
41
30
  def obter_dados(dir)
42
- return unless /factura/i.match?(dir) ||
31
+ return unless /fac?tura/i.match?(dir) ||
43
32
  /recibo/i.match?(dir) ||
44
33
  dados.empty?
45
34
 
46
35
  # obtem dados (faturas/recibos) da sheet c118-contas
47
36
  id = '1PbiMrtTtqGztZMhe3AiJbDS6NQE9o3hXebnQEFdt954'
48
- sh = (/factura/i.match?(dir) ? 'rft' : 'rrc') + '!A2:E'
37
+ sh = (/fac?tura/i.match?(dir) ? 'rft' : 'rrc') + '!A2:E'
49
38
  @dados = c118_sheets.get_spreadsheet_values(id, sh).values
50
39
  .group_by { |k| k[0][/\w+/] }
51
40
  rescue StandardError
@@ -60,6 +49,7 @@ module Arquivo
60
49
  # file obtido console.cloud.google.com/apis OAuth 2.0 client IDs
61
50
  i = Google::Auth::ClientId.from_file("#{p}credentials.json")
62
51
  s = Google::Apis::SheetsV4::AUTH_SPREADSHEETS_READONLY
52
+ # file criado aquando new_credentials is executed
63
53
  f = Google::Auth::Stores::FileTokenStore.new(file: "#{p}token.yaml")
64
54
  z = Google::Auth::UserAuthorizer.new(i, s, f)
65
55
 
@@ -74,10 +64,10 @@ module Arquivo
74
64
  #
75
65
  # @return [Google::Auth::UserAuthorizer] OAuth2 credentials
76
66
  def new_credentials(aut, oob)
77
- puts 'Open URL and copy code after authorization',
67
+ puts 'Open URL and copy code after authorization, in <codigo-aqui>',
78
68
  aut.get_authorization_url(base_url: oob)
79
69
  aut.get_and_store_credentials_from_code(user_id: 'default',
80
- code: 'codigo aqui',
70
+ code: '<codigo-aqui>',
81
71
  base_url: oob)
82
72
  end
83
73
 
@@ -102,10 +92,9 @@ module Arquivo
102
92
 
103
93
  def processa_fim
104
94
  system "rm -f #{base}.*;" \
105
- "pdftk tmp/stamped*.pdf cat output #{base}.pdf;cd tmp/zip;" \
106
- "tar cf ../../#{base}.tar *.pdf;" \
107
- "gzip --best ../../#{base}.tar;"
108
- # 'rm -f *.pdf ../*.pdf'
95
+ "pdftk tmp/stamped*.pdf cat output #{base}.pdf;" \
96
+ "cd tmp/zip;tar cf ../../#{base}.tar *.pdf;" \
97
+ "gzip --best ../../#{base}.tar"
109
98
  end
110
99
 
111
100
  def processa_file(options)
@@ -11,39 +11,30 @@ module Arquivo
11
11
  attr_reader :ext
12
12
  # @return [String] base do documento
13
13
  attr_reader :base
14
-
15
- # @return [String] texto duma pagina pdf
16
- attr_reader :page
17
- # @return [String] base extrato processado
18
- attr_reader :nome
19
- # @return [String] list paginas extrato processado
20
- attr_reader :list
21
-
22
14
  # @return [String] key do documento ft????/rc????/ex??0??/sc??????
23
15
  attr_reader :key
24
- # @return [Numeric] tamanho do pdf
16
+ # @return [Integer] tamanho do pdf
25
17
  attr_reader :size
26
18
 
27
- # @return [C118jpg] scanned jpg em processamento
28
- attr_reader :pjpg
29
- # @return [C118pdf] pdf em processamento
30
- attr_reader :ppdf
19
+ # @return [Array<Integer>] numeros pagina do extrato final
20
+ attr_reader :paginas
21
+ # @return [String] texto pagina pdf
22
+ attr_reader :pagina
23
+ # @return [String] nome extrato
24
+ attr_reader :nome
31
25
 
32
26
  # @return [C118pdf] pdf c118
33
27
  def initialize(fpdf)
34
28
  @file = fpdf
35
29
  @ext = File.extname(fpdf).downcase
36
- @base = File.basename(fpdf, File.extname(fpdf)).downcase
37
-
30
+ @base = File.basename(fpdf, File.extname(fpdf))
38
31
  @key = @base[/\w+/]
39
32
  @size = File.size(fpdf)
40
-
41
- @ppdf = self
42
33
  end
43
34
 
44
35
  def c118_gs
45
36
  # filtrar images para scq e extratos
46
- fi = /^[se]/i.match?(key.to_s) ? ' -dFILTERIMAGE' : ''
37
+ fi = /^[se]/i.match?(key) ? ' -dFILTERIMAGE' : ''
47
38
 
48
39
  'gs -sDEVICE=pdfwrite ' \
49
40
  '-dNOPAUSE -dBATCH -dQUIET ' \
@@ -57,7 +48,7 @@ module Arquivo
57
48
  def processa_extrato?
58
49
  return true if !File.exist?(base) &&
59
50
  File.exist?(file) && ext == '.pdf' &&
60
- first_page
51
+ first_extrato
61
52
 
62
53
  if File.exist?(base)
63
54
  puts "erro: #{base} pasta ja existe"
@@ -69,8 +60,8 @@ module Arquivo
69
60
 
70
61
  def processa_extrato(cnt)
71
62
  cnt += 1
72
- @list += ',' + cnt.to_s if c118_conta?
73
- if next_page
63
+ @paginas << cnt if conta_c118?
64
+ if proxima_pagina
74
65
  faz_extrato if extrato?
75
66
  processa_extrato(cnt)
76
67
  else
@@ -79,47 +70,47 @@ module Arquivo
79
70
  end
80
71
 
81
72
  def extrato?
82
- c118_conta? && page.match?(/extrato +combinado/i)
73
+ conta_c118? && pagina.match?(/extrato +combinado/i)
83
74
  end
84
75
 
85
76
  def faz_extrato
86
77
  system "#{c118_gs} " \
87
78
  "-sOutputFile=#{base}/#{nome}-extrato.pdf " \
88
- "-sPageList=#{list[1..-1]} \"#{file}\" 1>/dev/null 2>&1"
79
+ "-sPageList=#{paginas.join(',')} \"#{file}\" #{CO}"
89
80
  puts "#{nome}-extrato"
90
- base_extrato
81
+ proximo_extrato
91
82
  end
92
83
 
93
- def c118_conta?
94
- page.include?('45463760224')
84
+ def conta_c118?
85
+ pagina.include?('45463760224')
95
86
  end
96
87
 
97
- # @return [PDF::Reader] leitor pdf
98
- def rpdf
99
- @rpdf ||= PDF::Reader.new(file).pages.lazy
88
+ # @return [Enumerator::Lazy] leitor pdf
89
+ def leitor
90
+ @leitor ||= PDF::Reader.new(file).pages.lazy
100
91
  rescue StandardError
101
- @rpdf = nil
92
+ @leitor = nil
102
93
  end
103
94
 
104
95
  # @return [String] texto duma pagina pdf
105
- def next_page
106
- @page = rpdf.next.text
96
+ def proxima_pagina
97
+ @pagina = leitor.next.text
107
98
  rescue StopIteration
108
- @page = nil
99
+ @pagina = nil
109
100
  end
110
101
 
111
- def base_extrato
112
- return false unless page
102
+ def proximo_extrato
103
+ return false unless pagina
113
104
 
114
- @list = ''
115
- n = page.scan(%r{N\. *(\d+)/(\d+)}).flatten
105
+ @paginas = []
106
+ n = pagina.scan(%r{N\. *(\d+)/(\d+)}).flatten
116
107
  @nome = "ex#{n[0].to_s[/\d{2}$/]}#{n[1]}"
117
108
  rescue StandardError
118
109
  @nome = nil
119
110
  end
120
111
 
121
- def first_page
122
- rpdf && next_page && base_extrato
112
+ def first_extrato
113
+ leitor && proxima_pagina && proximo_extrato
123
114
  end
124
115
 
125
116
  def split
@@ -3,7 +3,19 @@
3
3
  require 'fastimage'
4
4
 
5
5
  module Arquivo
6
- # analisar/processar pdf
6
+ # size limit after trim attempt
7
+ LT = 9000
8
+
9
+ # A4 page (8.27x11.69) inches
10
+ X4 = 8.27
11
+ Y4 = 11.69
12
+
13
+ # to calculate image density (in dpi) needed to fit
14
+ # the image with a 2% border all around an A4 page.
15
+ # Factor 1.04 creates 2*2% borders,
16
+ FB = 1.04
17
+
18
+ # analisar/processar jpg
7
19
  class C118jpg < String
8
20
  # @return [String] nome do ficheiro
9
21
  attr_reader :file
@@ -11,24 +23,31 @@ module Arquivo
11
23
  attr_reader :ext
12
24
  # @return [String] base do ficheiro
13
25
  attr_reader :base
14
-
15
26
  # @return [String] key do documento ft????/rc????/ex??0??/sc??????
16
27
  attr_reader :key
17
- # @return [Numeric] tamanho do jpg
28
+ # @return [Integer] tamanho do jpg
18
29
  attr_reader :size
19
30
 
20
31
  # @return [C118jpg] jpg c118
21
32
  def initialize(fjpg)
22
33
  @file = fjpg
23
34
  @ext = File.extname(fjpg).downcase
24
- @base = File.basename(fjpg, File.extname(fjpg)).downcase
25
-
35
+ @base = File.basename(fjpg, File.extname(fjpg))
26
36
  @key = @base[/\w+/]
27
37
  @size = File.size(fjpg)
28
38
  end
29
39
 
30
40
  def processa_jpg(options, dados)
31
- trim(options).jpg2pdf(options).final(dados[key])
41
+ trim(options).converte(options).final(dados[key]).marca
42
+ end
43
+
44
+ def parm_trim(options, fuzz)
45
+ "-fuzz #{fuzz}% -trim +repage #{parm_qualidade(options)} " \
46
+ "tmp/#{key}-#{fuzz}.jpg #{CO}"
47
+ end
48
+
49
+ def parm_qualidade(options)
50
+ "-quality #{options[:quality]}% -compress jpeg"
32
51
  end
33
52
 
34
53
  def trim(options)
@@ -36,42 +55,33 @@ module Arquivo
36
55
  h = {}
37
56
  # obter jpg menor triming borders ao maximo
38
57
  while f >= 1
39
- system "convert \"#{file}\" -fuzz #{f}% -trim +repage " \
40
- "tmp/#{base}#{f}.jpg "
41
- h[f] = File.size("tmp/#{base}#{f}.jpg")
58
+ system "convert \"#{file}\" #{parm_trim(options, f)}"
59
+ h[f] = File.size("tmp/#{key}-#{f}.jpg")
42
60
  f -= 4
43
61
  end
44
62
  m = h.min_by { |_, v| v }
45
- m[1].between?(LT, size) ? C118jpg.new("tmp/#{base}#{m[0]}.jpg") : self
63
+ m[1].between?(LT, size) ? C118jpg.new("tmp/#{key}-#{m[0]}.jpg") : self
46
64
  end
47
65
 
48
- def jpg2pdf(options)
49
- o = "tmp/#{base}.pdf"
50
-
51
- # Center image on a larger canvas (with a size given by "-extent").
52
- x, y = scale_xy
53
- system "convert \"#{file}\" -units PixelsPerInch " \
54
- "-gravity center -extent #{x}x#{y} " \
55
- "-quality #{options[:quality]}% -compress jpeg -format pdf " \
56
- "#{o} 1>/dev/null 2>&1"
66
+ def converte(options)
67
+ # expande jpg on a larger canvas
68
+ system "convert \"#{file}\" #{expande} #{parm_qualidade(options)} " \
69
+ "-format pdf tmp/#{key}-trimed.pdf #{CO}"
57
70
 
58
71
  # devolve pdf processado a partir de jpg
59
- C118pdf.new(o)
72
+ C118pdf.new("tmp/#{key}-trimed.pdf")
60
73
  end
61
74
 
62
- def scale_xy
63
- # Determine image dimensions in pixels.
75
+ def expande
76
+ # image dimensions in pixels.
64
77
  x, y = FastImage.size(file)
65
78
 
66
- # Calculate image density (in dpi) needed to fit the image
67
- # with a 5% border all around an A4 page.
68
- # Factor 1.1 creates 2*5% borders,
69
- # Use the higher density to prevent exceeding the required fit.
70
- density = [x / X4 * 1.04, y / Y4 * 1.04].max
79
+ # use the higher density to prevent exceeding fit
80
+ density = [x / X4 * FB, y / Y4 * FB].max
71
81
 
72
- # Calculate canvas dimensions in pixels.
73
- # (Canvas is an A4 page with the calculated density.)
74
- [X4 * density, Y4 * density]
82
+ # canvas is an A4 page with the calculated density
83
+ '-units PixelsPerInch -gravity center ' \
84
+ "-extent #{X4 * density}x#{Y4 * density}"
75
85
  end
76
86
  end
77
87
  end
@@ -1,40 +1,40 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'i18n'
4
+
4
5
  I18n.config.available_locales = :pt
5
6
 
6
7
  module Arquivo
7
8
  # analisar/processar pdf
8
9
  class C118pdf < String
9
10
  def processa_pdf(options, dados)
10
- @ppdf = pjpg.trim(options).jpg2pdf(options) if jpg?
11
- @ppdf = self if size < ppdf.size
12
- ppdf.final(dados[key])
11
+ # em caso de scanned pdf extract.trim.jpg -> trimed pdf
12
+ tpdf = jpg? ? extract.trim(options).converte(options) : self
13
+
14
+ # usar trimed pdf somente se for menor que original
15
+ (tpdf.size < size ? tpdf : self).final(dados[key]).marca
13
16
  end
14
17
 
15
18
  def marca
16
- # produzir pdf com stamp
17
19
  o = "tmp/stamped-#{base[/-(\w+)/, 1]}-#{key}.pdf"
18
- t = '2 2 moveto /Ubuntu findfont 7 scalefont ' \
20
+ s = '2 2 moveto /Ubuntu findfont 7 scalefont ' \
19
21
  "setfont (#{base}) show"
20
- system "#{c118_gs} -sOutputFile=tmp/stamp-#{key}.pdf -c \"#{t}\";\
21
- pdftk tmp/zip/#{base}.pdf stamp tmp/stamp-#{key}.pdf output #{o}"
22
-
23
- C118pdf.new(o)
22
+ system "#{c118_gs} -sOutputFile=tmp/stamp-#{key}.pdf -c \"#{s}\";" \
23
+ "pdftk tmp/zip/#{base}.pdf " \
24
+ "stamp tmp/stamp-#{key}.pdf output #{o} #{CO}"
24
25
  end
25
26
 
26
27
  def final(kda)
27
28
  c118_stamp(kda)
28
29
  o = "tmp/zip/#{base}.pdf"
29
30
 
30
- if key[0] == 'r'
31
- # google producess better && smaller pdf then c118_gs
32
- system "cp \"#{file}\" #{o}"
33
- else
34
- system "#{c118_gs} -sOutputFile=#{o} \"#{file}\" 1>/dev/null 2>&1"
35
- end
36
- @ppdf = C118pdf.new(o) if File.size(o) <= size
37
- ppdf.marca
31
+ recibo = key[0] == 'r'
32
+ # google print has better && smaller pdf then c118_gs
33
+ system "#{c118_gs} -sOutputFile=#{o} \"#{file}\" #{CO}" unless recibo
34
+ # usar copia do original se processado for maior
35
+ system "cp \"#{file}\" #{o}" if recibo || File.size(o) > size
36
+
37
+ C118pdf.new(o)
38
38
  end
39
39
 
40
40
  def base_stamp(kda)
@@ -94,22 +94,20 @@ module Arquivo
94
94
  def jpg?
95
95
  return false if key[0] == 'r'
96
96
 
97
- o = "tmp/#{base}.txt"
97
+ o = "tmp/#{key}.txt"
98
98
  # teste scanned pdf (se contem texto -> not scanned)
99
99
  system "pdftotext -q -eol unix -nopgbrk \"#{file}\" #{o}"
100
- return false if File.size?(o)
101
-
102
- @pjpg = extract_jpg
100
+ File.size?(o) ? false : true
103
101
  end
104
102
 
105
- def extract_jpg
106
- o = "tmp/#{base}3.jpg"
103
+ def extract
104
+ o = "tmp/#{key}-extract.jpg"
107
105
 
108
- system "pdfimages -q -j #{file} tmp/#{base}2"
106
+ system "pdfimages -q -j \"#{file}\" tmp/#{key}"
109
107
  # nem sempre as imagens sao jpg
110
108
  # somente utilizar a primeira
111
- g = Dir.glob("tmp/#{base}2*.???")
112
- system "convert #{g[0]} #{o} 1>/dev/null 2>&1"
109
+ g = Dir.glob("tmp/#{key}-???.???")
110
+ system "convert #{g[0]} #{o} #{CO}"
113
111
  return unless File.size(o) > LT
114
112
 
115
113
  C118jpg.new(o)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Arquivo
4
- VERSION = '0.1.5'
4
+ VERSION = '0.1.6'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arquivo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hernâni Rodrigues Vaz
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-12-14 00:00:00.000000000 Z
11
+ date: 2019-12-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler