RubyGems - arquivo - Versions diffs - 0.1.5 → 0.1.6 - Mend

arquivo 0.1.5 → 0.1.6

Files changed (9) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 455448d9835d5e9ca98b7aa885e6faa0d8fe55896f58af9f2a225eb031055b6e
-  data.tar.gz: ce466c0ee18b09c0e3f782150a7f04c7a40df189f79fce3b051aba6a999c1bbf
+  metadata.gz: 5c32add30cf30ae788f2ace6ba483e70104a801d5078952ea4d4bb623b3b77da
+  data.tar.gz: c2137272520d205c30930579d36338d98635b3084f4bb12cdd139a5d27aa928b
 SHA512:
-  metadata.gz: 78a428ec047c3de7e902feb3e9624cef67243c463bf306ef197d760db78b758c631629d143a470b0991220db283eb78b38df2c31dc65bbedb2b89b663c752bf3
-  data.tar.gz: f7dc9bbc50fc95766949bfdb2d3af06cceab0d7842a0ffe4a858db6b383ca3f187a6d402abb6668de5419a06ae5d10c0a0419fe3aeabc996037facf1457ae59e
+  metadata.gz: 58dd3ebad2641ac46c45dec416a6b45d911905b9b270ebeebf35e3f4b396bbedf800bd65858593d5a52624eb4bc42998d965e007c8576849621ca3e4c1fe1f6f
+  data.tar.gz: '09ea0d8b876b8d061a67af94dfc2c30e2b272c0e8a0c349070823f4ec0961b1d87db956cf1fb1da0b98a0405f7d48d9ca5307526f414c51ab00f5e44ebb03df0'

data/Gemfile.lock CHANGED

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    arquivo (0.1.5)
+    arquivo (0.1.6)
       fastimage (~> 2.1)
       google-api-client (~> 0.34)
       pdf-reader (~> 2.3)

data/lib/arquivo.rb CHANGED

@@ -3,23 +3,16 @@
 require 'thor'
 require 'arquivo/version'
 require 'arquivo/extrato'
-require 'arquivo/pdf'
 require 'arquivo/dir'
+require 'arquivo/pdf'
 require 'arquivo/jpg'
 module Arquivo
   class Error < StandardError; end
-  # size limit for trim attempt
-  LT = 9000
-  # A4 page (8.27x11.69) inches
-  X4 = 8.27
-  Y4 = 11.69
   # CLI para analisar/processar documentos c118
   class CLI < Thor
-    desc 'pdf FILE', 'processa extratos ou faturas'
+    desc 'pdf PDF', 'processa PDF criando pasta de documentos para arquivo'
     def pdf(file)
       return unless File.ftype(file) == 'file'
@@ -37,9 +30,9 @@ module Arquivo
     desc 'dir PASTA', 'processa faturas/recibos/extratos/minutas'
     option :fuzz, type: :numeric, default: 29,
-                  desc: 'fuzz trim N-1 jpg -> escolhe menor scanned pdf'
+                  desc: 'fuzz trim jpg N-1, escolhe menor -> scanned pdf'
     option :quality, type: :numeric, default: 15,
-                     desc: 'compress N% jpg -> scanned pdf (less=low quality)'
+                     desc: 'compress jpg N% -> scanned pdf (less=low quality)'
     def dir(fdir)
       return unless File.ftype(fdir) == 'directory'

data/lib/arquivo/dir.rb CHANGED

@@ -4,20 +4,9 @@ require 'google/apis/sheets_v4'
 require 'googleauth'
 require 'googleauth/stores/file_token_store'
-# ordenacao c118 dos documentos
-class Array
-  def c118_ordena
-    sort do |a, b|
-      # processa pela ordem <st2>-<st1>
-      c = File.basename(a, '.???')
-      d = File.basename(b, '.???')
-      c[/(\w+)-(\w+)/, 2] + c[/(\w+)-(\w+)/, 1] <=>
-        d[/(\w+)-(\w+)/, 2] + d[/(\w+)-(\w+)/, 1]
-    end
-  end
-end
 module Arquivo
+  CO = '1>/dev/null 2>&1'
   # analisar/processar pasta
   class C118dir < Enumerator
     # @return [Enumerator] lista ordenada items dentro duma pasta
@@ -27,25 +16,25 @@ module Arquivo
     # @return [Hash] dados (faturas/recibos) de c118-contas
     attr_reader :dados
-    # @return [String] base do documento
+    # @return [String] base nome ficheiros finais (pdf, tar.gz)
     attr_reader :base
     # @return [C118dir] documentos c118
     def initialize(pasta)
       @items = Dir.glob(File.join(pasta, '*')).each
-      @base = File.basename(pasta, File.extname(pasta)).downcase +
+      @base = File.basename(pasta, File.extname(pasta)) +
               Date.today.strftime('%Y%m%d')
       obter_dados(pasta)
     end
     def obter_dados(dir)
-      return unless /factura/i.match?(dir) ||
+      return unless /fac?tura/i.match?(dir) ||
                     /recibo/i.match?(dir) ||
                     dados.empty?
       # obtem dados (faturas/recibos) da sheet c118-contas
       id = '1PbiMrtTtqGztZMhe3AiJbDS6NQE9o3hXebnQEFdt954'
-      sh = (/factura/i.match?(dir) ? 'rft' : 'rrc') + '!A2:E'
+      sh = (/fac?tura/i.match?(dir) ? 'rft' : 'rrc') + '!A2:E'
       @dados = c118_sheets.get_spreadsheet_values(id, sh).values
                           .group_by { |k| k[0][/\w+/] }
     rescue StandardError
@@ -60,6 +49,7 @@ module Arquivo
       # file obtido console.cloud.google.com/apis OAuth 2.0 client IDs
       i = Google::Auth::ClientId.from_file("#{p}credentials.json")
       s = Google::Apis::SheetsV4::AUTH_SPREADSHEETS_READONLY
+      # file criado aquando new_credentials is executed
       f = Google::Auth::Stores::FileTokenStore.new(file: "#{p}token.yaml")
       z = Google::Auth::UserAuthorizer.new(i, s, f)
@@ -74,10 +64,10 @@ module Arquivo
     #
     # @return [Google::Auth::UserAuthorizer] OAuth2 credentials
     def new_credentials(aut, oob)
-      puts 'Open URL and copy code after authorization',
+      puts 'Open URL and copy code after authorization, in <codigo-aqui>',
            aut.get_authorization_url(base_url: oob)
       aut.get_and_store_credentials_from_code(user_id: 'default',
-                                              code: 'codigo aqui',
+                                              code: '<codigo-aqui>',
                                               base_url: oob)
     end
@@ -102,10 +92,9 @@ module Arquivo
     def processa_fim
       system "rm -f #{base}.*;" \
-             "pdftk tmp/stamped*.pdf cat output #{base}.pdf;cd tmp/zip;" \
-             "tar cf ../../#{base}.tar *.pdf;" \
-             "gzip --best ../../#{base}.tar;"
-      # 'rm -f *.pdf ../*.pdf'
+             "pdftk tmp/stamped*.pdf cat output #{base}.pdf;" \
+             "cd tmp/zip;tar cf ../../#{base}.tar *.pdf;" \
+             "gzip --best ../../#{base}.tar"
     end
     def processa_file(options)

data/lib/arquivo/extrato.rb CHANGED

@@ -11,39 +11,30 @@ module Arquivo
     attr_reader :ext
     # @return [String] base do documento
     attr_reader :base
-    # @return [String] texto duma pagina pdf
-    attr_reader :page
-    # @return [String] base extrato processado
-    attr_reader :nome
-    # @return [String] list paginas extrato processado
-    attr_reader :list
     # @return [String] key do documento ft????/rc????/ex??0??/sc??????
     attr_reader :key
-    # @return [Numeric] tamanho do pdf
+    # @return [Integer] tamanho do pdf
     attr_reader :size
-    # @return [C118jpg] scanned jpg em processamento
-    attr_reader :pjpg
-    # @return [C118pdf] pdf em processamento
-    attr_reader :ppdf
+    # @return [Array<Integer>] numeros pagina do extrato final
+    attr_reader :paginas
+    # @return [String] texto pagina pdf
+    attr_reader :pagina
+    # @return [String] nome extrato
+    attr_reader :nome
     # @return [C118pdf] pdf c118
     def initialize(fpdf)
       @file = fpdf
       @ext = File.extname(fpdf).downcase
-      @base = File.basename(fpdf, File.extname(fpdf)).downcase
+      @base = File.basename(fpdf, File.extname(fpdf))
       @key = @base[/\w+/]
       @size = File.size(fpdf)
-      @ppdf = self
     end
     def c118_gs
       # filtrar images para scq e extratos
-      fi = /^[se]/i.match?(key.to_s) ? ' -dFILTERIMAGE' : ''
+      fi = /^[se]/i.match?(key) ? ' -dFILTERIMAGE' : ''
       'gs -sDEVICE=pdfwrite ' \
         '-dNOPAUSE -dBATCH -dQUIET ' \
@@ -57,7 +48,7 @@ module Arquivo
     def processa_extrato?
       return true if !File.exist?(base) &&
                      File.exist?(file) && ext == '.pdf' &&
-                     first_page
+                     first_extrato
       if File.exist?(base)
         puts "erro: #{base} pasta ja existe"
@@ -69,8 +60,8 @@ module Arquivo
     def processa_extrato(cnt)
       cnt += 1
-      @list += ',' + cnt.to_s if c118_conta?
-      if next_page
+      @paginas << cnt if conta_c118?
+      if proxima_pagina
         faz_extrato if extrato?
         processa_extrato(cnt)
       else
@@ -79,47 +70,47 @@ module Arquivo
     end
     def extrato?
-      c118_conta? && page.match?(/extrato +combinado/i)
+      conta_c118? && pagina.match?(/extrato +combinado/i)
     end
     def faz_extrato
       system "#{c118_gs} " \
         "-sOutputFile=#{base}/#{nome}-extrato.pdf " \
-        "-sPageList=#{list[1..-1]} \"#{file}\" 1>/dev/null 2>&1"
+        "-sPageList=#{paginas.join(',')} \"#{file}\" #{CO}"
       puts "#{nome}-extrato"
-      base_extrato
+      proximo_extrato
     end
-    def c118_conta?
-      page.include?('45463760224')
+    def conta_c118?
+      pagina.include?('45463760224')
     end
-    # @return [PDF::Reader] leitor pdf
-    def rpdf
-      @rpdf ||= PDF::Reader.new(file).pages.lazy
+    # @return [Enumerator::Lazy] leitor pdf
+    def leitor
+      @leitor ||= PDF::Reader.new(file).pages.lazy
     rescue StandardError
-      @rpdf = nil
+      @leitor = nil
     end
     # @return [String] texto duma pagina pdf
-    def next_page
-      @page = rpdf.next.text
+    def proxima_pagina
+      @pagina = leitor.next.text
     rescue StopIteration
-      @page = nil
+      @pagina = nil
     end
-    def base_extrato
-      return false unless page
+    def proximo_extrato
+      return false unless pagina
-      @list = ''
-      n = page.scan(%r{N\. *(\d+)/(\d+)}).flatten
+      @paginas = []
+      n = pagina.scan(%r{N\. *(\d+)/(\d+)}).flatten
       @nome = "ex#{n[0].to_s[/\d{2}$/]}#{n[1]}"
     rescue StandardError
       @nome = nil
     end
-    def first_page
-      rpdf && next_page && base_extrato
+    def first_extrato
+      leitor && proxima_pagina && proximo_extrato
     end
     def split

data/lib/arquivo/jpg.rb CHANGED

@@ -3,7 +3,19 @@
 require 'fastimage'
 module Arquivo
-  # analisar/processar pdf
+  # size limit after trim attempt
+  LT = 9000
+  # A4 page (8.27x11.69) inches
+  X4 = 8.27
+  Y4 = 11.69
+  # to calculate image density (in dpi) needed to fit
+  # the image with a 2% border all around an A4 page.
+  # Factor 1.04 creates 2*2% borders,
+  FB = 1.04
+  # analisar/processar jpg
   class C118jpg < String
     # @return [String] nome do ficheiro
     attr_reader :file
@@ -11,24 +23,31 @@ module Arquivo
     attr_reader :ext
     # @return [String] base do ficheiro
     attr_reader :base
     # @return [String] key do documento ft????/rc????/ex??0??/sc??????
     attr_reader :key
-    # @return [Numeric] tamanho do jpg
+    # @return [Integer] tamanho do jpg
     attr_reader :size
     # @return [C118jpg] jpg c118
     def initialize(fjpg)
       @file = fjpg
       @ext = File.extname(fjpg).downcase
-      @base = File.basename(fjpg, File.extname(fjpg)).downcase
+      @base = File.basename(fjpg, File.extname(fjpg))
       @key = @base[/\w+/]
       @size = File.size(fjpg)
     end
     def processa_jpg(options, dados)
-      trim(options).jpg2pdf(options).final(dados[key])
+      trim(options).converte(options).final(dados[key]).marca
+    end
+    def parm_trim(options, fuzz)
+      "-fuzz #{fuzz}% -trim +repage #{parm_qualidade(options)} " \
+        "tmp/#{key}-#{fuzz}.jpg #{CO}"
+    end
+    def parm_qualidade(options)
+      "-quality #{options[:quality]}% -compress jpeg"
     end
     def trim(options)
@@ -36,42 +55,33 @@ module Arquivo
       h = {}
       # obter jpg menor triming borders ao maximo
       while f >= 1
-        system "convert \"#{file}\" -fuzz #{f}% -trim +repage " \
-               "tmp/#{base}#{f}.jpg "
-        h[f] = File.size("tmp/#{base}#{f}.jpg")
+        system "convert \"#{file}\" #{parm_trim(options, f)}"
+        h[f] = File.size("tmp/#{key}-#{f}.jpg")
         f -= 4
       end
       m = h.min_by { |_, v| v }
-      m[1].between?(LT, size) ? C118jpg.new("tmp/#{base}#{m[0]}.jpg") : self
+      m[1].between?(LT, size) ? C118jpg.new("tmp/#{key}-#{m[0]}.jpg") : self
     end
-    def jpg2pdf(options)
-      o = "tmp/#{base}.pdf"
-      # Center image on a larger canvas (with a size given by "-extent").
-      x, y = scale_xy
-      system "convert \"#{file}\" -units PixelsPerInch " \
-             "-gravity center -extent #{x}x#{y} " \
-             "-quality #{options[:quality]}% -compress jpeg -format pdf " \
-             "#{o} 1>/dev/null 2>&1"
+    def converte(options)
+      # expande jpg on a larger canvas
+      system "convert \"#{file}\" #{expande} #{parm_qualidade(options)} " \
+             "-format pdf tmp/#{key}-trimed.pdf #{CO}"
       # devolve pdf processado a partir de jpg
-      C118pdf.new(o)
+      C118pdf.new("tmp/#{key}-trimed.pdf")
     end
-    def scale_xy
-      # Determine image dimensions in pixels.
+    def expande
+      # image dimensions in pixels.
       x, y = FastImage.size(file)
-      # Calculate image density (in dpi) needed to fit the image
-      # with a 5% border all around an A4 page.
-      # Factor 1.1 creates 2*5% borders,
-      # Use the higher density to prevent exceeding the required fit.
-      density = [x / X4 * 1.04, y / Y4 * 1.04].max
+      # use the higher density to prevent exceeding fit
+      density = [x / X4 * FB, y / Y4 * FB].max
-      # Calculate canvas dimensions in pixels.
-      # (Canvas is an A4 page with the calculated density.)
-      [X4 * density, Y4 * density]
+      # canvas is an A4 page with the calculated density
+      '-units PixelsPerInch -gravity center ' \
+        "-extent #{X4 * density}x#{Y4 * density}"
     end
   end
 end

data/lib/arquivo/pdf.rb CHANGED

@@ -1,40 +1,40 @@
 # frozen_string_literal: true
 require 'i18n'
 I18n.config.available_locales = :pt
 module Arquivo
   # analisar/processar pdf
   class C118pdf < String
     def processa_pdf(options, dados)
-      @ppdf = pjpg.trim(options).jpg2pdf(options) if jpg?
-      @ppdf = self if size < ppdf.size
-      ppdf.final(dados[key])
+      # em caso de scanned pdf extract.trim.jpg -> trimed pdf
+      tpdf = jpg? ? extract.trim(options).converte(options) : self
+      # usar trimed pdf somente se for menor que original
+      (tpdf.size < size ? tpdf : self).final(dados[key]).marca
     end
     def marca
-      # produzir pdf com stamp
       o = "tmp/stamped-#{base[/-(\w+)/, 1]}-#{key}.pdf"
-      t = '2 2 moveto /Ubuntu findfont 7 scalefont ' \
+      s = '2 2 moveto /Ubuntu findfont 7 scalefont ' \
            "setfont (#{base}) show"
-      system "#{c118_gs} -sOutputFile=tmp/stamp-#{key}.pdf -c \"#{t}\";\
-            pdftk tmp/zip/#{base}.pdf stamp tmp/stamp-#{key}.pdf output #{o}"
-      C118pdf.new(o)
+      system "#{c118_gs} -sOutputFile=tmp/stamp-#{key}.pdf -c \"#{s}\";" \
+             "pdftk tmp/zip/#{base}.pdf " \
+             "stamp tmp/stamp-#{key}.pdf output #{o} #{CO}"
     end
     def final(kda)
       c118_stamp(kda)
       o = "tmp/zip/#{base}.pdf"
-      if key[0] == 'r'
-        # google producess better && smaller pdf then c118_gs
-        system "cp \"#{file}\" #{o}"
-      else
-        system "#{c118_gs} -sOutputFile=#{o} \"#{file}\" 1>/dev/null 2>&1"
-      end
-      @ppdf = C118pdf.new(o) if File.size(o) <= size
-      ppdf.marca
+      recibo = key[0] == 'r'
+      # google print has better && smaller pdf then c118_gs
+      system "#{c118_gs} -sOutputFile=#{o} \"#{file}\" #{CO}" unless recibo
+      # usar copia do original se processado for maior
+      system "cp \"#{file}\" #{o}" if recibo || File.size(o) > size
+      C118pdf.new(o)
     end
     def base_stamp(kda)
@@ -94,22 +94,20 @@ module Arquivo
     def jpg?
       return false if key[0] == 'r'
-      o = "tmp/#{base}.txt"
+      o = "tmp/#{key}.txt"
       # teste scanned pdf (se contem texto -> not scanned)
       system "pdftotext -q -eol unix -nopgbrk \"#{file}\" #{o}"
-      return false if File.size?(o)
-      @pjpg = extract_jpg
+      File.size?(o) ? false : true
     end
-    def extract_jpg
-      o = "tmp/#{base}3.jpg"
+    def extract
+      o = "tmp/#{key}-extract.jpg"
-      system "pdfimages -q -j #{file} tmp/#{base}2"
+      system "pdfimages -q -j \"#{file}\" tmp/#{key}"
       # nem sempre as imagens sao jpg
       # somente utilizar a primeira
-      g = Dir.glob("tmp/#{base}2*.???")
-      system "convert #{g[0]} #{o} 1>/dev/null 2>&1"
+      g = Dir.glob("tmp/#{key}-???.???")
+      system "convert #{g[0]} #{o} #{CO}"
       return unless File.size(o) > LT
       C118jpg.new(o)

data/lib/arquivo/version.rb CHANGED

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Arquivo
-  VERSION = '0.1.5'
+  VERSION = '0.1.6'
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: arquivo
 version: !ruby/object:Gem::Version
-  version: 0.1.5
+  version: 0.1.6
 platform: ruby
 authors:
 - Hernâni Rodrigues Vaz
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2019-12-14 00:00:00.000000000 Z
+date: 2019-12-15 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler