arquivo 0.1.9 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/arquivo/dir.rb +29 -31
- data/lib/arquivo/extrato.rb +0 -27
- data/lib/arquivo/mp3.rb +22 -27
- data/lib/arquivo/noise.rb +18 -9
- data/lib/arquivo/pdf.rb +31 -5
- data/lib/arquivo/version.rb +1 -1
- data/lib/arquivo.rb +16 -12
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5327acf76aa8ca135c8e6faac8aeb2b5aee4a66a950cf07f3a7498f98da503f5
|
4
|
+
data.tar.gz: d37de979686bfef7182714d1b654da47dae151b3b68b52ebc0b7cf79e25cfa4b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2d82bbfe746b9dfb2337abcb2c8ef54cb0adfe9466c2b51cde53dfb54eaf1f1cb25828216531350f5c1c5f7e2cf7664f0b80b223b00b17852681826ef714c3fb
|
7
|
+
data.tar.gz: 57dd9546e5d54eb0ad94da2b2fca720e0ac2511e671c1d39f169ff8b03773452f705e25e460ac884d7ac6fb5c4ebd1c558dac341da9d56cfff1a0233ba8560e8
|
data/Gemfile.lock
CHANGED
data/lib/arquivo/dir.rb
CHANGED
@@ -4,11 +4,11 @@ require 'google/apis/sheets_v4'
|
|
4
4
|
require 'googleauth'
|
5
5
|
require 'googleauth/stores/file_token_store'
|
6
6
|
|
7
|
+
require 'arquivo/noise'
|
8
|
+
|
7
9
|
module Arquivo
|
8
|
-
|
9
|
-
|
10
|
-
O1 = ''
|
11
|
-
O2 = ''
|
10
|
+
O1 = '2>/dev/null'
|
11
|
+
O2 = '1>/dev/null 2>&1'
|
12
12
|
|
13
13
|
# analisar/processar pasta
|
14
14
|
class C118dir < Enumerator
|
@@ -22,7 +22,7 @@ module Arquivo
|
|
22
22
|
# @return [Float] maximo segundos de silencio encontrados
|
23
23
|
attr_reader :silence
|
24
24
|
# @return [String] noiseprof do silencio encontrado
|
25
|
-
attr_reader :
|
25
|
+
attr_reader :noiseprof
|
26
26
|
|
27
27
|
# @return [String] base nome ficheiros finais (pdf, tar.gz)
|
28
28
|
attr_reader :base
|
@@ -30,29 +30,35 @@ module Arquivo
|
|
30
30
|
# @return [C118dir] documentos c118
|
31
31
|
def initialize(pasta)
|
32
32
|
@items = Dir.glob(File.join(pasta, '*')).each
|
33
|
-
@base = File.basename(pasta, File.extname(pasta)) +
|
33
|
+
@base = File.basename(pasta, File.extname(pasta)) + '-' +
|
34
34
|
Date.today.strftime('%Y%m%d')
|
35
35
|
end
|
36
36
|
|
37
|
-
def
|
37
|
+
def processa_items(options)
|
38
38
|
n = 0
|
39
39
|
while next_item
|
40
40
|
if File.ftype(item) == 'directory'
|
41
|
-
C118dir.new(item).processa_pasta(options)
|
41
|
+
C118dir.new(item).processa_pasta(item, options)
|
42
42
|
else
|
43
43
|
processa_file(options, File.extname(item).downcase)
|
44
44
|
n += 1
|
45
45
|
end
|
46
46
|
end
|
47
|
-
processa_fim(n)
|
47
|
+
processa_fim(n)
|
48
48
|
end
|
49
49
|
|
50
50
|
def processa_fim(num)
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
51
|
+
return unless num.positive?
|
52
|
+
|
53
|
+
cmd = if /minuta/i.match?(base)
|
54
|
+
"rm -f #{base}.*"
|
55
|
+
else
|
56
|
+
"rm -f #{base}.*;pdftk tmp/stamped*.pdf cat output #{base}.pdf"
|
57
|
+
end
|
58
|
+
# ;rm -rf tmp
|
59
|
+
system cmd + ";cd tmp/zip;tar cf ../../#{base}.tar *" \
|
60
|
+
";cd ../..;gzip --best #{base}.tar;rm -rf tmp"
|
61
|
+
|
56
62
|
puts "#{base} (#{num})"
|
57
63
|
end
|
58
64
|
|
@@ -61,7 +67,7 @@ module Arquivo
|
|
61
67
|
when '.jpg' then C118jpg.new(item).processa_jpg(options, dados)
|
62
68
|
when '.pdf' then C118pdf.new(item).processa_pdf(options, dados)
|
63
69
|
when '.mp3', '.m4a', '.wav'
|
64
|
-
C118mp3.new(item).processa_mp3(options,
|
70
|
+
C118mp3.new(item).processa_mp3(options, noiseprof)
|
65
71
|
else
|
66
72
|
puts "erro: #{item} so posso processar mp3, jpg, pdf"
|
67
73
|
end
|
@@ -74,28 +80,20 @@ module Arquivo
|
|
74
80
|
@item = nil
|
75
81
|
end
|
76
82
|
|
77
|
-
def
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
end
|
82
|
-
|
83
|
-
def obtem_noiseprof(dir, options)
|
84
|
-
return unless /minuta/i.match?(dir) || silence&.zero?
|
83
|
+
def processa_pasta(pasta, options)
|
84
|
+
unless File.ftype(items.peek) == 'directory'
|
85
|
+
@dados = {}
|
86
|
+
obtem_dados(pasta)
|
85
87
|
|
86
|
-
if options[:nred]
|
87
88
|
@silence = 0.0
|
88
|
-
|
89
|
-
|
89
|
+
system 'mkdir -p tmp/zip'
|
90
|
+
obtem_noiseprof(pasta, options)
|
90
91
|
end
|
91
|
-
|
92
|
-
@silence = 0.0
|
92
|
+
processa_items(options)
|
93
93
|
end
|
94
94
|
|
95
95
|
def obtem_dados(dir)
|
96
|
-
return unless /fac?tura/i.match?(dir) ||
|
97
|
-
/recibo/i.match?(dir) ||
|
98
|
-
dados&.empty?
|
96
|
+
return unless /fac?tura/i.match?(dir) || /recibo/i.match?(dir)
|
99
97
|
|
100
98
|
# obtem dados (faturas/recibos) da sheet c118-contas
|
101
99
|
id = '1PbiMrtTtqGztZMhe3AiJbDS6NQE9o3hXebnQEFdt954'
|
data/lib/arquivo/extrato.rb
CHANGED
@@ -5,33 +5,6 @@ require 'pdf-reader'
|
|
5
5
|
module Arquivo
|
6
6
|
# analisar/processar pdf
|
7
7
|
class C118pdf < String
|
8
|
-
# @return [String] nome do documento
|
9
|
-
attr_reader :file
|
10
|
-
# @return [String] extensao do documento
|
11
|
-
attr_reader :ext
|
12
|
-
# @return [String] base do documento
|
13
|
-
attr_reader :base
|
14
|
-
# @return [String] key do documento ft????/rc????/ex??0??/sc??????
|
15
|
-
attr_reader :key
|
16
|
-
# @return [Integer] tamanho do pdf
|
17
|
-
attr_reader :size
|
18
|
-
|
19
|
-
# @return [Array<Integer>] numeros pagina do extrato final
|
20
|
-
attr_reader :paginas
|
21
|
-
# @return [String] texto pagina pdf
|
22
|
-
attr_reader :pagina
|
23
|
-
# @return [String] nome extrato
|
24
|
-
attr_reader :nome
|
25
|
-
|
26
|
-
# @return [C118pdf] pdf c118
|
27
|
-
def initialize(fpdf)
|
28
|
-
@file = fpdf
|
29
|
-
@ext = File.extname(fpdf).downcase
|
30
|
-
@base = File.basename(fpdf, File.extname(fpdf))
|
31
|
-
@key = @base[/\w+/]
|
32
|
-
@size = File.size(fpdf)
|
33
|
-
end
|
34
|
-
|
35
8
|
def c118_gs
|
36
9
|
# filtrar images para scq e extratos
|
37
10
|
fi = /^[se]/i.match?(key) ? ' -dFILTERIMAGE' : ''
|
data/lib/arquivo/mp3.rb
CHANGED
@@ -9,61 +9,56 @@ module Arquivo
|
|
9
9
|
attr_reader :ext
|
10
10
|
# @return [String] base do ficheiro
|
11
11
|
attr_reader :base
|
12
|
-
# @return [String] final do nome do segmento
|
13
|
-
attr_reader :final
|
14
12
|
# @return [Float] segundos do mp3
|
15
13
|
attr_reader :size
|
16
14
|
|
15
|
+
# @return [String] nome segmento
|
16
|
+
attr_reader :nome
|
17
|
+
|
17
18
|
# @return [C118mp3] mp3 c118
|
18
19
|
def initialize(fmp3)
|
19
20
|
@file = fmp3
|
20
21
|
@ext = File.extname(fmp3).downcase
|
21
22
|
@base = File.basename(fmp3, File.extname(fmp3))
|
22
|
-
@final = "-#{@base[/\d{8}/]}#{@base[/-\w+/]}#{@ext}"
|
23
23
|
@size = `soxi -V0 -D #{fmp3} #{O1}`.to_f
|
24
24
|
end
|
25
25
|
|
26
|
-
def
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
system
|
34
|
-
|
35
|
-
end
|
36
|
-
|
37
|
-
def processa_segmentos(ase, pse, cmd, amt)
|
38
|
-
return cmd[1..-1] unless pse < ase.size
|
39
|
-
|
40
|
-
fls = "#{base}/s#{pse}#{final} " \
|
41
|
-
"tmp/s#{pse}-#{final}#{base[/-\w+/]}.mp3"
|
42
|
-
cmd += nprof ? ";sox #{fls} noisered #{nprof} #{ff(amt)}" : ";cp #{fls}"
|
43
|
-
|
44
|
-
processa_segmentos(ase, pse + 1, cmd, amt)
|
45
|
-
end
|
46
|
-
|
47
|
-
def ff(val)
|
48
|
-
format('%<valor>.5f', valor: val)
|
26
|
+
def processa_mp3(options, npr)
|
27
|
+
cmd = if npr
|
28
|
+
"noisered #{npr} #{format('%<v>.5f', v: options[:amount])} "
|
29
|
+
else
|
30
|
+
''
|
31
|
+
end
|
32
|
+
cmd += "rate -v #{options[:rate]}k"
|
33
|
+
system "sox -G #{file} tmp/zip/#{base}.mp3 #{cmd} #{O2}"
|
34
|
+
# puts base
|
49
35
|
end
|
50
36
|
|
51
37
|
def segmenta(tps, pse, cmd)
|
52
38
|
return cmd[1..-1] unless pse < tps.size
|
53
39
|
|
54
|
-
|
40
|
+
puts proximo_segmento(pse)
|
41
|
+
|
42
|
+
cmd += ";sox #{file} #{nome} trim #{tps[pse]}"
|
55
43
|
pse += 1
|
56
44
|
cmd += " =#{tps[pse]}" if pse < tps.size
|
57
45
|
|
58
46
|
segmenta(tps, pse, cmd + " #{O2}")
|
59
47
|
end
|
60
48
|
|
49
|
+
def proximo_segmento(pse)
|
50
|
+
out = "s#{format('%<v>02d', v: pse)}-#{base[/\d{8}/]}#{base[/-\w+/]}"
|
51
|
+
@nome = "#{base}/#{out}#{ext}"
|
52
|
+
out
|
53
|
+
end
|
54
|
+
|
61
55
|
def processa_minuta(options)
|
62
56
|
system segmenta(['0'] + options[:tempos], 0, '')
|
63
57
|
end
|
64
58
|
|
65
59
|
def processa_minuta?
|
66
60
|
return true if ['.mp3', '.m4a', '.wav'].include?(ext) &&
|
61
|
+
size.positive? &&
|
67
62
|
!File.exist?(base)
|
68
63
|
|
69
64
|
if File.exist?(base)
|
data/lib/arquivo/noise.rb
CHANGED
@@ -3,15 +3,23 @@
|
|
3
3
|
module Arquivo
|
4
4
|
# analisar/processar pasta
|
5
5
|
class C118dir < Enumerator
|
6
|
+
def obtem_noiseprof(dir, options)
|
7
|
+
return unless /minuta/i.match?(dir) && !options[:noise]
|
8
|
+
|
9
|
+
silencio(1, duracao(item), options[:sound]) while next_item
|
10
|
+
items.rewind
|
11
|
+
@noiseprof = processa_noiseprof
|
12
|
+
end
|
13
|
+
|
6
14
|
def silencio(thr, tse, som)
|
7
|
-
|
15
|
+
o = "tmp/silencio-#{File.basename(item)}"
|
8
16
|
|
9
|
-
system "sox #{item} #{
|
17
|
+
system "sox #{item} #{o} " \
|
10
18
|
"silence 1 #{format('%<valor>.5f', valor: som)}t #{thr}% #{O2}"
|
11
19
|
|
12
|
-
return if silencio?(
|
20
|
+
return if silencio?(o, tse) || thr == 3
|
13
21
|
|
14
|
-
silencio(thr + 1,
|
22
|
+
silencio(thr + 1, tse, som)
|
15
23
|
end
|
16
24
|
|
17
25
|
def silencio?(fss, tse)
|
@@ -19,20 +27,21 @@ module Arquivo
|
|
19
27
|
return false unless s.positive? && (tse - s > silence)
|
20
28
|
|
21
29
|
@silence = tse - s
|
22
|
-
@
|
30
|
+
@noiseprof = fss
|
23
31
|
end
|
24
32
|
|
25
33
|
def duracao(seg)
|
26
34
|
`soxi -V0 -D #{seg} #{O1}`.to_f
|
27
35
|
end
|
28
36
|
|
29
|
-
def
|
37
|
+
def processa_noiseprof
|
30
38
|
return unless silence&.positive?
|
31
39
|
|
32
|
-
|
40
|
+
e = File.extname(noiseprof)
|
41
|
+
o = "tmp/noiseprof-#{File.basename(noiseprof, e)}"
|
33
42
|
# obter noiseprof do silencio encontrado
|
34
|
-
system "sox #{
|
35
|
-
"sox #{o}#{
|
43
|
+
system "sox #{noiseprof} #{o}#{e} trim 0 #{silence} #{O2};" \
|
44
|
+
"sox #{o}#{e} -n noiseprof #{o} #{O2}"
|
36
45
|
|
37
46
|
# so noiseprof validos sao devolvidos
|
38
47
|
@silence = 0.0 unless File.size?(o)
|
data/lib/arquivo/pdf.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'arquivo/extrato'
|
3
4
|
require 'i18n'
|
4
5
|
|
5
6
|
I18n.config.available_locales = :pt
|
@@ -7,6 +8,33 @@ I18n.config.available_locales = :pt
|
|
7
8
|
module Arquivo
|
8
9
|
# analisar/processar pdf
|
9
10
|
class C118pdf < String
|
11
|
+
# @return [String] nome do documento
|
12
|
+
attr_reader :file
|
13
|
+
# @return [String] extensao do documento
|
14
|
+
attr_reader :ext
|
15
|
+
# @return [String] base do documento
|
16
|
+
attr_reader :base
|
17
|
+
# @return [String] key do documento ft????/rc????/ex??0??/sc??????
|
18
|
+
attr_reader :key
|
19
|
+
# @return [Integer] tamanho do pdf
|
20
|
+
attr_reader :size
|
21
|
+
|
22
|
+
# @return [Array<Integer>] numeros pagina do extrato final
|
23
|
+
attr_reader :paginas
|
24
|
+
# @return [String] texto pagina pdf
|
25
|
+
attr_reader :pagina
|
26
|
+
# @return [String] nome extrato
|
27
|
+
attr_reader :nome
|
28
|
+
|
29
|
+
# @return [C118pdf] pdf c118
|
30
|
+
def initialize(fpdf)
|
31
|
+
@file = fpdf
|
32
|
+
@ext = File.extname(fpdf).downcase
|
33
|
+
@base = File.basename(fpdf, File.extname(fpdf))
|
34
|
+
@key = @base[/\w+/]
|
35
|
+
@size = File.size(fpdf)
|
36
|
+
end
|
37
|
+
|
10
38
|
def processa_pdf(options, dados)
|
11
39
|
# em caso de scanned pdf extract.trim.jpg -> trimed pdf
|
12
40
|
tpdf = jpg? ? extract.trim(options).converte(options) : self
|
@@ -22,18 +50,16 @@ module Arquivo
|
|
22
50
|
system "#{c118_gs} -sOutputFile=tmp/stamp-#{key}.pdf -c \"#{s}\";" \
|
23
51
|
"pdftk tmp/zip/#{base}.pdf " \
|
24
52
|
"stamp tmp/stamp-#{key}.pdf output #{o} #{O2}"
|
25
|
-
puts key
|
53
|
+
# puts key
|
26
54
|
end
|
27
55
|
|
28
56
|
def final(kda)
|
29
57
|
c118_stamp(kda)
|
30
58
|
o = "tmp/zip/#{base}.pdf"
|
31
59
|
|
32
|
-
|
33
|
-
# google print has better && smaller pdf then c118_gs
|
34
|
-
system "#{c118_gs} -sOutputFile=#{o} \"#{file}\" #{O2}" unless recibo
|
60
|
+
system "#{c118_gs} -sOutputFile=#{o} \"#{file}\" #{O2}"
|
35
61
|
# usar copia do original se processado for maior
|
36
|
-
system "cp \"#{file}\" #{o}" if
|
62
|
+
system "cp \"#{file}\" #{o}" if File.size(o) > size
|
37
63
|
|
38
64
|
C118pdf.new(o)
|
39
65
|
end
|
data/lib/arquivo/version.rb
CHANGED
data/lib/arquivo.rb
CHANGED
@@ -2,9 +2,7 @@
|
|
2
2
|
|
3
3
|
require 'thor'
|
4
4
|
require 'arquivo/version'
|
5
|
-
require 'arquivo/extrato'
|
6
5
|
require 'arquivo/dir'
|
7
|
-
require 'arquivo/noise'
|
8
6
|
require 'arquivo/pdf'
|
9
7
|
require 'arquivo/jpg'
|
10
8
|
require 'arquivo/mp3'
|
@@ -24,13 +22,14 @@ module Arquivo
|
|
24
22
|
f = C118mp3.new(file)
|
25
23
|
return unless f.processa_minuta?
|
26
24
|
|
27
|
-
system "mkdir -p
|
25
|
+
system "mkdir -p #{f.base}"
|
28
26
|
f.processa_minuta(options)
|
29
27
|
end
|
30
28
|
|
31
|
-
desc 'pdf
|
29
|
+
desc 'pdf EXTRATO', 'processa EXTRATO criando pasta ' \
|
30
|
+
'com documentos para arquivo'
|
32
31
|
def pdf(file)
|
33
|
-
return unless File.ftype(file) == 'file'
|
32
|
+
return unless File.exist?(file) && File.ftype(file) == 'file'
|
34
33
|
|
35
34
|
f = C118pdf.new(file)
|
36
35
|
return unless f.processa_extrato?
|
@@ -44,21 +43,26 @@ module Arquivo
|
|
44
43
|
end
|
45
44
|
end
|
46
45
|
|
47
|
-
desc 'dir PASTA', 'processa faturas/recibos/extratos/minutas'
|
46
|
+
desc 'dir PASTA', 'processa faturas/recibos/extratos/minutas ' \
|
47
|
+
' e cria arquivos c118'
|
48
48
|
option :fuzz, type: :numeric, default: 29,
|
49
49
|
desc: 'fuzz trim jpg N-1, escolhe menor -> scanned pdf'
|
50
50
|
option :quality, type: :numeric, default: 15,
|
51
51
|
desc: 'compress jpg N% -> scanned pdf (less=low quality)'
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
52
|
+
|
53
|
+
option :noise, type: :boolean, default: false,
|
54
|
+
desc: 'ruido de fundo - sim ou nao'
|
55
|
+
option :sound, type: :numeric, default: 1.0,
|
56
|
+
desc: 'minimo som que determina fim do silencio (segundos)'
|
57
|
+
option :amount, type: :numeric, default: 0.0001,
|
57
58
|
desc: 'qtd ruido a ser removido'
|
59
|
+
option :rate, type: :numeric, default: 16,
|
60
|
+
desc: 'sample rate - radio-16k, CD-44.1k, PC-48k, pro-96k'
|
61
|
+
|
58
62
|
def dir(fdir)
|
59
63
|
return unless File.ftype(fdir) == 'directory'
|
60
64
|
|
61
|
-
C118dir.new(fdir).
|
65
|
+
C118dir.new(fdir).processa_pasta(fdir, options)
|
62
66
|
end
|
63
67
|
end
|
64
68
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arquivo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hernâni Rodrigues Vaz
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-12-
|
11
|
+
date: 2019-12-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|