arquivo 0.1.8 → 0.1.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/arquivo.rb +11 -4
- data/lib/arquivo/dir.rb +30 -5
- data/lib/arquivo/extrato.rb +1 -1
- data/lib/arquivo/jpg.rb +2 -2
- data/lib/arquivo/mp3.rb +40 -14
- data/lib/arquivo/noise.rb +42 -0
- data/lib/arquivo/pdf.rb +3 -3
- data/lib/arquivo/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 76258c87d7c0c3a63f261359001ea4623dd00a2f53f5d4a6469fa121bd361bae
|
4
|
+
data.tar.gz: '02804d07fa8c7db3ba86995568ed76aa5948775e854a5d0a5159a90cf1e72f7d'
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 75ab73db2816fe15e4f80a8f8bdbeafa349c87dd50bb6ac53f6981e21e5349734d79461ed7a2e7b3b4dafe3b9f8fdf3bf35e5930b34132ee299ff2fdd7d6b021
|
7
|
+
data.tar.gz: a81b2f6d65e75608db0a22b54718e9c8433fedf6702923680568cd08384db924982f6d27b1c2aa1c3009d07a8a071dc8e6034f074d72a8785b77e34423cac3a7
|
data/Gemfile.lock
CHANGED
data/lib/arquivo.rb
CHANGED
@@ -4,6 +4,7 @@ require 'thor'
|
|
4
4
|
require 'arquivo/version'
|
5
5
|
require 'arquivo/extrato'
|
6
6
|
require 'arquivo/dir'
|
7
|
+
require 'arquivo/noise'
|
7
8
|
require 'arquivo/pdf'
|
8
9
|
require 'arquivo/jpg'
|
9
10
|
require 'arquivo/mp3'
|
@@ -15,10 +16,10 @@ module Arquivo
|
|
15
16
|
class CLI < Thor
|
16
17
|
desc 'mp3 MINUTA', 'processa MINUTA criando pasta ' \
|
17
18
|
'com segmentos para arquivo'
|
18
|
-
option :
|
19
|
-
|
19
|
+
option :tempos, type: :array, default: [],
|
20
|
+
desc: 'lista tempos para segmentar MINUTA, ex: [[h:]m:]s'
|
20
21
|
def mp3(file)
|
21
|
-
return unless File.ftype(file) == 'file'
|
22
|
+
return unless File.exist?(file) && File.ftype(file) == 'file'
|
22
23
|
|
23
24
|
f = C118mp3.new(file)
|
24
25
|
return unless f.processa_minuta?
|
@@ -48,10 +49,16 @@ module Arquivo
|
|
48
49
|
desc: 'fuzz trim jpg N-1, escolhe menor -> scanned pdf'
|
49
50
|
option :quality, type: :numeric, default: 15,
|
50
51
|
desc: 'compress jpg N% -> scanned pdf (less=low quality)'
|
52
|
+
option :nred, type: :boolean, default: true,
|
53
|
+
desc: 'fazer reducao do ruido de fundo'
|
54
|
+
option :som, type: :numeric, default: 1.0,
|
55
|
+
desc: 'minimo som que determina fim do silencio (segundos)'
|
56
|
+
option :amount, type: :numeric, default: 0.00001,
|
57
|
+
desc: 'qtd ruido a ser removido'
|
51
58
|
def dir(fdir)
|
52
59
|
return unless File.ftype(fdir) == 'directory'
|
53
60
|
|
54
|
-
C118dir.new(fdir).processa_pasta(options)
|
61
|
+
C118dir.new(fdir).prepara(fdir, options).processa_pasta(options)
|
55
62
|
end
|
56
63
|
end
|
57
64
|
end
|
data/lib/arquivo/dir.rb
CHANGED
@@ -5,8 +5,10 @@ require 'googleauth'
|
|
5
5
|
require 'googleauth/stores/file_token_store'
|
6
6
|
|
7
7
|
module Arquivo
|
8
|
-
#
|
9
|
-
|
8
|
+
# O1 = '2>/dev/null'
|
9
|
+
# O2 = '1>/dev/null 2>&1'
|
10
|
+
O1 = ''
|
11
|
+
O2 = ''
|
10
12
|
|
11
13
|
# analisar/processar pasta
|
12
14
|
class C118dir < Enumerator
|
@@ -14,8 +16,13 @@ module Arquivo
|
|
14
16
|
attr_reader :items
|
15
17
|
# @return [String] documento c118
|
16
18
|
attr_reader :item
|
19
|
+
|
17
20
|
# @return [Hash] dados (faturas/recibos) de c118-contas
|
18
21
|
attr_reader :dados
|
22
|
+
# @return [Float] maximo segundos de silencio encontrados
|
23
|
+
attr_reader :silence
|
24
|
+
# @return [String] noiseprof do silencio encontrado
|
25
|
+
attr_reader :nprof
|
19
26
|
|
20
27
|
# @return [String] base nome ficheiros finais (pdf, tar.gz)
|
21
28
|
attr_reader :base
|
@@ -25,8 +32,6 @@ module Arquivo
|
|
25
32
|
@items = Dir.glob(File.join(pasta, '*')).each
|
26
33
|
@base = File.basename(pasta, File.extname(pasta)) +
|
27
34
|
Date.today.strftime('%Y%m%d')
|
28
|
-
obtem_dados(pasta)
|
29
|
-
system 'mkdir -p tmp/zip'
|
30
35
|
end
|
31
36
|
|
32
37
|
def processa_pasta(options)
|
@@ -55,6 +60,8 @@ module Arquivo
|
|
55
60
|
case ext
|
56
61
|
when '.jpg' then C118jpg.new(item).processa_jpg(options, dados)
|
57
62
|
when '.pdf' then C118pdf.new(item).processa_pdf(options, dados)
|
63
|
+
when '.mp3', '.m4a', '.wav'
|
64
|
+
C118mp3.new(item).processa_mp3(options, num)
|
58
65
|
else
|
59
66
|
puts "erro: #{item} so posso processar mp3, jpg, pdf"
|
60
67
|
end
|
@@ -67,10 +74,28 @@ module Arquivo
|
|
67
74
|
@item = nil
|
68
75
|
end
|
69
76
|
|
77
|
+
def prepara(pasta, options)
|
78
|
+
obtem_dados(pasta)
|
79
|
+
obtem_noiseprof(pasta, options)
|
80
|
+
system 'mkdir -p tmp/zip'
|
81
|
+
end
|
82
|
+
|
83
|
+
def obtem_noiseprof(dir, options)
|
84
|
+
return unless /minuta/i.match?(dir) || silence&.zero?
|
85
|
+
|
86
|
+
if options[:nred]
|
87
|
+
@silence = 0.0
|
88
|
+
silencio(1, duracao(item), options[:som]) while next_item
|
89
|
+
@nprof = noiseprof
|
90
|
+
end
|
91
|
+
rescue StandardError
|
92
|
+
@silence = 0.0
|
93
|
+
end
|
94
|
+
|
70
95
|
def obtem_dados(dir)
|
71
96
|
return unless /fac?tura/i.match?(dir) ||
|
72
97
|
/recibo/i.match?(dir) ||
|
73
|
-
dados
|
98
|
+
dados&.empty?
|
74
99
|
|
75
100
|
# obtem dados (faturas/recibos) da sheet c118-contas
|
76
101
|
id = '1PbiMrtTtqGztZMhe3AiJbDS6NQE9o3hXebnQEFdt954'
|
data/lib/arquivo/extrato.rb
CHANGED
@@ -76,7 +76,7 @@ module Arquivo
|
|
76
76
|
def faz_extrato
|
77
77
|
system "#{c118_gs} " \
|
78
78
|
"-sOutputFile=#{base}/#{nome}-extrato.pdf " \
|
79
|
-
"-sPageList=#{paginas.join(',')} \"#{file}\" #{
|
79
|
+
"-sPageList=#{paginas.join(',')} \"#{file}\" #{O2}"
|
80
80
|
puts "#{nome}-extrato"
|
81
81
|
proximo_extrato
|
82
82
|
end
|
data/lib/arquivo/jpg.rb
CHANGED
@@ -43,7 +43,7 @@ module Arquivo
|
|
43
43
|
|
44
44
|
def parm_trim(options, fuzz)
|
45
45
|
"-fuzz #{fuzz}% -trim +repage #{parm_qualidade(options)} " \
|
46
|
-
"tmp/#{key}-#{fuzz}.jpg #{
|
46
|
+
"tmp/#{key}-#{fuzz}.jpg #{O2}"
|
47
47
|
end
|
48
48
|
|
49
49
|
def parm_qualidade(options)
|
@@ -66,7 +66,7 @@ module Arquivo
|
|
66
66
|
def converte(options)
|
67
67
|
# expande jpg on a larger canvas
|
68
68
|
system "convert \"#{file}\" #{expande} #{parm_qualidade(options)} " \
|
69
|
-
"-format pdf tmp/#{key}-trimed.pdf #{
|
69
|
+
"-format pdf tmp/#{key}-trimed.pdf #{O2}"
|
70
70
|
|
71
71
|
# devolve pdf processado a partir de jpg
|
72
72
|
C118pdf.new("tmp/#{key}-trimed.pdf")
|
data/lib/arquivo/mp3.rb
CHANGED
@@ -9,43 +9,69 @@ module Arquivo
|
|
9
9
|
attr_reader :ext
|
10
10
|
# @return [String] base do ficheiro
|
11
11
|
attr_reader :base
|
12
|
-
# @return [String]
|
13
|
-
attr_reader :
|
14
|
-
# @return [
|
12
|
+
# @return [String] final do nome do segmento
|
13
|
+
attr_reader :final
|
14
|
+
# @return [Float] segundos do mp3
|
15
15
|
attr_reader :size
|
16
16
|
|
17
|
-
# @return [String] nome segmento
|
18
|
-
attr_reader :nome
|
19
|
-
|
20
17
|
# @return [C118mp3] mp3 c118
|
21
18
|
def initialize(fmp3)
|
22
19
|
@file = fmp3
|
23
20
|
@ext = File.extname(fmp3).downcase
|
24
21
|
@base = File.basename(fmp3, File.extname(fmp3))
|
25
|
-
@
|
26
|
-
@size =
|
22
|
+
@final = "-#{@base[/\d{8}/]}#{@base[/-\w+/]}#{@ext}"
|
23
|
+
@size = `soxi -V0 -D #{fmp3} #{O1}`.to_f
|
27
24
|
end
|
28
25
|
|
29
26
|
def forca_mp3
|
30
27
|
o = "tmp/#{base}.mp3"
|
31
|
-
system "
|
28
|
+
system "sox \"#{file}\" #{o} #{O2}" unless ext == '.mp3'
|
32
29
|
File.size?(o) ? C118mp3.new(o) : self
|
33
30
|
end
|
34
31
|
|
32
|
+
def processa_mp3
|
33
|
+
system processa_segmentos(Dir.glob("tmp/#{base}*#{ext}"),
|
34
|
+
0, '', options[:amount])
|
35
|
+
end
|
36
|
+
|
37
|
+
def processa_segmentos(ase, pse, cmd, amt)
|
38
|
+
return cmd[1..-1] unless pse < ase.size
|
39
|
+
|
40
|
+
fls = "#{base}/s#{pse}#{final} " \
|
41
|
+
"tmp/s#{pse}-#{final}#{base[/-\w+/]}.mp3"
|
42
|
+
cmd += nprof ? ";sox #{fls} noisered #{nprof} #{ff(amt)}" : ";cp #{fls}"
|
43
|
+
|
44
|
+
processa_segmentos(ase, pse + 1, cmd, amt)
|
45
|
+
end
|
46
|
+
|
47
|
+
def ff(val)
|
48
|
+
format('%<valor>.5f', valor: val)
|
49
|
+
end
|
50
|
+
|
51
|
+
def segmenta(tps, pse, cmd)
|
52
|
+
return cmd[1..-1] unless pse < tps.size
|
53
|
+
|
54
|
+
cmd += ";sox #{file} #{base}/s#{pse}#{final} trim #{tps[pse]}"
|
55
|
+
pse += 1
|
56
|
+
cmd += " =#{tps[pse]}" if pse < tps.size
|
57
|
+
|
58
|
+
segmenta(tps, pse, cmd + " #{O2}")
|
59
|
+
end
|
60
|
+
|
35
61
|
def processa_minuta(options)
|
36
|
-
|
37
|
-
p options
|
62
|
+
system segmenta(['0'] + options[:tempos], 0, '')
|
38
63
|
end
|
39
64
|
|
40
65
|
def processa_minuta?
|
41
|
-
return true if
|
42
|
-
|
66
|
+
return true if ['.mp3', '.m4a', '.wav'].include?(ext) &&
|
67
|
+
!File.exist?(base)
|
43
68
|
|
44
69
|
if File.exist?(base)
|
45
70
|
puts "erro: #{base} pasta ja existe"
|
46
71
|
else
|
47
|
-
puts "erro: #{file} nao consigo processar
|
72
|
+
puts "erro: #{file} nao consigo processar minuta"
|
48
73
|
end
|
74
|
+
|
49
75
|
false
|
50
76
|
end
|
51
77
|
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Arquivo
|
4
|
+
# analisar/processar pasta
|
5
|
+
class C118dir < Enumerator
|
6
|
+
def silencio(thr, tse, som)
|
7
|
+
out = "tmp/silencio-#{File.basename(item)}"
|
8
|
+
|
9
|
+
system "sox #{item} #{out} " \
|
10
|
+
"silence 1 #{format('%<valor>.5f', valor: som)}t #{thr}% #{O2}"
|
11
|
+
|
12
|
+
return if silencio?(out, tse) || thr == 3
|
13
|
+
|
14
|
+
silencio(thr + 1, pse, tse, som)
|
15
|
+
end
|
16
|
+
|
17
|
+
def silencio?(fss, tse)
|
18
|
+
s = duracao(fss)
|
19
|
+
return false unless s.positive? && (tse - s > silence)
|
20
|
+
|
21
|
+
@silence = tse - s
|
22
|
+
@nprof = fss
|
23
|
+
end
|
24
|
+
|
25
|
+
def duracao(seg)
|
26
|
+
`soxi -V0 -D #{seg} #{O1}`.to_f
|
27
|
+
end
|
28
|
+
|
29
|
+
def noiseprof
|
30
|
+
return unless silence&.positive?
|
31
|
+
|
32
|
+
o = "tmp/prof-#{base}"
|
33
|
+
# obter noiseprof do silencio encontrado
|
34
|
+
system "sox #{nprof} #{o}#{ext} trim 0 #{silence} #{O2};" \
|
35
|
+
"sox #{o}#{ext} -n noiseprof #{o} #{O2}"
|
36
|
+
|
37
|
+
# so noiseprof validos sao devolvidos
|
38
|
+
@silence = 0.0 unless File.size?(o)
|
39
|
+
silence.positive? ? o : nil
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
data/lib/arquivo/pdf.rb
CHANGED
@@ -21,7 +21,7 @@ module Arquivo
|
|
21
21
|
"setfont (#{base}) show"
|
22
22
|
system "#{c118_gs} -sOutputFile=tmp/stamp-#{key}.pdf -c \"#{s}\";" \
|
23
23
|
"pdftk tmp/zip/#{base}.pdf " \
|
24
|
-
"stamp tmp/stamp-#{key}.pdf output #{o} #{
|
24
|
+
"stamp tmp/stamp-#{key}.pdf output #{o} #{O2}"
|
25
25
|
puts key
|
26
26
|
end
|
27
27
|
|
@@ -31,7 +31,7 @@ module Arquivo
|
|
31
31
|
|
32
32
|
recibo = key[0] == 'r'
|
33
33
|
# google print has better && smaller pdf then c118_gs
|
34
|
-
system "#{c118_gs} -sOutputFile=#{o} \"#{file}\" #{
|
34
|
+
system "#{c118_gs} -sOutputFile=#{o} \"#{file}\" #{O2}" unless recibo
|
35
35
|
# usar copia do original se processado for maior
|
36
36
|
system "cp \"#{file}\" #{o}" if recibo || File.size(o) > size
|
37
37
|
|
@@ -108,7 +108,7 @@ module Arquivo
|
|
108
108
|
# nem sempre as imagens sao jpg
|
109
109
|
# somente utilizar a primeira
|
110
110
|
g = Dir.glob("tmp/#{key}-???.???")
|
111
|
-
system "convert #{g[0]} #{o} #{
|
111
|
+
system "convert #{g[0]} #{o} #{O2}"
|
112
112
|
return unless File.size(o) > LT
|
113
113
|
|
114
114
|
C118jpg.new(o)
|
data/lib/arquivo/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arquivo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hernâni Rodrigues Vaz
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-12-
|
11
|
+
date: 2019-12-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -117,6 +117,7 @@ files:
|
|
117
117
|
- lib/arquivo/extrato.rb
|
118
118
|
- lib/arquivo/jpg.rb
|
119
119
|
- lib/arquivo/mp3.rb
|
120
|
+
- lib/arquivo/noise.rb
|
120
121
|
- lib/arquivo/pdf.rb
|
121
122
|
- lib/arquivo/version.rb
|
122
123
|
homepage: https://github.com/ph1341c118/arquivo
|