arquivo 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/arquivo.rb +1 -1
- data/lib/arquivo/dir.rb +4 -3
- data/lib/arquivo/extrato.rb +9 -8
- data/lib/arquivo/mp3.rb +5 -6
- data/lib/arquivo/noise.rb +32 -29
- data/lib/arquivo/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: db2e214ab2f4424ed534529d6601643e05256b201c75ac4d6ce13ce63167f62e
|
|
4
|
+
data.tar.gz: 657d0d3b58f42150e1930d669444ab71a019df9e2b006abc2ac75caee8e4a1ee
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 7aa8742696b7e53e4af0ff69c8c85b71fa48969751d96f162c8546d276373796c2c0dba1a075d5d00fec89ed24bda847083e1061d7698d4919e1b0815211c91c
|
|
7
|
+
data.tar.gz: 074e49e12a77c243ff62b32bde04b6d72e84576da5f152db4e3c85b643b245cca8f59058a010ea6318c92aed10f2776d339fb5a047e52ab8614f560aa19d00d4
|
data/Gemfile.lock
CHANGED
data/lib/arquivo.rb
CHANGED
|
@@ -54,7 +54,7 @@ module Arquivo
|
|
|
54
54
|
desc: 'ruido de fundo - sim ou nao'
|
|
55
55
|
option :sound, type: :numeric, default: 1.0,
|
|
56
56
|
desc: 'minimo som que determina fim do silencio (segundos)'
|
|
57
|
-
option :amount, type: :numeric, default: 0.
|
|
57
|
+
option :amount, type: :numeric, default: 0.00001,
|
|
58
58
|
desc: 'qtd ruido a ser removido'
|
|
59
59
|
option :rate, type: :numeric, default: 16,
|
|
60
60
|
desc: 'sample rate - radio-16k, CD-44.1k, PC-48k, pro-96k'
|
data/lib/arquivo/dir.rb
CHANGED
|
@@ -9,10 +9,11 @@ require 'arquivo/noise'
|
|
|
9
9
|
module Arquivo
|
|
10
10
|
O1 = '2>/dev/null'
|
|
11
11
|
O2 = '1>/dev/null 2>&1'
|
|
12
|
+
FT = ['.mp3', '.m4a', '.wav', '.sox'].freeze
|
|
12
13
|
|
|
13
14
|
# analisar/processar pasta
|
|
14
15
|
class C118dir < Enumerator
|
|
15
|
-
# @return [Enumerator]
|
|
16
|
+
# @return [Enumerator] items dentro duma pasta
|
|
16
17
|
attr_reader :items
|
|
17
18
|
# @return [String] documento c118
|
|
18
19
|
attr_reader :item
|
|
@@ -66,10 +67,10 @@ module Arquivo
|
|
|
66
67
|
case ext
|
|
67
68
|
when '.jpg' then C118jpg.new(item).processa_jpg(options, dados)
|
|
68
69
|
when '.pdf' then C118pdf.new(item).processa_pdf(options, dados)
|
|
69
|
-
when
|
|
70
|
+
when *FT
|
|
70
71
|
C118mp3.new(item).processa_mp3(options, noiseprof)
|
|
71
72
|
else
|
|
72
|
-
puts "erro: #{item} so posso processar
|
|
73
|
+
puts "erro: #{item} so posso processar"
|
|
73
74
|
end
|
|
74
75
|
end
|
|
75
76
|
|
data/lib/arquivo/extrato.rb
CHANGED
|
@@ -19,14 +19,15 @@ module Arquivo
|
|
|
19
19
|
end
|
|
20
20
|
|
|
21
21
|
def processa_extrato?
|
|
22
|
-
return true if
|
|
23
|
-
|
|
24
|
-
|
|
22
|
+
return true if ext == '.pdf' &&
|
|
23
|
+
size.positive? &&
|
|
24
|
+
!File.exist?(base) &&
|
|
25
|
+
first_extrato?
|
|
25
26
|
|
|
26
27
|
if File.exist?(base)
|
|
27
28
|
puts "erro: #{base} pasta ja existe"
|
|
28
29
|
else
|
|
29
|
-
puts "erro: #{file} nao consigo obter primeira pagina do
|
|
30
|
+
puts "erro: #{file} nao consigo obter primeira pagina do EXTRATO"
|
|
30
31
|
end
|
|
31
32
|
false
|
|
32
33
|
end
|
|
@@ -58,6 +59,10 @@ module Arquivo
|
|
|
58
59
|
pagina.include?('45463760224')
|
|
59
60
|
end
|
|
60
61
|
|
|
62
|
+
def first_extrato?
|
|
63
|
+
leitor && proxima_pagina && proximo_extrato
|
|
64
|
+
end
|
|
65
|
+
|
|
61
66
|
# @return [Enumerator::Lazy] leitor pdf
|
|
62
67
|
def leitor
|
|
63
68
|
@leitor ||= PDF::Reader.new(file).pages.lazy
|
|
@@ -82,10 +87,6 @@ module Arquivo
|
|
|
82
87
|
@nome = nil
|
|
83
88
|
end
|
|
84
89
|
|
|
85
|
-
def first_extrato
|
|
86
|
-
leitor && proxima_pagina && proximo_extrato
|
|
87
|
-
end
|
|
88
|
-
|
|
89
90
|
def split
|
|
90
91
|
system "pdftk #{file} burst output #{base}/pg%04d-#{base}.pdf;" \
|
|
91
92
|
"rm -f #{base}/*.txt"
|
data/lib/arquivo/mp3.rb
CHANGED
|
@@ -25,13 +25,12 @@ module Arquivo
|
|
|
25
25
|
|
|
26
26
|
def processa_mp3(options, npr)
|
|
27
27
|
cmd = if npr
|
|
28
|
-
"noisered #{npr} #{format('%<v>.
|
|
28
|
+
"noisered #{npr} #{format('%<v>.9f', v: options[:amount])} "
|
|
29
29
|
else
|
|
30
30
|
''
|
|
31
31
|
end
|
|
32
|
-
cmd += "rate -v #{options[:rate]}k"
|
|
32
|
+
cmd += "rate -v #{options[:rate]}k channels 1"
|
|
33
33
|
system "sox -G #{file} tmp/zip/#{base}.mp3 #{cmd} #{O2}"
|
|
34
|
-
# puts base
|
|
35
34
|
end
|
|
36
35
|
|
|
37
36
|
def segmenta(tps, pse, cmd)
|
|
@@ -57,14 +56,14 @@ module Arquivo
|
|
|
57
56
|
end
|
|
58
57
|
|
|
59
58
|
def processa_minuta?
|
|
60
|
-
return true if
|
|
61
|
-
size.positive? &&
|
|
59
|
+
return true if FT.include?(ext) && size.positive? &&
|
|
62
60
|
!File.exist?(base)
|
|
63
61
|
|
|
64
62
|
if File.exist?(base)
|
|
65
63
|
puts "erro: #{base} pasta ja existe"
|
|
66
64
|
else
|
|
67
|
-
puts
|
|
65
|
+
puts 'erro: so consigo processar minutas com som ' \
|
|
66
|
+
"e tipo #{FT}"
|
|
68
67
|
end
|
|
69
68
|
|
|
70
69
|
false
|
data/lib/arquivo/noise.rb
CHANGED
|
@@ -3,49 +3,52 @@
|
|
|
3
3
|
module Arquivo
|
|
4
4
|
# analisar/processar pasta
|
|
5
5
|
class C118dir < Enumerator
|
|
6
|
-
def obtem_noiseprof(
|
|
7
|
-
return unless /minuta/i.match?(
|
|
6
|
+
def obtem_noiseprof(pasta, options)
|
|
7
|
+
return unless /minuta/i.match?(pasta) && !options[:noise]
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
9
|
+
d = Dir.glob(File.join(pasta, '*')).map { |e| [e, duracao(e)] }
|
|
10
|
+
t = 0
|
|
11
|
+
s = ['', 0, 0]
|
|
12
|
+
s = maximo(d, t += 1, options[:sound]) while t < 9 && s[2] <= silence
|
|
13
13
|
|
|
14
|
-
|
|
15
|
-
|
|
14
|
+
processa_silencio(s)
|
|
15
|
+
end
|
|
16
16
|
|
|
17
|
-
|
|
18
|
-
|
|
17
|
+
def processa_silencio(seg)
|
|
18
|
+
return unless seg[2] > silence
|
|
19
19
|
|
|
20
|
-
|
|
20
|
+
o = "tmp/silencio-#{File.basename(seg[0])}"
|
|
21
|
+
system "sox #{seg[0]} #{o} trim 0 #{seg[2]} #{O2}"
|
|
22
|
+
seg[2] = duracao(o)
|
|
23
|
+
return unless seg[2].positive?
|
|
21
24
|
|
|
22
|
-
|
|
25
|
+
processa_noiseprof(seg, o)
|
|
23
26
|
end
|
|
24
27
|
|
|
25
|
-
def
|
|
26
|
-
|
|
27
|
-
|
|
28
|
+
def processa_noiseprof(seg, trm)
|
|
29
|
+
o = "tmp/noiseprof-#{File.basename(seg[0], File.extname(seg[0]))}"
|
|
30
|
+
# obter noiseprof do silencio encontrado
|
|
31
|
+
system "sox #{trm} -n noiseprof #{o} #{O2}"
|
|
28
32
|
|
|
29
|
-
|
|
30
|
-
@
|
|
33
|
+
# so noiseprof validos sao devolvidos
|
|
34
|
+
@silence = File.size?(o) ? seg[2] : 0.0
|
|
35
|
+
@noiseprof = silence.positive? ? o : nil
|
|
31
36
|
end
|
|
32
37
|
|
|
33
|
-
def
|
|
34
|
-
|
|
38
|
+
def maximo(seg, thr, som)
|
|
39
|
+
seg.sort.map { |e| add_silencio(e, thr, som) }.max_by { |_, _, s| s }
|
|
35
40
|
end
|
|
36
41
|
|
|
37
|
-
def
|
|
38
|
-
|
|
42
|
+
def add_silencio(seg, thr, som)
|
|
43
|
+
o = "tmp/thr-#{File.basename(seg[0])}"
|
|
44
|
+
system "sox #{seg[0]} #{o} silence 1 #{som}t #{thr}% #{O2}"
|
|
45
|
+
s = (seg[1] - duracao(o)).round(2, half: :down)
|
|
39
46
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
# obter noiseprof do silencio encontrado
|
|
43
|
-
system "sox #{noiseprof} #{o}#{e} trim 0 #{silence} #{O2};" \
|
|
44
|
-
"sox #{o}#{e} -n noiseprof #{o} #{O2}"
|
|
47
|
+
seg + [s > som ? s : 0.0]
|
|
48
|
+
end
|
|
45
49
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
silence.positive? ? o : nil
|
|
50
|
+
def duracao(seg)
|
|
51
|
+
`soxi -V0 -D #{seg} #{O1}`.to_f
|
|
49
52
|
end
|
|
50
53
|
end
|
|
51
54
|
end
|
data/lib/arquivo/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: arquivo
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Hernâni Rodrigues Vaz
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2019-12-
|
|
11
|
+
date: 2019-12-23 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: bundler
|