speech2text 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/speech2text +5 -0
- data/lib/speech/audio_inspector.rb +1 -1
- data/lib/speech/audio_splitter.rb +20 -4
- data/lib/speech/audio_to_text.rb +13 -21
- data/lib/speech/version.rb +1 -1
- data/test/audio_to_text_test.rb +18 -0
- metadata +6 -6
data/bin/speech2text
CHANGED
|
@@ -14,7 +14,7 @@ module Speech
|
|
|
14
14
|
|
|
15
15
|
def to_s
|
|
16
16
|
s,f = seconds.split('.')
|
|
17
|
-
sprintf "%.2d:%.2d:%.2d:%.2d", self.hours.
|
|
17
|
+
sprintf "%.2d:%.2d:%.2d:%.2d", self.hours.to_i, self.minutes.to_i, s.to_i, (f||0).to_i
|
|
18
18
|
#"#{hours}:#{minutes}:#{seconds}:#{f}"
|
|
19
19
|
end
|
|
20
20
|
|
|
@@ -5,22 +5,33 @@ module Speech
|
|
|
5
5
|
attr_accessor :original_file, :size, :duration, :chunks
|
|
6
6
|
|
|
7
7
|
class AudioChunk
|
|
8
|
-
attr_accessor :splitter, :chunk, :flac_chunk, :offset, :duration, :flac_rate
|
|
8
|
+
attr_accessor :splitter, :chunk, :flac_chunk, :offset, :duration, :flac_rate, :copied
|
|
9
9
|
|
|
10
10
|
def initialize(splitter, offset, duration)
|
|
11
11
|
self.offset = offset
|
|
12
12
|
self.chunk = File.join(File.dirname(splitter.original_file), "chunk-" + File.basename(splitter.original_file).gsub(/\.(.*)$/, "-#{offset}" + '.\1'))
|
|
13
13
|
self.duration = duration
|
|
14
14
|
self.splitter = splitter
|
|
15
|
+
self.copied = false
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def self.copy(splitter)
|
|
19
|
+
chunk = AudioChunk.new(splitter, 0, splitter.duration.to_f)
|
|
20
|
+
chunk.copied = true
|
|
21
|
+
system("cp #{splitter.original_file} #{chunk.chunk}")
|
|
22
|
+
chunk
|
|
15
23
|
end
|
|
16
24
|
|
|
17
25
|
# given the original file from the splitter and the chunked file name with duration and offset run the ffmpeg command
|
|
18
26
|
def build
|
|
27
|
+
return self if self.copied
|
|
19
28
|
# ffmpeg -y -i sample.audio.wav -acodec copy -vcodec copy -ss 00:00:00:00 -t 00:00:30:00 sample.audio.out.wav
|
|
20
29
|
offset_ts = AudioInspector::Duration.from_seconds(self.offset)
|
|
21
30
|
duration_ts = AudioInspector::Duration.from_seconds(self.duration)
|
|
31
|
+
# NOTE: kind of a hack, but if the original source is less than or equal to 1 second, we should skip ffmpeg
|
|
32
|
+
puts "building chunk: #{duration_ts.inspect} and offset: #{offset_ts}"
|
|
22
33
|
#puts "offset: #{ offset_ts.to_s }, duration: #{duration_ts.to_s}"
|
|
23
|
-
cmd = "ffmpeg -y -i #{splitter.original_file} -acodec copy -vcodec copy -ss #{offset_ts} -t #{duration_ts} #{self.chunk} >/dev/null 2>&1"
|
|
34
|
+
cmd = "ffmpeg -y -i #{splitter.original_file} -acodec copy -vcodec copy -ss #{offset_ts} -t #{duration_ts} #{self.chunk}"# >/dev/null 2>&1"
|
|
24
35
|
if system(cmd)
|
|
25
36
|
self
|
|
26
37
|
else
|
|
@@ -30,7 +41,9 @@ module Speech
|
|
|
30
41
|
|
|
31
42
|
# convert the audio file to flac format
|
|
32
43
|
def to_flac
|
|
33
|
-
|
|
44
|
+
puts "convert: #{chunk} to flac"
|
|
45
|
+
if system("flac #{chunk}")
|
|
46
|
+
puts "success?"
|
|
34
47
|
self.flac_chunk = chunk.gsub(File.extname(chunk), ".flac")
|
|
35
48
|
# convert the audio file to 16K
|
|
36
49
|
self.flac_rate = `ffmpeg -i #{self.flac_chunk} 2>&1`.strip.scan(/Audio: flac, (.*) Hz/).first.first.strip
|
|
@@ -42,6 +55,8 @@ module Speech
|
|
|
42
55
|
raise "failed to convert to lower audio rate"
|
|
43
56
|
end
|
|
44
57
|
|
|
58
|
+
else
|
|
59
|
+
raise "failed to convert chunk: #{chunk} with flac #{chunk}"
|
|
45
60
|
end
|
|
46
61
|
end
|
|
47
62
|
|
|
@@ -75,10 +90,11 @@ module Speech
|
|
|
75
90
|
end
|
|
76
91
|
|
|
77
92
|
if chunks.empty?
|
|
78
|
-
chunks << AudioChunk.
|
|
93
|
+
chunks << AudioChunk.copy(self)#, 0, self.duration.to_f)
|
|
79
94
|
else
|
|
80
95
|
chunks << AudioChunk.new(self, chunks.last.offset.to_i + chunks.last.duration.to_i, self.size + last_chunk)
|
|
81
96
|
end
|
|
97
|
+
puts "Chunk count: #{chunks.size}"
|
|
82
98
|
|
|
83
99
|
chunks
|
|
84
100
|
end
|
data/lib/speech/audio_to_text.rb
CHANGED
|
@@ -2,17 +2,16 @@
|
|
|
2
2
|
module Speech
|
|
3
3
|
|
|
4
4
|
class AudioToText
|
|
5
|
-
attr_accessor :file, :rate, :captured_json, :
|
|
5
|
+
attr_accessor :file, :rate, :captured_json, :captured_file
|
|
6
6
|
|
|
7
7
|
def initialize(file)
|
|
8
8
|
self.file = file
|
|
9
9
|
self.captured_file = self.file.gsub(/\.wav$/,'.json')
|
|
10
|
-
self.captured_json =
|
|
11
|
-
self.confidence = 0.0
|
|
10
|
+
self.captured_json = {}
|
|
12
11
|
end
|
|
13
12
|
|
|
14
13
|
def to_text
|
|
15
|
-
url = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=speech2text&lang=en-US&maxresults=
|
|
14
|
+
url = "https://www.google.com/speech-api/v1/recognize?xjerr=1&client=speech2text&lang=en-US&maxresults=10"
|
|
16
15
|
splitter = Speech::AudioSplitter.new(file) # based off the wave file because flac doesn't tell us the duration
|
|
17
16
|
easy = Curl::Easy.new(url)
|
|
18
17
|
splitter.split.each do|chunk|
|
|
@@ -31,10 +30,12 @@ module Speech
|
|
|
31
30
|
def convert_chunk(easy, chunk, options={})
|
|
32
31
|
puts "sending chunk of size #{chunk.duration}..."
|
|
33
32
|
retrying = true
|
|
34
|
-
|
|
33
|
+
retry_count = 0
|
|
34
|
+
while retrying && retry_count < 5
|
|
35
35
|
#easy.verbose = true
|
|
36
36
|
easy.headers['Content-Type'] = "audio/x-flac; rate=#{chunk.flac_rate}"
|
|
37
37
|
easy.headers['User-Agent'] = "https://github.com/taf2/speech2text"
|
|
38
|
+
#puts chunk.inspect
|
|
38
39
|
easy.post_body = "Content=#{chunk.to_flac_bytes}"
|
|
39
40
|
easy.on_progress {|dl_total, dl_now, ul_total, ul_now| printf("%.2f/%.2f\r", ul_now, ul_total); true }
|
|
40
41
|
easy.on_complete {|easy| puts }
|
|
@@ -44,25 +45,16 @@ module Speech
|
|
|
44
45
|
if easy.response_code == 500
|
|
45
46
|
puts "500 from google retry after 0.5 seconds"
|
|
46
47
|
retrying = true
|
|
47
|
-
|
|
48
|
+
retry_count += 1
|
|
49
|
+
sleep 0.5 # wait longer on error?, google??
|
|
48
50
|
else
|
|
49
51
|
# {"status":0,"id":"ce178ea89f8b17d8e8298c9c7814700a-1","hypotheses":[{"utterance":"I like pickles","confidence":0.92731786}]}
|
|
50
52
|
data = JSON.parse(easy.body_str)
|
|
51
|
-
|
|
52
|
-
data['
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
}
|
|
57
|
-
File.open("#{self.captured_file}", "wb") {|f|
|
|
58
|
-
size = self.captured_json.size
|
|
59
|
-
if size > 0
|
|
60
|
-
confidence_calc = self.confidence / size
|
|
61
|
-
else
|
|
62
|
-
confidence_calc = 0
|
|
63
|
-
end
|
|
64
|
-
f << {:captured_json => captured_json, :confidence => confidence_calc}.to_json
|
|
65
|
-
}
|
|
53
|
+
self.captured_json['status'] = data['status']
|
|
54
|
+
self.captured_json['id'] = data['id']
|
|
55
|
+
self.captured_json['hypotheses'] = data['hypotheses'].map {|ut| [ut['utterance'], ut['confidence']] }
|
|
56
|
+
puts self.captured_json.inspect
|
|
57
|
+
File.open("#{self.captured_file}", "wb") {|f| f << captured_json.to_json }
|
|
66
58
|
retrying = false
|
|
67
59
|
end
|
|
68
60
|
sleep 0.1 # not too fast there tiger
|
data/lib/speech/version.rb
CHANGED
data/test/audio_to_text_test.rb
CHANGED
|
@@ -18,4 +18,22 @@ class SpeechAudioToTextTest < Test::Unit::TestCase
|
|
|
18
18
|
ensure
|
|
19
19
|
audio.clean
|
|
20
20
|
end
|
|
21
|
+
|
|
22
|
+
def test_short_audio_clip
|
|
23
|
+
audio = Speech::AudioToText.new("samples/i-like-pickles.chunk5.wav")
|
|
24
|
+
captured_json = audio.to_text
|
|
25
|
+
assert captured_json
|
|
26
|
+
assert captured_json.key?("hypotheses")
|
|
27
|
+
assert !captured_json['hypotheses'].empty?
|
|
28
|
+
#{"status"=>0, "id"=>"552de5ba35bb769ce3493ff113e158a8-1", "hypotheses"=>[["eagles", 0.7214844], ["pickles", nil], ["michaels", nil], ["giggles", nil], ["tickles", nil]]}
|
|
29
|
+
assert captured_json.keys.include?('status')
|
|
30
|
+
assert captured_json.keys.include?('id')
|
|
31
|
+
assert captured_json.keys.include?('hypotheses')
|
|
32
|
+
puts captured_json.inspect
|
|
33
|
+
assert_equal "eagles", captured_json['hypotheses'][0].first
|
|
34
|
+
assert_equal "pickles", captured_json['hypotheses'][1].first
|
|
35
|
+
#assert captured_json['confidence'] > 0.9
|
|
36
|
+
ensure
|
|
37
|
+
audio.clean
|
|
38
|
+
end
|
|
21
39
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: speech2text
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.2
|
|
5
5
|
prerelease:
|
|
6
6
|
platform: ruby
|
|
7
7
|
authors:
|
|
@@ -9,12 +9,12 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2011-04-
|
|
12
|
+
date: 2011-04-04 00:00:00.000000000 -04:00
|
|
13
13
|
default_executable:
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|
|
16
16
|
name: curb
|
|
17
|
-
requirement: &
|
|
17
|
+
requirement: &2163572140 !ruby/object:Gem::Requirement
|
|
18
18
|
none: false
|
|
19
19
|
requirements:
|
|
20
20
|
- - ! '>='
|
|
@@ -22,10 +22,10 @@ dependencies:
|
|
|
22
22
|
version: '0'
|
|
23
23
|
type: :runtime
|
|
24
24
|
prerelease: false
|
|
25
|
-
version_requirements: *
|
|
25
|
+
version_requirements: *2163572140
|
|
26
26
|
- !ruby/object:Gem::Dependency
|
|
27
27
|
name: json
|
|
28
|
-
requirement: &
|
|
28
|
+
requirement: &2163571700 !ruby/object:Gem::Requirement
|
|
29
29
|
none: false
|
|
30
30
|
requirements:
|
|
31
31
|
- - ! '>='
|
|
@@ -33,7 +33,7 @@ dependencies:
|
|
|
33
33
|
version: '0'
|
|
34
34
|
type: :runtime
|
|
35
35
|
prerelease: false
|
|
36
|
-
version_requirements: *
|
|
36
|
+
version_requirements: *2163571700
|
|
37
37
|
description: Super powers of Google wrapped in a nice Ruby interface
|
|
38
38
|
email: todd.fisher@gmail.com
|
|
39
39
|
executables:
|