speech2text 0.01 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +18 -5
- data/bin/speech2text +5 -0
- data/lib/speech.rb +1 -0
- data/lib/speech/audio_inspector.rb +1 -0
- data/lib/speech/audio_splitter.rb +2 -1
- data/lib/speech/audio_to_text.rb +6 -0
- data/lib/speech/version.rb +2 -1
- data/speech2text.gemspec +9 -8
- data/test/audio_inspector_test.rb +1 -0
- data/test/audio_splitter_test.rb +2 -1
- data/test/audio_to_text_test.rb +21 -0
- data/test/samples/i-like-pickles.json +1 -0
- metadata +10 -9
- data/lib/speech/text.rb +0 -11
- data/test/i-like-pickles.wav +0 -0
data/README.rdoc
CHANGED
@@ -1,10 +1,23 @@
|
|
1
|
-
|
1
|
+
= Speech2Text
|
2
2
|
|
3
3
|
Using the power of ffmpeg/flac/Google and ruby here is a simple interface to play with to convert speech to text.
|
4
4
|
|
5
|
-
|
6
|
-
The Google API will frequently return 500 errors without providing much reason as to why.
|
5
|
+
Using a new undocumentd speech API from Google with the help of this article: http://mikepultz.com/2011/03/accessing-google-speech-api-chrome-11/
|
7
6
|
|
8
|
-
|
7
|
+
We're able to provide a very simple API in Ruby to decode simple audio to text.
|
9
8
|
|
10
|
-
|
9
|
+
The API from Google is not yet public and so may change. It also seems to be very fragile as more times than not it will return a 500, so the library has retry code built in - for larger audio files 10+ failures may return before a successful result is retrieved...
|
10
|
+
|
11
|
+
It also appears that the API only likes smaller audio files so there is a built in chunker that allows us to split the audio up into smaller chunks.
|
12
|
+
|
13
|
+
== Example
|
14
|
+
|
15
|
+
audio = Speech::AudioToText.new("i-like-pickles.wav")
|
16
|
+
puts audio.to_text.inspect
|
17
|
+
=> {"captured_json"=>[["I like pickles", 0.92731786]], "confidence"=>0.92731786}
|
18
|
+
|
19
|
+
== Command Line
|
20
|
+
|
21
|
+
speech2text i-like-pickles.wav
|
22
|
+
cat i-like-pickles.json
|
23
|
+
{"captured_json"=>[["I like pickles", 0.92731786]], "confidence"=>0.92731786}
|
data/bin/speech2text
CHANGED
data/lib/speech.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# -*- encoding: binary -*-
|
1
2
|
module Speech
|
2
3
|
|
3
4
|
class AudioSplitter
|
@@ -8,7 +9,7 @@ module Speech
|
|
8
9
|
|
9
10
|
def initialize(splitter, offset, duration)
|
10
11
|
self.offset = offset
|
11
|
-
self.chunk = "chunk-" + splitter.original_file.gsub(/\.(.*)$/, "-#{offset}" + '.\1')
|
12
|
+
self.chunk = File.join(File.dirname(splitter.original_file), "chunk-" + File.basename(splitter.original_file).gsub(/\.(.*)$/, "-#{offset}" + '.\1'))
|
12
13
|
self.duration = duration
|
13
14
|
self.splitter = splitter
|
14
15
|
end
|
data/lib/speech/audio_to_text.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# -*- encoding: binary -*-
|
1
2
|
module Speech
|
2
3
|
|
3
4
|
class AudioToText
|
@@ -21,6 +22,10 @@ module Speech
|
|
21
22
|
JSON.parse(File.read(self.captured_file))
|
22
23
|
end
|
23
24
|
|
25
|
+
def clean
|
26
|
+
File.unlink self.captured_file if self.captured_file && File.exist?(self.captured_file)
|
27
|
+
end
|
28
|
+
|
24
29
|
protected
|
25
30
|
|
26
31
|
def convert_chunk(easy, chunk, options={})
|
@@ -29,6 +34,7 @@ module Speech
|
|
29
34
|
while retrying
|
30
35
|
#easy.verbose = true
|
31
36
|
easy.headers['Content-Type'] = "audio/x-flac; rate=#{chunk.flac_rate}"
|
37
|
+
easy.headers['User-Agent'] = "https://github.com/taf2/speech2text"
|
32
38
|
easy.post_body = "Content=#{chunk.to_flac_bytes}"
|
33
39
|
easy.on_progress {|dl_total, dl_now, ul_total, ul_now| printf("%.2f/%.2f\r", ul_now, ul_total); true }
|
34
40
|
easy.on_complete {|easy| puts }
|
data/lib/speech/version.rb
CHANGED
data/speech2text.gemspec
CHANGED
@@ -2,14 +2,15 @@ $:.unshift File.expand_path(File.dirname(__FILE__) + "/lib")
|
|
2
2
|
require "speech/version"
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
|
-
s.name
|
6
|
-
s.authors
|
7
|
-
s.email
|
8
|
-
s.version
|
9
|
-
s.homepage
|
10
|
-
s.summary
|
11
|
-
s.description
|
12
|
-
s.files
|
5
|
+
s.name = "speech2text"
|
6
|
+
s.authors = ["Todd A. Fisher"]
|
7
|
+
s.email = "todd.fisher@gmail.com"
|
8
|
+
s.version = Speech::Info::VERSION
|
9
|
+
s.homepage = "https://github.com/taf2/speech2text"
|
10
|
+
s.summary = "Speech to Text Library"
|
11
|
+
s.description = "Super powers of Google wrapped in a nice Ruby interface"
|
12
|
+
s.files = Dir["{lib,bin,test}/**/*", "Rakefile", "README.rdoc", "*.gemspec"]
|
13
|
+
s.executables = %w(speech2text)
|
13
14
|
|
14
15
|
s.add_dependency "curb"
|
15
16
|
s.add_dependency "json"
|
data/test/audio_splitter_test.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# -*- encoding: binary -*-
|
1
2
|
require 'test/unit'
|
2
3
|
$:.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
|
3
4
|
require 'speech'
|
@@ -5,7 +6,7 @@ require 'speech'
|
|
5
6
|
class SpeechAudioSplitterTest < Test::Unit::TestCase
|
6
7
|
|
7
8
|
def test_audio_splitter
|
8
|
-
splitter = Speech::AudioSplitter.new("i-like-pickles.wav", 1)
|
9
|
+
splitter = Speech::AudioSplitter.new("samples/i-like-pickles.wav", 1)
|
9
10
|
|
10
11
|
assert_equal '00:00:03:52', splitter.duration.to_s
|
11
12
|
assert_equal 3.52, splitter.duration.to_f
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: binary -*-
|
2
|
+
require 'test/unit'
|
3
|
+
$:.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
|
4
|
+
require 'speech'
|
5
|
+
|
6
|
+
class SpeechAudioToTextTest < Test::Unit::TestCase
|
7
|
+
def test_audio_to_text
|
8
|
+
audio = Speech::AudioToText.new("samples/i-like-pickles.wav")
|
9
|
+
captured_json = audio.to_text
|
10
|
+
assert captured_json
|
11
|
+
assert captured_json.key?("captured_json")
|
12
|
+
assert !captured_json['captured_json'].empty?
|
13
|
+
assert_equal ['captured_json', 'confidence'], captured_json.keys.sort
|
14
|
+
assert_equal "I like pickles", captured_json['captured_json'].flatten.first
|
15
|
+
assert captured_json['confidence'] > 0.9
|
16
|
+
# {"captured_json"=>[["I like pickles", 0.92731786]], "confidence"=>0.92731786}
|
17
|
+
# puts captured_json.inspect
|
18
|
+
ensure
|
19
|
+
audio.clean
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
{"captured_json":[["I like pickles",0.92731786]],"confidence":0.92731786}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: speech2text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,12 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-03-
|
12
|
+
date: 2011-03-25 00:00:00.000000000 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: curb
|
17
|
-
requirement: &
|
17
|
+
requirement: &2157005180 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ! '>='
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: '0'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *2157005180
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: json
|
28
|
-
requirement: &
|
28
|
+
requirement: &2157004740 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ! '>='
|
@@ -33,24 +33,25 @@ dependencies:
|
|
33
33
|
version: '0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *2157004740
|
37
37
|
description: Super powers of Google wrapped in a nice Ruby interface
|
38
38
|
email: todd.fisher@gmail.com
|
39
|
-
executables:
|
39
|
+
executables:
|
40
|
+
- speech2text
|
40
41
|
extensions: []
|
41
42
|
extra_rdoc_files: []
|
42
43
|
files:
|
43
44
|
- lib/speech/audio_inspector.rb
|
44
45
|
- lib/speech/audio_splitter.rb
|
45
46
|
- lib/speech/audio_to_text.rb
|
46
|
-
- lib/speech/text.rb
|
47
47
|
- lib/speech/version.rb
|
48
48
|
- lib/speech.rb
|
49
49
|
- bin/speech2text
|
50
50
|
- test/audio_inspector_test.rb
|
51
51
|
- test/audio_splitter_test.rb
|
52
|
-
- test/
|
52
|
+
- test/audio_to_text_test.rb
|
53
53
|
- test/SampleAudio.wav
|
54
|
+
- test/samples/i-like-pickles.json
|
54
55
|
- test/samples/i-like-pickles.wav
|
55
56
|
- Rakefile
|
56
57
|
- README.rdoc
|
data/lib/speech/text.rb
DELETED
data/test/i-like-pickles.wav
DELETED
Binary file
|