speech2text 0.01 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +18 -5
- data/bin/speech2text +5 -0
- data/lib/speech.rb +1 -0
- data/lib/speech/audio_inspector.rb +1 -0
- data/lib/speech/audio_splitter.rb +2 -1
- data/lib/speech/audio_to_text.rb +6 -0
- data/lib/speech/version.rb +2 -1
- data/speech2text.gemspec +9 -8
- data/test/audio_inspector_test.rb +1 -0
- data/test/audio_splitter_test.rb +2 -1
- data/test/audio_to_text_test.rb +21 -0
- data/test/samples/i-like-pickles.json +1 -0
- metadata +10 -9
- data/lib/speech/text.rb +0 -11
- data/test/i-like-pickles.wav +0 -0
data/README.rdoc
CHANGED
@@ -1,10 +1,23 @@
|
|
1
|
-
|
1
|
+
= Speech2Text
|
2
2
|
|
3
3
|
Using the power of ffmpeg/flac/Google and ruby here is a simple interface to play with to convert speech to text.
|
4
4
|
|
5
|
-
|
6
|
-
The Google API will frequently return 500 errors without providing much reason as to why.
|
5
|
+
Using a new undocumentd speech API from Google with the help of this article: http://mikepultz.com/2011/03/accessing-google-speech-api-chrome-11/
|
7
6
|
|
8
|
-
|
7
|
+
We're able to provide a very simple API in Ruby to decode simple audio to text.
|
9
8
|
|
10
|
-
|
9
|
+
The API from Google is not yet public and so may change. It also seems to be very fragile as more times than not it will return a 500, so the library has retry code built in - for larger audio files 10+ failures may return before a successful result is retrieved...
|
10
|
+
|
11
|
+
It also appears that the API only likes smaller audio files so there is a built in chunker that allows us to split the audio up into smaller chunks.
|
12
|
+
|
13
|
+
== Example
|
14
|
+
|
15
|
+
audio = Speech::AudioToText.new("i-like-pickles.wav")
|
16
|
+
puts audio.to_text.inspect
|
17
|
+
=> {"captured_json"=>[["I like pickles", 0.92731786]], "confidence"=>0.92731786}
|
18
|
+
|
19
|
+
== Command Line
|
20
|
+
|
21
|
+
speech2text i-like-pickles.wav
|
22
|
+
cat i-like-pickles.json
|
23
|
+
{"captured_json"=>[["I like pickles", 0.92731786]], "confidence"=>0.92731786}
|
data/bin/speech2text
CHANGED
data/lib/speech.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# -*- encoding: binary -*-
|
1
2
|
module Speech
|
2
3
|
|
3
4
|
class AudioSplitter
|
@@ -8,7 +9,7 @@ module Speech
|
|
8
9
|
|
9
10
|
def initialize(splitter, offset, duration)
|
10
11
|
self.offset = offset
|
11
|
-
self.chunk = "chunk-" + splitter.original_file.gsub(/\.(.*)$/, "-#{offset}" + '.\1')
|
12
|
+
self.chunk = File.join(File.dirname(splitter.original_file), "chunk-" + File.basename(splitter.original_file).gsub(/\.(.*)$/, "-#{offset}" + '.\1'))
|
12
13
|
self.duration = duration
|
13
14
|
self.splitter = splitter
|
14
15
|
end
|
data/lib/speech/audio_to_text.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# -*- encoding: binary -*-
|
1
2
|
module Speech
|
2
3
|
|
3
4
|
class AudioToText
|
@@ -21,6 +22,10 @@ module Speech
|
|
21
22
|
JSON.parse(File.read(self.captured_file))
|
22
23
|
end
|
23
24
|
|
25
|
+
def clean
|
26
|
+
File.unlink self.captured_file if self.captured_file && File.exist?(self.captured_file)
|
27
|
+
end
|
28
|
+
|
24
29
|
protected
|
25
30
|
|
26
31
|
def convert_chunk(easy, chunk, options={})
|
@@ -29,6 +34,7 @@ module Speech
|
|
29
34
|
while retrying
|
30
35
|
#easy.verbose = true
|
31
36
|
easy.headers['Content-Type'] = "audio/x-flac; rate=#{chunk.flac_rate}"
|
37
|
+
easy.headers['User-Agent'] = "https://github.com/taf2/speech2text"
|
32
38
|
easy.post_body = "Content=#{chunk.to_flac_bytes}"
|
33
39
|
easy.on_progress {|dl_total, dl_now, ul_total, ul_now| printf("%.2f/%.2f\r", ul_now, ul_total); true }
|
34
40
|
easy.on_complete {|easy| puts }
|
data/lib/speech/version.rb
CHANGED
data/speech2text.gemspec
CHANGED
@@ -2,14 +2,15 @@ $:.unshift File.expand_path(File.dirname(__FILE__) + "/lib")
|
|
2
2
|
require "speech/version"
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
|
-
s.name
|
6
|
-
s.authors
|
7
|
-
s.email
|
8
|
-
s.version
|
9
|
-
s.homepage
|
10
|
-
s.summary
|
11
|
-
s.description
|
12
|
-
s.files
|
5
|
+
s.name = "speech2text"
|
6
|
+
s.authors = ["Todd A. Fisher"]
|
7
|
+
s.email = "todd.fisher@gmail.com"
|
8
|
+
s.version = Speech::Info::VERSION
|
9
|
+
s.homepage = "https://github.com/taf2/speech2text"
|
10
|
+
s.summary = "Speech to Text Library"
|
11
|
+
s.description = "Super powers of Google wrapped in a nice Ruby interface"
|
12
|
+
s.files = Dir["{lib,bin,test}/**/*", "Rakefile", "README.rdoc", "*.gemspec"]
|
13
|
+
s.executables = %w(speech2text)
|
13
14
|
|
14
15
|
s.add_dependency "curb"
|
15
16
|
s.add_dependency "json"
|
data/test/audio_splitter_test.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# -*- encoding: binary -*-
|
1
2
|
require 'test/unit'
|
2
3
|
$:.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
|
3
4
|
require 'speech'
|
@@ -5,7 +6,7 @@ require 'speech'
|
|
5
6
|
class SpeechAudioSplitterTest < Test::Unit::TestCase
|
6
7
|
|
7
8
|
def test_audio_splitter
|
8
|
-
splitter = Speech::AudioSplitter.new("i-like-pickles.wav", 1)
|
9
|
+
splitter = Speech::AudioSplitter.new("samples/i-like-pickles.wav", 1)
|
9
10
|
|
10
11
|
assert_equal '00:00:03:52', splitter.duration.to_s
|
11
12
|
assert_equal 3.52, splitter.duration.to_f
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# -*- encoding: binary -*-
|
2
|
+
require 'test/unit'
|
3
|
+
$:.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
|
4
|
+
require 'speech'
|
5
|
+
|
6
|
+
class SpeechAudioToTextTest < Test::Unit::TestCase
|
7
|
+
def test_audio_to_text
|
8
|
+
audio = Speech::AudioToText.new("samples/i-like-pickles.wav")
|
9
|
+
captured_json = audio.to_text
|
10
|
+
assert captured_json
|
11
|
+
assert captured_json.key?("captured_json")
|
12
|
+
assert !captured_json['captured_json'].empty?
|
13
|
+
assert_equal ['captured_json', 'confidence'], captured_json.keys.sort
|
14
|
+
assert_equal "I like pickles", captured_json['captured_json'].flatten.first
|
15
|
+
assert captured_json['confidence'] > 0.9
|
16
|
+
# {"captured_json"=>[["I like pickles", 0.92731786]], "confidence"=>0.92731786}
|
17
|
+
# puts captured_json.inspect
|
18
|
+
ensure
|
19
|
+
audio.clean
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
{"captured_json":[["I like pickles",0.92731786]],"confidence":0.92731786}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: speech2text
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,12 +9,12 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-03-
|
12
|
+
date: 2011-03-25 00:00:00.000000000 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: curb
|
17
|
-
requirement: &
|
17
|
+
requirement: &2157005180 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ! '>='
|
@@ -22,10 +22,10 @@ dependencies:
|
|
22
22
|
version: '0'
|
23
23
|
type: :runtime
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *2157005180
|
26
26
|
- !ruby/object:Gem::Dependency
|
27
27
|
name: json
|
28
|
-
requirement: &
|
28
|
+
requirement: &2157004740 !ruby/object:Gem::Requirement
|
29
29
|
none: false
|
30
30
|
requirements:
|
31
31
|
- - ! '>='
|
@@ -33,24 +33,25 @@ dependencies:
|
|
33
33
|
version: '0'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
|
-
version_requirements: *
|
36
|
+
version_requirements: *2157004740
|
37
37
|
description: Super powers of Google wrapped in a nice Ruby interface
|
38
38
|
email: todd.fisher@gmail.com
|
39
|
-
executables:
|
39
|
+
executables:
|
40
|
+
- speech2text
|
40
41
|
extensions: []
|
41
42
|
extra_rdoc_files: []
|
42
43
|
files:
|
43
44
|
- lib/speech/audio_inspector.rb
|
44
45
|
- lib/speech/audio_splitter.rb
|
45
46
|
- lib/speech/audio_to_text.rb
|
46
|
-
- lib/speech/text.rb
|
47
47
|
- lib/speech/version.rb
|
48
48
|
- lib/speech.rb
|
49
49
|
- bin/speech2text
|
50
50
|
- test/audio_inspector_test.rb
|
51
51
|
- test/audio_splitter_test.rb
|
52
|
-
- test/
|
52
|
+
- test/audio_to_text_test.rb
|
53
53
|
- test/SampleAudio.wav
|
54
|
+
- test/samples/i-like-pickles.json
|
54
55
|
- test/samples/i-like-pickles.wav
|
55
56
|
- Rakefile
|
56
57
|
- README.rdoc
|
data/lib/speech/text.rb
DELETED
data/test/i-like-pickles.wav
DELETED
Binary file
|