speech2text 0.01 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -1,10 +1,23 @@
1
- == Speech2Text
1
+ = Speech2Text
2
2
 
3
3
  Using the power of ffmpeg/flac/Google and ruby here is a simple interface to play with to convert speech to text.
4
4
 
5
- At this point the API from Google is not documented and seemly free.
6
- The Google API will frequently return 500 errors without providing much reason as to why.
5
+ Using a new undocumentd speech API from Google with the help of this article: http://mikepultz.com/2011/03/accessing-google-speech-api-chrome-11/
7
6
 
8
- It's possible that Google will decide to not open this API up and this effort my completely be for not...
7
+ We're able to provide a very simple API in Ruby to decode simple audio to text.
9
8
 
10
- This was all made possible in short order all thanks to Chrome 11 and http://mikepultz.com/2011/03/accessing-google-speech-api-chrome-11/
9
+ The API from Google is not yet public and so may change. It also seems to be very fragile as more times than not it will return a 500, so the library has retry code built in - for larger audio files 10+ failures may return before a successful result is retrieved...
10
+
11
+ It also appears that the API only likes smaller audio files so there is a built in chunker that allows us to split the audio up into smaller chunks.
12
+
13
+ == Example
14
+
15
+ audio = Speech::AudioToText.new("i-like-pickles.wav")
16
+ puts audio.to_text.inspect
17
+ => {"captured_json"=>[["I like pickles", 0.92731786]], "confidence"=>0.92731786}
18
+
19
+ == Command Line
20
+
21
+ speech2text i-like-pickles.wav
22
+ cat i-like-pickles.json
23
+ {"captured_json"=>[["I like pickles", 0.92731786]], "confidence"=>0.92731786}
data/bin/speech2text CHANGED
@@ -1,2 +1,7 @@
1
1
  #!/this/will/be/replaced/by/rubygems
2
2
  # -*- encoding: binary -*-
3
+
4
+ require 'speech'
5
+
6
+ captured_json = Speech::AudioToText.new(ARGV[0]).to_text
7
+ puts captured_json.inspect
data/lib/speech.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # -*- encoding: binary -*-
1
2
  require 'curb'
2
3
  require 'json'
3
4
 
@@ -1,3 +1,4 @@
1
+ # -*- encoding: binary -*-
1
2
  module Speech
2
3
 
3
4
  class AudioInspector
@@ -1,3 +1,4 @@
1
+ # -*- encoding: binary -*-
1
2
  module Speech
2
3
 
3
4
  class AudioSplitter
@@ -8,7 +9,7 @@ module Speech
8
9
 
9
10
  def initialize(splitter, offset, duration)
10
11
  self.offset = offset
11
- self.chunk = "chunk-" + splitter.original_file.gsub(/\.(.*)$/, "-#{offset}" + '.\1')
12
+ self.chunk = File.join(File.dirname(splitter.original_file), "chunk-" + File.basename(splitter.original_file).gsub(/\.(.*)$/, "-#{offset}" + '.\1'))
12
13
  self.duration = duration
13
14
  self.splitter = splitter
14
15
  end
@@ -1,3 +1,4 @@
1
+ # -*- encoding: binary -*-
1
2
  module Speech
2
3
 
3
4
  class AudioToText
@@ -21,6 +22,10 @@ module Speech
21
22
  JSON.parse(File.read(self.captured_file))
22
23
  end
23
24
 
25
+ def clean
26
+ File.unlink self.captured_file if self.captured_file && File.exist?(self.captured_file)
27
+ end
28
+
24
29
  protected
25
30
 
26
31
  def convert_chunk(easy, chunk, options={})
@@ -29,6 +34,7 @@ module Speech
29
34
  while retrying
30
35
  #easy.verbose = true
31
36
  easy.headers['Content-Type'] = "audio/x-flac; rate=#{chunk.flac_rate}"
37
+ easy.headers['User-Agent'] = "https://github.com/taf2/speech2text"
32
38
  easy.post_body = "Content=#{chunk.to_flac_bytes}"
33
39
  easy.on_progress {|dl_total, dl_now, ul_total, ul_now| printf("%.2f/%.2f\r", ul_now, ul_total); true }
34
40
  easy.on_complete {|easy| puts }
@@ -1,5 +1,6 @@
1
+ # -*- encoding: binary -*-
1
2
  module Speech
2
3
  class Info
3
- VERSION='0.01'
4
+ VERSION='0.3.0'
4
5
  end
5
6
  end
data/speech2text.gemspec CHANGED
@@ -2,14 +2,15 @@ $:.unshift File.expand_path(File.dirname(__FILE__) + "/lib")
2
2
  require "speech/version"
3
3
 
4
4
  Gem::Specification.new do |s|
5
- s.name = "speech2text"
6
- s.authors = ["Todd A. Fisher"]
7
- s.email = "todd.fisher@gmail.com"
8
- s.version = Speech::Info::VERSION
9
- s.homepage = "https://github.com/taf2/speech2text"
10
- s.summary = "Speech to Text Library"
11
- s.description = "Super powers of Google wrapped in a nice Ruby interface"
12
- s.files = Dir["{lib,bin,test}/**/*", "Rakefile", "README.rdoc", "*.gemspec"]
5
+ s.name = "speech2text"
6
+ s.authors = ["Todd A. Fisher"]
7
+ s.email = "todd.fisher@gmail.com"
8
+ s.version = Speech::Info::VERSION
9
+ s.homepage = "https://github.com/taf2/speech2text"
10
+ s.summary = "Speech to Text Library"
11
+ s.description = "Super powers of Google wrapped in a nice Ruby interface"
12
+ s.files = Dir["{lib,bin,test}/**/*", "Rakefile", "README.rdoc", "*.gemspec"]
13
+ s.executables = %w(speech2text)
13
14
 
14
15
  s.add_dependency "curb"
15
16
  s.add_dependency "json"
@@ -1,3 +1,4 @@
1
+ # -*- encoding: binary -*-
1
2
  require 'test/unit'
2
3
  $:.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
3
4
  require 'speech'
@@ -1,3 +1,4 @@
1
+ # -*- encoding: binary -*-
1
2
  require 'test/unit'
2
3
  $:.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
3
4
  require 'speech'
@@ -5,7 +6,7 @@ require 'speech'
5
6
  class SpeechAudioSplitterTest < Test::Unit::TestCase
6
7
 
7
8
  def test_audio_splitter
8
- splitter = Speech::AudioSplitter.new("i-like-pickles.wav", 1)
9
+ splitter = Speech::AudioSplitter.new("samples/i-like-pickles.wav", 1)
9
10
 
10
11
  assert_equal '00:00:03:52', splitter.duration.to_s
11
12
  assert_equal 3.52, splitter.duration.to_f
@@ -0,0 +1,21 @@
1
+ # -*- encoding: binary -*-
2
+ require 'test/unit'
3
+ $:.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
4
+ require 'speech'
5
+
6
+ class SpeechAudioToTextTest < Test::Unit::TestCase
7
+ def test_audio_to_text
8
+ audio = Speech::AudioToText.new("samples/i-like-pickles.wav")
9
+ captured_json = audio.to_text
10
+ assert captured_json
11
+ assert captured_json.key?("captured_json")
12
+ assert !captured_json['captured_json'].empty?
13
+ assert_equal ['captured_json', 'confidence'], captured_json.keys.sort
14
+ assert_equal "I like pickles", captured_json['captured_json'].flatten.first
15
+ assert captured_json['confidence'] > 0.9
16
+ # {"captured_json"=>[["I like pickles", 0.92731786]], "confidence"=>0.92731786}
17
+ # puts captured_json.inspect
18
+ ensure
19
+ audio.clean
20
+ end
21
+ end
@@ -0,0 +1 @@
1
+ {"captured_json":[["I like pickles",0.92731786]],"confidence":0.92731786}
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: speech2text
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.01'
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,12 +9,12 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-03-24 00:00:00.000000000 -04:00
12
+ date: 2011-03-25 00:00:00.000000000 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: curb
17
- requirement: &2157005460 !ruby/object:Gem::Requirement
17
+ requirement: &2157005180 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ! '>='
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: '0'
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *2157005460
25
+ version_requirements: *2157005180
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: json
28
- requirement: &2157005040 !ruby/object:Gem::Requirement
28
+ requirement: &2157004740 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ! '>='
@@ -33,24 +33,25 @@ dependencies:
33
33
  version: '0'
34
34
  type: :runtime
35
35
  prerelease: false
36
- version_requirements: *2157005040
36
+ version_requirements: *2157004740
37
37
  description: Super powers of Google wrapped in a nice Ruby interface
38
38
  email: todd.fisher@gmail.com
39
- executables: []
39
+ executables:
40
+ - speech2text
40
41
  extensions: []
41
42
  extra_rdoc_files: []
42
43
  files:
43
44
  - lib/speech/audio_inspector.rb
44
45
  - lib/speech/audio_splitter.rb
45
46
  - lib/speech/audio_to_text.rb
46
- - lib/speech/text.rb
47
47
  - lib/speech/version.rb
48
48
  - lib/speech.rb
49
49
  - bin/speech2text
50
50
  - test/audio_inspector_test.rb
51
51
  - test/audio_splitter_test.rb
52
- - test/i-like-pickles.wav
52
+ - test/audio_to_text_test.rb
53
53
  - test/SampleAudio.wav
54
+ - test/samples/i-like-pickles.json
54
55
  - test/samples/i-like-pickles.wav
55
56
  - Rakefile
56
57
  - README.rdoc
data/lib/speech/text.rb DELETED
@@ -1,11 +0,0 @@
1
- module Speech
2
- class Text
3
-
4
- def initialize(audio_file, options={})
5
- end
6
-
7
- def decode_audio(flac16k_audio)
8
- end
9
-
10
- end
11
- end
Binary file