speech2text 0.01 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -1,10 +1,23 @@
1
- == Speech2Text
1
+ = Speech2Text
2
2
 
3
3
  Using the power of ffmpeg/flac/Google and ruby here is a simple interface to play with to convert speech to text.
4
4
 
5
- At this point the API from Google is not documented and seemly free.
6
- The Google API will frequently return 500 errors without providing much reason as to why.
5
+ Using a new undocumentd speech API from Google with the help of this article: http://mikepultz.com/2011/03/accessing-google-speech-api-chrome-11/
7
6
 
8
- It's possible that Google will decide to not open this API up and this effort my completely be for not...
7
+ We're able to provide a very simple API in Ruby to decode simple audio to text.
9
8
 
10
- This was all made possible in short order all thanks to Chrome 11 and http://mikepultz.com/2011/03/accessing-google-speech-api-chrome-11/
9
+ The API from Google is not yet public and so may change. It also seems to be very fragile as more times than not it will return a 500, so the library has retry code built in - for larger audio files 10+ failures may return before a successful result is retrieved...
10
+
11
+ It also appears that the API only likes smaller audio files so there is a built in chunker that allows us to split the audio up into smaller chunks.
12
+
13
+ == Example
14
+
15
+ audio = Speech::AudioToText.new("i-like-pickles.wav")
16
+ puts audio.to_text.inspect
17
+ => {"captured_json"=>[["I like pickles", 0.92731786]], "confidence"=>0.92731786}
18
+
19
+ == Command Line
20
+
21
+ speech2text i-like-pickles.wav
22
+ cat i-like-pickles.json
23
+ {"captured_json"=>[["I like pickles", 0.92731786]], "confidence"=>0.92731786}
data/bin/speech2text CHANGED
@@ -1,2 +1,7 @@
1
1
  #!/this/will/be/replaced/by/rubygems
2
2
  # -*- encoding: binary -*-
3
+
4
+ require 'speech'
5
+
6
+ captured_json = Speech::AudioToText.new(ARGV[0]).to_text
7
+ puts captured_json.inspect
data/lib/speech.rb CHANGED
@@ -1,3 +1,4 @@
1
+ # -*- encoding: binary -*-
1
2
  require 'curb'
2
3
  require 'json'
3
4
 
@@ -1,3 +1,4 @@
1
+ # -*- encoding: binary -*-
1
2
  module Speech
2
3
 
3
4
  class AudioInspector
@@ -1,3 +1,4 @@
1
+ # -*- encoding: binary -*-
1
2
  module Speech
2
3
 
3
4
  class AudioSplitter
@@ -8,7 +9,7 @@ module Speech
8
9
 
9
10
  def initialize(splitter, offset, duration)
10
11
  self.offset = offset
11
- self.chunk = "chunk-" + splitter.original_file.gsub(/\.(.*)$/, "-#{offset}" + '.\1')
12
+ self.chunk = File.join(File.dirname(splitter.original_file), "chunk-" + File.basename(splitter.original_file).gsub(/\.(.*)$/, "-#{offset}" + '.\1'))
12
13
  self.duration = duration
13
14
  self.splitter = splitter
14
15
  end
@@ -1,3 +1,4 @@
1
+ # -*- encoding: binary -*-
1
2
  module Speech
2
3
 
3
4
  class AudioToText
@@ -21,6 +22,10 @@ module Speech
21
22
  JSON.parse(File.read(self.captured_file))
22
23
  end
23
24
 
25
+ def clean
26
+ File.unlink self.captured_file if self.captured_file && File.exist?(self.captured_file)
27
+ end
28
+
24
29
  protected
25
30
 
26
31
  def convert_chunk(easy, chunk, options={})
@@ -29,6 +34,7 @@ module Speech
29
34
  while retrying
30
35
  #easy.verbose = true
31
36
  easy.headers['Content-Type'] = "audio/x-flac; rate=#{chunk.flac_rate}"
37
+ easy.headers['User-Agent'] = "https://github.com/taf2/speech2text"
32
38
  easy.post_body = "Content=#{chunk.to_flac_bytes}"
33
39
  easy.on_progress {|dl_total, dl_now, ul_total, ul_now| printf("%.2f/%.2f\r", ul_now, ul_total); true }
34
40
  easy.on_complete {|easy| puts }
@@ -1,5 +1,6 @@
1
+ # -*- encoding: binary -*-
1
2
  module Speech
2
3
  class Info
3
- VERSION='0.01'
4
+ VERSION='0.3.0'
4
5
  end
5
6
  end
data/speech2text.gemspec CHANGED
@@ -2,14 +2,15 @@ $:.unshift File.expand_path(File.dirname(__FILE__) + "/lib")
2
2
  require "speech/version"
3
3
 
4
4
  Gem::Specification.new do |s|
5
- s.name = "speech2text"
6
- s.authors = ["Todd A. Fisher"]
7
- s.email = "todd.fisher@gmail.com"
8
- s.version = Speech::Info::VERSION
9
- s.homepage = "https://github.com/taf2/speech2text"
10
- s.summary = "Speech to Text Library"
11
- s.description = "Super powers of Google wrapped in a nice Ruby interface"
12
- s.files = Dir["{lib,bin,test}/**/*", "Rakefile", "README.rdoc", "*.gemspec"]
5
+ s.name = "speech2text"
6
+ s.authors = ["Todd A. Fisher"]
7
+ s.email = "todd.fisher@gmail.com"
8
+ s.version = Speech::Info::VERSION
9
+ s.homepage = "https://github.com/taf2/speech2text"
10
+ s.summary = "Speech to Text Library"
11
+ s.description = "Super powers of Google wrapped in a nice Ruby interface"
12
+ s.files = Dir["{lib,bin,test}/**/*", "Rakefile", "README.rdoc", "*.gemspec"]
13
+ s.executables = %w(speech2text)
13
14
 
14
15
  s.add_dependency "curb"
15
16
  s.add_dependency "json"
@@ -1,3 +1,4 @@
1
+ # -*- encoding: binary -*-
1
2
  require 'test/unit'
2
3
  $:.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
3
4
  require 'speech'
@@ -1,3 +1,4 @@
1
+ # -*- encoding: binary -*-
1
2
  require 'test/unit'
2
3
  $:.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
3
4
  require 'speech'
@@ -5,7 +6,7 @@ require 'speech'
5
6
  class SpeechAudioSplitterTest < Test::Unit::TestCase
6
7
 
7
8
  def test_audio_splitter
8
- splitter = Speech::AudioSplitter.new("i-like-pickles.wav", 1)
9
+ splitter = Speech::AudioSplitter.new("samples/i-like-pickles.wav", 1)
9
10
 
10
11
  assert_equal '00:00:03:52', splitter.duration.to_s
11
12
  assert_equal 3.52, splitter.duration.to_f
@@ -0,0 +1,21 @@
1
+ # -*- encoding: binary -*-
2
+ require 'test/unit'
3
+ $:.unshift File.expand_path(File.dirname(__FILE__) + '/../lib')
4
+ require 'speech'
5
+
6
+ class SpeechAudioToTextTest < Test::Unit::TestCase
7
+ def test_audio_to_text
8
+ audio = Speech::AudioToText.new("samples/i-like-pickles.wav")
9
+ captured_json = audio.to_text
10
+ assert captured_json
11
+ assert captured_json.key?("captured_json")
12
+ assert !captured_json['captured_json'].empty?
13
+ assert_equal ['captured_json', 'confidence'], captured_json.keys.sort
14
+ assert_equal "I like pickles", captured_json['captured_json'].flatten.first
15
+ assert captured_json['confidence'] > 0.9
16
+ # {"captured_json"=>[["I like pickles", 0.92731786]], "confidence"=>0.92731786}
17
+ # puts captured_json.inspect
18
+ ensure
19
+ audio.clean
20
+ end
21
+ end
@@ -0,0 +1 @@
1
+ {"captured_json":[["I like pickles",0.92731786]],"confidence":0.92731786}
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: speech2text
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.01'
4
+ version: 0.3.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,12 +9,12 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-03-24 00:00:00.000000000 -04:00
12
+ date: 2011-03-25 00:00:00.000000000 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: curb
17
- requirement: &2157005460 !ruby/object:Gem::Requirement
17
+ requirement: &2157005180 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ! '>='
@@ -22,10 +22,10 @@ dependencies:
22
22
  version: '0'
23
23
  type: :runtime
24
24
  prerelease: false
25
- version_requirements: *2157005460
25
+ version_requirements: *2157005180
26
26
  - !ruby/object:Gem::Dependency
27
27
  name: json
28
- requirement: &2157005040 !ruby/object:Gem::Requirement
28
+ requirement: &2157004740 !ruby/object:Gem::Requirement
29
29
  none: false
30
30
  requirements:
31
31
  - - ! '>='
@@ -33,24 +33,25 @@ dependencies:
33
33
  version: '0'
34
34
  type: :runtime
35
35
  prerelease: false
36
- version_requirements: *2157005040
36
+ version_requirements: *2157004740
37
37
  description: Super powers of Google wrapped in a nice Ruby interface
38
38
  email: todd.fisher@gmail.com
39
- executables: []
39
+ executables:
40
+ - speech2text
40
41
  extensions: []
41
42
  extra_rdoc_files: []
42
43
  files:
43
44
  - lib/speech/audio_inspector.rb
44
45
  - lib/speech/audio_splitter.rb
45
46
  - lib/speech/audio_to_text.rb
46
- - lib/speech/text.rb
47
47
  - lib/speech/version.rb
48
48
  - lib/speech.rb
49
49
  - bin/speech2text
50
50
  - test/audio_inspector_test.rb
51
51
  - test/audio_splitter_test.rb
52
- - test/i-like-pickles.wav
52
+ - test/audio_to_text_test.rb
53
53
  - test/SampleAudio.wav
54
+ - test/samples/i-like-pickles.json
54
55
  - test/samples/i-like-pickles.wav
55
56
  - Rakefile
56
57
  - README.rdoc
data/lib/speech/text.rb DELETED
@@ -1,11 +0,0 @@
1
- module Speech
2
- class Text
3
-
4
- def initialize(audio_file, options={})
5
- end
6
-
7
- def decode_audio(flac16k_audio)
8
- end
9
-
10
- end
11
- end
Binary file