noyes 1.0.1 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
data/COPYING CHANGED
@@ -1,7 +1,22 @@
1
1
  Copyright 2010 Talkhouse. All rights reserved.
2
2
 
3
- Noyes is licensed under GPLv3. You can obtain a copy here:
4
- http://www.gnu.org/licenses/gpl.html
3
+ Redistribution and use in source and binary forms, with or without modification, are
4
+ permitted provided that the following conditions are met:
5
+
6
+ 1. Redistributions of source code must retain the above copyright notice, this list of
7
+ conditions and the following disclaimer.
8
+
9
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list
10
+ of conditions and the following disclaimer in the documentation and/or other materials
11
+ provided with the distribution.
12
+
13
+ THIS SOFTWARE IS PROVIDED BY TALKHOUSE ``AS IS'' AND ANY EXPRESS OR IMPLIED
14
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
15
+ FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL TALKHOUSE OR
16
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
17
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
18
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
19
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
20
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
21
+ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5
22
 
6
- If for any reason this license is not adequate for your purposes please contact
7
- talkhouse. We're very open minded.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.1
1
+ 1.1.1
data/bin/noyes ADDED
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env ruby
2
+ # vim: set filetype=ruby :
3
+ ROOT = File.dirname(File.dirname(__FILE__))
4
+ VERSION_FILE = "#{ROOT}/VERSION"
5
+ $: << "#{ROOT}/lib" << "#{ROOT}/ship"
6
+
7
+ require 'trollop'
8
+ require 'noyes_c'
9
+ include NoyesC
10
+
11
+ options = Trollop::options do
12
+ version "Noyes #{IO.read(VERSION_FILE).strip} (c) 2010 Talkhouse"
13
+ banner <<DOC'Usage: noyes [options] file1 file2 ...'
14
+ Converts files from audio to raw features. Supports format that your
15
+ version of SOX supports.
16
+ DOC
17
+ opt :force, 'Force creation'
18
+ opt :verbose, 'Verbose mode'
19
+ opt :pattern, 'Files matching pattern.', :type => :strings, :multi => true
20
+ end
21
+
22
+ patterns = options[:pattern].flatten
23
+ pattern_files = patterns.inject [] {|memo, pattern| memo | Dir[pattern]}
24
+ audio_files = ARGV | pattern_files
25
+ # Make sure were not overwritting anything unless force flag is used.
26
+ mfcc_files = audio_files.map {|audio_file| audio_file.sub /\.\w+$/, '.mfcc'}
27
+ unless options[:force]
28
+ mfcc_file = mfcc_files.detect {|mfcc_file| File.exists? mfcc_file}
29
+ if mfcc_file
30
+ puts "#{mfcc_file} already exists. Quitting. Use -f to force."
31
+ exit 9
32
+ end
33
+ end
34
+
35
+ audio_files.zip(mfcc_files).each do |audio_file, mfcc_file|
36
+ observations = file2features audio_file
37
+ puts mfcc_file if options[:verbose]
38
+ open(mfcc_file, 'wb') do |f|
39
+ f.print [observations.size, 100000].pack 'N*'
40
+ f.print [0x009c, 0x2306].pack 'n*'
41
+ f.puts observations.flatten.pack 'g*'
42
+ end
43
+ end
data/bin/nrec CHANGED
@@ -11,6 +11,7 @@ options = Trollop::options do
11
11
  banner 'Usage: nrec [options] file1 file2 ...'
12
12
  opt :implementation, "ruby, c, or java", :default => 'ruby'
13
13
  opt :address, "address", :default => '174.129.244.159'
14
+ opt :file, "Output file (overrides default TCP/IP)", :type => :string
14
15
  opt :port, "port", :default => 2348
15
16
  opt :bits, "bit depth", :default => 16
16
17
  opt :frequency, "sampling frequency", :default => 16000
@@ -22,8 +23,11 @@ if options[:bits] != 16 && options[:payload] == 'mfcc'
22
23
  Trollop::die :bits, "must be 16 for mfcc"
23
24
  end
24
25
 
25
- if options[:payload] == 'mfcc' && ((options[:frequency] % 8000) != 0)
26
- Trollop::die :frequency, "(#{options[:frequency]}) for mfcc must be divisible by 8000"
26
+ if options[:payload] == 'mfcc'
27
+ f = options[:frequency]
28
+ if f != 8000 && f != 16000
29
+ Trollop::die :frequency, "(#{options[:frequency]}) for mfcc must be either 16000 or 8000"
30
+ end
27
31
  end
28
32
 
29
33
  # Must set implementation specific library path before requiring libraries.
@@ -99,12 +103,23 @@ end
99
103
  require 'socket'
100
104
 
101
105
  def recognize file, options
102
- TCPSocket.open(options[:address], options[:port]) do |client|
103
- bits, rate = options[:bits], options[:frequency]
104
- if options[:pcm]
105
- send_incremental_pcm file, client, client, bits, rate
106
- else
107
- send_incremental_features file, client, client, bits, rate
106
+ bits, rate = options[:bits], options[:frequency]
107
+ if rate == 8000
108
+ rateinfo = FEAT8M16R
109
+ elsif rate == 16000
110
+ rateinfo = FEAT16M16R
111
+ end
112
+ if options[:file]
113
+ open(options[:file], 'wb') do |client|
114
+ send_incremental_features file, client, nil, bits, rateinfo
115
+ end
116
+ else
117
+ TCPSocket.open(options[:address], options[:port]) do |client|
118
+ if options[:pcm]
119
+ send_incremental_pcm file, client, client, bits, rateinfo
120
+ else
121
+ send_incremental_features file, client, client, bits, rateinfo
122
+ end
108
123
  end
109
124
  end
110
125
  rescue Errno::ECONNREFUSED
@@ -0,0 +1,42 @@
1
+ require 'stringio'
2
+
3
+ # Convert audio file into an IO object with features.
4
+ def file2fstream file, format = FEAT8M16R
5
+ to_server = StringIO.new 'wb'
6
+ from_server = StringIO.new 'dummy result'
7
+ result = send_incremental_features file, to_server, from_server, 16, format
8
+ StringIO.new to_server.string
9
+ end
10
+
11
+ # Take a talkhouse feature stream and convert it into an array.
12
+ def stream2features stream
13
+ observations = []
14
+ raise "Unexpected magic number." if stream.read(TMAGIC.size) != TMAGIC
15
+ raise "Expected TSTART." if stream.read(4) != TSTART
16
+ loop do
17
+ case stream.read(4)
18
+ when TPCM
19
+ count = stream.read(4).unpack('N')[0]
20
+ pcm = stream.read count
21
+ pcm = pcm.unpack('g*')
22
+ when TCEPSTRA
23
+ count = stream.read(4).unpack('N')[0]
24
+ cmn = Array.new(count) {stream.read(13*4).unpack('g*')}
25
+ observations += cmn
26
+ when TBYE
27
+ break
28
+ when TEND
29
+ else
30
+ end
31
+ end
32
+ delta_filter = Noyes::DoubleDeltaFilter.new
33
+ observations >>= delta_filter
34
+ observations.map {|a| a.flatten}
35
+ end
36
+
37
+ # Convenience function for converting almost any type of audio file to an mfcc
38
+ # feature array.
39
+ def file2features file, format = FEAT8M16R
40
+ stream = file2fstream file, format
41
+ stream2features stream
42
+ end
@@ -2,7 +2,7 @@ TMAGIC = '1.0 talkhouse'
2
2
 
3
3
  # The following constants are packed as 4 byte big-endian integers.
4
4
  TSTART = [0].pack('N')
5
- #TAUDIO = [1].pack('N')
5
+ TPCM = [1].pack('N')
6
6
  TEND = [2].pack('N')
7
7
  TBYE = [3].pack('N')
8
8
  TCEPSTRA = [4].pack('N')
@@ -1,6 +1,12 @@
1
1
  require 'noyes'
2
2
  require 'common/file2pcm'
3
3
 
4
+
5
+ # Parameters for 8 kHz models and 16 kHz data
6
+ FEAT8M16R = [32, 200, 3700, 256*2, 8000*2, 80*2, 205*2]
7
+ # Paramenters for 16 kHz models and 16 kHz data
8
+ FEAT16M16R = [40, 133.33, 6855.5, 512, 8000*2, 80*2, 205*2]
9
+
4
10
  # The following flags are in network byte order (big endian) and are 4 bytes
5
11
  # long.
6
12
  #
@@ -17,14 +23,9 @@ require 'common/file2pcm'
17
23
  # Use sox to convert a file of almost any common type int pcm.
18
24
  # Not sure this works for anything beside 16 bits.
19
25
  # Takes a file and two IO-like objects.
20
- def send_incremental_features file, to_server, from_server, bits, freq
26
+ def send_incremental_features file, to_server, from_server, bits, freqinfo
21
27
  stats = {}
22
- nfilt = 40
23
- min_freq = 133.33334
24
- max_freq = 6855.4976
25
- nfft = 512
26
- shift = 160
27
- frame_size = 410
28
+ nfilt, min_freq, max_freq, nfft, freq, shift, frame_size = *freqinfo
28
29
  preemphasizer = Preemphasizer.new 0.97
29
30
  segmenter = Segmenter.new frame_size, shift
30
31
  hamming_windower = HammingWindow.new frame_size
@@ -52,7 +53,7 @@ def send_incremental_features file, to_server, from_server, bits, freq
52
53
  stats[:process_time] += Time.new - process_time_start
53
54
  to_server.write TCEPSTRA
54
55
  to_server.write [data.size].pack('N')
55
- print '.'
56
+ # print '.'
56
57
  data.each {|cmn| to_server.write cmn.pack('g*')}
57
58
  to_server.flush
58
59
  end
@@ -60,9 +61,9 @@ def send_incremental_features file, to_server, from_server, bits, freq
60
61
  to_server.write TBYE
61
62
  to_server.flush
62
63
  latency_start = Time.new
63
- stats[:transcript] = from_server.read
64
+ stats[:transcript] = from_server ? from_server.read : ""
64
65
  stats[:latency] = Time.new - latency_start
65
- return stats
66
+ stats
66
67
  end
67
68
 
68
69
  def send_incremental_pcm file, to_server, from_server, depth, rate
@@ -74,7 +75,7 @@ def send_incremental_pcm file, to_server, from_server, depth, rate
74
75
  to_server.write TA16_16
75
76
  to_server.write [chunk.size/2].pack('N')
76
77
  to_server.write chunk
77
- print '.'
78
+ # print '.'
78
79
  to_server.flush
79
80
  chunk = raw.slice! 0, 1024
80
81
  end
data/lib/common.rb CHANGED
@@ -4,4 +4,5 @@ require 'common/noyes_dsl'
4
4
  require 'common/noyes_math'
5
5
  require 'common/noyes_protocol'
6
6
  require 'common/send_incrementally'
7
+ require 'common/file2feat'
7
8
  require 'common/ruby_ext'
data/ship/noyes.jar ADDED
Binary file
metadata CHANGED
@@ -1,13 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: noyes
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
5
- prerelease: false
6
- segments:
7
- - 1
8
- - 0
9
- - 1
10
- version: 1.0.1
4
+ prerelease:
5
+ version: 1.1.1
11
6
  platform: ruby
12
7
  authors:
13
8
  - Joe Woelfel
@@ -15,7 +10,7 @@ autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
12
 
18
- date: 2010-09-01 00:00:00 -04:00
13
+ date: 2011-12-13 00:00:00 -05:00
19
14
  default_executable:
20
15
  dependencies:
21
16
  - !ruby/object:Gem::Dependency
@@ -26,11 +21,6 @@ dependencies:
26
21
  requirements:
27
22
  - - ">="
28
23
  - !ruby/object:Gem::Version
29
- hash: 23
30
- segments:
31
- - 1
32
- - 0
33
- - 0
34
24
  version: 1.0.0
35
25
  type: :runtime
36
26
  version_requirements: *id001
@@ -40,6 +30,7 @@ description: |-
40
30
  email: joe@talkhouse.com
41
31
  executables:
42
32
  - mock_noyes_server
33
+ - noyes
43
34
  - noyes_dump44k
44
35
  - noyes_dump8k
45
36
  - nrec
@@ -82,6 +73,7 @@ files:
82
73
  - lib/cext/segmenter.c
83
74
  - lib/cext/speech_trimmer.c
84
75
  - lib/common.rb
76
+ - lib/common/file2feat.rb
85
77
  - lib/common/file2pcm.rb
86
78
  - lib/common/mock_noyes_server.rb
87
79
  - lib/common/noyes_dsl.rb
@@ -123,9 +115,11 @@ files:
123
115
  - lib/ruby_impl/preemphasis.rb
124
116
  - lib/ruby_impl/segment.rb
125
117
  - lib/ruby_impl/speech_trimmer.rb
118
+ - ship/noyes.jar
126
119
  - COPYING
127
120
  - FAQ
128
121
  - bin/mock_noyes_server
122
+ - bin/noyes
129
123
  - bin/noyes_dump44k
130
124
  - bin/noyes_dump8k
131
125
  - bin/nrec
@@ -134,8 +128,8 @@ homepage: http://github.com/talkhouse/noyes
134
128
  licenses: []
135
129
 
136
130
  post_install_message:
137
- rdoc_options:
138
- - --charset=UTF-8
131
+ rdoc_options: []
132
+
139
133
  require_paths:
140
134
  - lib
141
135
  - ship
@@ -144,23 +138,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
144
138
  requirements:
145
139
  - - ">="
146
140
  - !ruby/object:Gem::Version
147
- hash: 3
148
- segments:
149
- - 0
150
141
  version: "0"
151
142
  required_rubygems_version: !ruby/object:Gem::Requirement
152
143
  none: false
153
144
  requirements:
154
145
  - - ">="
155
146
  - !ruby/object:Gem::Version
156
- hash: 3
157
- segments:
158
- - 0
159
147
  version: "0"
160
148
  requirements: []
161
149
 
162
150
  rubyforge_project:
163
- rubygems_version: 1.3.7
151
+ rubygems_version: 1.5.0
164
152
  signing_key:
165
153
  specification_version: 3
166
154
  summary: A signal processing library