noyes 1.0.1 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/COPYING CHANGED
@@ -1,7 +1,22 @@
1
1
  Copyright 2010 Talkhouse. All rights reserved.
2
2
 
3
- Noyes is licensed under GPLv3. You can obtain a copy here:
4
- http://www.gnu.org/licenses/gpl.html
3
+ Redistribution and use in source and binary forms, with or without modification, are
4
+ permitted provided that the following conditions are met:
5
+
6
+ 1. Redistributions of source code must retain the above copyright notice, this list of
7
+ conditions and the following disclaimer.
8
+
9
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list
10
+ of conditions and the following disclaimer in the documentation and/or other materials
11
+ provided with the distribution.
12
+
13
+ THIS SOFTWARE IS PROVIDED BY TALKHOUSE ``AS IS'' AND ANY EXPRESS OR IMPLIED
14
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
15
+ FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL TALKHOUSE OR
16
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
17
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
18
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
19
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
20
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
21
+ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5
22
 
6
- If for any reason this license is not adequate for your purposes please contact
7
- talkhouse. We're very open minded.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.0.1
1
+ 1.1.1
data/bin/noyes ADDED
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env ruby
2
+ # vim: set filetype=ruby :
3
+ ROOT = File.dirname(File.dirname(__FILE__))
4
+ VERSION_FILE = "#{ROOT}/VERSION"
5
+ $: << "#{ROOT}/lib" << "#{ROOT}/ship"
6
+
7
+ require 'trollop'
8
+ require 'noyes_c'
9
+ include NoyesC
10
+
11
+ options = Trollop::options do
12
+ version "Noyes #{IO.read(VERSION_FILE).strip} (c) 2010 Talkhouse"
13
+ banner <<DOC'Usage: noyes [options] file1 file2 ...'
14
+ Converts files from audio to raw features. Supports format that your
15
+ version of SOX supports.
16
+ DOC
17
+ opt :force, 'Force creation'
18
+ opt :verbose, 'Verbose mode'
19
+ opt :pattern, 'Files matching pattern.', :type => :strings, :multi => true
20
+ end
21
+
22
+ patterns = options[:pattern].flatten
23
+ pattern_files = patterns.inject [] {|memo, pattern| memo | Dir[pattern]}
24
+ audio_files = ARGV | pattern_files
25
+ # Make sure were not overwritting anything unless force flag is used.
26
+ mfcc_files = audio_files.map {|audio_file| audio_file.sub /\.\w+$/, '.mfcc'}
27
+ unless options[:force]
28
+ mfcc_file = mfcc_files.detect {|mfcc_file| File.exists? mfcc_file}
29
+ if mfcc_file
30
+ puts "#{mfcc_file} already exists. Quitting. Use -f to force."
31
+ exit 9
32
+ end
33
+ end
34
+
35
+ audio_files.zip(mfcc_files).each do |audio_file, mfcc_file|
36
+ observations = file2features audio_file
37
+ puts mfcc_file if options[:verbose]
38
+ open(mfcc_file, 'wb') do |f|
39
+ f.print [observations.size, 100000].pack 'N*'
40
+ f.print [0x009c, 0x2306].pack 'n*'
41
+ f.puts observations.flatten.pack 'g*'
42
+ end
43
+ end
data/bin/nrec CHANGED
@@ -11,6 +11,7 @@ options = Trollop::options do
11
11
  banner 'Usage: nrec [options] file1 file2 ...'
12
12
  opt :implementation, "ruby, c, or java", :default => 'ruby'
13
13
  opt :address, "address", :default => '174.129.244.159'
14
+ opt :file, "Output file (overrides default TCP/IP)", :type => :string
14
15
  opt :port, "port", :default => 2348
15
16
  opt :bits, "bit depth", :default => 16
16
17
  opt :frequency, "sampling frequency", :default => 16000
@@ -22,8 +23,11 @@ if options[:bits] != 16 && options[:payload] == 'mfcc'
22
23
  Trollop::die :bits, "must be 16 for mfcc"
23
24
  end
24
25
 
25
- if options[:payload] == 'mfcc' && ((options[:frequency] % 8000) != 0)
26
- Trollop::die :frequency, "(#{options[:frequency]}) for mfcc must be divisible by 8000"
26
+ if options[:payload] == 'mfcc'
27
+ f = options[:frequency]
28
+ if f != 8000 && f != 16000
29
+ Trollop::die :frequency, "(#{options[:frequency]}) for mfcc must be either 16000 or 8000"
30
+ end
27
31
  end
28
32
 
29
33
  # Must set implementation specific library path before requiring libraries.
@@ -99,12 +103,23 @@ end
99
103
  require 'socket'
100
104
 
101
105
  def recognize file, options
102
- TCPSocket.open(options[:address], options[:port]) do |client|
103
- bits, rate = options[:bits], options[:frequency]
104
- if options[:pcm]
105
- send_incremental_pcm file, client, client, bits, rate
106
- else
107
- send_incremental_features file, client, client, bits, rate
106
+ bits, rate = options[:bits], options[:frequency]
107
+ if rate == 8000
108
+ rateinfo = FEAT8M16R
109
+ elsif rate == 16000
110
+ rateinfo = FEAT16M16R
111
+ end
112
+ if options[:file]
113
+ open(options[:file], 'wb') do |client|
114
+ send_incremental_features file, client, nil, bits, rateinfo
115
+ end
116
+ else
117
+ TCPSocket.open(options[:address], options[:port]) do |client|
118
+ if options[:pcm]
119
+ send_incremental_pcm file, client, client, bits, rateinfo
120
+ else
121
+ send_incremental_features file, client, client, bits, rateinfo
122
+ end
108
123
  end
109
124
  end
110
125
  rescue Errno::ECONNREFUSED
@@ -0,0 +1,42 @@
1
+ require 'stringio'
2
+
3
+ # Convert audio file into an IO object with features.
4
+ def file2fstream file, format = FEAT8M16R
5
+ to_server = StringIO.new 'wb'
6
+ from_server = StringIO.new 'dummy result'
7
+ result = send_incremental_features file, to_server, from_server, 16, format
8
+ StringIO.new to_server.string
9
+ end
10
+
11
+ # Take a talkhouse feature stream and convert it into an array.
12
+ def stream2features stream
13
+ observations = []
14
+ raise "Unexpected magic number." if stream.read(TMAGIC.size) != TMAGIC
15
+ raise "Expected TSTART." if stream.read(4) != TSTART
16
+ loop do
17
+ case stream.read(4)
18
+ when TPCM
19
+ count = stream.read(4).unpack('N')[0]
20
+ pcm = stream.read count
21
+ pcm = pcm.unpack('g*')
22
+ when TCEPSTRA
23
+ count = stream.read(4).unpack('N')[0]
24
+ cmn = Array.new(count) {stream.read(13*4).unpack('g*')}
25
+ observations += cmn
26
+ when TBYE
27
+ break
28
+ when TEND
29
+ else
30
+ end
31
+ end
32
+ delta_filter = Noyes::DoubleDeltaFilter.new
33
+ observations >>= delta_filter
34
+ observations.map {|a| a.flatten}
35
+ end
36
+
37
+ # Convenience function for converting almost any type of audio file to an mfcc
38
+ # feature array.
39
+ def file2features file, format = FEAT8M16R
40
+ stream = file2fstream file, format
41
+ stream2features stream
42
+ end
@@ -2,7 +2,7 @@ TMAGIC = '1.0 talkhouse'
2
2
 
3
3
  # The following constants are packed as 4 byte big-endian integers.
4
4
  TSTART = [0].pack('N')
5
- #TAUDIO = [1].pack('N')
5
+ TPCM = [1].pack('N')
6
6
  TEND = [2].pack('N')
7
7
  TBYE = [3].pack('N')
8
8
  TCEPSTRA = [4].pack('N')
@@ -1,6 +1,12 @@
1
1
  require 'noyes'
2
2
  require 'common/file2pcm'
3
3
 
4
+
5
+ # Parameters for 8 kHz models and 16 kHz data
6
+ FEAT8M16R = [32, 200, 3700, 256*2, 8000*2, 80*2, 205*2]
7
+ # Paramenters for 16 kHz models and 16 kHz data
8
+ FEAT16M16R = [40, 133.33, 6855.5, 512, 8000*2, 80*2, 205*2]
9
+
4
10
  # The following flags are in network byte order (big endian) and are 4 bytes
5
11
  # long.
6
12
  #
@@ -17,14 +23,9 @@ require 'common/file2pcm'
17
23
  # Use sox to convert a file of almost any common type int pcm.
18
24
  # Not sure this works for anything beside 16 bits.
19
25
  # Takes a file and two IO-like objects.
20
- def send_incremental_features file, to_server, from_server, bits, freq
26
+ def send_incremental_features file, to_server, from_server, bits, freqinfo
21
27
  stats = {}
22
- nfilt = 40
23
- min_freq = 133.33334
24
- max_freq = 6855.4976
25
- nfft = 512
26
- shift = 160
27
- frame_size = 410
28
+ nfilt, min_freq, max_freq, nfft, freq, shift, frame_size = *freqinfo
28
29
  preemphasizer = Preemphasizer.new 0.97
29
30
  segmenter = Segmenter.new frame_size, shift
30
31
  hamming_windower = HammingWindow.new frame_size
@@ -52,7 +53,7 @@ def send_incremental_features file, to_server, from_server, bits, freq
52
53
  stats[:process_time] += Time.new - process_time_start
53
54
  to_server.write TCEPSTRA
54
55
  to_server.write [data.size].pack('N')
55
- print '.'
56
+ # print '.'
56
57
  data.each {|cmn| to_server.write cmn.pack('g*')}
57
58
  to_server.flush
58
59
  end
@@ -60,9 +61,9 @@ def send_incremental_features file, to_server, from_server, bits, freq
60
61
  to_server.write TBYE
61
62
  to_server.flush
62
63
  latency_start = Time.new
63
- stats[:transcript] = from_server.read
64
+ stats[:transcript] = from_server ? from_server.read : ""
64
65
  stats[:latency] = Time.new - latency_start
65
- return stats
66
+ stats
66
67
  end
67
68
 
68
69
  def send_incremental_pcm file, to_server, from_server, depth, rate
@@ -74,7 +75,7 @@ def send_incremental_pcm file, to_server, from_server, depth, rate
74
75
  to_server.write TA16_16
75
76
  to_server.write [chunk.size/2].pack('N')
76
77
  to_server.write chunk
77
- print '.'
78
+ # print '.'
78
79
  to_server.flush
79
80
  chunk = raw.slice! 0, 1024
80
81
  end
data/lib/common.rb CHANGED
@@ -4,4 +4,5 @@ require 'common/noyes_dsl'
4
4
  require 'common/noyes_math'
5
5
  require 'common/noyes_protocol'
6
6
  require 'common/send_incrementally'
7
+ require 'common/file2feat'
7
8
  require 'common/ruby_ext'
data/ship/noyes.jar ADDED
Binary file
metadata CHANGED
@@ -1,13 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: noyes
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
5
- prerelease: false
6
- segments:
7
- - 1
8
- - 0
9
- - 1
10
- version: 1.0.1
4
+ prerelease:
5
+ version: 1.1.1
11
6
  platform: ruby
12
7
  authors:
13
8
  - Joe Woelfel
@@ -15,7 +10,7 @@ autorequire:
15
10
  bindir: bin
16
11
  cert_chain: []
17
12
 
18
- date: 2010-09-01 00:00:00 -04:00
13
+ date: 2011-12-13 00:00:00 -05:00
19
14
  default_executable:
20
15
  dependencies:
21
16
  - !ruby/object:Gem::Dependency
@@ -26,11 +21,6 @@ dependencies:
26
21
  requirements:
27
22
  - - ">="
28
23
  - !ruby/object:Gem::Version
29
- hash: 23
30
- segments:
31
- - 1
32
- - 0
33
- - 0
34
24
  version: 1.0.0
35
25
  type: :runtime
36
26
  version_requirements: *id001
@@ -40,6 +30,7 @@ description: |-
40
30
  email: joe@talkhouse.com
41
31
  executables:
42
32
  - mock_noyes_server
33
+ - noyes
43
34
  - noyes_dump44k
44
35
  - noyes_dump8k
45
36
  - nrec
@@ -82,6 +73,7 @@ files:
82
73
  - lib/cext/segmenter.c
83
74
  - lib/cext/speech_trimmer.c
84
75
  - lib/common.rb
76
+ - lib/common/file2feat.rb
85
77
  - lib/common/file2pcm.rb
86
78
  - lib/common/mock_noyes_server.rb
87
79
  - lib/common/noyes_dsl.rb
@@ -123,9 +115,11 @@ files:
123
115
  - lib/ruby_impl/preemphasis.rb
124
116
  - lib/ruby_impl/segment.rb
125
117
  - lib/ruby_impl/speech_trimmer.rb
118
+ - ship/noyes.jar
126
119
  - COPYING
127
120
  - FAQ
128
121
  - bin/mock_noyes_server
122
+ - bin/noyes
129
123
  - bin/noyes_dump44k
130
124
  - bin/noyes_dump8k
131
125
  - bin/nrec
@@ -134,8 +128,8 @@ homepage: http://github.com/talkhouse/noyes
134
128
  licenses: []
135
129
 
136
130
  post_install_message:
137
- rdoc_options:
138
- - --charset=UTF-8
131
+ rdoc_options: []
132
+
139
133
  require_paths:
140
134
  - lib
141
135
  - ship
@@ -144,23 +138,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
144
138
  requirements:
145
139
  - - ">="
146
140
  - !ruby/object:Gem::Version
147
- hash: 3
148
- segments:
149
- - 0
150
141
  version: "0"
151
142
  required_rubygems_version: !ruby/object:Gem::Requirement
152
143
  none: false
153
144
  requirements:
154
145
  - - ">="
155
146
  - !ruby/object:Gem::Version
156
- hash: 3
157
- segments:
158
- - 0
159
147
  version: "0"
160
148
  requirements: []
161
149
 
162
150
  rubyforge_project:
163
- rubygems_version: 1.3.7
151
+ rubygems_version: 1.5.0
164
152
  signing_key:
165
153
  specification_version: 3
166
154
  summary: A signal processing library