noyes 1.0.1 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/COPYING +19 -4
- data/VERSION +1 -1
- data/bin/noyes +43 -0
- data/bin/nrec +23 -8
- data/lib/common/file2feat.rb +42 -0
- data/lib/common/noyes_protocol.rb +1 -1
- data/lib/common/send_incrementally.rb +12 -11
- data/lib/common.rb +1 -0
- data/ship/noyes.jar +0 -0
- metadata +10 -22
data/COPYING
CHANGED
@@ -1,7 +1,22 @@
|
|
1
1
|
Copyright 2010 Talkhouse. All rights reserved.
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
Redistribution and use in source and binary forms, with or without modification, are
|
4
|
+
permitted provided that the following conditions are met:
|
5
|
+
|
6
|
+
1. Redistributions of source code must retain the above copyright notice, this list of
|
7
|
+
conditions and the following disclaimer.
|
8
|
+
|
9
|
+
2. Redistributions in binary form must reproduce the above copyright notice, this list
|
10
|
+
of conditions and the following disclaimer in the documentation and/or other materials
|
11
|
+
provided with the distribution.
|
12
|
+
|
13
|
+
THIS SOFTWARE IS PROVIDED BY TALKHOUSE ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
14
|
+
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL TALKHOUSE OR
|
16
|
+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17
|
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
18
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
19
|
+
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
20
|
+
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
21
|
+
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
5
22
|
|
6
|
-
If for any reason this license is not adequate for your purposes please contact
|
7
|
-
talkhouse. We're very open minded.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.
|
1
|
+
1.1.1
|
data/bin/noyes
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# vim: set filetype=ruby :
|
3
|
+
ROOT = File.dirname(File.dirname(__FILE__))
|
4
|
+
VERSION_FILE = "#{ROOT}/VERSION"
|
5
|
+
$: << "#{ROOT}/lib" << "#{ROOT}/ship"
|
6
|
+
|
7
|
+
require 'trollop'
|
8
|
+
require 'noyes_c'
|
9
|
+
include NoyesC
|
10
|
+
|
11
|
+
options = Trollop::options do
|
12
|
+
version "Noyes #{IO.read(VERSION_FILE).strip} (c) 2010 Talkhouse"
|
13
|
+
banner <<DOC'Usage: noyes [options] file1 file2 ...'
|
14
|
+
Converts files from audio to raw features. Supports format that your
|
15
|
+
version of SOX supports.
|
16
|
+
DOC
|
17
|
+
opt :force, 'Force creation'
|
18
|
+
opt :verbose, 'Verbose mode'
|
19
|
+
opt :pattern, 'Files matching pattern.', :type => :strings, :multi => true
|
20
|
+
end
|
21
|
+
|
22
|
+
patterns = options[:pattern].flatten
|
23
|
+
pattern_files = patterns.inject [] {|memo, pattern| memo | Dir[pattern]}
|
24
|
+
audio_files = ARGV | pattern_files
|
25
|
+
# Make sure were not overwritting anything unless force flag is used.
|
26
|
+
mfcc_files = audio_files.map {|audio_file| audio_file.sub /\.\w+$/, '.mfcc'}
|
27
|
+
unless options[:force]
|
28
|
+
mfcc_file = mfcc_files.detect {|mfcc_file| File.exists? mfcc_file}
|
29
|
+
if mfcc_file
|
30
|
+
puts "#{mfcc_file} already exists. Quitting. Use -f to force."
|
31
|
+
exit 9
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
audio_files.zip(mfcc_files).each do |audio_file, mfcc_file|
|
36
|
+
observations = file2features audio_file
|
37
|
+
puts mfcc_file if options[:verbose]
|
38
|
+
open(mfcc_file, 'wb') do |f|
|
39
|
+
f.print [observations.size, 100000].pack 'N*'
|
40
|
+
f.print [0x009c, 0x2306].pack 'n*'
|
41
|
+
f.puts observations.flatten.pack 'g*'
|
42
|
+
end
|
43
|
+
end
|
data/bin/nrec
CHANGED
@@ -11,6 +11,7 @@ options = Trollop::options do
|
|
11
11
|
banner 'Usage: nrec [options] file1 file2 ...'
|
12
12
|
opt :implementation, "ruby, c, or java", :default => 'ruby'
|
13
13
|
opt :address, "address", :default => '174.129.244.159'
|
14
|
+
opt :file, "Output file (overrides default TCP/IP)", :type => :string
|
14
15
|
opt :port, "port", :default => 2348
|
15
16
|
opt :bits, "bit depth", :default => 16
|
16
17
|
opt :frequency, "sampling frequency", :default => 16000
|
@@ -22,8 +23,11 @@ if options[:bits] != 16 && options[:payload] == 'mfcc'
|
|
22
23
|
Trollop::die :bits, "must be 16 for mfcc"
|
23
24
|
end
|
24
25
|
|
25
|
-
if options[:payload] == 'mfcc'
|
26
|
-
|
26
|
+
if options[:payload] == 'mfcc'
|
27
|
+
f = options[:frequency]
|
28
|
+
if f != 8000 && f != 16000
|
29
|
+
Trollop::die :frequency, "(#{options[:frequency]}) for mfcc must be either 16000 or 8000"
|
30
|
+
end
|
27
31
|
end
|
28
32
|
|
29
33
|
# Must set implementation specific library path before requiring libraries.
|
@@ -99,12 +103,23 @@ end
|
|
99
103
|
require 'socket'
|
100
104
|
|
101
105
|
def recognize file, options
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
106
|
+
bits, rate = options[:bits], options[:frequency]
|
107
|
+
if rate == 8000
|
108
|
+
rateinfo = FEAT8M16R
|
109
|
+
elsif rate == 16000
|
110
|
+
rateinfo = FEAT16M16R
|
111
|
+
end
|
112
|
+
if options[:file]
|
113
|
+
open(options[:file], 'wb') do |client|
|
114
|
+
send_incremental_features file, client, nil, bits, rateinfo
|
115
|
+
end
|
116
|
+
else
|
117
|
+
TCPSocket.open(options[:address], options[:port]) do |client|
|
118
|
+
if options[:pcm]
|
119
|
+
send_incremental_pcm file, client, client, bits, rateinfo
|
120
|
+
else
|
121
|
+
send_incremental_features file, client, client, bits, rateinfo
|
122
|
+
end
|
108
123
|
end
|
109
124
|
end
|
110
125
|
rescue Errno::ECONNREFUSED
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
|
3
|
+
# Convert audio file into an IO object with features.
|
4
|
+
def file2fstream file, format = FEAT8M16R
|
5
|
+
to_server = StringIO.new 'wb'
|
6
|
+
from_server = StringIO.new 'dummy result'
|
7
|
+
result = send_incremental_features file, to_server, from_server, 16, format
|
8
|
+
StringIO.new to_server.string
|
9
|
+
end
|
10
|
+
|
11
|
+
# Take a talkhouse feature stream and convert it into an array.
|
12
|
+
def stream2features stream
|
13
|
+
observations = []
|
14
|
+
raise "Unexpected magic number." if stream.read(TMAGIC.size) != TMAGIC
|
15
|
+
raise "Expected TSTART." if stream.read(4) != TSTART
|
16
|
+
loop do
|
17
|
+
case stream.read(4)
|
18
|
+
when TPCM
|
19
|
+
count = stream.read(4).unpack('N')[0]
|
20
|
+
pcm = stream.read count
|
21
|
+
pcm = pcm.unpack('g*')
|
22
|
+
when TCEPSTRA
|
23
|
+
count = stream.read(4).unpack('N')[0]
|
24
|
+
cmn = Array.new(count) {stream.read(13*4).unpack('g*')}
|
25
|
+
observations += cmn
|
26
|
+
when TBYE
|
27
|
+
break
|
28
|
+
when TEND
|
29
|
+
else
|
30
|
+
end
|
31
|
+
end
|
32
|
+
delta_filter = Noyes::DoubleDeltaFilter.new
|
33
|
+
observations >>= delta_filter
|
34
|
+
observations.map {|a| a.flatten}
|
35
|
+
end
|
36
|
+
|
37
|
+
# Convenience function for converting almost any type of audio file to an mfcc
|
38
|
+
# feature array.
|
39
|
+
def file2features file, format = FEAT8M16R
|
40
|
+
stream = file2fstream file, format
|
41
|
+
stream2features stream
|
42
|
+
end
|
@@ -1,6 +1,12 @@
|
|
1
1
|
require 'noyes'
|
2
2
|
require 'common/file2pcm'
|
3
3
|
|
4
|
+
|
5
|
+
# Parameters for 8 kHz models and 16 kHz data
|
6
|
+
FEAT8M16R = [32, 200, 3700, 256*2, 8000*2, 80*2, 205*2]
|
7
|
+
# Paramenters for 16 kHz models and 16 kHz data
|
8
|
+
FEAT16M16R = [40, 133.33, 6855.5, 512, 8000*2, 80*2, 205*2]
|
9
|
+
|
4
10
|
# The following flags are in network byte order (big endian) and are 4 bytes
|
5
11
|
# long.
|
6
12
|
#
|
@@ -17,14 +23,9 @@ require 'common/file2pcm'
|
|
17
23
|
# Use sox to convert a file of almost any common type int pcm.
|
18
24
|
# Not sure this works for anything beside 16 bits.
|
19
25
|
# Takes a file and two IO-like objects.
|
20
|
-
def send_incremental_features file, to_server, from_server, bits,
|
26
|
+
def send_incremental_features file, to_server, from_server, bits, freqinfo
|
21
27
|
stats = {}
|
22
|
-
nfilt =
|
23
|
-
min_freq = 133.33334
|
24
|
-
max_freq = 6855.4976
|
25
|
-
nfft = 512
|
26
|
-
shift = 160
|
27
|
-
frame_size = 410
|
28
|
+
nfilt, min_freq, max_freq, nfft, freq, shift, frame_size = *freqinfo
|
28
29
|
preemphasizer = Preemphasizer.new 0.97
|
29
30
|
segmenter = Segmenter.new frame_size, shift
|
30
31
|
hamming_windower = HammingWindow.new frame_size
|
@@ -52,7 +53,7 @@ def send_incremental_features file, to_server, from_server, bits, freq
|
|
52
53
|
stats[:process_time] += Time.new - process_time_start
|
53
54
|
to_server.write TCEPSTRA
|
54
55
|
to_server.write [data.size].pack('N')
|
55
|
-
print '.'
|
56
|
+
# print '.'
|
56
57
|
data.each {|cmn| to_server.write cmn.pack('g*')}
|
57
58
|
to_server.flush
|
58
59
|
end
|
@@ -60,9 +61,9 @@ def send_incremental_features file, to_server, from_server, bits, freq
|
|
60
61
|
to_server.write TBYE
|
61
62
|
to_server.flush
|
62
63
|
latency_start = Time.new
|
63
|
-
stats[:transcript] = from_server.read
|
64
|
+
stats[:transcript] = from_server ? from_server.read : ""
|
64
65
|
stats[:latency] = Time.new - latency_start
|
65
|
-
|
66
|
+
stats
|
66
67
|
end
|
67
68
|
|
68
69
|
def send_incremental_pcm file, to_server, from_server, depth, rate
|
@@ -74,7 +75,7 @@ def send_incremental_pcm file, to_server, from_server, depth, rate
|
|
74
75
|
to_server.write TA16_16
|
75
76
|
to_server.write [chunk.size/2].pack('N')
|
76
77
|
to_server.write chunk
|
77
|
-
print '.'
|
78
|
+
# print '.'
|
78
79
|
to_server.flush
|
79
80
|
chunk = raw.slice! 0, 1024
|
80
81
|
end
|
data/lib/common.rb
CHANGED
data/ship/noyes.jar
ADDED
Binary file
|
metadata
CHANGED
@@ -1,13 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: noyes
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
segments:
|
7
|
-
- 1
|
8
|
-
- 0
|
9
|
-
- 1
|
10
|
-
version: 1.0.1
|
4
|
+
prerelease:
|
5
|
+
version: 1.1.1
|
11
6
|
platform: ruby
|
12
7
|
authors:
|
13
8
|
- Joe Woelfel
|
@@ -15,7 +10,7 @@ autorequire:
|
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
12
|
|
18
|
-
date:
|
13
|
+
date: 2011-12-13 00:00:00 -05:00
|
19
14
|
default_executable:
|
20
15
|
dependencies:
|
21
16
|
- !ruby/object:Gem::Dependency
|
@@ -26,11 +21,6 @@ dependencies:
|
|
26
21
|
requirements:
|
27
22
|
- - ">="
|
28
23
|
- !ruby/object:Gem::Version
|
29
|
-
hash: 23
|
30
|
-
segments:
|
31
|
-
- 1
|
32
|
-
- 0
|
33
|
-
- 0
|
34
24
|
version: 1.0.0
|
35
25
|
type: :runtime
|
36
26
|
version_requirements: *id001
|
@@ -40,6 +30,7 @@ description: |-
|
|
40
30
|
email: joe@talkhouse.com
|
41
31
|
executables:
|
42
32
|
- mock_noyes_server
|
33
|
+
- noyes
|
43
34
|
- noyes_dump44k
|
44
35
|
- noyes_dump8k
|
45
36
|
- nrec
|
@@ -82,6 +73,7 @@ files:
|
|
82
73
|
- lib/cext/segmenter.c
|
83
74
|
- lib/cext/speech_trimmer.c
|
84
75
|
- lib/common.rb
|
76
|
+
- lib/common/file2feat.rb
|
85
77
|
- lib/common/file2pcm.rb
|
86
78
|
- lib/common/mock_noyes_server.rb
|
87
79
|
- lib/common/noyes_dsl.rb
|
@@ -123,9 +115,11 @@ files:
|
|
123
115
|
- lib/ruby_impl/preemphasis.rb
|
124
116
|
- lib/ruby_impl/segment.rb
|
125
117
|
- lib/ruby_impl/speech_trimmer.rb
|
118
|
+
- ship/noyes.jar
|
126
119
|
- COPYING
|
127
120
|
- FAQ
|
128
121
|
- bin/mock_noyes_server
|
122
|
+
- bin/noyes
|
129
123
|
- bin/noyes_dump44k
|
130
124
|
- bin/noyes_dump8k
|
131
125
|
- bin/nrec
|
@@ -134,8 +128,8 @@ homepage: http://github.com/talkhouse/noyes
|
|
134
128
|
licenses: []
|
135
129
|
|
136
130
|
post_install_message:
|
137
|
-
rdoc_options:
|
138
|
-
|
131
|
+
rdoc_options: []
|
132
|
+
|
139
133
|
require_paths:
|
140
134
|
- lib
|
141
135
|
- ship
|
@@ -144,23 +138,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
144
138
|
requirements:
|
145
139
|
- - ">="
|
146
140
|
- !ruby/object:Gem::Version
|
147
|
-
hash: 3
|
148
|
-
segments:
|
149
|
-
- 0
|
150
141
|
version: "0"
|
151
142
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
152
143
|
none: false
|
153
144
|
requirements:
|
154
145
|
- - ">="
|
155
146
|
- !ruby/object:Gem::Version
|
156
|
-
hash: 3
|
157
|
-
segments:
|
158
|
-
- 0
|
159
147
|
version: "0"
|
160
148
|
requirements: []
|
161
149
|
|
162
150
|
rubyforge_project:
|
163
|
-
rubygems_version: 1.
|
151
|
+
rubygems_version: 1.5.0
|
164
152
|
signing_key:
|
165
153
|
specification_version: 3
|
166
154
|
summary: A signal processing library
|