noyes 1.0.1 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING +19 -4
- data/VERSION +1 -1
- data/bin/noyes +43 -0
- data/bin/nrec +23 -8
- data/lib/common/file2feat.rb +42 -0
- data/lib/common/noyes_protocol.rb +1 -1
- data/lib/common/send_incrementally.rb +12 -11
- data/lib/common.rb +1 -0
- data/ship/noyes.jar +0 -0
- metadata +10 -22
data/COPYING
CHANGED
@@ -1,7 +1,22 @@
|
|
1
1
|
Copyright 2010 Talkhouse. All rights reserved.
|
2
2
|
|
3
|
-
|
4
|
-
|
3
|
+
Redistribution and use in source and binary forms, with or without modification, are
|
4
|
+
permitted provided that the following conditions are met:
|
5
|
+
|
6
|
+
1. Redistributions of source code must retain the above copyright notice, this list of
|
7
|
+
conditions and the following disclaimer.
|
8
|
+
|
9
|
+
2. Redistributions in binary form must reproduce the above copyright notice, this list
|
10
|
+
of conditions and the following disclaimer in the documentation and/or other materials
|
11
|
+
provided with the distribution.
|
12
|
+
|
13
|
+
THIS SOFTWARE IS PROVIDED BY TALKHOUSE ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
14
|
+
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL TALKHOUSE OR
|
16
|
+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17
|
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
18
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
19
|
+
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
20
|
+
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
21
|
+
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
5
22
|
|
6
|
-
If for any reason this license is not adequate for your purposes please contact
|
7
|
-
talkhouse. We're very open minded.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.
|
1
|
+
1.1.1
|
data/bin/noyes
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# vim: set filetype=ruby :
|
3
|
+
ROOT = File.dirname(File.dirname(__FILE__))
|
4
|
+
VERSION_FILE = "#{ROOT}/VERSION"
|
5
|
+
$: << "#{ROOT}/lib" << "#{ROOT}/ship"
|
6
|
+
|
7
|
+
require 'trollop'
|
8
|
+
require 'noyes_c'
|
9
|
+
include NoyesC
|
10
|
+
|
11
|
+
options = Trollop::options do
|
12
|
+
version "Noyes #{IO.read(VERSION_FILE).strip} (c) 2010 Talkhouse"
|
13
|
+
banner <<DOC'Usage: noyes [options] file1 file2 ...'
|
14
|
+
Converts files from audio to raw features. Supports format that your
|
15
|
+
version of SOX supports.
|
16
|
+
DOC
|
17
|
+
opt :force, 'Force creation'
|
18
|
+
opt :verbose, 'Verbose mode'
|
19
|
+
opt :pattern, 'Files matching pattern.', :type => :strings, :multi => true
|
20
|
+
end
|
21
|
+
|
22
|
+
patterns = options[:pattern].flatten
|
23
|
+
pattern_files = patterns.inject [] {|memo, pattern| memo | Dir[pattern]}
|
24
|
+
audio_files = ARGV | pattern_files
|
25
|
+
# Make sure were not overwritting anything unless force flag is used.
|
26
|
+
mfcc_files = audio_files.map {|audio_file| audio_file.sub /\.\w+$/, '.mfcc'}
|
27
|
+
unless options[:force]
|
28
|
+
mfcc_file = mfcc_files.detect {|mfcc_file| File.exists? mfcc_file}
|
29
|
+
if mfcc_file
|
30
|
+
puts "#{mfcc_file} already exists. Quitting. Use -f to force."
|
31
|
+
exit 9
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
audio_files.zip(mfcc_files).each do |audio_file, mfcc_file|
|
36
|
+
observations = file2features audio_file
|
37
|
+
puts mfcc_file if options[:verbose]
|
38
|
+
open(mfcc_file, 'wb') do |f|
|
39
|
+
f.print [observations.size, 100000].pack 'N*'
|
40
|
+
f.print [0x009c, 0x2306].pack 'n*'
|
41
|
+
f.puts observations.flatten.pack 'g*'
|
42
|
+
end
|
43
|
+
end
|
data/bin/nrec
CHANGED
@@ -11,6 +11,7 @@ options = Trollop::options do
|
|
11
11
|
banner 'Usage: nrec [options] file1 file2 ...'
|
12
12
|
opt :implementation, "ruby, c, or java", :default => 'ruby'
|
13
13
|
opt :address, "address", :default => '174.129.244.159'
|
14
|
+
opt :file, "Output file (overrides default TCP/IP)", :type => :string
|
14
15
|
opt :port, "port", :default => 2348
|
15
16
|
opt :bits, "bit depth", :default => 16
|
16
17
|
opt :frequency, "sampling frequency", :default => 16000
|
@@ -22,8 +23,11 @@ if options[:bits] != 16 && options[:payload] == 'mfcc'
|
|
22
23
|
Trollop::die :bits, "must be 16 for mfcc"
|
23
24
|
end
|
24
25
|
|
25
|
-
if options[:payload] == 'mfcc'
|
26
|
-
|
26
|
+
if options[:payload] == 'mfcc'
|
27
|
+
f = options[:frequency]
|
28
|
+
if f != 8000 && f != 16000
|
29
|
+
Trollop::die :frequency, "(#{options[:frequency]}) for mfcc must be either 16000 or 8000"
|
30
|
+
end
|
27
31
|
end
|
28
32
|
|
29
33
|
# Must set implementation specific library path before requiring libraries.
|
@@ -99,12 +103,23 @@ end
|
|
99
103
|
require 'socket'
|
100
104
|
|
101
105
|
def recognize file, options
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
106
|
+
bits, rate = options[:bits], options[:frequency]
|
107
|
+
if rate == 8000
|
108
|
+
rateinfo = FEAT8M16R
|
109
|
+
elsif rate == 16000
|
110
|
+
rateinfo = FEAT16M16R
|
111
|
+
end
|
112
|
+
if options[:file]
|
113
|
+
open(options[:file], 'wb') do |client|
|
114
|
+
send_incremental_features file, client, nil, bits, rateinfo
|
115
|
+
end
|
116
|
+
else
|
117
|
+
TCPSocket.open(options[:address], options[:port]) do |client|
|
118
|
+
if options[:pcm]
|
119
|
+
send_incremental_pcm file, client, client, bits, rateinfo
|
120
|
+
else
|
121
|
+
send_incremental_features file, client, client, bits, rateinfo
|
122
|
+
end
|
108
123
|
end
|
109
124
|
end
|
110
125
|
rescue Errno::ECONNREFUSED
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
|
3
|
+
# Convert audio file into an IO object with features.
|
4
|
+
def file2fstream file, format = FEAT8M16R
|
5
|
+
to_server = StringIO.new 'wb'
|
6
|
+
from_server = StringIO.new 'dummy result'
|
7
|
+
result = send_incremental_features file, to_server, from_server, 16, format
|
8
|
+
StringIO.new to_server.string
|
9
|
+
end
|
10
|
+
|
11
|
+
# Take a talkhouse feature stream and convert it into an array.
|
12
|
+
def stream2features stream
|
13
|
+
observations = []
|
14
|
+
raise "Unexpected magic number." if stream.read(TMAGIC.size) != TMAGIC
|
15
|
+
raise "Expected TSTART." if stream.read(4) != TSTART
|
16
|
+
loop do
|
17
|
+
case stream.read(4)
|
18
|
+
when TPCM
|
19
|
+
count = stream.read(4).unpack('N')[0]
|
20
|
+
pcm = stream.read count
|
21
|
+
pcm = pcm.unpack('g*')
|
22
|
+
when TCEPSTRA
|
23
|
+
count = stream.read(4).unpack('N')[0]
|
24
|
+
cmn = Array.new(count) {stream.read(13*4).unpack('g*')}
|
25
|
+
observations += cmn
|
26
|
+
when TBYE
|
27
|
+
break
|
28
|
+
when TEND
|
29
|
+
else
|
30
|
+
end
|
31
|
+
end
|
32
|
+
delta_filter = Noyes::DoubleDeltaFilter.new
|
33
|
+
observations >>= delta_filter
|
34
|
+
observations.map {|a| a.flatten}
|
35
|
+
end
|
36
|
+
|
37
|
+
# Convenience function for converting almost any type of audio file to an mfcc
|
38
|
+
# feature array.
|
39
|
+
def file2features file, format = FEAT8M16R
|
40
|
+
stream = file2fstream file, format
|
41
|
+
stream2features stream
|
42
|
+
end
|
@@ -1,6 +1,12 @@
|
|
1
1
|
require 'noyes'
|
2
2
|
require 'common/file2pcm'
|
3
3
|
|
4
|
+
|
5
|
+
# Parameters for 8 kHz models and 16 kHz data
|
6
|
+
FEAT8M16R = [32, 200, 3700, 256*2, 8000*2, 80*2, 205*2]
|
7
|
+
# Paramenters for 16 kHz models and 16 kHz data
|
8
|
+
FEAT16M16R = [40, 133.33, 6855.5, 512, 8000*2, 80*2, 205*2]
|
9
|
+
|
4
10
|
# The following flags are in network byte order (big endian) and are 4 bytes
|
5
11
|
# long.
|
6
12
|
#
|
@@ -17,14 +23,9 @@ require 'common/file2pcm'
|
|
17
23
|
# Use sox to convert a file of almost any common type int pcm.
|
18
24
|
# Not sure this works for anything beside 16 bits.
|
19
25
|
# Takes a file and two IO-like objects.
|
20
|
-
def send_incremental_features file, to_server, from_server, bits,
|
26
|
+
def send_incremental_features file, to_server, from_server, bits, freqinfo
|
21
27
|
stats = {}
|
22
|
-
nfilt =
|
23
|
-
min_freq = 133.33334
|
24
|
-
max_freq = 6855.4976
|
25
|
-
nfft = 512
|
26
|
-
shift = 160
|
27
|
-
frame_size = 410
|
28
|
+
nfilt, min_freq, max_freq, nfft, freq, shift, frame_size = *freqinfo
|
28
29
|
preemphasizer = Preemphasizer.new 0.97
|
29
30
|
segmenter = Segmenter.new frame_size, shift
|
30
31
|
hamming_windower = HammingWindow.new frame_size
|
@@ -52,7 +53,7 @@ def send_incremental_features file, to_server, from_server, bits, freq
|
|
52
53
|
stats[:process_time] += Time.new - process_time_start
|
53
54
|
to_server.write TCEPSTRA
|
54
55
|
to_server.write [data.size].pack('N')
|
55
|
-
print '.'
|
56
|
+
# print '.'
|
56
57
|
data.each {|cmn| to_server.write cmn.pack('g*')}
|
57
58
|
to_server.flush
|
58
59
|
end
|
@@ -60,9 +61,9 @@ def send_incremental_features file, to_server, from_server, bits, freq
|
|
60
61
|
to_server.write TBYE
|
61
62
|
to_server.flush
|
62
63
|
latency_start = Time.new
|
63
|
-
stats[:transcript] = from_server.read
|
64
|
+
stats[:transcript] = from_server ? from_server.read : ""
|
64
65
|
stats[:latency] = Time.new - latency_start
|
65
|
-
|
66
|
+
stats
|
66
67
|
end
|
67
68
|
|
68
69
|
def send_incremental_pcm file, to_server, from_server, depth, rate
|
@@ -74,7 +75,7 @@ def send_incremental_pcm file, to_server, from_server, depth, rate
|
|
74
75
|
to_server.write TA16_16
|
75
76
|
to_server.write [chunk.size/2].pack('N')
|
76
77
|
to_server.write chunk
|
77
|
-
print '.'
|
78
|
+
# print '.'
|
78
79
|
to_server.flush
|
79
80
|
chunk = raw.slice! 0, 1024
|
80
81
|
end
|
data/lib/common.rb
CHANGED
data/ship/noyes.jar
ADDED
Binary file
|
metadata
CHANGED
@@ -1,13 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: noyes
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
segments:
|
7
|
-
- 1
|
8
|
-
- 0
|
9
|
-
- 1
|
10
|
-
version: 1.0.1
|
4
|
+
prerelease:
|
5
|
+
version: 1.1.1
|
11
6
|
platform: ruby
|
12
7
|
authors:
|
13
8
|
- Joe Woelfel
|
@@ -15,7 +10,7 @@ autorequire:
|
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
12
|
|
18
|
-
date:
|
13
|
+
date: 2011-12-13 00:00:00 -05:00
|
19
14
|
default_executable:
|
20
15
|
dependencies:
|
21
16
|
- !ruby/object:Gem::Dependency
|
@@ -26,11 +21,6 @@ dependencies:
|
|
26
21
|
requirements:
|
27
22
|
- - ">="
|
28
23
|
- !ruby/object:Gem::Version
|
29
|
-
hash: 23
|
30
|
-
segments:
|
31
|
-
- 1
|
32
|
-
- 0
|
33
|
-
- 0
|
34
24
|
version: 1.0.0
|
35
25
|
type: :runtime
|
36
26
|
version_requirements: *id001
|
@@ -40,6 +30,7 @@ description: |-
|
|
40
30
|
email: joe@talkhouse.com
|
41
31
|
executables:
|
42
32
|
- mock_noyes_server
|
33
|
+
- noyes
|
43
34
|
- noyes_dump44k
|
44
35
|
- noyes_dump8k
|
45
36
|
- nrec
|
@@ -82,6 +73,7 @@ files:
|
|
82
73
|
- lib/cext/segmenter.c
|
83
74
|
- lib/cext/speech_trimmer.c
|
84
75
|
- lib/common.rb
|
76
|
+
- lib/common/file2feat.rb
|
85
77
|
- lib/common/file2pcm.rb
|
86
78
|
- lib/common/mock_noyes_server.rb
|
87
79
|
- lib/common/noyes_dsl.rb
|
@@ -123,9 +115,11 @@ files:
|
|
123
115
|
- lib/ruby_impl/preemphasis.rb
|
124
116
|
- lib/ruby_impl/segment.rb
|
125
117
|
- lib/ruby_impl/speech_trimmer.rb
|
118
|
+
- ship/noyes.jar
|
126
119
|
- COPYING
|
127
120
|
- FAQ
|
128
121
|
- bin/mock_noyes_server
|
122
|
+
- bin/noyes
|
129
123
|
- bin/noyes_dump44k
|
130
124
|
- bin/noyes_dump8k
|
131
125
|
- bin/nrec
|
@@ -134,8 +128,8 @@ homepage: http://github.com/talkhouse/noyes
|
|
134
128
|
licenses: []
|
135
129
|
|
136
130
|
post_install_message:
|
137
|
-
rdoc_options:
|
138
|
-
|
131
|
+
rdoc_options: []
|
132
|
+
|
139
133
|
require_paths:
|
140
134
|
- lib
|
141
135
|
- ship
|
@@ -144,23 +138,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
144
138
|
requirements:
|
145
139
|
- - ">="
|
146
140
|
- !ruby/object:Gem::Version
|
147
|
-
hash: 3
|
148
|
-
segments:
|
149
|
-
- 0
|
150
141
|
version: "0"
|
151
142
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
152
143
|
none: false
|
153
144
|
requirements:
|
154
145
|
- - ">="
|
155
146
|
- !ruby/object:Gem::Version
|
156
|
-
hash: 3
|
157
|
-
segments:
|
158
|
-
- 0
|
159
147
|
version: "0"
|
160
148
|
requirements: []
|
161
149
|
|
162
150
|
rubyforge_project:
|
163
|
-
rubygems_version: 1.
|
151
|
+
rubygems_version: 1.5.0
|
164
152
|
signing_key:
|
165
153
|
specification_version: 3
|
166
154
|
summary: A signal processing library
|