noyes 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- data/COPYING +21 -0
- data/README +32 -0
- data/bin/noyes_dump44k.sh +59 -0
- data/bin/noyes_dump8k.sh +58 -0
- data/bin/recognize.sh +15 -0
- data/doc/overview.rdoc +51 -0
- data/lib/common/noyes_dsl.rb +6 -0
- data/lib/common/noyes_math.rb +18 -0
- data/lib/common/send_incrementally.rb +62 -0
- data/lib/noyes.rb +13 -0
- data/lib/ruby_impl/dct.rb +34 -0
- data/lib/ruby_impl/delta.rb +34 -0
- data/lib/ruby_impl/discrete_fourier_transform.rb +37 -0
- data/lib/ruby_impl/filter.rb +11 -0
- data/lib/ruby_impl/hamming_window.rb +20 -0
- data/lib/ruby_impl/live_cmn.rb +42 -0
- data/lib/ruby_impl/log_compress.rb +13 -0
- data/lib/ruby_impl/mel_filter.rb +112 -0
- data/lib/ruby_impl/power_spec.rb +19 -0
- data/lib/ruby_impl/preemphasis.rb +21 -0
- data/lib/ruby_impl/segment.rb +28 -0
- metadata +78 -0
data/COPYING
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright 2010 Talkhouse. All rights reserved.
|
2
|
+
|
3
|
+
Redistribution and use in source and binary forms, with or without modification, are
|
4
|
+
permitted provided that the following conditions are met:
|
5
|
+
|
6
|
+
1. Redistributions of source code must retain the above copyright notice, this list of
|
7
|
+
conditions and the following disclaimer.
|
8
|
+
|
9
|
+
2. Redistributions in binary form must reproduce the above copyright notice, this list
|
10
|
+
of conditions and the following disclaimer in the documentation and/or other materials
|
11
|
+
provided with the distribution.
|
12
|
+
|
13
|
+
THIS SOFTWARE IS PROVIDED BY TALKHOUSE ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
14
|
+
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL TALKHOUSE OR
|
16
|
+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17
|
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
18
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
19
|
+
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
20
|
+
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
21
|
+
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
Noyes is a signal processing library. It currently has just enough signal
|
2
|
+
processing to produce features suitable for speech recognition.
|
3
|
+
|
4
|
+
Pronunciation: Typically pronounced the same as 'noise'. But "NO!... YES!" is
|
5
|
+
considered acceptable if you yell it loudly enough or at least with sufficient
|
6
|
+
conviction to make people think you have truly changed your mind.
|
7
|
+
|
8
|
+
Noyes is implemented entirely in Ruby. It's also implemented entirely in Java.
|
9
|
+
The Java version has Ruby bindings too. So you can have Java's speed from
|
10
|
+
Ruby. All versions share the same unit tests, which are written in Ruby. The
|
11
|
+
design goal is to have signal processing routines that are so simple and so
|
12
|
+
disentangled from the overall system that anyone could extract any of the
|
13
|
+
routines and use them elsewhere with little trouble. Benchmarks are included.
|
14
|
+
|
15
|
+
This library places an emphasis on expressiveness without sacrificing ultimate
|
16
|
+
performance. It does so by supporting multiple implementations each with Ruby
|
17
|
+
bindings. The pure Ruby version, while not fast, is often adequate for
|
18
|
+
development and is the best place to add new routines.
|
19
|
+
|
20
|
+
For examples of how to link with different implementations see the test section
|
21
|
+
of the Rakefile. At present only the pure Ruby implementation is exposed via
|
22
|
+
the gem.
|
23
|
+
|
24
|
+
Requirements:
|
25
|
+
Almost any version of ruby & rake.
|
26
|
+
Java, if you want to use the Java version.
|
27
|
+
|
28
|
+
Some of the utility scripts may use sox, but
|
29
|
+
none of the core routines use it.
|
30
|
+
|
31
|
+
For usage information:
|
32
|
+
rake -T
|
@@ -0,0 +1,59 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# vim: set filetype=ruby :
|
3
|
+
ROOT = File.dirname(File.dirname(__FILE__))
|
4
|
+
$: << "#{ROOT}/lib/ruby"
|
5
|
+
$: << "#{ROOT}/lib/common"
|
6
|
+
|
7
|
+
require 'signal'
|
8
|
+
|
9
|
+
if ARGV.size != 1 || ARGV[0] == '-h'
|
10
|
+
puts "Usage: noyes_dump44k <file>"
|
11
|
+
exit 1
|
12
|
+
end
|
13
|
+
|
14
|
+
FILE = ARGV[0]
|
15
|
+
DIR = File.dirname FILE
|
16
|
+
|
17
|
+
include Signal
|
18
|
+
nfilt = 40
|
19
|
+
min_freq = 130
|
20
|
+
max_freq = 6800
|
21
|
+
nfft = 2048
|
22
|
+
freq = 44100
|
23
|
+
shift = 441
|
24
|
+
frame_size = 1130
|
25
|
+
|
26
|
+
preemphasizer = Preemphasizer.new 0.97
|
27
|
+
segmenter = Segmenter.new frame_size, shift
|
28
|
+
hamming_windower = HammingWindow.new frame_size
|
29
|
+
power_spectrum_filter = PowerSpectrumFilter.new nfft
|
30
|
+
mel_filter = MelFilter.new freq, nfft, nfilt, min_freq, max_freq
|
31
|
+
discrete_cosine_transform = DCT.new 13, nfilt
|
32
|
+
live_cmn = LiveCMN.new
|
33
|
+
ddf = DoubleDeltaFilter.new
|
34
|
+
|
35
|
+
raw = `sox #{FILE} -s -B -r 8k -b 16 -t raw -`
|
36
|
+
open('raw.dat', 'wb') {|f| f.write raw}
|
37
|
+
pcm = raw.unpack 'n*'
|
38
|
+
pcm = pcm.map{|d| to_signed_short(d).to_f}
|
39
|
+
pre = preemphasizer << pcm
|
40
|
+
open("#{DIR}/pre.dat", 'w') {|f| f.write pre.flatten.pack 'g*' }
|
41
|
+
seg = segmenter << (pre + Array.new(frame_size - pre.size % frame_size, 0.0))
|
42
|
+
open("#{DIR}/seg.dat", 'w') {|f| f.write seg.flatten.pack 'g*'}
|
43
|
+
ham = hamming_windower << seg
|
44
|
+
open("#{DIR}/ham.dat", 'w') {|f| f.write ham.flatten.pack 'g*'}
|
45
|
+
pow = power_spectrum_filter << ham
|
46
|
+
open("#{DIR}/pow.dat", 'w') {|f| f.write pow.flatten.pack 'g*'}
|
47
|
+
mel = mel_filter << pow
|
48
|
+
open("#{DIR}/mel.dat", 'w') {|f| f.write mel.flatten.pack 'g*'}
|
49
|
+
log = log_compress mel
|
50
|
+
open("#{DIR}/log_mel.dat", 'w') {|f| f.write log.flatten.pack 'g*'}
|
51
|
+
dct = discrete_cosine_transform << log
|
52
|
+
open("#{DIR}/dct.dat", 'w') {|f| f.write dct.flatten.pack 'g*'}
|
53
|
+
cmn = live_cmn << dct
|
54
|
+
open("#{DIR}/cmn.dat", 'w') {|f| f.write cmn.flatten.pack 'g*'}
|
55
|
+
dd = ddf << cmn
|
56
|
+
dd += ddf.final_estimate
|
57
|
+
open("#{DIR}/dd.dat", 'w') {|f| f.write dd.flatten.pack 'g*'}
|
58
|
+
|
59
|
+
|
data/bin/noyes_dump8k.sh
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# vim: set filetype=ruby :
|
3
|
+
ROOT = File.dirname(File.dirname(__FILE__))
|
4
|
+
$: << "#{ROOT}/lib/ruby"
|
5
|
+
$: << "#{ROOT}/lib/common"
|
6
|
+
|
7
|
+
require 'signal'
|
8
|
+
|
9
|
+
if ARGV.size != 1 || ARGV[0] == '-h'
|
10
|
+
puts "Usage: noyes_dump8k <file>"
|
11
|
+
exit 1
|
12
|
+
end
|
13
|
+
|
14
|
+
FILE = ARGV[0]
|
15
|
+
DIR = File.dirname FILE
|
16
|
+
|
17
|
+
include Signal
|
18
|
+
nfilt = 32
|
19
|
+
min_freq = 200
|
20
|
+
max_freq = 3700
|
21
|
+
nfft = 256
|
22
|
+
freq = 8000
|
23
|
+
shift = 80
|
24
|
+
frame_size = 205
|
25
|
+
|
26
|
+
preemphasizer = Preemphasizer.new 0.97
|
27
|
+
segmenter = Segmenter.new frame_size, shift
|
28
|
+
hamming_windower = HammingWindow.new frame_size
|
29
|
+
power_spectrum_filter = PowerSpectrumFilter.new nfft
|
30
|
+
mel_filter = MelFilter.new freq, nfft, nfilt, min_freq, max_freq
|
31
|
+
discrete_cosine_transform = DCT.new 13, nfilt
|
32
|
+
live_cmn = LiveCMN.new
|
33
|
+
ddf = DoubleDeltaFilter.new
|
34
|
+
|
35
|
+
raw = `sox #{FILE} -s -B -r 8k -b 16 -t raw -`
|
36
|
+
open('raw.dat', 'wb') {|f| f.write raw}
|
37
|
+
pcm = raw.unpack 'n*'
|
38
|
+
pcm = pcm.map{|d| to_signed_short(d).to_f}
|
39
|
+
pre = preemphasizer << pcm
|
40
|
+
open("#{DIR}/pre.dat", 'w') {|f| f.write pre.flatten.pack 'g*' }
|
41
|
+
seg = segmenter << (pre + Array.new(frame_size - pre.size % frame_size, 0.0))
|
42
|
+
open("#{DIR}/seg.dat", 'w') {|f| f.write seg.flatten.pack 'g*'}
|
43
|
+
ham = hamming_windower << seg
|
44
|
+
open("#{DIR}/ham.dat", 'w') {|f| f.write ham.flatten.pack 'g*'}
|
45
|
+
pow = power_spectrum_filter << ham
|
46
|
+
open("#{DIR}/pow.dat", 'w') {|f| f.write pow.flatten.pack 'g*'}
|
47
|
+
mel = mel_filter << pow
|
48
|
+
open("#{DIR}/mel.dat", 'w') {|f| f.write mel.flatten.pack 'g*'}
|
49
|
+
log = log_compress mel
|
50
|
+
open("#{DIR}/log_mel.dat", 'w') {|f| f.write log.flatten.pack 'g*'}
|
51
|
+
dct = discrete_cosine_transform << log
|
52
|
+
open("#{DIR}/dct.dat", 'w') {|f| f.write dct.flatten.pack 'g*'}
|
53
|
+
cmn = live_cmn << dct
|
54
|
+
open("#{DIR}/cmn.dat", 'w') {|f| f.write cmn.flatten.pack 'g*'}
|
55
|
+
dd = ddf << cmn
|
56
|
+
dd += ddf.final_estimate
|
57
|
+
open("#{DIR}/dd.dat", 'w') {|f| f.write dd.flatten.pack 'g*'}
|
58
|
+
|
data/bin/recognize.sh
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
# vim: set filetype=ruby :
|
3
|
+
ROOT = File.dirname(File.dirname(__FILE__))
|
4
|
+
$: << "#{ROOT}/lib/ruby"
|
5
|
+
$: << "#{ROOT}/lib/common"
|
6
|
+
require 'socket'
|
7
|
+
require 'send_incrementally'
|
8
|
+
|
9
|
+
def recognize file, node='localhost', port=2318
|
10
|
+
TCPSocket.open(node, port) do |client|
|
11
|
+
send_incremental_features file, client, client
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
puts recognize ARGV[0]
|
data/doc/overview.rdoc
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
# = Overview
|
2
|
+
#
|
3
|
+
# All signal processing routines use a simple DSL style inteface. Below are
|
4
|
+
# some examples.
|
5
|
+
#
|
6
|
+
# == Filter operator example.
|
7
|
+
# Each example below is the data on the left being operated on by the filter on
|
8
|
+
# the right. This is similar to the way the += operator works for numbers. The
|
9
|
+
# data is not modified in place currently and it should probably stay that way.
|
10
|
+
# It could be if efficiency demanded it, but that would require a bit more care
|
11
|
+
# to avoid side effects when using the API. The >>= actually looks like a
|
12
|
+
# filter.
|
13
|
+
#
|
14
|
+
# data = (1..12).to_a
|
15
|
+
# segmenter = Segmenter.new 4, 2 # window size, window shift
|
16
|
+
# hamming_filter = HammingWindow.new 4 # window size
|
17
|
+
# power_spec_filter = PowerSpectrumFilter.new 8 # number of ffts
|
18
|
+
#
|
19
|
+
# data >>= segmenter
|
20
|
+
# data >>= hamming_filter
|
21
|
+
# data >>= power_spec_filter
|
22
|
+
# data >>= dct_filter
|
23
|
+
#
|
24
|
+
# You can expand the >>= operator out, but I think the flow is worse and there
|
25
|
+
# is more repetition, particularly when you have a lot of filters in sequence.
|
26
|
+
# This is perfectly valid syntax though. Also, this is very useful if you don't
|
27
|
+
# want to keep a reference to your original data.
|
28
|
+
#
|
29
|
+
# pcm_data = (1..12).to_a
|
30
|
+
# segmenter = Segmenter.new
|
31
|
+
# hamming_filter = HammingWindow.new 4
|
32
|
+
# segmented_data = segmenter << pcm_data, 4, 2
|
33
|
+
# hamming_data = hamming_filter << segmented_data
|
34
|
+
# power_spectrum data = power_spec_filter hamminging_data, 8
|
35
|
+
# dct_data = dct_filter << power_spectrum_data
|
36
|
+
#
|
37
|
+
# Here is an older version with function calls instead of operator overloading.
|
38
|
+
# The trouble with it is that the flow is hard to follow, and there is
|
39
|
+
# repetition. Filter and process are really synonyms. And this requires
|
40
|
+
# repeating the data component twice. Also, power spec is a function here
|
41
|
+
# with additional arguments. I think I'd rather have the configuration
|
42
|
+
# details, such as number of ffts all grouped at the top. It's easier to
|
43
|
+
# follow this way.
|
44
|
+
#
|
45
|
+
# data = (1..12).to_a
|
46
|
+
# seg = Segmenter.new
|
47
|
+
# ham = HammingWindow.new 4
|
48
|
+
# segments = segmenter.process data, 4, 2
|
49
|
+
# hamming_ = hamming_filter.process segments
|
50
|
+
# power = power_spec.filter hamming, 8
|
51
|
+
# dct = dct.process power
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Math
|
2
|
+
def dot_product l1, l2
|
3
|
+
sum = 0
|
4
|
+
for i in 0...l1.size
|
5
|
+
sum += l1[i] * l2[i]
|
6
|
+
end
|
7
|
+
sum
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
# Converts from unsigned to signed short. Ruby, strangely enough, doesn't have
|
12
|
+
# network byte order short conversion for signed shorts.
|
13
|
+
def to_signed_short n
|
14
|
+
length = 16 # bits
|
15
|
+
max = 2**length-1
|
16
|
+
mid = 2**(length-1)
|
17
|
+
n>=mid ? -((n ^ max) + 1) : n
|
18
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'noyes'
|
2
|
+
include Noyes
|
3
|
+
|
4
|
+
TMAGIC = '1.0 talkhouse'
|
5
|
+
TSTART = [0].pack('N')
|
6
|
+
TAUDIO = [1].pack('N')
|
7
|
+
TEND = [2].pack('N')
|
8
|
+
TDONE = [3].pack('N')
|
9
|
+
TCEPSTRA = [4].pack('N')
|
10
|
+
|
11
|
+
# Use sox to convert a file of almost any common type int pcm.
|
12
|
+
def file2pcm file
|
13
|
+
raw = `sox #{file} -s -B -r 8k -b 16 -t raw -`
|
14
|
+
length = 16 # bits
|
15
|
+
max = 2**length-1
|
16
|
+
mid = 2**(length-1)
|
17
|
+
to_signed = proc {|n| (n>=mid) ? -((n ^ max) + 1) : n}
|
18
|
+
unpacked = raw.unpack 'n*'
|
19
|
+
unpacked.map{|d| to_signed[d].to_f}
|
20
|
+
end
|
21
|
+
|
22
|
+
# Takes a file and two IO-like objects.
|
23
|
+
def send_incremental_features file, to_server, from_server
|
24
|
+
nfilt = 32
|
25
|
+
min_freq = 200
|
26
|
+
max_freq = 3700
|
27
|
+
nfft = 256
|
28
|
+
freq = 8000
|
29
|
+
shift = 80
|
30
|
+
frame_size = 205
|
31
|
+
preemphasizer = Preemphasizer.new 0.97
|
32
|
+
segmenter = Segmenter.new frame_size, shift
|
33
|
+
hamming_windower = HammingWindow.new frame_size
|
34
|
+
power_spectrum_filter = PowerSpectrumFilter.new nfft
|
35
|
+
mel_filter = MelFilter.new freq, nfft, nfilt, min_freq, max_freq
|
36
|
+
compressor = LogCompressor.new
|
37
|
+
discrete_cosine_transform = DCT.new 13, nfilt
|
38
|
+
live_cmn = LiveCMN.new
|
39
|
+
pcm = file2pcm file
|
40
|
+
to_server.write TMAGIC
|
41
|
+
to_server.write TSTART
|
42
|
+
pcm.each_slice 1230 do |data|
|
43
|
+
data >>= preemphasizer
|
44
|
+
data >>= segmenter
|
45
|
+
next unless data
|
46
|
+
data >>= hamming_windower
|
47
|
+
data >>= power_spectrum_filter
|
48
|
+
data >>= mel_filter
|
49
|
+
data >>= compressor
|
50
|
+
data >>= discrete_cosine_transform
|
51
|
+
data >>= live_cmn
|
52
|
+
to_server.write TCEPSTRA
|
53
|
+
to_server.write [data.size].pack('N')
|
54
|
+
print '.'
|
55
|
+
data.each {|cmn| to_server.write cmn.pack('g*')}
|
56
|
+
to_server.flush
|
57
|
+
end
|
58
|
+
to_server.write TEND
|
59
|
+
to_server.write TDONE
|
60
|
+
to_server.flush
|
61
|
+
from_server.read
|
62
|
+
end
|
data/lib/noyes.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require "noyes_dsl"
|
2
|
+
require "noyes_math"
|
3
|
+
require "live_cmn"
|
4
|
+
require "dct"
|
5
|
+
require "delta"
|
6
|
+
require "filter"
|
7
|
+
require "mel_filter"
|
8
|
+
require "hamming_window"
|
9
|
+
require "log_compress"
|
10
|
+
require "discrete_fourier_transform"
|
11
|
+
require "power_spec"
|
12
|
+
require "preemphasis"
|
13
|
+
require "segment"
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'noyes_math'
|
2
|
+
|
3
|
+
module Noyes
|
4
|
+
# Takes the discrete cosing transform. Converts a n x m matrix to an n x order
|
5
|
+
# matrix. ncol should be set to m.
|
6
|
+
class DCT
|
7
|
+
include Math
|
8
|
+
attr_accessor :melcos
|
9
|
+
def initialize order, ncol
|
10
|
+
@melcos = []
|
11
|
+
order.times do |i|
|
12
|
+
freq = PI * i.to_f / ncol
|
13
|
+
ldct = Array.new ncol
|
14
|
+
ncol.times do |j|
|
15
|
+
ldct[j] = cos(freq * (j + 0.5)) / order # [1]
|
16
|
+
end
|
17
|
+
@melcos << ldct
|
18
|
+
end
|
19
|
+
@melcos
|
20
|
+
end
|
21
|
+
|
22
|
+
def << data
|
23
|
+
data.map do |dvec|
|
24
|
+
@melcos.map {|m| dot_product m, dvec}
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Notes:
|
31
|
+
# [1] I'm not sure why I do this division by order. Sphinx does it. I wanted
|
32
|
+
# to have compatible output though I'm not sure I should care since I don't use
|
33
|
+
# sphinx anymore. However, Sphinx does it continually during processing. I
|
34
|
+
# build it into the filters so there is no cost.
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Noyes
|
2
|
+
# Takes an m x n array and makes an m x 3 x n array. The original inner
|
3
|
+
# array is duplicated followed by its delta and it's double delta.
|
4
|
+
class DoubleDeltaFilter
|
5
|
+
def initialize
|
6
|
+
@previous = nil
|
7
|
+
end
|
8
|
+
def << cepstra
|
9
|
+
@previous = [cepstra.first] * 3 unless @previous
|
10
|
+
buf = @previous + cepstra
|
11
|
+
result = []
|
12
|
+
for i in 3...(buf.size-3)
|
13
|
+
delta = Array.new buf[i].size
|
14
|
+
delta.size.times do |k|
|
15
|
+
delta[k] = buf[i+2][k] - buf[i-2][k]
|
16
|
+
end
|
17
|
+
double_delta = Array.new buf[i].size
|
18
|
+
double_delta.size.times do |k|
|
19
|
+
double_delta[k] = buf[i+3][k] - buf[i-1][k] - buf[i+1][k] + buf[i-3][k]
|
20
|
+
end
|
21
|
+
result << [buf[i], delta, double_delta]
|
22
|
+
end
|
23
|
+
@previous = buf[-6..-1]
|
24
|
+
result
|
25
|
+
end
|
26
|
+
# If there is no more data we can estimate a couple more frames by copying
|
27
|
+
# the final frame 3 times. Probably this is rarely necessary.
|
28
|
+
def final_estimate
|
29
|
+
return [] unless @previous
|
30
|
+
cepstra = [@previous.last] * 3
|
31
|
+
self.<< cepstra
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'complex'
|
2
|
+
|
3
|
+
module Noyes
|
4
|
+
include Math
|
5
|
+
# Takes the discrete Fourier transform.
|
6
|
+
def dft data,size
|
7
|
+
vals = Array.new size
|
8
|
+
data.size.times {|i| vals[i] = Complex(data[i],0)}
|
9
|
+
(data.size).upto(size-1) {|i| vals[i] = Complex(0,0)}
|
10
|
+
|
11
|
+
j=0
|
12
|
+
size.times do |i|
|
13
|
+
vals[j],vals[i] = vals[i],vals[j] if i<j
|
14
|
+
m = size/2
|
15
|
+
while j>=m && m>1
|
16
|
+
j-=m
|
17
|
+
m/=2
|
18
|
+
end
|
19
|
+
j+=m
|
20
|
+
end
|
21
|
+
k=1
|
22
|
+
while k<size
|
23
|
+
incr = 2*k
|
24
|
+
mul = Complex.polar 1, Math::PI/k
|
25
|
+
w = Complex(1, 0)
|
26
|
+
k.times do |i|
|
27
|
+
i.step(size-1,incr) do |j|
|
28
|
+
tmp = w * vals[j+k]
|
29
|
+
vals[j+k],vals[j]=vals[j]-tmp,vals[j]+tmp
|
30
|
+
end
|
31
|
+
w *= mul;
|
32
|
+
end
|
33
|
+
k=incr
|
34
|
+
end
|
35
|
+
vals
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Noyes
|
2
|
+
# Takes a m x n matrix and multiples each inner array by a hamming window
|
3
|
+
# function. Be careful to make sure your inner array length is the same as
|
4
|
+
# the window size.
|
5
|
+
class HammingWindow
|
6
|
+
include Math
|
7
|
+
def initialize window_size
|
8
|
+
twopi = 2 * PI
|
9
|
+
@hamming_window = []
|
10
|
+
window_size.times do |i|
|
11
|
+
@hamming_window << 0.54 - 0.46*cos(twopi*i/(window_size-1))
|
12
|
+
end
|
13
|
+
end
|
14
|
+
def << segments
|
15
|
+
segments.map do |s|
|
16
|
+
s.zip(@hamming_window).map {|d, h| d*h}
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
class LiveCMN
|
2
|
+
# Normalizes cepstrum means and applies them. Dimensionality remains
|
3
|
+
# unchanged. NOTE: This class resets itself automatically if bounds drift
|
4
|
+
# too much. Possibly these bounds should be parameterized.
|
5
|
+
def initialize dimensions=13, init_mean=45.0, window_size=100, shift=160
|
6
|
+
@init_mean = init_mean; @shift = shift; @ws = window_size
|
7
|
+
@sums = Array.new dimensions, 0
|
8
|
+
@means = Array.new dimensions, 0
|
9
|
+
@means[0] = @init_mean
|
10
|
+
@frame_count = 0
|
11
|
+
end
|
12
|
+
def << dct
|
13
|
+
raise "Wrong number of dimensions" if dct[0].size != @means.size
|
14
|
+
dct.map do |mfc|
|
15
|
+
cmn = Array.new @means.size
|
16
|
+
@means.size.times do |i|
|
17
|
+
@sums[i] += mfc[i]
|
18
|
+
cmn[i] = mfc[i] - @means[i]
|
19
|
+
end
|
20
|
+
@frame_count += 1
|
21
|
+
update if @frame_count > @shift
|
22
|
+
cmn
|
23
|
+
end
|
24
|
+
end
|
25
|
+
def update
|
26
|
+
per_frame = 1.0 / @frame_count
|
27
|
+
@means = @sums.map {|x| x * per_frame}
|
28
|
+
|
29
|
+
if @means.first > 70 || @means.first < 5
|
30
|
+
reset
|
31
|
+
elsif @frame_count >= @shift
|
32
|
+
@sums = @sums.map {|x| x * per_frame * @ws}
|
33
|
+
@frame_count = @ws
|
34
|
+
end
|
35
|
+
end
|
36
|
+
def reset
|
37
|
+
@sums.map! {0}
|
38
|
+
@means.map! {0}
|
39
|
+
@means[0] = @init_mean
|
40
|
+
@frame_count = 0
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Noyes
|
2
|
+
# Takes the log base 10 of an incoming m x n array. The dimensions of the
|
3
|
+
# array remain unchanged. If a value is zero then the value log_zero is used
|
4
|
+
# instead of plunging into singularity land and throwing an exception.
|
5
|
+
class LogCompressor
|
6
|
+
def initialize log_zero = -0.00001
|
7
|
+
@log_zero = log_zero
|
8
|
+
end
|
9
|
+
def << mspec
|
10
|
+
mspec.map {|msp| msp.map { |m| m > 0 ? Math::log(m) : @log_zero}}
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
require 'noyes_math'
|
2
|
+
module Noyes
|
3
|
+
# Mel filter takes an m x n matrix. The inner array becomes equal to the
|
4
|
+
# number of mel filter banks (nfilt). The dimensionality of the outer array
|
5
|
+
# remains unchanged.
|
6
|
+
class MelFilter
|
7
|
+
include Math
|
8
|
+
def initialize srate, nfft, nfilt, lowerf, upperf
|
9
|
+
bank_params = MelFilter.make_bank_parameters srate, nfft, nfilt, lowerf, upperf
|
10
|
+
@indices = []
|
11
|
+
@weights = []
|
12
|
+
bank_params.map do |params|
|
13
|
+
ind, weights = MelFilter.make_filter *params
|
14
|
+
@indices << ind
|
15
|
+
@weights << weights
|
16
|
+
end
|
17
|
+
end
|
18
|
+
def << power_spectra
|
19
|
+
power_spectra.map do |spectrum|
|
20
|
+
mel_bank = Array.new @indices.size
|
21
|
+
@indices.size.times do |i|
|
22
|
+
initial_index, weights = @indices[i], @weights[i]
|
23
|
+
output = 0.0
|
24
|
+
weights.size.times do |j|
|
25
|
+
index = initial_index + j
|
26
|
+
output += spectrum[index] * weights[j] if index < spectrum.length
|
27
|
+
end
|
28
|
+
mel_bank[i] = output
|
29
|
+
end
|
30
|
+
mel_bank
|
31
|
+
end
|
32
|
+
end
|
33
|
+
def self.to_mel f
|
34
|
+
return f.map {|linfreq| self.to_mel linfreq} if f.respond_to? :map
|
35
|
+
2595.0 * Math.log10(1.0 + f/700.0)
|
36
|
+
end
|
37
|
+
def self.to_linear m
|
38
|
+
return m.map {|melfreq| self.to_linear melfreq} if m.respond_to? :map
|
39
|
+
700.0 * (10.0**(m/2595.0) - 1.0)
|
40
|
+
end
|
41
|
+
def self.determine_bin in_freq, step_freq
|
42
|
+
step_freq * (in_freq/step_freq).round
|
43
|
+
end
|
44
|
+
def self.make_bank_parameters srate, nfft, nfilt, lowerf, upperf
|
45
|
+
raise 'Number of FFT points is <= 0.' if nfft == 0
|
46
|
+
raise 'Number of filters is <= 0.' if nfilt == 0
|
47
|
+
srate = srate.to_f; lowerf = lowerf.to_f; upperf = upperf.to_f
|
48
|
+
left_edge = Array.new nfilt
|
49
|
+
right_edge = Array.new nfilt
|
50
|
+
center_freq = Array.new nfilt
|
51
|
+
melmax = self.to_mel upperf
|
52
|
+
melmin = self.to_mel lowerf
|
53
|
+
delta_freq_mel = (melmax - melmin) / (nfilt + 1.0)
|
54
|
+
delta_freq = srate/nfft
|
55
|
+
left_edge[0] = self.determine_bin lowerf, delta_freq
|
56
|
+
next_edge_mel = melmin
|
57
|
+
nfilt.times do |i|
|
58
|
+
next_edge_mel += delta_freq_mel
|
59
|
+
next_edge = self.to_linear next_edge_mel
|
60
|
+
center_freq[i] = self.determine_bin next_edge, delta_freq
|
61
|
+
right_edge[i-1] = center_freq[i] if i > 0
|
62
|
+
left_edge[i+1] = center_freq[i] if i < nfilt - 1
|
63
|
+
end
|
64
|
+
|
65
|
+
next_edge_mel += delta_freq_mel
|
66
|
+
next_edge = self.to_linear next_edge_mel
|
67
|
+
right_edge[nfilt-1] = self.determine_bin next_edge, delta_freq
|
68
|
+
fparams = Array.new nfilt
|
69
|
+
nfilt.times do |i|
|
70
|
+
initial_freq_bin = self.determine_bin left_edge[i], delta_freq
|
71
|
+
initial_freq_bin += delta_freq if initial_freq_bin < left_edge[i]
|
72
|
+
fparams[i] = [left_edge[i], center_freq[i], right_edge[i],
|
73
|
+
initial_freq_bin, delta_freq]
|
74
|
+
end
|
75
|
+
fparams
|
76
|
+
end
|
77
|
+
def self.make_filter left, center, right, init_freq, delta
|
78
|
+
raise 'delta freq has zero value' if delta == 0
|
79
|
+
if (right - left).round == 0 || (center - left).round == 0 ||
|
80
|
+
(right - center).round == 0
|
81
|
+
raise 'filter boundaries too close'
|
82
|
+
end
|
83
|
+
|
84
|
+
n_elements = ((right - left)/ delta + 1).round
|
85
|
+
raise 'number of mel elements is zero' if n_elements == 0
|
86
|
+
|
87
|
+
weights = Array.new n_elements
|
88
|
+
height = 1
|
89
|
+
left_slope = height / (center - left)
|
90
|
+
right_slope = height / (center - right)
|
91
|
+
|
92
|
+
index_fw = 0
|
93
|
+
init_freq.step right, delta do |current|
|
94
|
+
if current < center
|
95
|
+
weights[index_fw] = left_slope * (current - left)
|
96
|
+
else
|
97
|
+
weights[index_fw] = height + right_slope * (current - center)
|
98
|
+
end
|
99
|
+
index_fw += 1
|
100
|
+
end
|
101
|
+
#weights.insert 0, (init_freq/delta).round
|
102
|
+
[(init_freq/delta).round, weights]
|
103
|
+
end
|
104
|
+
def apply_weights init_index, weights, spectrum
|
105
|
+
output = 0.0
|
106
|
+
weights.size.times do |i|
|
107
|
+
output += spectrum[i + init_index] * weights[i]
|
108
|
+
end
|
109
|
+
output
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'discrete_fourier_transform'
|
2
|
+
module Noyes
|
3
|
+
# The square of the DFT. You must specify the number of ffts. The power
|
4
|
+
# spectrum returns an array of arrays where each inner array is of length
|
5
|
+
# nfft/2 + 1. The length of the outer array does not change.
|
6
|
+
class PowerSpectrumFilter
|
7
|
+
include Noyes
|
8
|
+
def initialize nfft
|
9
|
+
@nfft = nfft
|
10
|
+
end
|
11
|
+
def << data
|
12
|
+
nuniqdftpts = @nfft/2 + 1
|
13
|
+
data.map do |datavec|
|
14
|
+
datavecfft = dft datavec, @nfft
|
15
|
+
Array.new(nuniqdftpts){|i| datavecfft[i].abs**2}
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Noyes
|
2
|
+
# A simple high pass filter. It takes an array of size n and returns an
|
3
|
+
# array of size n.
|
4
|
+
class Preemphasizer
|
5
|
+
include Math
|
6
|
+
def initialize factor=0.97
|
7
|
+
@factor = factor
|
8
|
+
@prior = 0
|
9
|
+
end
|
10
|
+
|
11
|
+
def << data
|
12
|
+
prior = @prior
|
13
|
+
@prior = data.last
|
14
|
+
data.map do |x|
|
15
|
+
y = x - @factor * prior
|
16
|
+
prior = x
|
17
|
+
y
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Noyes
|
2
|
+
# Segments an array of data into an array of arrays. Inner arrays are the
|
3
|
+
# size of the window.
|
4
|
+
class Segmenter
|
5
|
+
def initialize window_size, shift
|
6
|
+
@winsz = window_size; @winshift = shift
|
7
|
+
@overflow = nil
|
8
|
+
end
|
9
|
+
|
10
|
+
def << data
|
11
|
+
data = @overflow + data if @overflow
|
12
|
+
if data.size < @winsz + @winshift * 5
|
13
|
+
@overflow = data
|
14
|
+
return nil
|
15
|
+
else
|
16
|
+
@overflow = nil
|
17
|
+
end
|
18
|
+
x = []
|
19
|
+
i = 0
|
20
|
+
while i+@winsz <= data.length
|
21
|
+
x << data[i,@winsz]
|
22
|
+
i += @winshift
|
23
|
+
end
|
24
|
+
@overflow = data[i..-1]
|
25
|
+
x
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
metadata
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: noyes
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.3.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Joe Woelfel
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2010-02-01 00:00:00 -05:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Currently sufficient to create basic features for speech recognition
|
17
|
+
email: joe@talkhouse.com
|
18
|
+
executables:
|
19
|
+
- noyes_dump44k.sh
|
20
|
+
- noyes_dump8k.sh
|
21
|
+
- recognize.sh
|
22
|
+
extensions: []
|
23
|
+
|
24
|
+
extra_rdoc_files:
|
25
|
+
- COPYING
|
26
|
+
- README
|
27
|
+
- doc/overview.rdoc
|
28
|
+
files:
|
29
|
+
- lib/common/noyes_dsl.rb
|
30
|
+
- lib/common/noyes_math.rb
|
31
|
+
- lib/common/send_incrementally.rb
|
32
|
+
- lib/noyes.rb
|
33
|
+
- lib/ruby_impl/dct.rb
|
34
|
+
- lib/ruby_impl/delta.rb
|
35
|
+
- lib/ruby_impl/discrete_fourier_transform.rb
|
36
|
+
- lib/ruby_impl/filter.rb
|
37
|
+
- lib/ruby_impl/hamming_window.rb
|
38
|
+
- lib/ruby_impl/live_cmn.rb
|
39
|
+
- lib/ruby_impl/log_compress.rb
|
40
|
+
- lib/ruby_impl/mel_filter.rb
|
41
|
+
- lib/ruby_impl/power_spec.rb
|
42
|
+
- lib/ruby_impl/preemphasis.rb
|
43
|
+
- lib/ruby_impl/segment.rb
|
44
|
+
- COPYING
|
45
|
+
- README
|
46
|
+
- doc/overview.rdoc
|
47
|
+
has_rdoc: true
|
48
|
+
homepage: http://github.com/talkhouse/noise
|
49
|
+
licenses: []
|
50
|
+
|
51
|
+
post_install_message:
|
52
|
+
rdoc_options:
|
53
|
+
- --charset=UTF-8
|
54
|
+
require_paths:
|
55
|
+
- lib/ruby_impl
|
56
|
+
- lib/common
|
57
|
+
- lib
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: "0"
|
63
|
+
version:
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: "0"
|
69
|
+
version:
|
70
|
+
requirements: []
|
71
|
+
|
72
|
+
rubyforge_project:
|
73
|
+
rubygems_version: 1.3.5
|
74
|
+
signing_key:
|
75
|
+
specification_version: 3
|
76
|
+
summary: A signal processing library
|
77
|
+
test_files: []
|
78
|
+
|