noyes 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING +21 -0
- data/README +32 -0
- data/bin/noyes_dump44k.sh +59 -0
- data/bin/noyes_dump8k.sh +58 -0
- data/bin/recognize.sh +15 -0
- data/doc/overview.rdoc +51 -0
- data/lib/common/noyes_dsl.rb +6 -0
- data/lib/common/noyes_math.rb +18 -0
- data/lib/common/send_incrementally.rb +62 -0
- data/lib/noyes.rb +13 -0
- data/lib/ruby_impl/dct.rb +34 -0
- data/lib/ruby_impl/delta.rb +34 -0
- data/lib/ruby_impl/discrete_fourier_transform.rb +37 -0
- data/lib/ruby_impl/filter.rb +11 -0
- data/lib/ruby_impl/hamming_window.rb +20 -0
- data/lib/ruby_impl/live_cmn.rb +42 -0
- data/lib/ruby_impl/log_compress.rb +13 -0
- data/lib/ruby_impl/mel_filter.rb +112 -0
- data/lib/ruby_impl/power_spec.rb +19 -0
- data/lib/ruby_impl/preemphasis.rb +21 -0
- data/lib/ruby_impl/segment.rb +28 -0
- metadata +78 -0
data/COPYING
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
Copyright 2010 Talkhouse. All rights reserved.
|
2
|
+
|
3
|
+
Redistribution and use in source and binary forms, with or without modification, are
|
4
|
+
permitted provided that the following conditions are met:
|
5
|
+
|
6
|
+
1. Redistributions of source code must retain the above copyright notice, this list of
|
7
|
+
conditions and the following disclaimer.
|
8
|
+
|
9
|
+
2. Redistributions in binary form must reproduce the above copyright notice, this list
|
10
|
+
of conditions and the following disclaimer in the documentation and/or other materials
|
11
|
+
provided with the distribution.
|
12
|
+
|
13
|
+
THIS SOFTWARE IS PROVIDED BY TALKHOUSE ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
14
|
+
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL TALKHOUSE OR
|
16
|
+
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
17
|
+
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
18
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
19
|
+
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
20
|
+
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
21
|
+
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
Noyes is a signal processing library. It currently has just enough signal
|
2
|
+
processing to produce features suitable for speech recognition.
|
3
|
+
|
4
|
+
Pronunciation: Typically pronounced the same as 'noise'. But "NO!... YES!" is
|
5
|
+
considered acceptable if you yell it loudly enough or at least with sufficient
|
6
|
+
conviction to make people think you have truly changed your mind.
|
7
|
+
|
8
|
+
Noyes is implemented entirely in Ruby. It's also implemented entirely in Java.
|
9
|
+
The Java version has Ruby bindings too. So you can have Java's speed from
|
10
|
+
Ruby. All versions share the same unit tests, which are written in Ruby. The
|
11
|
+
design goal is to have signal processing routines that are so simple and so
|
12
|
+
disentangled from the overall system that anyone could extract any of the
|
13
|
+
routines and use them elsewhere with little trouble. Benchmarks are included.
|
14
|
+
|
15
|
+
This library places an emphasis on expressiveness without sacrificing ultimate
|
16
|
+
performance. It does so by supporting multiple implementations each with Ruby
|
17
|
+
bindings. The pure Ruby version, while not fast, is often adequate for
|
18
|
+
development and is the best place to add new routines.
|
19
|
+
|
20
|
+
For examples of how to link with different implementations see the test section
|
21
|
+
of the Rakefile. At present only the pure Ruby implementation is exposed via
|
22
|
+
the gem.
|
23
|
+
|
24
|
+
Requirements:
|
25
|
+
Almost any version of ruby & rake.
|
26
|
+
Java, if you want to use the Java version.
|
27
|
+
|
28
|
+
Some of the utility scripts may use sox, but
|
29
|
+
none of the core routines use it.
|
30
|
+
|
31
|
+
For usage information:
|
32
|
+
rake -T
|
@@ -0,0 +1,59 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# vim: set filetype=ruby :
|
3
|
+
ROOT = File.dirname(File.dirname(__FILE__))
|
4
|
+
$: << "#{ROOT}/lib/ruby"
|
5
|
+
$: << "#{ROOT}/lib/common"
|
6
|
+
|
7
|
+
require 'signal'
|
8
|
+
|
9
|
+
if ARGV.size != 1 || ARGV[0] == '-h'
|
10
|
+
puts "Usage: noyes_dump44k <file>"
|
11
|
+
exit 1
|
12
|
+
end
|
13
|
+
|
14
|
+
FILE = ARGV[0]
|
15
|
+
DIR = File.dirname FILE
|
16
|
+
|
17
|
+
include Signal
|
18
|
+
nfilt = 40
|
19
|
+
min_freq = 130
|
20
|
+
max_freq = 6800
|
21
|
+
nfft = 2048
|
22
|
+
freq = 44100
|
23
|
+
shift = 441
|
24
|
+
frame_size = 1130
|
25
|
+
|
26
|
+
preemphasizer = Preemphasizer.new 0.97
|
27
|
+
segmenter = Segmenter.new frame_size, shift
|
28
|
+
hamming_windower = HammingWindow.new frame_size
|
29
|
+
power_spectrum_filter = PowerSpectrumFilter.new nfft
|
30
|
+
mel_filter = MelFilter.new freq, nfft, nfilt, min_freq, max_freq
|
31
|
+
discrete_cosine_transform = DCT.new 13, nfilt
|
32
|
+
live_cmn = LiveCMN.new
|
33
|
+
ddf = DoubleDeltaFilter.new
|
34
|
+
|
35
|
+
raw = `sox #{FILE} -s -B -r 8k -b 16 -t raw -`
|
36
|
+
open('raw.dat', 'wb') {|f| f.write raw}
|
37
|
+
pcm = raw.unpack 'n*'
|
38
|
+
pcm = pcm.map{|d| to_signed_short(d).to_f}
|
39
|
+
pre = preemphasizer << pcm
|
40
|
+
open("#{DIR}/pre.dat", 'w') {|f| f.write pre.flatten.pack 'g*' }
|
41
|
+
seg = segmenter << (pre + Array.new(frame_size - pre.size % frame_size, 0.0))
|
42
|
+
open("#{DIR}/seg.dat", 'w') {|f| f.write seg.flatten.pack 'g*'}
|
43
|
+
ham = hamming_windower << seg
|
44
|
+
open("#{DIR}/ham.dat", 'w') {|f| f.write ham.flatten.pack 'g*'}
|
45
|
+
pow = power_spectrum_filter << ham
|
46
|
+
open("#{DIR}/pow.dat", 'w') {|f| f.write pow.flatten.pack 'g*'}
|
47
|
+
mel = mel_filter << pow
|
48
|
+
open("#{DIR}/mel.dat", 'w') {|f| f.write mel.flatten.pack 'g*'}
|
49
|
+
log = log_compress mel
|
50
|
+
open("#{DIR}/log_mel.dat", 'w') {|f| f.write log.flatten.pack 'g*'}
|
51
|
+
dct = discrete_cosine_transform << log
|
52
|
+
open("#{DIR}/dct.dat", 'w') {|f| f.write dct.flatten.pack 'g*'}
|
53
|
+
cmn = live_cmn << dct
|
54
|
+
open("#{DIR}/cmn.dat", 'w') {|f| f.write cmn.flatten.pack 'g*'}
|
55
|
+
dd = ddf << cmn
|
56
|
+
dd += ddf.final_estimate
|
57
|
+
open("#{DIR}/dd.dat", 'w') {|f| f.write dd.flatten.pack 'g*'}
|
58
|
+
|
59
|
+
|
data/bin/noyes_dump8k.sh
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# vim: set filetype=ruby :
|
3
|
+
ROOT = File.dirname(File.dirname(__FILE__))
|
4
|
+
$: << "#{ROOT}/lib/ruby"
|
5
|
+
$: << "#{ROOT}/lib/common"
|
6
|
+
|
7
|
+
require 'signal'
|
8
|
+
|
9
|
+
if ARGV.size != 1 || ARGV[0] == '-h'
|
10
|
+
puts "Usage: noyes_dump8k <file>"
|
11
|
+
exit 1
|
12
|
+
end
|
13
|
+
|
14
|
+
FILE = ARGV[0]
|
15
|
+
DIR = File.dirname FILE
|
16
|
+
|
17
|
+
include Signal
|
18
|
+
nfilt = 32
|
19
|
+
min_freq = 200
|
20
|
+
max_freq = 3700
|
21
|
+
nfft = 256
|
22
|
+
freq = 8000
|
23
|
+
shift = 80
|
24
|
+
frame_size = 205
|
25
|
+
|
26
|
+
preemphasizer = Preemphasizer.new 0.97
|
27
|
+
segmenter = Segmenter.new frame_size, shift
|
28
|
+
hamming_windower = HammingWindow.new frame_size
|
29
|
+
power_spectrum_filter = PowerSpectrumFilter.new nfft
|
30
|
+
mel_filter = MelFilter.new freq, nfft, nfilt, min_freq, max_freq
|
31
|
+
discrete_cosine_transform = DCT.new 13, nfilt
|
32
|
+
live_cmn = LiveCMN.new
|
33
|
+
ddf = DoubleDeltaFilter.new
|
34
|
+
|
35
|
+
raw = `sox #{FILE} -s -B -r 8k -b 16 -t raw -`
|
36
|
+
open('raw.dat', 'wb') {|f| f.write raw}
|
37
|
+
pcm = raw.unpack 'n*'
|
38
|
+
pcm = pcm.map{|d| to_signed_short(d).to_f}
|
39
|
+
pre = preemphasizer << pcm
|
40
|
+
open("#{DIR}/pre.dat", 'w') {|f| f.write pre.flatten.pack 'g*' }
|
41
|
+
seg = segmenter << (pre + Array.new(frame_size - pre.size % frame_size, 0.0))
|
42
|
+
open("#{DIR}/seg.dat", 'w') {|f| f.write seg.flatten.pack 'g*'}
|
43
|
+
ham = hamming_windower << seg
|
44
|
+
open("#{DIR}/ham.dat", 'w') {|f| f.write ham.flatten.pack 'g*'}
|
45
|
+
pow = power_spectrum_filter << ham
|
46
|
+
open("#{DIR}/pow.dat", 'w') {|f| f.write pow.flatten.pack 'g*'}
|
47
|
+
mel = mel_filter << pow
|
48
|
+
open("#{DIR}/mel.dat", 'w') {|f| f.write mel.flatten.pack 'g*'}
|
49
|
+
log = log_compress mel
|
50
|
+
open("#{DIR}/log_mel.dat", 'w') {|f| f.write log.flatten.pack 'g*'}
|
51
|
+
dct = discrete_cosine_transform << log
|
52
|
+
open("#{DIR}/dct.dat", 'w') {|f| f.write dct.flatten.pack 'g*'}
|
53
|
+
cmn = live_cmn << dct
|
54
|
+
open("#{DIR}/cmn.dat", 'w') {|f| f.write cmn.flatten.pack 'g*'}
|
55
|
+
dd = ddf << cmn
|
56
|
+
dd += ddf.final_estimate
|
57
|
+
open("#{DIR}/dd.dat", 'w') {|f| f.write dd.flatten.pack 'g*'}
|
58
|
+
|
data/bin/recognize.sh
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
#!/usr/bin/env jruby
|
2
|
+
# vim: set filetype=ruby :
|
3
|
+
ROOT = File.dirname(File.dirname(__FILE__))
|
4
|
+
$: << "#{ROOT}/lib/ruby"
|
5
|
+
$: << "#{ROOT}/lib/common"
|
6
|
+
require 'socket'
|
7
|
+
require 'send_incrementally'
|
8
|
+
|
9
|
+
def recognize file, node='localhost', port=2318
|
10
|
+
TCPSocket.open(node, port) do |client|
|
11
|
+
send_incremental_features file, client, client
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
puts recognize ARGV[0]
|
data/doc/overview.rdoc
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
# = Overview
|
2
|
+
#
|
3
|
+
# All signal processing routines use a simple DSL style inteface. Below are
|
4
|
+
# some examples.
|
5
|
+
#
|
6
|
+
# == Filter operator example.
|
7
|
+
# Each example below is the data on the left being operated on by the filter on
|
8
|
+
# the right. This is similar to the way the += operator works for numbers. The
|
9
|
+
# data is not modified in place currently and it should probably stay that way.
|
10
|
+
# It could be if efficiency demanded it, but that would require a bit more care
|
11
|
+
# to avoid side effects when using the API. The >>= actually looks like a
|
12
|
+
# filter.
|
13
|
+
#
|
14
|
+
# data = (1..12).to_a
|
15
|
+
# segmenter = Segmenter.new 4, 2 # window size, window shift
|
16
|
+
# hamming_filter = HammingWindow.new 4 # window size
|
17
|
+
# power_spec_filter = PowerSpectrumFilter.new 8 # number of ffts
|
18
|
+
#
|
19
|
+
# data >>= segmenter
|
20
|
+
# data >>= hamming_filter
|
21
|
+
# data >>= power_spec_filter
|
22
|
+
# data >>= dct_filter
|
23
|
+
#
|
24
|
+
# You can expand the >>= operator out, but I think the flow is worse and there
|
25
|
+
# is more repetition, particularly when you have a lot of filters in sequence.
|
26
|
+
# This is perfectly valid syntax though. Also, this is very useful if you don't
|
27
|
+
# want to keep a reference to your original data.
|
28
|
+
#
|
29
|
+
# pcm_data = (1..12).to_a
|
30
|
+
# segmenter = Segmenter.new
|
31
|
+
# hamming_filter = HammingWindow.new 4
|
32
|
+
# segmented_data = segmenter << pcm_data, 4, 2
|
33
|
+
# hamming_data = hamming_filter << segmented_data
|
34
|
+
# power_spectrum data = power_spec_filter hamminging_data, 8
|
35
|
+
# dct_data = dct_filter << power_spectrum_data
|
36
|
+
#
|
37
|
+
# Here is an older version with function calls instead of operator overloading.
|
38
|
+
# The trouble with it is that the flow is hard to follow, and there is
|
39
|
+
# repetition. Filter and process are really synonyms. And this requires
|
40
|
+
# repeating the data component twice. Also, power spec is a function here
|
41
|
+
# with additional arguments. I think I'd rather have the configuration
|
42
|
+
# details, such as number of ffts all grouped at the top. It's easier to
|
43
|
+
# follow this way.
|
44
|
+
#
|
45
|
+
# data = (1..12).to_a
|
46
|
+
# seg = Segmenter.new
|
47
|
+
# ham = HammingWindow.new 4
|
48
|
+
# segments = segmenter.process data, 4, 2
|
49
|
+
# hamming_ = hamming_filter.process segments
|
50
|
+
# power = power_spec.filter hamming, 8
|
51
|
+
# dct = dct.process power
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module Math
|
2
|
+
def dot_product l1, l2
|
3
|
+
sum = 0
|
4
|
+
for i in 0...l1.size
|
5
|
+
sum += l1[i] * l2[i]
|
6
|
+
end
|
7
|
+
sum
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
# Converts from unsigned to signed short. Ruby, strangely enough, doesn't have
|
12
|
+
# network byte order short conversion for signed shorts.
|
13
|
+
def to_signed_short n
|
14
|
+
length = 16 # bits
|
15
|
+
max = 2**length-1
|
16
|
+
mid = 2**(length-1)
|
17
|
+
n>=mid ? -((n ^ max) + 1) : n
|
18
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'noyes'
|
2
|
+
include Noyes
|
3
|
+
|
4
|
+
TMAGIC = '1.0 talkhouse'
|
5
|
+
TSTART = [0].pack('N')
|
6
|
+
TAUDIO = [1].pack('N')
|
7
|
+
TEND = [2].pack('N')
|
8
|
+
TDONE = [3].pack('N')
|
9
|
+
TCEPSTRA = [4].pack('N')
|
10
|
+
|
11
|
+
# Use sox to convert a file of almost any common type int pcm.
|
12
|
+
def file2pcm file
|
13
|
+
raw = `sox #{file} -s -B -r 8k -b 16 -t raw -`
|
14
|
+
length = 16 # bits
|
15
|
+
max = 2**length-1
|
16
|
+
mid = 2**(length-1)
|
17
|
+
to_signed = proc {|n| (n>=mid) ? -((n ^ max) + 1) : n}
|
18
|
+
unpacked = raw.unpack 'n*'
|
19
|
+
unpacked.map{|d| to_signed[d].to_f}
|
20
|
+
end
|
21
|
+
|
22
|
+
# Takes a file and two IO-like objects.
|
23
|
+
def send_incremental_features file, to_server, from_server
|
24
|
+
nfilt = 32
|
25
|
+
min_freq = 200
|
26
|
+
max_freq = 3700
|
27
|
+
nfft = 256
|
28
|
+
freq = 8000
|
29
|
+
shift = 80
|
30
|
+
frame_size = 205
|
31
|
+
preemphasizer = Preemphasizer.new 0.97
|
32
|
+
segmenter = Segmenter.new frame_size, shift
|
33
|
+
hamming_windower = HammingWindow.new frame_size
|
34
|
+
power_spectrum_filter = PowerSpectrumFilter.new nfft
|
35
|
+
mel_filter = MelFilter.new freq, nfft, nfilt, min_freq, max_freq
|
36
|
+
compressor = LogCompressor.new
|
37
|
+
discrete_cosine_transform = DCT.new 13, nfilt
|
38
|
+
live_cmn = LiveCMN.new
|
39
|
+
pcm = file2pcm file
|
40
|
+
to_server.write TMAGIC
|
41
|
+
to_server.write TSTART
|
42
|
+
pcm.each_slice 1230 do |data|
|
43
|
+
data >>= preemphasizer
|
44
|
+
data >>= segmenter
|
45
|
+
next unless data
|
46
|
+
data >>= hamming_windower
|
47
|
+
data >>= power_spectrum_filter
|
48
|
+
data >>= mel_filter
|
49
|
+
data >>= compressor
|
50
|
+
data >>= discrete_cosine_transform
|
51
|
+
data >>= live_cmn
|
52
|
+
to_server.write TCEPSTRA
|
53
|
+
to_server.write [data.size].pack('N')
|
54
|
+
print '.'
|
55
|
+
data.each {|cmn| to_server.write cmn.pack('g*')}
|
56
|
+
to_server.flush
|
57
|
+
end
|
58
|
+
to_server.write TEND
|
59
|
+
to_server.write TDONE
|
60
|
+
to_server.flush
|
61
|
+
from_server.read
|
62
|
+
end
|
data/lib/noyes.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require "noyes_dsl"
|
2
|
+
require "noyes_math"
|
3
|
+
require "live_cmn"
|
4
|
+
require "dct"
|
5
|
+
require "delta"
|
6
|
+
require "filter"
|
7
|
+
require "mel_filter"
|
8
|
+
require "hamming_window"
|
9
|
+
require "log_compress"
|
10
|
+
require "discrete_fourier_transform"
|
11
|
+
require "power_spec"
|
12
|
+
require "preemphasis"
|
13
|
+
require "segment"
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'noyes_math'
|
2
|
+
|
3
|
+
module Noyes
|
4
|
+
# Takes the discrete cosing transform. Converts a n x m matrix to an n x order
|
5
|
+
# matrix. ncol should be set to m.
|
6
|
+
class DCT
|
7
|
+
include Math
|
8
|
+
attr_accessor :melcos
|
9
|
+
def initialize order, ncol
|
10
|
+
@melcos = []
|
11
|
+
order.times do |i|
|
12
|
+
freq = PI * i.to_f / ncol
|
13
|
+
ldct = Array.new ncol
|
14
|
+
ncol.times do |j|
|
15
|
+
ldct[j] = cos(freq * (j + 0.5)) / order # [1]
|
16
|
+
end
|
17
|
+
@melcos << ldct
|
18
|
+
end
|
19
|
+
@melcos
|
20
|
+
end
|
21
|
+
|
22
|
+
def << data
|
23
|
+
data.map do |dvec|
|
24
|
+
@melcos.map {|m| dot_product m, dvec}
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
# Notes:
|
31
|
+
# [1] I'm not sure why I do this division by order. Sphinx does it. I wanted
|
32
|
+
# to have compatible output though I'm not sure I should care since I don't use
|
33
|
+
# sphinx anymore. However, Sphinx does it continually during processing. I
|
34
|
+
# build it into the filters so there is no cost.
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Noyes
|
2
|
+
# Takes an m x n array and makes an m x 3 x n array. The original inner
|
3
|
+
# array is duplicated followed by its delta and it's double delta.
|
4
|
+
class DoubleDeltaFilter
|
5
|
+
def initialize
|
6
|
+
@previous = nil
|
7
|
+
end
|
8
|
+
def << cepstra
|
9
|
+
@previous = [cepstra.first] * 3 unless @previous
|
10
|
+
buf = @previous + cepstra
|
11
|
+
result = []
|
12
|
+
for i in 3...(buf.size-3)
|
13
|
+
delta = Array.new buf[i].size
|
14
|
+
delta.size.times do |k|
|
15
|
+
delta[k] = buf[i+2][k] - buf[i-2][k]
|
16
|
+
end
|
17
|
+
double_delta = Array.new buf[i].size
|
18
|
+
double_delta.size.times do |k|
|
19
|
+
double_delta[k] = buf[i+3][k] - buf[i-1][k] - buf[i+1][k] + buf[i-3][k]
|
20
|
+
end
|
21
|
+
result << [buf[i], delta, double_delta]
|
22
|
+
end
|
23
|
+
@previous = buf[-6..-1]
|
24
|
+
result
|
25
|
+
end
|
26
|
+
# If there is no more data we can estimate a couple more frames by copying
|
27
|
+
# the final frame 3 times. Probably this is rarely necessary.
|
28
|
+
def final_estimate
|
29
|
+
return [] unless @previous
|
30
|
+
cepstra = [@previous.last] * 3
|
31
|
+
self.<< cepstra
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'complex'
|
2
|
+
|
3
|
+
module Noyes
|
4
|
+
include Math
|
5
|
+
# Takes the discrete Fourier transform.
|
6
|
+
def dft data,size
|
7
|
+
vals = Array.new size
|
8
|
+
data.size.times {|i| vals[i] = Complex(data[i],0)}
|
9
|
+
(data.size).upto(size-1) {|i| vals[i] = Complex(0,0)}
|
10
|
+
|
11
|
+
j=0
|
12
|
+
size.times do |i|
|
13
|
+
vals[j],vals[i] = vals[i],vals[j] if i<j
|
14
|
+
m = size/2
|
15
|
+
while j>=m && m>1
|
16
|
+
j-=m
|
17
|
+
m/=2
|
18
|
+
end
|
19
|
+
j+=m
|
20
|
+
end
|
21
|
+
k=1
|
22
|
+
while k<size
|
23
|
+
incr = 2*k
|
24
|
+
mul = Complex.polar 1, Math::PI/k
|
25
|
+
w = Complex(1, 0)
|
26
|
+
k.times do |i|
|
27
|
+
i.step(size-1,incr) do |j|
|
28
|
+
tmp = w * vals[j+k]
|
29
|
+
vals[j+k],vals[j]=vals[j]-tmp,vals[j]+tmp
|
30
|
+
end
|
31
|
+
w *= mul;
|
32
|
+
end
|
33
|
+
k=incr
|
34
|
+
end
|
35
|
+
vals
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Noyes
|
2
|
+
# Takes a m x n matrix and multiples each inner array by a hamming window
|
3
|
+
# function. Be careful to make sure your inner array length is the same as
|
4
|
+
# the window size.
|
5
|
+
class HammingWindow
|
6
|
+
include Math
|
7
|
+
def initialize window_size
|
8
|
+
twopi = 2 * PI
|
9
|
+
@hamming_window = []
|
10
|
+
window_size.times do |i|
|
11
|
+
@hamming_window << 0.54 - 0.46*cos(twopi*i/(window_size-1))
|
12
|
+
end
|
13
|
+
end
|
14
|
+
def << segments
|
15
|
+
segments.map do |s|
|
16
|
+
s.zip(@hamming_window).map {|d, h| d*h}
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
class LiveCMN
|
2
|
+
# Normalizes cepstrum means and applies them. Dimensionality remains
|
3
|
+
# unchanged. NOTE: This class resets itself automatically if bounds drift
|
4
|
+
# too much. Possibly these bounds should be parameterized.
|
5
|
+
def initialize dimensions=13, init_mean=45.0, window_size=100, shift=160
|
6
|
+
@init_mean = init_mean; @shift = shift; @ws = window_size
|
7
|
+
@sums = Array.new dimensions, 0
|
8
|
+
@means = Array.new dimensions, 0
|
9
|
+
@means[0] = @init_mean
|
10
|
+
@frame_count = 0
|
11
|
+
end
|
12
|
+
def << dct
|
13
|
+
raise "Wrong number of dimensions" if dct[0].size != @means.size
|
14
|
+
dct.map do |mfc|
|
15
|
+
cmn = Array.new @means.size
|
16
|
+
@means.size.times do |i|
|
17
|
+
@sums[i] += mfc[i]
|
18
|
+
cmn[i] = mfc[i] - @means[i]
|
19
|
+
end
|
20
|
+
@frame_count += 1
|
21
|
+
update if @frame_count > @shift
|
22
|
+
cmn
|
23
|
+
end
|
24
|
+
end
|
25
|
+
def update
|
26
|
+
per_frame = 1.0 / @frame_count
|
27
|
+
@means = @sums.map {|x| x * per_frame}
|
28
|
+
|
29
|
+
if @means.first > 70 || @means.first < 5
|
30
|
+
reset
|
31
|
+
elsif @frame_count >= @shift
|
32
|
+
@sums = @sums.map {|x| x * per_frame * @ws}
|
33
|
+
@frame_count = @ws
|
34
|
+
end
|
35
|
+
end
|
36
|
+
def reset
|
37
|
+
@sums.map! {0}
|
38
|
+
@means.map! {0}
|
39
|
+
@means[0] = @init_mean
|
40
|
+
@frame_count = 0
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module Noyes
|
2
|
+
# Takes the log base 10 of an incoming m x n array. The dimensions of the
|
3
|
+
# array remain unchanged. If a value is zero then the value log_zero is used
|
4
|
+
# instead of plunging into singularity land and throwing an exception.
|
5
|
+
class LogCompressor
|
6
|
+
def initialize log_zero = -0.00001
|
7
|
+
@log_zero = log_zero
|
8
|
+
end
|
9
|
+
def << mspec
|
10
|
+
mspec.map {|msp| msp.map { |m| m > 0 ? Math::log(m) : @log_zero}}
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
require 'noyes_math'
|
2
|
+
module Noyes
|
3
|
+
# Mel filter takes an m x n matrix. The inner array becomes equal to the
|
4
|
+
# number of mel filter banks (nfilt). The dimensionality of the outer array
|
5
|
+
# remains unchanged.
|
6
|
+
class MelFilter
|
7
|
+
include Math
|
8
|
+
def initialize srate, nfft, nfilt, lowerf, upperf
|
9
|
+
bank_params = MelFilter.make_bank_parameters srate, nfft, nfilt, lowerf, upperf
|
10
|
+
@indices = []
|
11
|
+
@weights = []
|
12
|
+
bank_params.map do |params|
|
13
|
+
ind, weights = MelFilter.make_filter *params
|
14
|
+
@indices << ind
|
15
|
+
@weights << weights
|
16
|
+
end
|
17
|
+
end
|
18
|
+
def << power_spectra
|
19
|
+
power_spectra.map do |spectrum|
|
20
|
+
mel_bank = Array.new @indices.size
|
21
|
+
@indices.size.times do |i|
|
22
|
+
initial_index, weights = @indices[i], @weights[i]
|
23
|
+
output = 0.0
|
24
|
+
weights.size.times do |j|
|
25
|
+
index = initial_index + j
|
26
|
+
output += spectrum[index] * weights[j] if index < spectrum.length
|
27
|
+
end
|
28
|
+
mel_bank[i] = output
|
29
|
+
end
|
30
|
+
mel_bank
|
31
|
+
end
|
32
|
+
end
|
33
|
+
def self.to_mel f
|
34
|
+
return f.map {|linfreq| self.to_mel linfreq} if f.respond_to? :map
|
35
|
+
2595.0 * Math.log10(1.0 + f/700.0)
|
36
|
+
end
|
37
|
+
def self.to_linear m
|
38
|
+
return m.map {|melfreq| self.to_linear melfreq} if m.respond_to? :map
|
39
|
+
700.0 * (10.0**(m/2595.0) - 1.0)
|
40
|
+
end
|
41
|
+
def self.determine_bin in_freq, step_freq
|
42
|
+
step_freq * (in_freq/step_freq).round
|
43
|
+
end
|
44
|
+
def self.make_bank_parameters srate, nfft, nfilt, lowerf, upperf
|
45
|
+
raise 'Number of FFT points is <= 0.' if nfft == 0
|
46
|
+
raise 'Number of filters is <= 0.' if nfilt == 0
|
47
|
+
srate = srate.to_f; lowerf = lowerf.to_f; upperf = upperf.to_f
|
48
|
+
left_edge = Array.new nfilt
|
49
|
+
right_edge = Array.new nfilt
|
50
|
+
center_freq = Array.new nfilt
|
51
|
+
melmax = self.to_mel upperf
|
52
|
+
melmin = self.to_mel lowerf
|
53
|
+
delta_freq_mel = (melmax - melmin) / (nfilt + 1.0)
|
54
|
+
delta_freq = srate/nfft
|
55
|
+
left_edge[0] = self.determine_bin lowerf, delta_freq
|
56
|
+
next_edge_mel = melmin
|
57
|
+
nfilt.times do |i|
|
58
|
+
next_edge_mel += delta_freq_mel
|
59
|
+
next_edge = self.to_linear next_edge_mel
|
60
|
+
center_freq[i] = self.determine_bin next_edge, delta_freq
|
61
|
+
right_edge[i-1] = center_freq[i] if i > 0
|
62
|
+
left_edge[i+1] = center_freq[i] if i < nfilt - 1
|
63
|
+
end
|
64
|
+
|
65
|
+
next_edge_mel += delta_freq_mel
|
66
|
+
next_edge = self.to_linear next_edge_mel
|
67
|
+
right_edge[nfilt-1] = self.determine_bin next_edge, delta_freq
|
68
|
+
fparams = Array.new nfilt
|
69
|
+
nfilt.times do |i|
|
70
|
+
initial_freq_bin = self.determine_bin left_edge[i], delta_freq
|
71
|
+
initial_freq_bin += delta_freq if initial_freq_bin < left_edge[i]
|
72
|
+
fparams[i] = [left_edge[i], center_freq[i], right_edge[i],
|
73
|
+
initial_freq_bin, delta_freq]
|
74
|
+
end
|
75
|
+
fparams
|
76
|
+
end
|
77
|
+
def self.make_filter left, center, right, init_freq, delta
|
78
|
+
raise 'delta freq has zero value' if delta == 0
|
79
|
+
if (right - left).round == 0 || (center - left).round == 0 ||
|
80
|
+
(right - center).round == 0
|
81
|
+
raise 'filter boundaries too close'
|
82
|
+
end
|
83
|
+
|
84
|
+
n_elements = ((right - left)/ delta + 1).round
|
85
|
+
raise 'number of mel elements is zero' if n_elements == 0
|
86
|
+
|
87
|
+
weights = Array.new n_elements
|
88
|
+
height = 1
|
89
|
+
left_slope = height / (center - left)
|
90
|
+
right_slope = height / (center - right)
|
91
|
+
|
92
|
+
index_fw = 0
|
93
|
+
init_freq.step right, delta do |current|
|
94
|
+
if current < center
|
95
|
+
weights[index_fw] = left_slope * (current - left)
|
96
|
+
else
|
97
|
+
weights[index_fw] = height + right_slope * (current - center)
|
98
|
+
end
|
99
|
+
index_fw += 1
|
100
|
+
end
|
101
|
+
#weights.insert 0, (init_freq/delta).round
|
102
|
+
[(init_freq/delta).round, weights]
|
103
|
+
end
|
104
|
+
def apply_weights init_index, weights, spectrum
|
105
|
+
output = 0.0
|
106
|
+
weights.size.times do |i|
|
107
|
+
output += spectrum[i + init_index] * weights[i]
|
108
|
+
end
|
109
|
+
output
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'discrete_fourier_transform'
|
2
|
+
module Noyes
|
3
|
+
# The square of the DFT. You must specify the number of ffts. The power
|
4
|
+
# spectrum returns an array of arrays where each inner array is of length
|
5
|
+
# nfft/2 + 1. The length of the outer array does not change.
|
6
|
+
class PowerSpectrumFilter
|
7
|
+
include Noyes
|
8
|
+
def initialize nfft
|
9
|
+
@nfft = nfft
|
10
|
+
end
|
11
|
+
def << data
|
12
|
+
nuniqdftpts = @nfft/2 + 1
|
13
|
+
data.map do |datavec|
|
14
|
+
datavecfft = dft datavec, @nfft
|
15
|
+
Array.new(nuniqdftpts){|i| datavecfft[i].abs**2}
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Noyes
|
2
|
+
# A simple high pass filter. It takes an array of size n and returns an
|
3
|
+
# array of size n.
|
4
|
+
class Preemphasizer
|
5
|
+
include Math
|
6
|
+
def initialize factor=0.97
|
7
|
+
@factor = factor
|
8
|
+
@prior = 0
|
9
|
+
end
|
10
|
+
|
11
|
+
def << data
|
12
|
+
prior = @prior
|
13
|
+
@prior = data.last
|
14
|
+
data.map do |x|
|
15
|
+
y = x - @factor * prior
|
16
|
+
prior = x
|
17
|
+
y
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Noyes
|
2
|
+
# Segments an array of data into an array of arrays. Inner arrays are the
|
3
|
+
# size of the window.
|
4
|
+
class Segmenter
|
5
|
+
def initialize window_size, shift
|
6
|
+
@winsz = window_size; @winshift = shift
|
7
|
+
@overflow = nil
|
8
|
+
end
|
9
|
+
|
10
|
+
def << data
|
11
|
+
data = @overflow + data if @overflow
|
12
|
+
if data.size < @winsz + @winshift * 5
|
13
|
+
@overflow = data
|
14
|
+
return nil
|
15
|
+
else
|
16
|
+
@overflow = nil
|
17
|
+
end
|
18
|
+
x = []
|
19
|
+
i = 0
|
20
|
+
while i+@winsz <= data.length
|
21
|
+
x << data[i,@winsz]
|
22
|
+
i += @winshift
|
23
|
+
end
|
24
|
+
@overflow = data[i..-1]
|
25
|
+
x
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
metadata
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: noyes
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.3.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Joe Woelfel
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2010-02-01 00:00:00 -05:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: Currently sufficient to create basic features for speech recognition
|
17
|
+
email: joe@talkhouse.com
|
18
|
+
executables:
|
19
|
+
- noyes_dump44k.sh
|
20
|
+
- noyes_dump8k.sh
|
21
|
+
- recognize.sh
|
22
|
+
extensions: []
|
23
|
+
|
24
|
+
extra_rdoc_files:
|
25
|
+
- COPYING
|
26
|
+
- README
|
27
|
+
- doc/overview.rdoc
|
28
|
+
files:
|
29
|
+
- lib/common/noyes_dsl.rb
|
30
|
+
- lib/common/noyes_math.rb
|
31
|
+
- lib/common/send_incrementally.rb
|
32
|
+
- lib/noyes.rb
|
33
|
+
- lib/ruby_impl/dct.rb
|
34
|
+
- lib/ruby_impl/delta.rb
|
35
|
+
- lib/ruby_impl/discrete_fourier_transform.rb
|
36
|
+
- lib/ruby_impl/filter.rb
|
37
|
+
- lib/ruby_impl/hamming_window.rb
|
38
|
+
- lib/ruby_impl/live_cmn.rb
|
39
|
+
- lib/ruby_impl/log_compress.rb
|
40
|
+
- lib/ruby_impl/mel_filter.rb
|
41
|
+
- lib/ruby_impl/power_spec.rb
|
42
|
+
- lib/ruby_impl/preemphasis.rb
|
43
|
+
- lib/ruby_impl/segment.rb
|
44
|
+
- COPYING
|
45
|
+
- README
|
46
|
+
- doc/overview.rdoc
|
47
|
+
has_rdoc: true
|
48
|
+
homepage: http://github.com/talkhouse/noise
|
49
|
+
licenses: []
|
50
|
+
|
51
|
+
post_install_message:
|
52
|
+
rdoc_options:
|
53
|
+
- --charset=UTF-8
|
54
|
+
require_paths:
|
55
|
+
- lib/ruby_impl
|
56
|
+
- lib/common
|
57
|
+
- lib
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: "0"
|
63
|
+
version:
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: "0"
|
69
|
+
version:
|
70
|
+
requirements: []
|
71
|
+
|
72
|
+
rubyforge_project:
|
73
|
+
rubygems_version: 1.3.5
|
74
|
+
signing_key:
|
75
|
+
specification_version: 3
|
76
|
+
summary: A signal processing library
|
77
|
+
test_files: []
|
78
|
+
|