noyes 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
data/COPYING ADDED
@@ -0,0 +1,21 @@
1
+ Copyright 2010 Talkhouse. All rights reserved.
2
+
3
+ Redistribution and use in source and binary forms, with or without modification, are
4
+ permitted provided that the following conditions are met:
5
+
6
+ 1. Redistributions of source code must retain the above copyright notice, this list of
7
+ conditions and the following disclaimer.
8
+
9
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list
10
+ of conditions and the following disclaimer in the documentation and/or other materials
11
+ provided with the distribution.
12
+
13
+ THIS SOFTWARE IS PROVIDED BY TALKHOUSE ``AS IS'' AND ANY EXPRESS OR IMPLIED
14
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
15
+ FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL TALKHOUSE OR
16
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
17
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
18
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
19
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
20
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
21
+ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README ADDED
@@ -0,0 +1,32 @@
1
+ Noyes is a signal processing library. It currently has just enough signal
2
+ processing to produce features suitable for speech recognition.
3
+
4
+ Pronunciation: Typically pronounced the same as 'noise'. But "NO!... YES!" is
5
+ considered acceptable if you yell it loudly enough or at least with sufficient
6
+ conviction to make people think you have truly changed your mind.
7
+
8
+ Noyes is implemented entirely in Ruby. It's also implemented entirely in Java.
9
+ The Java version has Ruby bindings too. So you can have Java's speed from
10
+ Ruby. All versions share the same unit tests, which are written in Ruby. The
11
+ design goal is to have signal processing routines that are so simple and so
12
+ disentangled from the overall system that anyone could extract any of the
13
+ routines and use them elsewhere with little trouble. Benchmarks are included.
14
+
15
+ This library places an emphasis on expressiveness without sacrificing ultimate
16
+ performance. It does so by supporting multiple implementations each with Ruby
17
+ bindings. The pure Ruby version, while not fast, is often adequate for
18
+ development and is the best place to add new routines.
19
+
20
+ For examples of how to link with different implementations see the test section
21
+ of the Rakefile. At present only the pure Ruby implementation is exposed via
22
+ the gem.
23
+
24
+ Requirements:
25
+ Almost any version of ruby & rake.
26
+ Java, if you want to use the Java version.
27
+
28
+ Some of the utility scripts may use sox, but
29
+ none of the core routines use it.
30
+
31
+ For usage information:
32
+ rake -T
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env ruby
2
+ # vim: set filetype=ruby :
3
+ ROOT = File.dirname(File.dirname(__FILE__))
4
+ $: << "#{ROOT}/lib/ruby"
5
+ $: << "#{ROOT}/lib/common"
6
+
7
+ require 'signal'
8
+
9
+ if ARGV.size != 1 || ARGV[0] == '-h'
10
+ puts "Usage: noyes_dump44k <file>"
11
+ exit 1
12
+ end
13
+
14
+ FILE = ARGV[0]
15
+ DIR = File.dirname FILE
16
+
17
+ include Signal
18
+ nfilt = 40
19
+ min_freq = 130
20
+ max_freq = 6800
21
+ nfft = 2048
22
+ freq = 44100
23
+ shift = 441
24
+ frame_size = 1130
25
+
26
+ preemphasizer = Preemphasizer.new 0.97
27
+ segmenter = Segmenter.new frame_size, shift
28
+ hamming_windower = HammingWindow.new frame_size
29
+ power_spectrum_filter = PowerSpectrumFilter.new nfft
30
+ mel_filter = MelFilter.new freq, nfft, nfilt, min_freq, max_freq
31
+ discrete_cosine_transform = DCT.new 13, nfilt
32
+ live_cmn = LiveCMN.new
33
+ ddf = DoubleDeltaFilter.new
34
+
35
+ raw = `sox #{FILE} -s -B -r 8k -b 16 -t raw -`
36
+ open('raw.dat', 'wb') {|f| f.write raw}
37
+ pcm = raw.unpack 'n*'
38
+ pcm = pcm.map{|d| to_signed_short(d).to_f}
39
+ pre = preemphasizer << pcm
40
+ open("#{DIR}/pre.dat", 'w') {|f| f.write pre.flatten.pack 'g*' }
41
+ seg = segmenter << (pre + Array.new(frame_size - pre.size % frame_size, 0.0))
42
+ open("#{DIR}/seg.dat", 'w') {|f| f.write seg.flatten.pack 'g*'}
43
+ ham = hamming_windower << seg
44
+ open("#{DIR}/ham.dat", 'w') {|f| f.write ham.flatten.pack 'g*'}
45
+ pow = power_spectrum_filter << ham
46
+ open("#{DIR}/pow.dat", 'w') {|f| f.write pow.flatten.pack 'g*'}
47
+ mel = mel_filter << pow
48
+ open("#{DIR}/mel.dat", 'w') {|f| f.write mel.flatten.pack 'g*'}
49
+ log = log_compress mel
50
+ open("#{DIR}/log_mel.dat", 'w') {|f| f.write log.flatten.pack 'g*'}
51
+ dct = discrete_cosine_transform << log
52
+ open("#{DIR}/dct.dat", 'w') {|f| f.write dct.flatten.pack 'g*'}
53
+ cmn = live_cmn << dct
54
+ open("#{DIR}/cmn.dat", 'w') {|f| f.write cmn.flatten.pack 'g*'}
55
+ dd = ddf << cmn
56
+ dd += ddf.final_estimate
57
+ open("#{DIR}/dd.dat", 'w') {|f| f.write dd.flatten.pack 'g*'}
58
+
59
+
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env ruby
2
+ # vim: set filetype=ruby :
3
+ ROOT = File.dirname(File.dirname(__FILE__))
4
+ $: << "#{ROOT}/lib/ruby"
5
+ $: << "#{ROOT}/lib/common"
6
+
7
+ require 'signal'
8
+
9
+ if ARGV.size != 1 || ARGV[0] == '-h'
10
+ puts "Usage: noyes_dump8k <file>"
11
+ exit 1
12
+ end
13
+
14
+ FILE = ARGV[0]
15
+ DIR = File.dirname FILE
16
+
17
+ include Signal
18
+ nfilt = 32
19
+ min_freq = 200
20
+ max_freq = 3700
21
+ nfft = 256
22
+ freq = 8000
23
+ shift = 80
24
+ frame_size = 205
25
+
26
+ preemphasizer = Preemphasizer.new 0.97
27
+ segmenter = Segmenter.new frame_size, shift
28
+ hamming_windower = HammingWindow.new frame_size
29
+ power_spectrum_filter = PowerSpectrumFilter.new nfft
30
+ mel_filter = MelFilter.new freq, nfft, nfilt, min_freq, max_freq
31
+ discrete_cosine_transform = DCT.new 13, nfilt
32
+ live_cmn = LiveCMN.new
33
+ ddf = DoubleDeltaFilter.new
34
+
35
+ raw = `sox #{FILE} -s -B -r 8k -b 16 -t raw -`
36
+ open('raw.dat', 'wb') {|f| f.write raw}
37
+ pcm = raw.unpack 'n*'
38
+ pcm = pcm.map{|d| to_signed_short(d).to_f}
39
+ pre = preemphasizer << pcm
40
+ open("#{DIR}/pre.dat", 'w') {|f| f.write pre.flatten.pack 'g*' }
41
+ seg = segmenter << (pre + Array.new(frame_size - pre.size % frame_size, 0.0))
42
+ open("#{DIR}/seg.dat", 'w') {|f| f.write seg.flatten.pack 'g*'}
43
+ ham = hamming_windower << seg
44
+ open("#{DIR}/ham.dat", 'w') {|f| f.write ham.flatten.pack 'g*'}
45
+ pow = power_spectrum_filter << ham
46
+ open("#{DIR}/pow.dat", 'w') {|f| f.write pow.flatten.pack 'g*'}
47
+ mel = mel_filter << pow
48
+ open("#{DIR}/mel.dat", 'w') {|f| f.write mel.flatten.pack 'g*'}
49
+ log = log_compress mel
50
+ open("#{DIR}/log_mel.dat", 'w') {|f| f.write log.flatten.pack 'g*'}
51
+ dct = discrete_cosine_transform << log
52
+ open("#{DIR}/dct.dat", 'w') {|f| f.write dct.flatten.pack 'g*'}
53
+ cmn = live_cmn << dct
54
+ open("#{DIR}/cmn.dat", 'w') {|f| f.write cmn.flatten.pack 'g*'}
55
+ dd = ddf << cmn
56
+ dd += ddf.final_estimate
57
+ open("#{DIR}/dd.dat", 'w') {|f| f.write dd.flatten.pack 'g*'}
58
+
data/bin/recognize.sh ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env jruby
2
+ # vim: set filetype=ruby :
3
+ ROOT = File.dirname(File.dirname(__FILE__))
4
+ $: << "#{ROOT}/lib/ruby"
5
+ $: << "#{ROOT}/lib/common"
6
+ require 'socket'
7
+ require 'send_incrementally'
8
+
9
+ def recognize file, node='localhost', port=2318
10
+ TCPSocket.open(node, port) do |client|
11
+ send_incremental_features file, client, client
12
+ end
13
+ end
14
+
15
+ puts recognize ARGV[0]
data/doc/overview.rdoc ADDED
@@ -0,0 +1,51 @@
1
+ # = Overview
2
+ #
3
+ # All signal processing routines use a simple DSL style inteface. Below are
4
+ # some examples.
5
+ #
6
+ # == Filter operator example.
7
+ # Each example below is the data on the left being operated on by the filter on
8
+ # the right. This is similar to the way the += operator works for numbers. The
9
+ # data is not modified in place currently and it should probably stay that way.
10
+ # It could be if efficiency demanded it, but that would require a bit more care
11
+ # to avoid side effects when using the API. The >>= actually looks like a
12
+ # filter.
13
+ #
14
+ # data = (1..12).to_a
15
+ # segmenter = Segmenter.new 4, 2 # window size, window shift
16
+ # hamming_filter = HammingWindow.new 4 # window size
17
+ # power_spec_filter = PowerSpectrumFilter.new 8 # number of ffts
18
+ #
19
+ # data >>= segmenter
20
+ # data >>= hamming_filter
21
+ # data >>= power_spec_filter
22
+ # data >>= dct_filter
23
+ #
24
+ # You can expand the >>= operator out, but I think the flow is worse and there
25
+ # is more repetition, particularly when you have a lot of filters in sequence.
26
+ # This is perfectly valid syntax though. Also, this is very useful if you don't
27
+ # want to keep a reference to your original data.
28
+ #
29
+ # pcm_data = (1..12).to_a
30
+ # segmenter = Segmenter.new
31
+ # hamming_filter = HammingWindow.new 4
32
+ # segmented_data = segmenter << pcm_data, 4, 2
33
+ # hamming_data = hamming_filter << segmented_data
34
+ # power_spectrum data = power_spec_filter hamminging_data, 8
35
+ # dct_data = dct_filter << power_spectrum_data
36
+ #
37
+ # Here is an older version with function calls instead of operator overloading.
38
+ # The trouble with it is that the flow is hard to follow, and there is
39
+ # repetition. Filter and process are really synonyms. And this requires
40
+ # repeating the data component twice. Also, power spec is a function here
41
+ # with additional arguments. I think I'd rather have the configuration
42
+ # details, such as number of ffts all grouped at the top. It's easier to
43
+ # follow this way.
44
+ #
45
+ # data = (1..12).to_a
46
+ # seg = Segmenter.new
47
+ # ham = HammingWindow.new 4
48
+ # segments = segmenter.process data, 4, 2
49
+ # hamming_ = hamming_filter.process segments
50
+ # power = power_spec.filter hamming, 8
51
+ # dct = dct.process power
@@ -0,0 +1,6 @@
1
+ class Array
2
+ # The magic that enables the filter operator.
3
+ def >> other
4
+ other << self
5
+ end
6
+ end
@@ -0,0 +1,18 @@
1
+ module Math
2
+ def dot_product l1, l2
3
+ sum = 0
4
+ for i in 0...l1.size
5
+ sum += l1[i] * l2[i]
6
+ end
7
+ sum
8
+ end
9
+ end
10
+
11
+ # Converts from unsigned to signed short. Ruby, strangely enough, doesn't have
12
+ # network byte order short conversion for signed shorts.
13
+ def to_signed_short n
14
+ length = 16 # bits
15
+ max = 2**length-1
16
+ mid = 2**(length-1)
17
+ n>=mid ? -((n ^ max) + 1) : n
18
+ end
@@ -0,0 +1,62 @@
1
+ require 'noyes'
2
+ include Noyes
3
+
4
+ TMAGIC = '1.0 talkhouse'
5
+ TSTART = [0].pack('N')
6
+ TAUDIO = [1].pack('N')
7
+ TEND = [2].pack('N')
8
+ TDONE = [3].pack('N')
9
+ TCEPSTRA = [4].pack('N')
10
+
11
+ # Use sox to convert a file of almost any common type int pcm.
12
+ def file2pcm file
13
+ raw = `sox #{file} -s -B -r 8k -b 16 -t raw -`
14
+ length = 16 # bits
15
+ max = 2**length-1
16
+ mid = 2**(length-1)
17
+ to_signed = proc {|n| (n>=mid) ? -((n ^ max) + 1) : n}
18
+ unpacked = raw.unpack 'n*'
19
+ unpacked.map{|d| to_signed[d].to_f}
20
+ end
21
+
22
+ # Takes a file and two IO-like objects.
23
+ def send_incremental_features file, to_server, from_server
24
+ nfilt = 32
25
+ min_freq = 200
26
+ max_freq = 3700
27
+ nfft = 256
28
+ freq = 8000
29
+ shift = 80
30
+ frame_size = 205
31
+ preemphasizer = Preemphasizer.new 0.97
32
+ segmenter = Segmenter.new frame_size, shift
33
+ hamming_windower = HammingWindow.new frame_size
34
+ power_spectrum_filter = PowerSpectrumFilter.new nfft
35
+ mel_filter = MelFilter.new freq, nfft, nfilt, min_freq, max_freq
36
+ compressor = LogCompressor.new
37
+ discrete_cosine_transform = DCT.new 13, nfilt
38
+ live_cmn = LiveCMN.new
39
+ pcm = file2pcm file
40
+ to_server.write TMAGIC
41
+ to_server.write TSTART
42
+ pcm.each_slice 1230 do |data|
43
+ data >>= preemphasizer
44
+ data >>= segmenter
45
+ next unless data
46
+ data >>= hamming_windower
47
+ data >>= power_spectrum_filter
48
+ data >>= mel_filter
49
+ data >>= compressor
50
+ data >>= discrete_cosine_transform
51
+ data >>= live_cmn
52
+ to_server.write TCEPSTRA
53
+ to_server.write [data.size].pack('N')
54
+ print '.'
55
+ data.each {|cmn| to_server.write cmn.pack('g*')}
56
+ to_server.flush
57
+ end
58
+ to_server.write TEND
59
+ to_server.write TDONE
60
+ to_server.flush
61
+ from_server.read
62
+ end
data/lib/noyes.rb ADDED
@@ -0,0 +1,13 @@
1
+ require "noyes_dsl"
2
+ require "noyes_math"
3
+ require "live_cmn"
4
+ require "dct"
5
+ require "delta"
6
+ require "filter"
7
+ require "mel_filter"
8
+ require "hamming_window"
9
+ require "log_compress"
10
+ require "discrete_fourier_transform"
11
+ require "power_spec"
12
+ require "preemphasis"
13
+ require "segment"
@@ -0,0 +1,34 @@
1
+ require 'noyes_math'
2
+
3
+ module Noyes
4
+ # Takes the discrete cosing transform. Converts a n x m matrix to an n x order
5
+ # matrix. ncol should be set to m.
6
+ class DCT
7
+ include Math
8
+ attr_accessor :melcos
9
+ def initialize order, ncol
10
+ @melcos = []
11
+ order.times do |i|
12
+ freq = PI * i.to_f / ncol
13
+ ldct = Array.new ncol
14
+ ncol.times do |j|
15
+ ldct[j] = cos(freq * (j + 0.5)) / order # [1]
16
+ end
17
+ @melcos << ldct
18
+ end
19
+ @melcos
20
+ end
21
+
22
+ def << data
23
+ data.map do |dvec|
24
+ @melcos.map {|m| dot_product m, dvec}
25
+ end
26
+ end
27
+ end
28
+ end
29
+
30
+ # Notes:
31
+ # [1] I'm not sure why I do this division by order. Sphinx does it. I wanted
32
+ # to have compatible output though I'm not sure I should care since I don't use
33
+ # sphinx anymore. However, Sphinx does it continually during processing. I
34
+ # build it into the filters so there is no cost.
@@ -0,0 +1,34 @@
1
+ module Noyes
2
+ # Takes an m x n array and makes an m x 3 x n array. The original inner
3
+ # array is duplicated followed by its delta and it's double delta.
4
+ class DoubleDeltaFilter
5
+ def initialize
6
+ @previous = nil
7
+ end
8
+ def << cepstra
9
+ @previous = [cepstra.first] * 3 unless @previous
10
+ buf = @previous + cepstra
11
+ result = []
12
+ for i in 3...(buf.size-3)
13
+ delta = Array.new buf[i].size
14
+ delta.size.times do |k|
15
+ delta[k] = buf[i+2][k] - buf[i-2][k]
16
+ end
17
+ double_delta = Array.new buf[i].size
18
+ double_delta.size.times do |k|
19
+ double_delta[k] = buf[i+3][k] - buf[i-1][k] - buf[i+1][k] + buf[i-3][k]
20
+ end
21
+ result << [buf[i], delta, double_delta]
22
+ end
23
+ @previous = buf[-6..-1]
24
+ result
25
+ end
26
+ # If there is no more data we can estimate a couple more frames by copying
27
+ # the final frame 3 times. Probably this is rarely necessary.
28
+ def final_estimate
29
+ return [] unless @previous
30
+ cepstra = [@previous.last] * 3
31
+ self.<< cepstra
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,37 @@
1
+ require 'complex'
2
+
3
+ module Noyes
4
+ include Math
5
+ # Takes the discrete Fourier transform.
6
+ def dft data,size
7
+ vals = Array.new size
8
+ data.size.times {|i| vals[i] = Complex(data[i],0)}
9
+ (data.size).upto(size-1) {|i| vals[i] = Complex(0,0)}
10
+
11
+ j=0
12
+ size.times do |i|
13
+ vals[j],vals[i] = vals[i],vals[j] if i<j
14
+ m = size/2
15
+ while j>=m && m>1
16
+ j-=m
17
+ m/=2
18
+ end
19
+ j+=m
20
+ end
21
+ k=1
22
+ while k<size
23
+ incr = 2*k
24
+ mul = Complex.polar 1, Math::PI/k
25
+ w = Complex(1, 0)
26
+ k.times do |i|
27
+ i.step(size-1,incr) do |j|
28
+ tmp = w * vals[j+k]
29
+ vals[j+k],vals[j]=vals[j]-tmp,vals[j]+tmp
30
+ end
31
+ w *= mul;
32
+ end
33
+ k=incr
34
+ end
35
+ vals
36
+ end
37
+ end
@@ -0,0 +1,11 @@
1
+ module Noyes
2
+ # A generic filter. Just supply an array of weights.
3
+ class Filter
4
+ def initialize weights
5
+ @weights = weights
6
+ end
7
+ def << data
8
+ data.zip(@weights).map {|d, h| d*h}
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,20 @@
1
+ module Noyes
2
+ # Takes a m x n matrix and multiples each inner array by a hamming window
3
+ # function. Be careful to make sure your inner array length is the same as
4
+ # the window size.
5
+ class HammingWindow
6
+ include Math
7
+ def initialize window_size
8
+ twopi = 2 * PI
9
+ @hamming_window = []
10
+ window_size.times do |i|
11
+ @hamming_window << 0.54 - 0.46*cos(twopi*i/(window_size-1))
12
+ end
13
+ end
14
+ def << segments
15
+ segments.map do |s|
16
+ s.zip(@hamming_window).map {|d, h| d*h}
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,42 @@
1
+ class LiveCMN
2
+ # Normalizes cepstrum means and applies them. Dimensionality remains
3
+ # unchanged. NOTE: This class resets itself automatically if bounds drift
4
+ # too much. Possibly these bounds should be parameterized.
5
+ def initialize dimensions=13, init_mean=45.0, window_size=100, shift=160
6
+ @init_mean = init_mean; @shift = shift; @ws = window_size
7
+ @sums = Array.new dimensions, 0
8
+ @means = Array.new dimensions, 0
9
+ @means[0] = @init_mean
10
+ @frame_count = 0
11
+ end
12
+ def << dct
13
+ raise "Wrong number of dimensions" if dct[0].size != @means.size
14
+ dct.map do |mfc|
15
+ cmn = Array.new @means.size
16
+ @means.size.times do |i|
17
+ @sums[i] += mfc[i]
18
+ cmn[i] = mfc[i] - @means[i]
19
+ end
20
+ @frame_count += 1
21
+ update if @frame_count > @shift
22
+ cmn
23
+ end
24
+ end
25
+ def update
26
+ per_frame = 1.0 / @frame_count
27
+ @means = @sums.map {|x| x * per_frame}
28
+
29
+ if @means.first > 70 || @means.first < 5
30
+ reset
31
+ elsif @frame_count >= @shift
32
+ @sums = @sums.map {|x| x * per_frame * @ws}
33
+ @frame_count = @ws
34
+ end
35
+ end
36
+ def reset
37
+ @sums.map! {0}
38
+ @means.map! {0}
39
+ @means[0] = @init_mean
40
+ @frame_count = 0
41
+ end
42
+ end
@@ -0,0 +1,13 @@
1
+ module Noyes
2
+ # Takes the log base 10 of an incoming m x n array. The dimensions of the
3
+ # array remain unchanged. If a value is zero then the value log_zero is used
4
+ # instead of plunging into singularity land and throwing an exception.
5
+ class LogCompressor
6
+ def initialize log_zero = -0.00001
7
+ @log_zero = log_zero
8
+ end
9
+ def << mspec
10
+ mspec.map {|msp| msp.map { |m| m > 0 ? Math::log(m) : @log_zero}}
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,112 @@
1
+ require 'noyes_math'
2
+ module Noyes
3
+ # Mel filter takes an m x n matrix. The inner array becomes equal to the
4
+ # number of mel filter banks (nfilt). The dimensionality of the outer array
5
+ # remains unchanged.
6
+ class MelFilter
7
+ include Math
8
+ def initialize srate, nfft, nfilt, lowerf, upperf
9
+ bank_params = MelFilter.make_bank_parameters srate, nfft, nfilt, lowerf, upperf
10
+ @indices = []
11
+ @weights = []
12
+ bank_params.map do |params|
13
+ ind, weights = MelFilter.make_filter *params
14
+ @indices << ind
15
+ @weights << weights
16
+ end
17
+ end
18
+ def << power_spectra
19
+ power_spectra.map do |spectrum|
20
+ mel_bank = Array.new @indices.size
21
+ @indices.size.times do |i|
22
+ initial_index, weights = @indices[i], @weights[i]
23
+ output = 0.0
24
+ weights.size.times do |j|
25
+ index = initial_index + j
26
+ output += spectrum[index] * weights[j] if index < spectrum.length
27
+ end
28
+ mel_bank[i] = output
29
+ end
30
+ mel_bank
31
+ end
32
+ end
33
+ def self.to_mel f
34
+ return f.map {|linfreq| self.to_mel linfreq} if f.respond_to? :map
35
+ 2595.0 * Math.log10(1.0 + f/700.0)
36
+ end
37
+ def self.to_linear m
38
+ return m.map {|melfreq| self.to_linear melfreq} if m.respond_to? :map
39
+ 700.0 * (10.0**(m/2595.0) - 1.0)
40
+ end
41
+ def self.determine_bin in_freq, step_freq
42
+ step_freq * (in_freq/step_freq).round
43
+ end
44
+ def self.make_bank_parameters srate, nfft, nfilt, lowerf, upperf
45
+ raise 'Number of FFT points is <= 0.' if nfft == 0
46
+ raise 'Number of filters is <= 0.' if nfilt == 0
47
+ srate = srate.to_f; lowerf = lowerf.to_f; upperf = upperf.to_f
48
+ left_edge = Array.new nfilt
49
+ right_edge = Array.new nfilt
50
+ center_freq = Array.new nfilt
51
+ melmax = self.to_mel upperf
52
+ melmin = self.to_mel lowerf
53
+ delta_freq_mel = (melmax - melmin) / (nfilt + 1.0)
54
+ delta_freq = srate/nfft
55
+ left_edge[0] = self.determine_bin lowerf, delta_freq
56
+ next_edge_mel = melmin
57
+ nfilt.times do |i|
58
+ next_edge_mel += delta_freq_mel
59
+ next_edge = self.to_linear next_edge_mel
60
+ center_freq[i] = self.determine_bin next_edge, delta_freq
61
+ right_edge[i-1] = center_freq[i] if i > 0
62
+ left_edge[i+1] = center_freq[i] if i < nfilt - 1
63
+ end
64
+
65
+ next_edge_mel += delta_freq_mel
66
+ next_edge = self.to_linear next_edge_mel
67
+ right_edge[nfilt-1] = self.determine_bin next_edge, delta_freq
68
+ fparams = Array.new nfilt
69
+ nfilt.times do |i|
70
+ initial_freq_bin = self.determine_bin left_edge[i], delta_freq
71
+ initial_freq_bin += delta_freq if initial_freq_bin < left_edge[i]
72
+ fparams[i] = [left_edge[i], center_freq[i], right_edge[i],
73
+ initial_freq_bin, delta_freq]
74
+ end
75
+ fparams
76
+ end
77
+ def self.make_filter left, center, right, init_freq, delta
78
+ raise 'delta freq has zero value' if delta == 0
79
+ if (right - left).round == 0 || (center - left).round == 0 ||
80
+ (right - center).round == 0
81
+ raise 'filter boundaries too close'
82
+ end
83
+
84
+ n_elements = ((right - left)/ delta + 1).round
85
+ raise 'number of mel elements is zero' if n_elements == 0
86
+
87
+ weights = Array.new n_elements
88
+ height = 1
89
+ left_slope = height / (center - left)
90
+ right_slope = height / (center - right)
91
+
92
+ index_fw = 0
93
+ init_freq.step right, delta do |current|
94
+ if current < center
95
+ weights[index_fw] = left_slope * (current - left)
96
+ else
97
+ weights[index_fw] = height + right_slope * (current - center)
98
+ end
99
+ index_fw += 1
100
+ end
101
+ #weights.insert 0, (init_freq/delta).round
102
+ [(init_freq/delta).round, weights]
103
+ end
104
+ def apply_weights init_index, weights, spectrum
105
+ output = 0.0
106
+ weights.size.times do |i|
107
+ output += spectrum[i + init_index] * weights[i]
108
+ end
109
+ output
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,19 @@
1
+ require 'discrete_fourier_transform'
2
+ module Noyes
3
+ # The square of the DFT. You must specify the number of ffts. The power
4
+ # spectrum returns an array of arrays where each inner array is of length
5
+ # nfft/2 + 1. The length of the outer array does not change.
6
+ class PowerSpectrumFilter
7
+ include Noyes
8
+ def initialize nfft
9
+ @nfft = nfft
10
+ end
11
+ def << data
12
+ nuniqdftpts = @nfft/2 + 1
13
+ data.map do |datavec|
14
+ datavecfft = dft datavec, @nfft
15
+ Array.new(nuniqdftpts){|i| datavecfft[i].abs**2}
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,21 @@
1
+ module Noyes
2
+ # A simple high pass filter. It takes an array of size n and returns an
3
+ # array of size n.
4
+ class Preemphasizer
5
+ include Math
6
+ def initialize factor=0.97
7
+ @factor = factor
8
+ @prior = 0
9
+ end
10
+
11
+ def << data
12
+ prior = @prior
13
+ @prior = data.last
14
+ data.map do |x|
15
+ y = x - @factor * prior
16
+ prior = x
17
+ y
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,28 @@
1
+ module Noyes
2
+ # Segments an array of data into an array of arrays. Inner arrays are the
3
+ # size of the window.
4
+ class Segmenter
5
+ def initialize window_size, shift
6
+ @winsz = window_size; @winshift = shift
7
+ @overflow = nil
8
+ end
9
+
10
+ def << data
11
+ data = @overflow + data if @overflow
12
+ if data.size < @winsz + @winshift * 5
13
+ @overflow = data
14
+ return nil
15
+ else
16
+ @overflow = nil
17
+ end
18
+ x = []
19
+ i = 0
20
+ while i+@winsz <= data.length
21
+ x << data[i,@winsz]
22
+ i += @winshift
23
+ end
24
+ @overflow = data[i..-1]
25
+ x
26
+ end
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: noyes
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.2
5
+ platform: ruby
6
+ authors:
7
+ - Joe Woelfel
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-02-01 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Currently sufficient to create basic features for speech recognition
17
+ email: joe@talkhouse.com
18
+ executables:
19
+ - noyes_dump44k.sh
20
+ - noyes_dump8k.sh
21
+ - recognize.sh
22
+ extensions: []
23
+
24
+ extra_rdoc_files:
25
+ - COPYING
26
+ - README
27
+ - doc/overview.rdoc
28
+ files:
29
+ - lib/common/noyes_dsl.rb
30
+ - lib/common/noyes_math.rb
31
+ - lib/common/send_incrementally.rb
32
+ - lib/noyes.rb
33
+ - lib/ruby_impl/dct.rb
34
+ - lib/ruby_impl/delta.rb
35
+ - lib/ruby_impl/discrete_fourier_transform.rb
36
+ - lib/ruby_impl/filter.rb
37
+ - lib/ruby_impl/hamming_window.rb
38
+ - lib/ruby_impl/live_cmn.rb
39
+ - lib/ruby_impl/log_compress.rb
40
+ - lib/ruby_impl/mel_filter.rb
41
+ - lib/ruby_impl/power_spec.rb
42
+ - lib/ruby_impl/preemphasis.rb
43
+ - lib/ruby_impl/segment.rb
44
+ - COPYING
45
+ - README
46
+ - doc/overview.rdoc
47
+ has_rdoc: true
48
+ homepage: http://github.com/talkhouse/noise
49
+ licenses: []
50
+
51
+ post_install_message:
52
+ rdoc_options:
53
+ - --charset=UTF-8
54
+ require_paths:
55
+ - lib/ruby_impl
56
+ - lib/common
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: "0"
63
+ version:
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: "0"
69
+ version:
70
+ requirements: []
71
+
72
+ rubyforge_project:
73
+ rubygems_version: 1.3.5
74
+ signing_key:
75
+ specification_version: 3
76
+ summary: A signal processing library
77
+ test_files: []
78
+