noyes 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/COPYING ADDED
@@ -0,0 +1,21 @@
1
+ Copyright 2010 Talkhouse. All rights reserved.
2
+
3
+ Redistribution and use in source and binary forms, with or without modification, are
4
+ permitted provided that the following conditions are met:
5
+
6
+ 1. Redistributions of source code must retain the above copyright notice, this list of
7
+ conditions and the following disclaimer.
8
+
9
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list
10
+ of conditions and the following disclaimer in the documentation and/or other materials
11
+ provided with the distribution.
12
+
13
+ THIS SOFTWARE IS PROVIDED BY TALKHOUSE ``AS IS'' AND ANY EXPRESS OR IMPLIED
14
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
15
+ FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL TALKHOUSE OR
16
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
17
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
18
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
19
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
20
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
21
+ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
data/README ADDED
@@ -0,0 +1,32 @@
1
+ Noyes is a signal processing library. It currently has just enough signal
2
+ processing to produce features suitable for speech recognition.
3
+
4
+ Pronunciation: Typically pronounced the same as 'noise'. But "NO!... YES!" is
5
+ considered acceptable if you yell it loudly enough or at least with sufficient
6
+ conviction to make people think you have truly changed your mind.
7
+
8
+ Noyes is implemented entirely in Ruby. It's also implemented entirely in Java.
9
+ The Java version has Ruby bindings too. So you can have Java's speed from
10
+ Ruby. All versions share the same unit tests, which are written in Ruby. The
11
+ design goal is to have signal processing routines that are so simple and so
12
+ disentangled from the overall system that anyone could extract any of the
13
+ routines and use them elsewhere with little trouble. Benchmarks are included.
14
+
15
+ This library places an emphasis on expressiveness without sacrificing ultimate
16
+ performance. It does so by supporting multiple implementations each with Ruby
17
+ bindings. The pure Ruby version, while not fast, is often adequate for
18
+ development and is the best place to add new routines.
19
+
20
+ For examples of how to link with different implementations see the test section
21
+ of the Rakefile. At present only the pure Ruby implementation is exposed via
22
+ the gem.
23
+
24
+ Requirements:
25
+ Almost any version of ruby & rake.
26
+ Java, if you want to use the Java version.
27
+
28
+ Some of the utility scripts may use sox, but
29
+ none of the core routines use it.
30
+
31
+ For usage information:
32
+ rake -T
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env ruby
2
+ # vim: set filetype=ruby :
3
+ ROOT = File.dirname(File.dirname(__FILE__))
4
+ $: << "#{ROOT}/lib/ruby"
5
+ $: << "#{ROOT}/lib/common"
6
+
7
+ require 'signal'
8
+
9
+ if ARGV.size != 1 || ARGV[0] == '-h'
10
+ puts "Usage: noyes_dump44k <file>"
11
+ exit 1
12
+ end
13
+
14
+ FILE = ARGV[0]
15
+ DIR = File.dirname FILE
16
+
17
+ include Signal
18
+ nfilt = 40
19
+ min_freq = 130
20
+ max_freq = 6800
21
+ nfft = 2048
22
+ freq = 44100
23
+ shift = 441
24
+ frame_size = 1130
25
+
26
+ preemphasizer = Preemphasizer.new 0.97
27
+ segmenter = Segmenter.new frame_size, shift
28
+ hamming_windower = HammingWindow.new frame_size
29
+ power_spectrum_filter = PowerSpectrumFilter.new nfft
30
+ mel_filter = MelFilter.new freq, nfft, nfilt, min_freq, max_freq
31
+ discrete_cosine_transform = DCT.new 13, nfilt
32
+ live_cmn = LiveCMN.new
33
+ ddf = DoubleDeltaFilter.new
34
+
35
+ raw = `sox #{FILE} -s -B -r 8k -b 16 -t raw -`
36
+ open('raw.dat', 'wb') {|f| f.write raw}
37
+ pcm = raw.unpack 'n*'
38
+ pcm = pcm.map{|d| to_signed_short(d).to_f}
39
+ pre = preemphasizer << pcm
40
+ open("#{DIR}/pre.dat", 'w') {|f| f.write pre.flatten.pack 'g*' }
41
+ seg = segmenter << (pre + Array.new(frame_size - pre.size % frame_size, 0.0))
42
+ open("#{DIR}/seg.dat", 'w') {|f| f.write seg.flatten.pack 'g*'}
43
+ ham = hamming_windower << seg
44
+ open("#{DIR}/ham.dat", 'w') {|f| f.write ham.flatten.pack 'g*'}
45
+ pow = power_spectrum_filter << ham
46
+ open("#{DIR}/pow.dat", 'w') {|f| f.write pow.flatten.pack 'g*'}
47
+ mel = mel_filter << pow
48
+ open("#{DIR}/mel.dat", 'w') {|f| f.write mel.flatten.pack 'g*'}
49
+ log = log_compress mel
50
+ open("#{DIR}/log_mel.dat", 'w') {|f| f.write log.flatten.pack 'g*'}
51
+ dct = discrete_cosine_transform << log
52
+ open("#{DIR}/dct.dat", 'w') {|f| f.write dct.flatten.pack 'g*'}
53
+ cmn = live_cmn << dct
54
+ open("#{DIR}/cmn.dat", 'w') {|f| f.write cmn.flatten.pack 'g*'}
55
+ dd = ddf << cmn
56
+ dd += ddf.final_estimate
57
+ open("#{DIR}/dd.dat", 'w') {|f| f.write dd.flatten.pack 'g*'}
58
+
59
+
@@ -0,0 +1,58 @@
1
+ #!/usr/bin/env ruby
2
+ # vim: set filetype=ruby :
3
+ ROOT = File.dirname(File.dirname(__FILE__))
4
+ $: << "#{ROOT}/lib/ruby"
5
+ $: << "#{ROOT}/lib/common"
6
+
7
+ require 'signal'
8
+
9
+ if ARGV.size != 1 || ARGV[0] == '-h'
10
+ puts "Usage: noyes_dump8k <file>"
11
+ exit 1
12
+ end
13
+
14
+ FILE = ARGV[0]
15
+ DIR = File.dirname FILE
16
+
17
+ include Signal
18
+ nfilt = 32
19
+ min_freq = 200
20
+ max_freq = 3700
21
+ nfft = 256
22
+ freq = 8000
23
+ shift = 80
24
+ frame_size = 205
25
+
26
+ preemphasizer = Preemphasizer.new 0.97
27
+ segmenter = Segmenter.new frame_size, shift
28
+ hamming_windower = HammingWindow.new frame_size
29
+ power_spectrum_filter = PowerSpectrumFilter.new nfft
30
+ mel_filter = MelFilter.new freq, nfft, nfilt, min_freq, max_freq
31
+ discrete_cosine_transform = DCT.new 13, nfilt
32
+ live_cmn = LiveCMN.new
33
+ ddf = DoubleDeltaFilter.new
34
+
35
+ raw = `sox #{FILE} -s -B -r 8k -b 16 -t raw -`
36
+ open('raw.dat', 'wb') {|f| f.write raw}
37
+ pcm = raw.unpack 'n*'
38
+ pcm = pcm.map{|d| to_signed_short(d).to_f}
39
+ pre = preemphasizer << pcm
40
+ open("#{DIR}/pre.dat", 'w') {|f| f.write pre.flatten.pack 'g*' }
41
+ seg = segmenter << (pre + Array.new(frame_size - pre.size % frame_size, 0.0))
42
+ open("#{DIR}/seg.dat", 'w') {|f| f.write seg.flatten.pack 'g*'}
43
+ ham = hamming_windower << seg
44
+ open("#{DIR}/ham.dat", 'w') {|f| f.write ham.flatten.pack 'g*'}
45
+ pow = power_spectrum_filter << ham
46
+ open("#{DIR}/pow.dat", 'w') {|f| f.write pow.flatten.pack 'g*'}
47
+ mel = mel_filter << pow
48
+ open("#{DIR}/mel.dat", 'w') {|f| f.write mel.flatten.pack 'g*'}
49
+ log = log_compress mel
50
+ open("#{DIR}/log_mel.dat", 'w') {|f| f.write log.flatten.pack 'g*'}
51
+ dct = discrete_cosine_transform << log
52
+ open("#{DIR}/dct.dat", 'w') {|f| f.write dct.flatten.pack 'g*'}
53
+ cmn = live_cmn << dct
54
+ open("#{DIR}/cmn.dat", 'w') {|f| f.write cmn.flatten.pack 'g*'}
55
+ dd = ddf << cmn
56
+ dd += ddf.final_estimate
57
+ open("#{DIR}/dd.dat", 'w') {|f| f.write dd.flatten.pack 'g*'}
58
+
data/bin/recognize.sh ADDED
@@ -0,0 +1,15 @@
1
+ #!/usr/bin/env jruby
2
+ # vim: set filetype=ruby :
3
+ ROOT = File.dirname(File.dirname(__FILE__))
4
+ $: << "#{ROOT}/lib/ruby"
5
+ $: << "#{ROOT}/lib/common"
6
+ require 'socket'
7
+ require 'send_incrementally'
8
+
9
+ def recognize file, node='localhost', port=2318
10
+ TCPSocket.open(node, port) do |client|
11
+ send_incremental_features file, client, client
12
+ end
13
+ end
14
+
15
+ puts recognize ARGV[0]
data/doc/overview.rdoc ADDED
@@ -0,0 +1,51 @@
1
+ # = Overview
2
+ #
3
+ # All signal processing routines use a simple DSL style inteface. Below are
4
+ # some examples.
5
+ #
6
+ # == Filter operator example.
7
+ # Each example below is the data on the left being operated on by the filter on
8
+ # the right. This is similar to the way the += operator works for numbers. The
9
+ # data is not modified in place currently and it should probably stay that way.
10
+ # It could be if efficiency demanded it, but that would require a bit more care
11
+ # to avoid side effects when using the API. The >>= actually looks like a
12
+ # filter.
13
+ #
14
+ # data = (1..12).to_a
15
+ # segmenter = Segmenter.new 4, 2 # window size, window shift
16
+ # hamming_filter = HammingWindow.new 4 # window size
17
+ # power_spec_filter = PowerSpectrumFilter.new 8 # number of ffts
18
+ #
19
+ # data >>= segmenter
20
+ # data >>= hamming_filter
21
+ # data >>= power_spec_filter
22
+ # data >>= dct_filter
23
+ #
24
+ # You can expand the >>= operator out, but I think the flow is worse and there
25
+ # is more repetition, particularly when you have a lot of filters in sequence.
26
+ # This is perfectly valid syntax though. Also, this is very useful if you don't
27
+ # want to keep a reference to your original data.
28
+ #
29
+ # pcm_data = (1..12).to_a
30
+ # segmenter = Segmenter.new
31
+ # hamming_filter = HammingWindow.new 4
32
+ # segmented_data = segmenter << pcm_data, 4, 2
33
+ # hamming_data = hamming_filter << segmented_data
34
+ # power_spectrum data = power_spec_filter hamminging_data, 8
35
+ # dct_data = dct_filter << power_spectrum_data
36
+ #
37
+ # Here is an older version with function calls instead of operator overloading.
38
+ # The trouble with it is that the flow is hard to follow, and there is
39
+ # repetition. Filter and process are really synonyms. And this requires
40
+ # repeating the data component twice. Also, power spec is a function here
41
+ # with additional arguments. I think I'd rather have the configuration
42
+ # details, such as number of ffts all grouped at the top. It's easier to
43
+ # follow this way.
44
+ #
45
+ # data = (1..12).to_a
46
+ # seg = Segmenter.new
47
+ # ham = HammingWindow.new 4
48
+ # segments = segmenter.process data, 4, 2
49
+ # hamming_ = hamming_filter.process segments
50
+ # power = power_spec.filter hamming, 8
51
+ # dct = dct.process power
@@ -0,0 +1,6 @@
1
+ class Array
2
+ # The magic that enables the filter operator.
3
+ def >> other
4
+ other << self
5
+ end
6
+ end
@@ -0,0 +1,18 @@
1
+ module Math
2
+ def dot_product l1, l2
3
+ sum = 0
4
+ for i in 0...l1.size
5
+ sum += l1[i] * l2[i]
6
+ end
7
+ sum
8
+ end
9
+ end
10
+
11
+ # Converts from unsigned to signed short. Ruby, strangely enough, doesn't have
12
+ # network byte order short conversion for signed shorts.
13
+ def to_signed_short n
14
+ length = 16 # bits
15
+ max = 2**length-1
16
+ mid = 2**(length-1)
17
+ n>=mid ? -((n ^ max) + 1) : n
18
+ end
@@ -0,0 +1,62 @@
1
+ require 'noyes'
2
+ include Noyes
3
+
4
+ TMAGIC = '1.0 talkhouse'
5
+ TSTART = [0].pack('N')
6
+ TAUDIO = [1].pack('N')
7
+ TEND = [2].pack('N')
8
+ TDONE = [3].pack('N')
9
+ TCEPSTRA = [4].pack('N')
10
+
11
+ # Use sox to convert a file of almost any common type int pcm.
12
+ def file2pcm file
13
+ raw = `sox #{file} -s -B -r 8k -b 16 -t raw -`
14
+ length = 16 # bits
15
+ max = 2**length-1
16
+ mid = 2**(length-1)
17
+ to_signed = proc {|n| (n>=mid) ? -((n ^ max) + 1) : n}
18
+ unpacked = raw.unpack 'n*'
19
+ unpacked.map{|d| to_signed[d].to_f}
20
+ end
21
+
22
+ # Takes a file and two IO-like objects.
23
+ def send_incremental_features file, to_server, from_server
24
+ nfilt = 32
25
+ min_freq = 200
26
+ max_freq = 3700
27
+ nfft = 256
28
+ freq = 8000
29
+ shift = 80
30
+ frame_size = 205
31
+ preemphasizer = Preemphasizer.new 0.97
32
+ segmenter = Segmenter.new frame_size, shift
33
+ hamming_windower = HammingWindow.new frame_size
34
+ power_spectrum_filter = PowerSpectrumFilter.new nfft
35
+ mel_filter = MelFilter.new freq, nfft, nfilt, min_freq, max_freq
36
+ compressor = LogCompressor.new
37
+ discrete_cosine_transform = DCT.new 13, nfilt
38
+ live_cmn = LiveCMN.new
39
+ pcm = file2pcm file
40
+ to_server.write TMAGIC
41
+ to_server.write TSTART
42
+ pcm.each_slice 1230 do |data|
43
+ data >>= preemphasizer
44
+ data >>= segmenter
45
+ next unless data
46
+ data >>= hamming_windower
47
+ data >>= power_spectrum_filter
48
+ data >>= mel_filter
49
+ data >>= compressor
50
+ data >>= discrete_cosine_transform
51
+ data >>= live_cmn
52
+ to_server.write TCEPSTRA
53
+ to_server.write [data.size].pack('N')
54
+ print '.'
55
+ data.each {|cmn| to_server.write cmn.pack('g*')}
56
+ to_server.flush
57
+ end
58
+ to_server.write TEND
59
+ to_server.write TDONE
60
+ to_server.flush
61
+ from_server.read
62
+ end
data/lib/noyes.rb ADDED
@@ -0,0 +1,13 @@
1
+ require "noyes_dsl"
2
+ require "noyes_math"
3
+ require "live_cmn"
4
+ require "dct"
5
+ require "delta"
6
+ require "filter"
7
+ require "mel_filter"
8
+ require "hamming_window"
9
+ require "log_compress"
10
+ require "discrete_fourier_transform"
11
+ require "power_spec"
12
+ require "preemphasis"
13
+ require "segment"
@@ -0,0 +1,34 @@
1
+ require 'noyes_math'
2
+
3
+ module Noyes
4
+ # Takes the discrete cosing transform. Converts a n x m matrix to an n x order
5
+ # matrix. ncol should be set to m.
6
+ class DCT
7
+ include Math
8
+ attr_accessor :melcos
9
+ def initialize order, ncol
10
+ @melcos = []
11
+ order.times do |i|
12
+ freq = PI * i.to_f / ncol
13
+ ldct = Array.new ncol
14
+ ncol.times do |j|
15
+ ldct[j] = cos(freq * (j + 0.5)) / order # [1]
16
+ end
17
+ @melcos << ldct
18
+ end
19
+ @melcos
20
+ end
21
+
22
+ def << data
23
+ data.map do |dvec|
24
+ @melcos.map {|m| dot_product m, dvec}
25
+ end
26
+ end
27
+ end
28
+ end
29
+
30
+ # Notes:
31
+ # [1] I'm not sure why I do this division by order. Sphinx does it. I wanted
32
+ # to have compatible output though I'm not sure I should care since I don't use
33
+ # sphinx anymore. However, Sphinx does it continually during processing. I
34
+ # build it into the filters so there is no cost.
@@ -0,0 +1,34 @@
1
+ module Noyes
2
+ # Takes an m x n array and makes an m x 3 x n array. The original inner
3
+ # array is duplicated followed by its delta and it's double delta.
4
+ class DoubleDeltaFilter
5
+ def initialize
6
+ @previous = nil
7
+ end
8
+ def << cepstra
9
+ @previous = [cepstra.first] * 3 unless @previous
10
+ buf = @previous + cepstra
11
+ result = []
12
+ for i in 3...(buf.size-3)
13
+ delta = Array.new buf[i].size
14
+ delta.size.times do |k|
15
+ delta[k] = buf[i+2][k] - buf[i-2][k]
16
+ end
17
+ double_delta = Array.new buf[i].size
18
+ double_delta.size.times do |k|
19
+ double_delta[k] = buf[i+3][k] - buf[i-1][k] - buf[i+1][k] + buf[i-3][k]
20
+ end
21
+ result << [buf[i], delta, double_delta]
22
+ end
23
+ @previous = buf[-6..-1]
24
+ result
25
+ end
26
+ # If there is no more data we can estimate a couple more frames by copying
27
+ # the final frame 3 times. Probably this is rarely necessary.
28
+ def final_estimate
29
+ return [] unless @previous
30
+ cepstra = [@previous.last] * 3
31
+ self.<< cepstra
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,37 @@
1
+ require 'complex'
2
+
3
+ module Noyes
4
+ include Math
5
+ # Takes the discrete Fourier transform.
6
+ def dft data,size
7
+ vals = Array.new size
8
+ data.size.times {|i| vals[i] = Complex(data[i],0)}
9
+ (data.size).upto(size-1) {|i| vals[i] = Complex(0,0)}
10
+
11
+ j=0
12
+ size.times do |i|
13
+ vals[j],vals[i] = vals[i],vals[j] if i<j
14
+ m = size/2
15
+ while j>=m && m>1
16
+ j-=m
17
+ m/=2
18
+ end
19
+ j+=m
20
+ end
21
+ k=1
22
+ while k<size
23
+ incr = 2*k
24
+ mul = Complex.polar 1, Math::PI/k
25
+ w = Complex(1, 0)
26
+ k.times do |i|
27
+ i.step(size-1,incr) do |j|
28
+ tmp = w * vals[j+k]
29
+ vals[j+k],vals[j]=vals[j]-tmp,vals[j]+tmp
30
+ end
31
+ w *= mul;
32
+ end
33
+ k=incr
34
+ end
35
+ vals
36
+ end
37
+ end
@@ -0,0 +1,11 @@
1
+ module Noyes
2
+ # A generic filter. Just supply an array of weights.
3
+ class Filter
4
+ def initialize weights
5
+ @weights = weights
6
+ end
7
+ def << data
8
+ data.zip(@weights).map {|d, h| d*h}
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,20 @@
1
+ module Noyes
2
+ # Takes a m x n matrix and multiples each inner array by a hamming window
3
+ # function. Be careful to make sure your inner array length is the same as
4
+ # the window size.
5
+ class HammingWindow
6
+ include Math
7
+ def initialize window_size
8
+ twopi = 2 * PI
9
+ @hamming_window = []
10
+ window_size.times do |i|
11
+ @hamming_window << 0.54 - 0.46*cos(twopi*i/(window_size-1))
12
+ end
13
+ end
14
+ def << segments
15
+ segments.map do |s|
16
+ s.zip(@hamming_window).map {|d, h| d*h}
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,42 @@
1
+ class LiveCMN
2
+ # Normalizes cepstrum means and applies them. Dimensionality remains
3
+ # unchanged. NOTE: This class resets itself automatically if bounds drift
4
+ # too much. Possibly these bounds should be parameterized.
5
+ def initialize dimensions=13, init_mean=45.0, window_size=100, shift=160
6
+ @init_mean = init_mean; @shift = shift; @ws = window_size
7
+ @sums = Array.new dimensions, 0
8
+ @means = Array.new dimensions, 0
9
+ @means[0] = @init_mean
10
+ @frame_count = 0
11
+ end
12
+ def << dct
13
+ raise "Wrong number of dimensions" if dct[0].size != @means.size
14
+ dct.map do |mfc|
15
+ cmn = Array.new @means.size
16
+ @means.size.times do |i|
17
+ @sums[i] += mfc[i]
18
+ cmn[i] = mfc[i] - @means[i]
19
+ end
20
+ @frame_count += 1
21
+ update if @frame_count > @shift
22
+ cmn
23
+ end
24
+ end
25
+ def update
26
+ per_frame = 1.0 / @frame_count
27
+ @means = @sums.map {|x| x * per_frame}
28
+
29
+ if @means.first > 70 || @means.first < 5
30
+ reset
31
+ elsif @frame_count >= @shift
32
+ @sums = @sums.map {|x| x * per_frame * @ws}
33
+ @frame_count = @ws
34
+ end
35
+ end
36
+ def reset
37
+ @sums.map! {0}
38
+ @means.map! {0}
39
+ @means[0] = @init_mean
40
+ @frame_count = 0
41
+ end
42
+ end
@@ -0,0 +1,13 @@
1
+ module Noyes
2
+ # Takes the log base 10 of an incoming m x n array. The dimensions of the
3
+ # array remain unchanged. If a value is zero then the value log_zero is used
4
+ # instead of plunging into singularity land and throwing an exception.
5
+ class LogCompressor
6
+ def initialize log_zero = -0.00001
7
+ @log_zero = log_zero
8
+ end
9
+ def << mspec
10
+ mspec.map {|msp| msp.map { |m| m > 0 ? Math::log(m) : @log_zero}}
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,112 @@
1
+ require 'noyes_math'
2
+ module Noyes
3
+ # Mel filter takes an m x n matrix. The inner array becomes equal to the
4
+ # number of mel filter banks (nfilt). The dimensionality of the outer array
5
+ # remains unchanged.
6
+ class MelFilter
7
+ include Math
8
+ def initialize srate, nfft, nfilt, lowerf, upperf
9
+ bank_params = MelFilter.make_bank_parameters srate, nfft, nfilt, lowerf, upperf
10
+ @indices = []
11
+ @weights = []
12
+ bank_params.map do |params|
13
+ ind, weights = MelFilter.make_filter *params
14
+ @indices << ind
15
+ @weights << weights
16
+ end
17
+ end
18
+ def << power_spectra
19
+ power_spectra.map do |spectrum|
20
+ mel_bank = Array.new @indices.size
21
+ @indices.size.times do |i|
22
+ initial_index, weights = @indices[i], @weights[i]
23
+ output = 0.0
24
+ weights.size.times do |j|
25
+ index = initial_index + j
26
+ output += spectrum[index] * weights[j] if index < spectrum.length
27
+ end
28
+ mel_bank[i] = output
29
+ end
30
+ mel_bank
31
+ end
32
+ end
33
+ def self.to_mel f
34
+ return f.map {|linfreq| self.to_mel linfreq} if f.respond_to? :map
35
+ 2595.0 * Math.log10(1.0 + f/700.0)
36
+ end
37
+ def self.to_linear m
38
+ return m.map {|melfreq| self.to_linear melfreq} if m.respond_to? :map
39
+ 700.0 * (10.0**(m/2595.0) - 1.0)
40
+ end
41
+ def self.determine_bin in_freq, step_freq
42
+ step_freq * (in_freq/step_freq).round
43
+ end
44
+ def self.make_bank_parameters srate, nfft, nfilt, lowerf, upperf
45
+ raise 'Number of FFT points is <= 0.' if nfft == 0
46
+ raise 'Number of filters is <= 0.' if nfilt == 0
47
+ srate = srate.to_f; lowerf = lowerf.to_f; upperf = upperf.to_f
48
+ left_edge = Array.new nfilt
49
+ right_edge = Array.new nfilt
50
+ center_freq = Array.new nfilt
51
+ melmax = self.to_mel upperf
52
+ melmin = self.to_mel lowerf
53
+ delta_freq_mel = (melmax - melmin) / (nfilt + 1.0)
54
+ delta_freq = srate/nfft
55
+ left_edge[0] = self.determine_bin lowerf, delta_freq
56
+ next_edge_mel = melmin
57
+ nfilt.times do |i|
58
+ next_edge_mel += delta_freq_mel
59
+ next_edge = self.to_linear next_edge_mel
60
+ center_freq[i] = self.determine_bin next_edge, delta_freq
61
+ right_edge[i-1] = center_freq[i] if i > 0
62
+ left_edge[i+1] = center_freq[i] if i < nfilt - 1
63
+ end
64
+
65
+ next_edge_mel += delta_freq_mel
66
+ next_edge = self.to_linear next_edge_mel
67
+ right_edge[nfilt-1] = self.determine_bin next_edge, delta_freq
68
+ fparams = Array.new nfilt
69
+ nfilt.times do |i|
70
+ initial_freq_bin = self.determine_bin left_edge[i], delta_freq
71
+ initial_freq_bin += delta_freq if initial_freq_bin < left_edge[i]
72
+ fparams[i] = [left_edge[i], center_freq[i], right_edge[i],
73
+ initial_freq_bin, delta_freq]
74
+ end
75
+ fparams
76
+ end
77
+ def self.make_filter left, center, right, init_freq, delta
78
+ raise 'delta freq has zero value' if delta == 0
79
+ if (right - left).round == 0 || (center - left).round == 0 ||
80
+ (right - center).round == 0
81
+ raise 'filter boundaries too close'
82
+ end
83
+
84
+ n_elements = ((right - left)/ delta + 1).round
85
+ raise 'number of mel elements is zero' if n_elements == 0
86
+
87
+ weights = Array.new n_elements
88
+ height = 1
89
+ left_slope = height / (center - left)
90
+ right_slope = height / (center - right)
91
+
92
+ index_fw = 0
93
+ init_freq.step right, delta do |current|
94
+ if current < center
95
+ weights[index_fw] = left_slope * (current - left)
96
+ else
97
+ weights[index_fw] = height + right_slope * (current - center)
98
+ end
99
+ index_fw += 1
100
+ end
101
+ #weights.insert 0, (init_freq/delta).round
102
+ [(init_freq/delta).round, weights]
103
+ end
104
+ def apply_weights init_index, weights, spectrum
105
+ output = 0.0
106
+ weights.size.times do |i|
107
+ output += spectrum[i + init_index] * weights[i]
108
+ end
109
+ output
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,19 @@
1
+ require 'discrete_fourier_transform'
2
+ module Noyes
3
+ # The square of the DFT. You must specify the number of ffts. The power
4
+ # spectrum returns an array of arrays where each inner array is of length
5
+ # nfft/2 + 1. The length of the outer array does not change.
6
+ class PowerSpectrumFilter
7
+ include Noyes
8
+ def initialize nfft
9
+ @nfft = nfft
10
+ end
11
+ def << data
12
+ nuniqdftpts = @nfft/2 + 1
13
+ data.map do |datavec|
14
+ datavecfft = dft datavec, @nfft
15
+ Array.new(nuniqdftpts){|i| datavecfft[i].abs**2}
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,21 @@
1
+ module Noyes
2
+ # A simple high pass filter. It takes an array of size n and returns an
3
+ # array of size n.
4
+ class Preemphasizer
5
+ include Math
6
+ def initialize factor=0.97
7
+ @factor = factor
8
+ @prior = 0
9
+ end
10
+
11
+ def << data
12
+ prior = @prior
13
+ @prior = data.last
14
+ data.map do |x|
15
+ y = x - @factor * prior
16
+ prior = x
17
+ y
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,28 @@
1
+ module Noyes
2
+ # Segments an array of data into an array of arrays. Inner arrays are the
3
+ # size of the window.
4
+ class Segmenter
5
+ def initialize window_size, shift
6
+ @winsz = window_size; @winshift = shift
7
+ @overflow = nil
8
+ end
9
+
10
+ def << data
11
+ data = @overflow + data if @overflow
12
+ if data.size < @winsz + @winshift * 5
13
+ @overflow = data
14
+ return nil
15
+ else
16
+ @overflow = nil
17
+ end
18
+ x = []
19
+ i = 0
20
+ while i+@winsz <= data.length
21
+ x << data[i,@winsz]
22
+ i += @winshift
23
+ end
24
+ @overflow = data[i..-1]
25
+ x
26
+ end
27
+ end
28
+ end
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: noyes
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.2
5
+ platform: ruby
6
+ authors:
7
+ - Joe Woelfel
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-02-01 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Currently sufficient to create basic features for speech recognition
17
+ email: joe@talkhouse.com
18
+ executables:
19
+ - noyes_dump44k.sh
20
+ - noyes_dump8k.sh
21
+ - recognize.sh
22
+ extensions: []
23
+
24
+ extra_rdoc_files:
25
+ - COPYING
26
+ - README
27
+ - doc/overview.rdoc
28
+ files:
29
+ - lib/common/noyes_dsl.rb
30
+ - lib/common/noyes_math.rb
31
+ - lib/common/send_incrementally.rb
32
+ - lib/noyes.rb
33
+ - lib/ruby_impl/dct.rb
34
+ - lib/ruby_impl/delta.rb
35
+ - lib/ruby_impl/discrete_fourier_transform.rb
36
+ - lib/ruby_impl/filter.rb
37
+ - lib/ruby_impl/hamming_window.rb
38
+ - lib/ruby_impl/live_cmn.rb
39
+ - lib/ruby_impl/log_compress.rb
40
+ - lib/ruby_impl/mel_filter.rb
41
+ - lib/ruby_impl/power_spec.rb
42
+ - lib/ruby_impl/preemphasis.rb
43
+ - lib/ruby_impl/segment.rb
44
+ - COPYING
45
+ - README
46
+ - doc/overview.rdoc
47
+ has_rdoc: true
48
+ homepage: http://github.com/talkhouse/noise
49
+ licenses: []
50
+
51
+ post_install_message:
52
+ rdoc_options:
53
+ - --charset=UTF-8
54
+ require_paths:
55
+ - lib/ruby_impl
56
+ - lib/common
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: "0"
63
+ version:
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: "0"
69
+ version:
70
+ requirements: []
71
+
72
+ rubyforge_project:
73
+ rubygems_version: 1.3.5
74
+ signing_key:
75
+ specification_version: 3
76
+ summary: A signal processing library
77
+ test_files: []
78
+