hmm 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 David Tresner-Kirsch
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,17 @@
1
+ = hmm
2
+
3
+ Description goes here.
4
+
5
+ == Note on Patches/Pull Requests
6
+
7
+ * Fork the project.
8
+ * Make your feature addition or bug fix.
9
+ * Add tests for it. This is important so I don't break it in a
10
+ future version unintentionally.
11
+ * Commit, do not mess with rakefile, version, or history.
12
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
13
+ * Send me a pull request. Bonus points for topic branches.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2009 David Tresner-Kirsch. See LICENSE for details.
@@ -0,0 +1,54 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "hmm"
8
+ gem.summary = %Q{HMM Classifier}
9
+ gem.description = %Q{This project is a Ruby gem ('hmm') for machine learning that natively implements a (somewhat) generalized Hidden Markov Model classifier.}
10
+ gem.email = "dwkirsch@gmail.com"
11
+ gem.homepage = "http://github.com/dtkirsch/hmm"
12
+ gem.authors = ["David Tresner-Kirsch"]
13
+ gem.add_development_dependency "thoughtbot-shoulda", ">= 0"
14
+ gem.add_development_dependency "narray", ">= 0"
15
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
+ end
17
+ Jeweler::GemcutterTasks.new
18
+ rescue LoadError
19
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
20
+ end
21
+
22
+ require 'rake/testtask'
23
+ Rake::TestTask.new(:test) do |test|
24
+ test.libs << 'lib' << 'test'
25
+ test.pattern = 'test/**/test_*.rb'
26
+ test.verbose = true
27
+ end
28
+
29
+ begin
30
+ require 'rcov/rcovtask'
31
+ Rcov::RcovTask.new do |test|
32
+ test.libs << 'test'
33
+ test.pattern = 'test/**/test_*.rb'
34
+ test.verbose = true
35
+ end
36
+ rescue LoadError
37
+ task :rcov do
38
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
39
+ end
40
+ end
41
+
42
+ task :test => :check_dependencies
43
+
44
+ task :default => :test
45
+
46
+ require 'rake/rdoctask'
47
+ Rake::RDocTask.new do |rdoc|
48
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
49
+
50
+ rdoc.rdoc_dir = 'rdoc'
51
+ rdoc.title = "hmm #{version}"
52
+ rdoc.rdoc_files.include('README*')
53
+ rdoc.rdoc_files.include('lib/**/*.rb')
54
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
@@ -0,0 +1,57 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{hmm}
8
+ s.version = "0.0.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["David Tresner-Kirsch"]
12
+ s.date = %q{2009-11-25}
13
+ s.description = %q{This project is a Ruby gem ('hmm') for machine learning that natively implements a (somewhat) generalized Hidden Markov Model classifier.}
14
+ s.email = %q{dwkirsch@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "hmm.gemspec",
27
+ "lib/hmm.rb",
28
+ "test/helper.rb",
29
+ "test/test_hmm.rb"
30
+ ]
31
+ s.homepage = %q{http://github.com/dtkirsch/hmm}
32
+ s.rdoc_options = ["--charset=UTF-8"]
33
+ s.require_paths = ["lib"]
34
+ s.rubygems_version = %q{1.3.5}
35
+ s.summary = %q{HMM Classifier}
36
+ s.test_files = [
37
+ "test/helper.rb",
38
+ "test/test_hmm.rb"
39
+ ]
40
+
41
+ if s.respond_to? :specification_version then
42
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
43
+ s.specification_version = 3
44
+
45
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
46
+ s.add_development_dependency(%q<thoughtbot-shoulda>, [">= 0"])
47
+ s.add_development_dependency(%q<narray>, [">= 0"])
48
+ else
49
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
50
+ s.add_dependency(%q<narray>, [">= 0"])
51
+ end
52
+ else
53
+ s.add_dependency(%q<thoughtbot-shoulda>, [">= 0"])
54
+ s.add_dependency(%q<narray>, [">= 0"])
55
+ end
56
+ end
57
+
@@ -0,0 +1,135 @@
1
+ # Hidden Markov Model classifier.
2
+ # Currently capable of:
3
+ # -supervised training on data with arbitrary state and observation domains.
4
+ # -decoding of obsservation strings via Viterbi
5
+ # -computing token level accuracy across a list of observation sequences
6
+ # against a provided gold standard
7
+
8
+
9
+ require 'rubygems'
10
+ require 'narray'
11
+
12
+ class HMM
13
+
14
+ class Classifier
15
+ attr_accessor :a, :b, :pi, :o_lex, :q_lex, :debug, :train
16
+ # Member variables:
17
+ # pi -- initial state distribution
18
+ # a -- state transition probabilities
19
+ # b -- state-conditional observation probabilities
20
+ # o_lex -- index of observation labels
21
+ # q_lex -- index of state labels
22
+ # debug -- flag for verbose output to stdout
23
+ # train -- a list of labelled sequences for supervised training
24
+
25
+ def initialize
26
+ @o_lex, @q_lex, @train = [], [], []
27
+ end
28
+
29
+ def add_to_train(o, q)
30
+ @o_lex |= o # add new tokens to indexed lexicon
31
+ @q_lex |= q
32
+ @train << Sequence.new(index(o, @o_lex), index(q, @q_lex))
33
+ end
34
+
35
+ def train
36
+ # initialize Pi, A, and B
37
+ @pi = NArray.float(@q_lex.length)
38
+ @a = NArray.float(@q_lex.length, @q_lex.length)
39
+ @b = NArray.float(@q_lex.length, @o_lex.length)
40
+
41
+ # count frequencies to build Pi, A, and B
42
+ @train.each do |sequence|
43
+ @pi[sequence.q.first] +=1
44
+ sequence.q.length.times do |i|
45
+ @b[sequence.q[i], sequence.o[i]] += 1
46
+ @a[sequence.q[i-1], sequence.q[i]] +=1 if i>0
47
+ end
48
+ end
49
+
50
+ # normalize frequencies into probabilities
51
+ @pi /= @pi.sum
52
+ @a /= @a.sum(1)
53
+ @b /= @b.sum(1)
54
+ end
55
+
56
+
57
+ def decode(o_sequence)
58
+ # Viterbi! with log probability math to avoid underflow
59
+
60
+ # encode observations
61
+ o_sequence = index(o_sequence, @o_lex)
62
+
63
+ # initialize. skipping the 0 initialization for psi, as it's never used.
64
+ # psi will have T-1 elements instead of T, allowing it
65
+ # to control the backtrack iterator later.
66
+ delta, psi = [log(pi)+log(b[true, o_sequence.shift])], []
67
+
68
+ # recursive step
69
+ o_sequence.each do |o|
70
+ psi << argmax(delta.last+log(a))
71
+ delta << (delta.last+log(a)).max(0)+log(b[true, o])
72
+ end
73
+
74
+ # initialize Q* with final state
75
+ q_star = [delta.last.sort_index[-1]]
76
+
77
+ # backtrack the optimal state sequence into Q*
78
+ psi.reverse.each do |psi_t|
79
+ q_star.unshift psi_t[q_star.first]
80
+ end
81
+
82
+ puts "delta:", exp(delta).inspect, "psi:", exp(psi).inspect if @debug
83
+
84
+ return deindex(q_star, @q_lex)
85
+ end
86
+
87
+ def accuracy(o, q)
88
+ # token level accuracy across a set of sequences
89
+ correct, total = 0.0, 0.0
90
+ o.length.times do |i|
91
+ correct += (NArray.to_na(decode(o[i])).eq NArray.to_na(q[i])).sum
92
+ total += o[i].length
93
+ end
94
+ correct/total
95
+ end
96
+
97
+ private
98
+
99
+ # index and deindex map between labels and the ordinals of those labels.
100
+ # the ordinals map the labels to rows and columns of Pi, A, and B
101
+ def index(sequence, lexicon)
102
+ lexicon |= sequence # add any unknown tokens to the lex
103
+ sequence.collect{|x| lexicon.rindex(x)}
104
+ end
105
+
106
+ def deindex(sequence, lexicon)
107
+ sequence.collect{|i| lexicon[i]}
108
+ end
109
+
110
+ # abstracting out some array element operations for readability
111
+ def log(array)
112
+ # natural log of each element
113
+ array.collect{|n| NMath::log n}
114
+ end
115
+
116
+ def exp(array)
117
+ # e to the power of each element
118
+ array.collect{|n| Math::E ** n}
119
+ end
120
+
121
+ def argmax(narray)
122
+ # horizontal index of the max in each row.
123
+ # the mod is b/c sort_index returns global indices
124
+ # (rather than starting at 0 for each row)
125
+ (narray).sort_index(0)[-1, true] % narray.shape[1]
126
+ end
127
+ end
128
+
129
+ class Sequence
130
+ attr_accessor :o, :q # array of observations, array of states
131
+ def initialize (o, q)
132
+ @o, @q = o, q
133
+ end
134
+ end
135
+ end
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'hmm'
8
+
9
+ class Test::Unit::TestCase
10
+ end
@@ -0,0 +1,7 @@
1
+ require 'helper'
2
+
3
+ class TestHmm < Test::Unit::TestCase
4
+ should "probably rename this file and start testing for real" do
5
+ #flunk "hey buddy, you should probably rename this file and start testing for real"
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,85 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: hmm
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - David Tresner-Kirsch
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-11-25 00:00:00 -05:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: thoughtbot-shoulda
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: narray
27
+ type: :development
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: "0"
34
+ version:
35
+ description: This project is a Ruby gem ('hmm') for machine learning that natively implements a (somewhat) generalized Hidden Markov Model classifier.
36
+ email: dwkirsch@gmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - LICENSE
43
+ - README.rdoc
44
+ files:
45
+ - .document
46
+ - .gitignore
47
+ - LICENSE
48
+ - README.rdoc
49
+ - Rakefile
50
+ - VERSION
51
+ - hmm.gemspec
52
+ - lib/hmm.rb
53
+ - test/helper.rb
54
+ - test/test_hmm.rb
55
+ has_rdoc: true
56
+ homepage: http://github.com/dtkirsch/hmm
57
+ licenses: []
58
+
59
+ post_install_message:
60
+ rdoc_options:
61
+ - --charset=UTF-8
62
+ require_paths:
63
+ - lib
64
+ required_ruby_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: "0"
69
+ version:
70
+ required_rubygems_version: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - ">="
73
+ - !ruby/object:Gem::Version
74
+ version: "0"
75
+ version:
76
+ requirements: []
77
+
78
+ rubyforge_project:
79
+ rubygems_version: 1.3.5
80
+ signing_key:
81
+ specification_version: 3
82
+ summary: HMM Classifier
83
+ test_files:
84
+ - test/helper.rb
85
+ - test/test_hmm.rb