pest 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,61 @@
1
+ require 'narray'
2
+
3
+ class Pest::Estimator::Frequency
4
+ include Pest::Estimator
5
+ include Pest::Function::Probability
6
+ include Pest::Function::Entropy
7
+
8
+ def distribution_class
9
+ Distribution
10
+ end
11
+
12
+ class Distribution
13
+ include Pest::Estimator::Distribution
14
+
15
+ attr_reader :frequencies, :checksum
16
+
17
+ def cache_model
18
+ if @frequencies.nil?
19
+ @frequencies = Hash.new(0)
20
+ @estimator.data.data_vectors(variable_array).each do |vector|
21
+ # Make sure this vector is consistently ordered
22
+ @frequencies[vector] += 1
23
+ end
24
+ end
25
+ end
26
+
27
+ def probability(data)
28
+ cache_model
29
+
30
+ NArray[ data.data_vectors(variable_array).map do |vector|
31
+ @frequencies[vector].to_f
32
+ end ] / @estimator.data.length
33
+ end
34
+
35
+ def entropy
36
+ cache_model
37
+
38
+ probabilities = probability(unique_event_dataset)
39
+
40
+ (-probabilities * NMath.log2(probabilities)).sum
41
+ end
42
+
43
+ private
44
+
45
+ def unique_event_dataset
46
+ vectors = Pest::DataSet::NArray[@frequencies.keys]
47
+ hash = {}
48
+ variable_array.each_index do |i|
49
+ # Extract a single variable from the array of vectors
50
+ hash[variable_array[i]] = vectors[i,true,true].reshape!(vectors.shape[1]).to_a
51
+ end
52
+ Pest::DataSet::NArray.from_hash(hash)
53
+ end
54
+
55
+ def find_tempfile
56
+ if path = Dir.glob("#{Dir::Tmpname.tmpdir}/*").select {|path| path =~ /#{@checksum}\.#{@variables.hash}/}.first
57
+ File.open(path)
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,28 @@
1
+ module Pest::Function
2
+
3
+ module Builder
4
+ # Delegate methods to the result of 'evaluate'
5
+ #
6
+ methods = [
7
+ :sprintf, :+, :-, :*, :/, :**, :<=>, :==, :coerce, :floor, :ceil,
8
+ :truncate, :round, :to_i, :to_f, :to_r, :rationalize, :hash, :to_s,
9
+ :i, :+@, :-@, :eql?, :div, :divmod, :%, :modulo, :remainder, :abs,
10
+ :magnitude, :to_int, :real?, :integer?, :zero?, :nonzero?,
11
+ :>, :>=, :<, :<=, :between?, :pretty_print_instance_variables,
12
+ :pretty_print_inspect, :nil?, :===, :=~, :!~, :!, :!=
13
+ ]
14
+
15
+ methods.each do |f|
16
+ define_method(f) do |*args|
17
+ evaluate.send(f, *args)
18
+ end
19
+ end
20
+
21
+ private
22
+
23
+ def parse(variables)
24
+ variables.map {|arg| estimator.to_variable(arg) }.to_set
25
+ end
26
+
27
+ end
28
+ end
@@ -0,0 +1,35 @@
1
+ module Pest::Function
2
+ module Entropy
3
+ def entropy(*variables)
4
+ Builder.new(self, variables)
5
+ end
6
+ alias :h :entropy
7
+
8
+ class Builder
9
+ include Pest::Function::Builder
10
+
11
+ attr_reader :estimator, :event, :givens
12
+
13
+ def initialize(estimator, variables)
14
+ @estimator = estimator
15
+ @event = parse(variables)
16
+ @givens = [].to_set
17
+ end
18
+
19
+ def given(*variables)
20
+ @givens.merge parse(variables)
21
+ self
22
+ end
23
+
24
+ def evaluate
25
+ joint = estimator.distributions[event].entropy
26
+ if givens.empty?
27
+ joint
28
+ else
29
+ conditional = estimator.distributions[givens].entropy
30
+ joint - conditional
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,41 @@
1
+ module Pest::Function
2
+ module Probability
3
+ def probability(*variables)
4
+ Builder.new(self, variables)
5
+ end
6
+ alias :p :probability
7
+
8
+ class Builder
9
+ include Pest::Function::Builder
10
+
11
+ attr_reader :estimator, :data_source, :event, :givens
12
+
13
+ def initialize(estimator, variables)
14
+ @estimator = estimator
15
+ @data_source = data_source
16
+ @event = parse(variables)
17
+ @givens = [].to_set
18
+ end
19
+
20
+ def given(*variables)
21
+ @givens.merge parse(variables)
22
+ self
23
+ end
24
+
25
+ def in(data_set)
26
+ @data_source = data_set
27
+ self
28
+ end
29
+
30
+ def evaluate
31
+ joint = estimator.distributions[event].probability(data_source)
32
+ if givens.empty?
33
+ joint
34
+ else
35
+ conditional = estimator.distributions[givens].probability(data_source)
36
+ joint / conditional
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,34 @@
1
+ class Pest::Variable
2
+ def self.deserialize(string)
3
+ if string.chomp =~ /^([^\:]+)\:(\w{8}-\w{4}-\w{4}-\w{4}-\w{12})$/
4
+ new :name => $1, :uuid => $2
5
+ else
6
+ raise "Unable to parse string"
7
+ end
8
+ end
9
+
10
+ attr_reader :name, :uuid
11
+
12
+ def initialize(args={})
13
+ @name = args[:name]
14
+ @uuid = args[:uuid] || UUIDTools::UUID.random_create
15
+ end
16
+
17
+ def identifier
18
+ "#{name}:#{uuid}"
19
+ end
20
+ alias :serialize :identifier
21
+
22
+ def hash
23
+ identifier.hash
24
+ end
25
+
26
+ def ==(other)
27
+ other.kind_of?(self.class) and identifier == other.identifier
28
+ end
29
+ alias :eql? :==
30
+
31
+ def <=>(other)
32
+ identifier <=> other.identifier
33
+ end
34
+ end
@@ -0,0 +1,7 @@
1
+ module Pest
2
+ VERSION = "0.0.1"
3
+
4
+ def self.version
5
+ VERSION
6
+ end
7
+ end
data/pest.gemspec ADDED
@@ -0,0 +1,112 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "pest"
8
+ s.version = "0.0.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Ryan Michael"]
12
+ s.date = "2012-06-24"
13
+ s.description = "Wrappers to facilitate different classes of probability estimators"
14
+ s.email = "kerinin@gmail.com"
15
+ s.extra_rdoc_files = [
16
+ "README.md"
17
+ ]
18
+ s.files = [
19
+ ".travis.yml",
20
+ "Gemfile",
21
+ "README.md",
22
+ "Rakefile",
23
+ "VERSION",
24
+ "lib/pest.rb",
25
+ "lib/pest/data_set.rb",
26
+ "lib/pest/data_set/hash.rb",
27
+ "lib/pest/data_set/narray.rb",
28
+ "lib/pest/estimator.rb",
29
+ "lib/pest/estimator/frequency.rb",
30
+ "lib/pest/function.rb",
31
+ "lib/pest/function/entropy.rb",
32
+ "lib/pest/function/probability.rb",
33
+ "lib/pest/variable.rb",
34
+ "lib/pest/version.rb",
35
+ "pest.gemspec",
36
+ "spec/pest/data_set/hash_spec.rb",
37
+ "spec/pest/data_set/narray_spec.rb",
38
+ "spec/pest/data_set_spec.rb",
39
+ "spec/pest/estimator/bernoulli_spec.rb",
40
+ "spec/pest/estimator/frequency_spec.rb",
41
+ "spec/pest/estimator/gaussian_spec.rb",
42
+ "spec/pest/estimator/multinomial_spec.rb",
43
+ "spec/pest/estimator/parzen_spec.rb",
44
+ "spec/pest/estimator/svd_spec.rb",
45
+ "spec/pest/estimator_spec.rb",
46
+ "spec/pest/function/entropy_spec.rb",
47
+ "spec/pest/function/probability_spec.rb",
48
+ "spec/pest/variable_spec.rb",
49
+ "spec/pest_spec.rb",
50
+ "spec/spec_helper.rb"
51
+ ]
52
+ s.homepage = "http://github.com/kerinin/pest"
53
+ s.licenses = ["MIT"]
54
+ s.require_paths = ["lib"]
55
+ s.rubygems_version = "1.8.24"
56
+ s.summary = "Probability Estimation"
57
+
58
+ if s.respond_to? :specification_version then
59
+ s.specification_version = 3
60
+
61
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
62
+ s.add_runtime_dependency(%q<pest>, [">= 0"])
63
+ s.add_runtime_dependency(%q<narray>, [">= 0"])
64
+ s.add_runtime_dependency(%q<uuidtools>, [">= 0"])
65
+ s.add_development_dependency(%q<jeweler>, [">= 0"])
66
+ s.add_development_dependency(%q<rake>, [">= 0"])
67
+ s.add_development_dependency(%q<pry>, [">= 0"])
68
+ s.add_development_dependency(%q<rspec>, [">= 0"])
69
+ s.add_development_dependency(%q<jeweler>, [">= 0"])
70
+ s.add_development_dependency(%q<rake>, [">= 0"])
71
+ s.add_development_dependency(%q<pry>, [">= 0"])
72
+ s.add_development_dependency(%q<rspec>, [">= 0"])
73
+ s.add_development_dependency(%q<jeweler>, [">= 0"])
74
+ s.add_development_dependency(%q<rake>, [">= 0"])
75
+ s.add_development_dependency(%q<pry>, [">= 0"])
76
+ s.add_development_dependency(%q<rspec>, [">= 0"])
77
+ else
78
+ s.add_dependency(%q<pest>, [">= 0"])
79
+ s.add_dependency(%q<narray>, [">= 0"])
80
+ s.add_dependency(%q<uuidtools>, [">= 0"])
81
+ s.add_dependency(%q<jeweler>, [">= 0"])
82
+ s.add_dependency(%q<rake>, [">= 0"])
83
+ s.add_dependency(%q<pry>, [">= 0"])
84
+ s.add_dependency(%q<rspec>, [">= 0"])
85
+ s.add_dependency(%q<jeweler>, [">= 0"])
86
+ s.add_dependency(%q<rake>, [">= 0"])
87
+ s.add_dependency(%q<pry>, [">= 0"])
88
+ s.add_dependency(%q<rspec>, [">= 0"])
89
+ s.add_dependency(%q<jeweler>, [">= 0"])
90
+ s.add_dependency(%q<rake>, [">= 0"])
91
+ s.add_dependency(%q<pry>, [">= 0"])
92
+ s.add_dependency(%q<rspec>, [">= 0"])
93
+ end
94
+ else
95
+ s.add_dependency(%q<pest>, [">= 0"])
96
+ s.add_dependency(%q<narray>, [">= 0"])
97
+ s.add_dependency(%q<uuidtools>, [">= 0"])
98
+ s.add_dependency(%q<jeweler>, [">= 0"])
99
+ s.add_dependency(%q<rake>, [">= 0"])
100
+ s.add_dependency(%q<pry>, [">= 0"])
101
+ s.add_dependency(%q<rspec>, [">= 0"])
102
+ s.add_dependency(%q<jeweler>, [">= 0"])
103
+ s.add_dependency(%q<rake>, [">= 0"])
104
+ s.add_dependency(%q<pry>, [">= 0"])
105
+ s.add_dependency(%q<rspec>, [">= 0"])
106
+ s.add_dependency(%q<jeweler>, [">= 0"])
107
+ s.add_dependency(%q<rake>, [">= 0"])
108
+ s.add_dependency(%q<pry>, [">= 0"])
109
+ s.add_dependency(%q<rspec>, [">= 0"])
110
+ end
111
+ end
112
+
@@ -0,0 +1,108 @@
1
+ require 'spec_helper'
2
+
3
+ # NOTE: make sure you're clear on the relationship between variables,
4
+ # their names, and the keys used in @hash. I think they're getting
5
+ # conflated
6
+
7
+ describe Pest::DataSet::Hash do
8
+ context "class methods" do
9
+ before(:each) do
10
+ @class = Pest::DataSet::Hash
11
+ end
12
+
13
+ describe "::translators" do
14
+ it "maps File => from_file" do
15
+ @class.translators[File].should == :from_file
16
+ end
17
+
18
+ it "maps String => from_file" do
19
+ @class.translators[String].should == :from_file
20
+ end
21
+
22
+ it "maps Symbol => from_file" do
23
+ @class.translators[Symbol].should == :from_file
24
+ end
25
+ end
26
+
27
+ describe "::from_file" do
28
+ before(:each) do
29
+ file = File.open(__FILE__, 'r')
30
+ File.stub(:open).with('foo', 'r').and_return(file)
31
+ Marshal.stub(:restore).with(file).and_return({:foo => 1})
32
+ end
33
+
34
+ it "looks for file if passed string" do
35
+ File.should_receive(:open).with('foo', 'r')
36
+ @class.from_file('foo')
37
+ end
38
+
39
+ it "unmarshals" do
40
+ Marshal.should_receive(:restore)
41
+ @class.from_file('foo')
42
+ end
43
+
44
+ it "sets variables" do
45
+ @class.from_file('foo').variables.length.should == 1
46
+ end
47
+
48
+ it "generates variables" do
49
+ @class.from_file('foo').variables[:foo].should be_a(Pest::Variable)
50
+ end
51
+ end
52
+ end
53
+
54
+ before(:each) do
55
+ @data = {:foo => [1,2,3], :bar => [3,4,5]}
56
+ @instance = Pest::DataSet::Hash.new(@data)
57
+ end
58
+
59
+ describe "#to_hash" do
60
+ it "returns a hash" do
61
+ @instance.to_hash.should == @data
62
+ end
63
+ end
64
+
65
+ describe "#data_vectors" do
66
+ it "returns an enumerable" do
67
+ @instance.data_vectors.should be_a(Enumerable)
68
+ end
69
+
70
+ it "formats data as a list of rows" do
71
+ @instance.data_vectors.first.should == [1,3]
72
+ end
73
+ end
74
+
75
+ describe "#save" do
76
+ before(:each) do
77
+ @file = File.open(__FILE__, 'r')
78
+ File.stub(:open).with('foo', 'w').and_return(@file)
79
+ Marshal.stub(:dump)
80
+ end
81
+
82
+ it "marshals to file from path" do
83
+ Marshal.should_receive(:dump).with(@data, @file)
84
+ @instance.save('foo')
85
+ end
86
+
87
+ it "marshals to file from file" do
88
+ Marshal.should_receive(:dump).with(@data, @file)
89
+ @instance.save(@file)
90
+ end
91
+
92
+ it "saves to tmp dir if no filename specified" do
93
+ File.should_receive(:open).with(/pest_hash_dataset/, anything(), anything()).and_return(@file)
94
+ @instance.save
95
+ end
96
+ end
97
+
98
+ describe "#length" do
99
+ before(:each) do
100
+ @data = {:foo => [1,2,3], :bar => [3,4,5]}
101
+ @instance = Pest::DataSet::Hash.new(@data)
102
+ end
103
+
104
+ it "delegates to hash" do
105
+ @instance.length.should == 3
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,141 @@
1
+ require 'spec_helper'
2
+ require 'narray'
3
+
4
+ describe Pest::DataSet::NArray do
5
+ before(:each) do
6
+ @v1 = Pest::Variable.new(:name => :foo)
7
+ @v2 = Pest::Variable.new(:name => :bar)
8
+ @class = Pest::DataSet::NArray
9
+ end
10
+
11
+ describe "::translators" do
12
+ it "maps Pest::DataSet::Hash => from_hash" do
13
+ @class.translators[Pest::DataSet::Hash].should == :from_hash
14
+ end
15
+
16
+ it "maps File => from_file" do
17
+ @class.translators[File].should == :from_file
18
+ end
19
+
20
+ it "maps String => from_file" do
21
+ @class.translators[String].should == :from_file
22
+ end
23
+ end
24
+
25
+ describe "::from_file" do
26
+ before(:each) do
27
+ @matrix = @class.from_hash @v1 => [1,2,3], @v2 => [4,5,6]
28
+ @file = Tempfile.new('test')
29
+ @matrix.save(@file.path)
30
+ end
31
+
32
+ it "loads from NArray IO" do
33
+ @class.from_file(@file).should == @matrix
34
+ end
35
+
36
+ it "looks for NArray from string" do
37
+ @class.from_file(@file.path).should == @matrix
38
+ end
39
+
40
+ it "sets variables" do
41
+ @class.from_file(@file).variables.values.should == [@v1, @v2]
42
+ end
43
+ end
44
+
45
+ describe "::from_hash" do
46
+ before(:each) do
47
+ @matrix = NArray.to_na [[4,5,6],[1,2,3]]
48
+ end
49
+
50
+ it "creates a NArray" do
51
+ @class.from_hash({@v1 => [1,2,3], @v2 => [4,5,6]}).should == @matrix
52
+ end
53
+
54
+ it "sets variables" do
55
+ @class.from_hash({@v1 => [1,2,3], @v2 => [4,5,6]}).variables.values.should == [@v1, @v2]
56
+ end
57
+
58
+ it "generates Pest::Variables if not passed" do
59
+ @class.from_hash({:foo => [1,2,3]}).variables[:foo].should be_a(Pest::Variable)
60
+ end
61
+ end
62
+
63
+ describe "::from_csv" do
64
+ before(:each) do
65
+ @file = Tempfile.new('test_csv')
66
+ CSV.open(@file.path, 'w') do |csv|
67
+ csv << ["foo", "bar"]
68
+ csv << [1,1]
69
+ csv << [1,2]
70
+ csv << [1,3]
71
+ end
72
+ end
73
+
74
+ it "creates variables from first line" do
75
+ @instance = @class.from_csv @file.path
76
+ @instance.variable_array.map(&:name).should == ["bar", "foo"]
77
+ end
78
+
79
+ it "creates data from the rest" do
80
+ @instance = @class.from_csv @file.path
81
+ @instance.to_hash.should == {@instance.variables["foo"] => [1,1,1], @instance.variables["bar"] => [1,2,3]}
82
+ end
83
+
84
+ it "accepts a filename" do
85
+ @instance = @class.from_csv @file.path
86
+ @instance.to_hash.should == {@instance.variables["foo"] => [1,1,1], @instance.variables["bar"] => [1,2,3]}
87
+ end
88
+
89
+ it "accepts an IO" do
90
+ @instance = @class.from_csv @file
91
+ @instance.to_hash.should == {@instance.variables["foo"] => [1,1,1], @instance.variables["bar"] => [1,2,3]}
92
+ end
93
+ end
94
+
95
+ describe "#to_hash" do
96
+ before(:each) do
97
+ @instance = @class.from_hash :foo => [1,2,3], :bar => [4,5,6]
98
+ end
99
+
100
+ it "sets keys" do
101
+ @instance.to_hash.keys.should == @instance.variables.values
102
+ end
103
+
104
+ it "sets values" do
105
+ @instance.to_hash.values.should == [[4,5,6],[1,2,3]]
106
+ end
107
+ end
108
+
109
+ describe "#data_vectors" do
110
+ before(:each) do
111
+ @instance = @class.from_hash :foo => [1,2,3], :bar => [4,5,6]
112
+ end
113
+
114
+ it "returns an enumerable" do
115
+ @instance.data_vectors.should be_a(Enumerable)
116
+ end
117
+
118
+ it "slices" do
119
+ # NOTE: This is returning an array - probably could be more efficient
120
+ @instance.data_vectors.first.should == [4,1]
121
+ end
122
+ end
123
+
124
+ describe "#save" do
125
+ before(:each) do
126
+ @file = Tempfile.new('test')
127
+ @instance = @class.from_hash :foo => [1,2,3], :bar => [4,5,6]
128
+ end
129
+
130
+ it "marshals to file" do
131
+ @instance.save(@file)
132
+ @class.from_file(@file.path).should == @instance
133
+ end
134
+
135
+ it "saves to tmp dir if no filename specified" do
136
+ Tempfile.should_receive(:new).and_return(@file)
137
+ @instance.save
138
+ @class.from_file(@file.path).should == @instance
139
+ end
140
+ end
141
+ end