pest 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,61 @@
1
+ require 'narray'
2
+
3
+ class Pest::Estimator::Frequency
4
+ include Pest::Estimator
5
+ include Pest::Function::Probability
6
+ include Pest::Function::Entropy
7
+
8
+ def distribution_class
9
+ Distribution
10
+ end
11
+
12
+ class Distribution
13
+ include Pest::Estimator::Distribution
14
+
15
+ attr_reader :frequencies, :checksum
16
+
17
+ def cache_model
18
+ if @frequencies.nil?
19
+ @frequencies = Hash.new(0)
20
+ @estimator.data.data_vectors(variable_array).each do |vector|
21
+ # Make sure this vector is consistently ordered
22
+ @frequencies[vector] += 1
23
+ end
24
+ end
25
+ end
26
+
27
+ def probability(data)
28
+ cache_model
29
+
30
+ NArray[ data.data_vectors(variable_array).map do |vector|
31
+ @frequencies[vector].to_f
32
+ end ] / @estimator.data.length
33
+ end
34
+
35
+ def entropy
36
+ cache_model
37
+
38
+ probabilities = probability(unique_event_dataset)
39
+
40
+ (-probabilities * NMath.log2(probabilities)).sum
41
+ end
42
+
43
+ private
44
+
45
+ def unique_event_dataset
46
+ vectors = Pest::DataSet::NArray[@frequencies.keys]
47
+ hash = {}
48
+ variable_array.each_index do |i|
49
+ # Extract a single variable from the array of vectors
50
+ hash[variable_array[i]] = vectors[i,true,true].reshape!(vectors.shape[1]).to_a
51
+ end
52
+ Pest::DataSet::NArray.from_hash(hash)
53
+ end
54
+
55
+ def find_tempfile
56
+ if path = Dir.glob("#{Dir::Tmpname.tmpdir}/*").select {|path| path =~ /#{@checksum}\.#{@variables.hash}/}.first
57
+ File.open(path)
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,28 @@
1
+ module Pest::Function
2
+
3
+ module Builder
4
+ # Delegate methods to the result of 'evaluate'
5
+ #
6
+ methods = [
7
+ :sprintf, :+, :-, :*, :/, :**, :<=>, :==, :coerce, :floor, :ceil,
8
+ :truncate, :round, :to_i, :to_f, :to_r, :rationalize, :hash, :to_s,
9
+ :i, :+@, :-@, :eql?, :div, :divmod, :%, :modulo, :remainder, :abs,
10
+ :magnitude, :to_int, :real?, :integer?, :zero?, :nonzero?,
11
+ :>, :>=, :<, :<=, :between?, :pretty_print_instance_variables,
12
+ :pretty_print_inspect, :nil?, :===, :=~, :!~, :!, :!=
13
+ ]
14
+
15
+ methods.each do |f|
16
+ define_method(f) do |*args|
17
+ evaluate.send(f, *args)
18
+ end
19
+ end
20
+
21
+ private
22
+
23
+ def parse(variables)
24
+ variables.map {|arg| estimator.to_variable(arg) }.to_set
25
+ end
26
+
27
+ end
28
+ end
@@ -0,0 +1,35 @@
1
+ module Pest::Function
2
+ module Entropy
3
+ def entropy(*variables)
4
+ Builder.new(self, variables)
5
+ end
6
+ alias :h :entropy
7
+
8
+ class Builder
9
+ include Pest::Function::Builder
10
+
11
+ attr_reader :estimator, :event, :givens
12
+
13
+ def initialize(estimator, variables)
14
+ @estimator = estimator
15
+ @event = parse(variables)
16
+ @givens = [].to_set
17
+ end
18
+
19
+ def given(*variables)
20
+ @givens.merge parse(variables)
21
+ self
22
+ end
23
+
24
+ def evaluate
25
+ joint = estimator.distributions[event].entropy
26
+ if givens.empty?
27
+ joint
28
+ else
29
+ conditional = estimator.distributions[givens].entropy
30
+ joint - conditional
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,41 @@
1
+ module Pest::Function
2
+ module Probability
3
+ def probability(*variables)
4
+ Builder.new(self, variables)
5
+ end
6
+ alias :p :probability
7
+
8
+ class Builder
9
+ include Pest::Function::Builder
10
+
11
+ attr_reader :estimator, :data_source, :event, :givens
12
+
13
+ def initialize(estimator, variables)
14
+ @estimator = estimator
15
+ @data_source = data_source
16
+ @event = parse(variables)
17
+ @givens = [].to_set
18
+ end
19
+
20
+ def given(*variables)
21
+ @givens.merge parse(variables)
22
+ self
23
+ end
24
+
25
+ def in(data_set)
26
+ @data_source = data_set
27
+ self
28
+ end
29
+
30
+ def evaluate
31
+ joint = estimator.distributions[event].probability(data_source)
32
+ if givens.empty?
33
+ joint
34
+ else
35
+ conditional = estimator.distributions[givens].probability(data_source)
36
+ joint / conditional
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,34 @@
1
+ class Pest::Variable
2
+ def self.deserialize(string)
3
+ if string.chomp =~ /^([^\:]+)\:(\w{8}-\w{4}-\w{4}-\w{4}-\w{12})$/
4
+ new :name => $1, :uuid => $2
5
+ else
6
+ raise "Unable to parse string"
7
+ end
8
+ end
9
+
10
+ attr_reader :name, :uuid
11
+
12
+ def initialize(args={})
13
+ @name = args[:name]
14
+ @uuid = args[:uuid] || UUIDTools::UUID.random_create
15
+ end
16
+
17
+ def identifier
18
+ "#{name}:#{uuid}"
19
+ end
20
+ alias :serialize :identifier
21
+
22
+ def hash
23
+ identifier.hash
24
+ end
25
+
26
+ def ==(other)
27
+ other.kind_of?(self.class) and identifier == other.identifier
28
+ end
29
+ alias :eql? :==
30
+
31
+ def <=>(other)
32
+ identifier <=> other.identifier
33
+ end
34
+ end
@@ -0,0 +1,7 @@
1
+ module Pest
2
+ VERSION = "0.0.1"
3
+
4
+ def self.version
5
+ VERSION
6
+ end
7
+ end
data/pest.gemspec ADDED
@@ -0,0 +1,112 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "pest"
8
+ s.version = "0.0.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Ryan Michael"]
12
+ s.date = "2012-06-24"
13
+ s.description = "Wrappers to facilitate different classes of probability estimators"
14
+ s.email = "kerinin@gmail.com"
15
+ s.extra_rdoc_files = [
16
+ "README.md"
17
+ ]
18
+ s.files = [
19
+ ".travis.yml",
20
+ "Gemfile",
21
+ "README.md",
22
+ "Rakefile",
23
+ "VERSION",
24
+ "lib/pest.rb",
25
+ "lib/pest/data_set.rb",
26
+ "lib/pest/data_set/hash.rb",
27
+ "lib/pest/data_set/narray.rb",
28
+ "lib/pest/estimator.rb",
29
+ "lib/pest/estimator/frequency.rb",
30
+ "lib/pest/function.rb",
31
+ "lib/pest/function/entropy.rb",
32
+ "lib/pest/function/probability.rb",
33
+ "lib/pest/variable.rb",
34
+ "lib/pest/version.rb",
35
+ "pest.gemspec",
36
+ "spec/pest/data_set/hash_spec.rb",
37
+ "spec/pest/data_set/narray_spec.rb",
38
+ "spec/pest/data_set_spec.rb",
39
+ "spec/pest/estimator/bernoulli_spec.rb",
40
+ "spec/pest/estimator/frequency_spec.rb",
41
+ "spec/pest/estimator/gaussian_spec.rb",
42
+ "spec/pest/estimator/multinomial_spec.rb",
43
+ "spec/pest/estimator/parzen_spec.rb",
44
+ "spec/pest/estimator/svd_spec.rb",
45
+ "spec/pest/estimator_spec.rb",
46
+ "spec/pest/function/entropy_spec.rb",
47
+ "spec/pest/function/probability_spec.rb",
48
+ "spec/pest/variable_spec.rb",
49
+ "spec/pest_spec.rb",
50
+ "spec/spec_helper.rb"
51
+ ]
52
+ s.homepage = "http://github.com/kerinin/pest"
53
+ s.licenses = ["MIT"]
54
+ s.require_paths = ["lib"]
55
+ s.rubygems_version = "1.8.24"
56
+ s.summary = "Probability Estimation"
57
+
58
+ if s.respond_to? :specification_version then
59
+ s.specification_version = 3
60
+
61
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
62
+ s.add_runtime_dependency(%q<pest>, [">= 0"])
63
+ s.add_runtime_dependency(%q<narray>, [">= 0"])
64
+ s.add_runtime_dependency(%q<uuidtools>, [">= 0"])
65
+ s.add_development_dependency(%q<jeweler>, [">= 0"])
66
+ s.add_development_dependency(%q<rake>, [">= 0"])
67
+ s.add_development_dependency(%q<pry>, [">= 0"])
68
+ s.add_development_dependency(%q<rspec>, [">= 0"])
69
+ s.add_development_dependency(%q<jeweler>, [">= 0"])
70
+ s.add_development_dependency(%q<rake>, [">= 0"])
71
+ s.add_development_dependency(%q<pry>, [">= 0"])
72
+ s.add_development_dependency(%q<rspec>, [">= 0"])
73
+ s.add_development_dependency(%q<jeweler>, [">= 0"])
74
+ s.add_development_dependency(%q<rake>, [">= 0"])
75
+ s.add_development_dependency(%q<pry>, [">= 0"])
76
+ s.add_development_dependency(%q<rspec>, [">= 0"])
77
+ else
78
+ s.add_dependency(%q<pest>, [">= 0"])
79
+ s.add_dependency(%q<narray>, [">= 0"])
80
+ s.add_dependency(%q<uuidtools>, [">= 0"])
81
+ s.add_dependency(%q<jeweler>, [">= 0"])
82
+ s.add_dependency(%q<rake>, [">= 0"])
83
+ s.add_dependency(%q<pry>, [">= 0"])
84
+ s.add_dependency(%q<rspec>, [">= 0"])
85
+ s.add_dependency(%q<jeweler>, [">= 0"])
86
+ s.add_dependency(%q<rake>, [">= 0"])
87
+ s.add_dependency(%q<pry>, [">= 0"])
88
+ s.add_dependency(%q<rspec>, [">= 0"])
89
+ s.add_dependency(%q<jeweler>, [">= 0"])
90
+ s.add_dependency(%q<rake>, [">= 0"])
91
+ s.add_dependency(%q<pry>, [">= 0"])
92
+ s.add_dependency(%q<rspec>, [">= 0"])
93
+ end
94
+ else
95
+ s.add_dependency(%q<pest>, [">= 0"])
96
+ s.add_dependency(%q<narray>, [">= 0"])
97
+ s.add_dependency(%q<uuidtools>, [">= 0"])
98
+ s.add_dependency(%q<jeweler>, [">= 0"])
99
+ s.add_dependency(%q<rake>, [">= 0"])
100
+ s.add_dependency(%q<pry>, [">= 0"])
101
+ s.add_dependency(%q<rspec>, [">= 0"])
102
+ s.add_dependency(%q<jeweler>, [">= 0"])
103
+ s.add_dependency(%q<rake>, [">= 0"])
104
+ s.add_dependency(%q<pry>, [">= 0"])
105
+ s.add_dependency(%q<rspec>, [">= 0"])
106
+ s.add_dependency(%q<jeweler>, [">= 0"])
107
+ s.add_dependency(%q<rake>, [">= 0"])
108
+ s.add_dependency(%q<pry>, [">= 0"])
109
+ s.add_dependency(%q<rspec>, [">= 0"])
110
+ end
111
+ end
112
+
@@ -0,0 +1,108 @@
1
+ require 'spec_helper'
2
+
3
+ # NOTE: make sure you're clear on the relationship between variables,
4
+ # their names, and the keys used in @hash. I think they're getting
5
+ # conflated
6
+
7
+ describe Pest::DataSet::Hash do
8
+ context "class methods" do
9
+ before(:each) do
10
+ @class = Pest::DataSet::Hash
11
+ end
12
+
13
+ describe "::translators" do
14
+ it "maps File => from_file" do
15
+ @class.translators[File].should == :from_file
16
+ end
17
+
18
+ it "maps String => from_file" do
19
+ @class.translators[String].should == :from_file
20
+ end
21
+
22
+ it "maps Symbol => from_file" do
23
+ @class.translators[Symbol].should == :from_file
24
+ end
25
+ end
26
+
27
+ describe "::from_file" do
28
+ before(:each) do
29
+ file = File.open(__FILE__, 'r')
30
+ File.stub(:open).with('foo', 'r').and_return(file)
31
+ Marshal.stub(:restore).with(file).and_return({:foo => 1})
32
+ end
33
+
34
+ it "looks for file if passed string" do
35
+ File.should_receive(:open).with('foo', 'r')
36
+ @class.from_file('foo')
37
+ end
38
+
39
+ it "unmarshals" do
40
+ Marshal.should_receive(:restore)
41
+ @class.from_file('foo')
42
+ end
43
+
44
+ it "sets variables" do
45
+ @class.from_file('foo').variables.length.should == 1
46
+ end
47
+
48
+ it "generates variables" do
49
+ @class.from_file('foo').variables[:foo].should be_a(Pest::Variable)
50
+ end
51
+ end
52
+ end
53
+
54
+ before(:each) do
55
+ @data = {:foo => [1,2,3], :bar => [3,4,5]}
56
+ @instance = Pest::DataSet::Hash.new(@data)
57
+ end
58
+
59
+ describe "#to_hash" do
60
+ it "returns a hash" do
61
+ @instance.to_hash.should == @data
62
+ end
63
+ end
64
+
65
+ describe "#data_vectors" do
66
+ it "returns an enumerable" do
67
+ @instance.data_vectors.should be_a(Enumerable)
68
+ end
69
+
70
+ it "formats data as a list of rows" do
71
+ @instance.data_vectors.first.should == [1,3]
72
+ end
73
+ end
74
+
75
+ describe "#save" do
76
+ before(:each) do
77
+ @file = File.open(__FILE__, 'r')
78
+ File.stub(:open).with('foo', 'w').and_return(@file)
79
+ Marshal.stub(:dump)
80
+ end
81
+
82
+ it "marshals to file from path" do
83
+ Marshal.should_receive(:dump).with(@data, @file)
84
+ @instance.save('foo')
85
+ end
86
+
87
+ it "marshals to file from file" do
88
+ Marshal.should_receive(:dump).with(@data, @file)
89
+ @instance.save(@file)
90
+ end
91
+
92
+ it "saves to tmp dir if no filename specified" do
93
+ File.should_receive(:open).with(/pest_hash_dataset/, anything(), anything()).and_return(@file)
94
+ @instance.save
95
+ end
96
+ end
97
+
98
+ describe "#length" do
99
+ before(:each) do
100
+ @data = {:foo => [1,2,3], :bar => [3,4,5]}
101
+ @instance = Pest::DataSet::Hash.new(@data)
102
+ end
103
+
104
+ it "delegates to hash" do
105
+ @instance.length.should == 3
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,141 @@
1
+ require 'spec_helper'
2
+ require 'narray'
3
+
4
+ describe Pest::DataSet::NArray do
5
+ before(:each) do
6
+ @v1 = Pest::Variable.new(:name => :foo)
7
+ @v2 = Pest::Variable.new(:name => :bar)
8
+ @class = Pest::DataSet::NArray
9
+ end
10
+
11
+ describe "::translators" do
12
+ it "maps Pest::DataSet::Hash => from_hash" do
13
+ @class.translators[Pest::DataSet::Hash].should == :from_hash
14
+ end
15
+
16
+ it "maps File => from_file" do
17
+ @class.translators[File].should == :from_file
18
+ end
19
+
20
+ it "maps String => from_file" do
21
+ @class.translators[String].should == :from_file
22
+ end
23
+ end
24
+
25
+ describe "::from_file" do
26
+ before(:each) do
27
+ @matrix = @class.from_hash @v1 => [1,2,3], @v2 => [4,5,6]
28
+ @file = Tempfile.new('test')
29
+ @matrix.save(@file.path)
30
+ end
31
+
32
+ it "loads from NArray IO" do
33
+ @class.from_file(@file).should == @matrix
34
+ end
35
+
36
+ it "looks for NArray from string" do
37
+ @class.from_file(@file.path).should == @matrix
38
+ end
39
+
40
+ it "sets variables" do
41
+ @class.from_file(@file).variables.values.should == [@v1, @v2]
42
+ end
43
+ end
44
+
45
+ describe "::from_hash" do
46
+ before(:each) do
47
+ @matrix = NArray.to_na [[4,5,6],[1,2,3]]
48
+ end
49
+
50
+ it "creates a NArray" do
51
+ @class.from_hash({@v1 => [1,2,3], @v2 => [4,5,6]}).should == @matrix
52
+ end
53
+
54
+ it "sets variables" do
55
+ @class.from_hash({@v1 => [1,2,3], @v2 => [4,5,6]}).variables.values.should == [@v1, @v2]
56
+ end
57
+
58
+ it "generates Pest::Variables if not passed" do
59
+ @class.from_hash({:foo => [1,2,3]}).variables[:foo].should be_a(Pest::Variable)
60
+ end
61
+ end
62
+
63
+ describe "::from_csv" do
64
+ before(:each) do
65
+ @file = Tempfile.new('test_csv')
66
+ CSV.open(@file.path, 'w') do |csv|
67
+ csv << ["foo", "bar"]
68
+ csv << [1,1]
69
+ csv << [1,2]
70
+ csv << [1,3]
71
+ end
72
+ end
73
+
74
+ it "creates variables from first line" do
75
+ @instance = @class.from_csv @file.path
76
+ @instance.variable_array.map(&:name).should == ["bar", "foo"]
77
+ end
78
+
79
+ it "creates data from the rest" do
80
+ @instance = @class.from_csv @file.path
81
+ @instance.to_hash.should == {@instance.variables["foo"] => [1,1,1], @instance.variables["bar"] => [1,2,3]}
82
+ end
83
+
84
+ it "accepts a filename" do
85
+ @instance = @class.from_csv @file.path
86
+ @instance.to_hash.should == {@instance.variables["foo"] => [1,1,1], @instance.variables["bar"] => [1,2,3]}
87
+ end
88
+
89
+ it "accepts an IO" do
90
+ @instance = @class.from_csv @file
91
+ @instance.to_hash.should == {@instance.variables["foo"] => [1,1,1], @instance.variables["bar"] => [1,2,3]}
92
+ end
93
+ end
94
+
95
+ describe "#to_hash" do
96
+ before(:each) do
97
+ @instance = @class.from_hash :foo => [1,2,3], :bar => [4,5,6]
98
+ end
99
+
100
+ it "sets keys" do
101
+ @instance.to_hash.keys.should == @instance.variables.values
102
+ end
103
+
104
+ it "sets values" do
105
+ @instance.to_hash.values.should == [[4,5,6],[1,2,3]]
106
+ end
107
+ end
108
+
109
+ describe "#data_vectors" do
110
+ before(:each) do
111
+ @instance = @class.from_hash :foo => [1,2,3], :bar => [4,5,6]
112
+ end
113
+
114
+ it "returns an enumerable" do
115
+ @instance.data_vectors.should be_a(Enumerable)
116
+ end
117
+
118
+ it "slices" do
119
+ # NOTE: This is returning an array - probably could be more efficient
120
+ @instance.data_vectors.first.should == [4,1]
121
+ end
122
+ end
123
+
124
+ describe "#save" do
125
+ before(:each) do
126
+ @file = Tempfile.new('test')
127
+ @instance = @class.from_hash :foo => [1,2,3], :bar => [4,5,6]
128
+ end
129
+
130
+ it "marshals to file" do
131
+ @instance.save(@file)
132
+ @class.from_file(@file.path).should == @instance
133
+ end
134
+
135
+ it "saves to tmp dir if no filename specified" do
136
+ Tempfile.should_receive(:new).and_return(@file)
137
+ @instance.save
138
+ @class.from_file(@file.path).should == @instance
139
+ end
140
+ end
141
+ end