pest 0.0.0 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -13,17 +13,6 @@ module Pest::Estimator
13
13
  @distributions ||= DistributionList.new(self)
14
14
  end
15
15
 
16
- def to_variable(arg)
17
- variable = case arg.class.name
18
- when 'Pest::Variable'
19
- arg
20
- when 'String', 'Symbol'
21
- variables[arg] || Pest::Variable.new(:name => arg)
22
- end
23
- raise ArgumentError unless variables.values.include?(variable)
24
- variable
25
- end
26
-
27
16
  module Distribution
28
17
  attr_reader :variables
29
18
 
@@ -36,7 +25,11 @@ module Pest::Estimator
36
25
  variables.to_a.sort
37
26
  end
38
27
 
39
- def probability
28
+ def batch_probability(*args)
29
+ raise NotImplementedError
30
+ end
31
+
32
+ def probability(*args)
40
33
  raise NotImplementedError
41
34
  end
42
35
  end
@@ -46,23 +39,10 @@ module Pest::Estimator
46
39
  @estimator = estimator
47
40
  end
48
41
 
49
- def parse_args(args)
50
- set = if args.kind_of? Array
51
- if args.any? {|arg| arg.kind_of?(::Set)}
52
- args.inject(::Set.new) {|set, el| set + el.to_set}
53
- else
54
- args.flatten.to_set
55
- end
56
- elsif args.kind_of? ::Set
57
- args
58
- else
59
- Array(args).to_set
60
- end
61
- set.map! {|arg| @estimator.to_variable(arg) }
62
- end
63
-
64
42
  def [](*args)
65
- set = parse_args(args)
43
+ set = args.to_set
44
+ raise ArgumentError unless (set - @estimator.variables).empty?
45
+
66
46
  unless has_key? set
67
47
  self[set] = @estimator.distribution_class.new(@estimator, set)
68
48
  end
@@ -11,15 +11,16 @@ class Pest::Estimator::Frequency
11
11
 
12
12
  class Distribution
13
13
  include Pest::Estimator::Distribution
14
-
14
+
15
+ OFFSET = 0
15
16
  attr_reader :frequencies, :checksum
16
17
 
17
18
  def cache_model
18
19
  if @frequencies.nil?
19
20
  @frequencies = Hash.new(0)
20
- @estimator.data.data_vectors(variable_array).each do |vector|
21
+ @estimator.data.pick(*variable_array).each do |vector|
21
22
  # Make sure this vector is consistently ordered
22
- @frequencies[vector] += 1
23
+ @frequencies[Array(vector)] += 1
23
24
  end
24
25
  end
25
26
  end
@@ -27,9 +28,10 @@ class Pest::Estimator::Frequency
27
28
  def probability(data)
28
29
  cache_model
29
30
 
30
- NArray[ data.data_vectors(variable_array).map do |vector|
31
- @frequencies[vector].to_f
32
- end ] / @estimator.data.length
31
+ array = NArray[ data.pick(*variable_array).map do |vector|
32
+ @frequencies[Array(vector)].to_f
33
+ end ]
34
+ (OFFSET + array.reshape!(data.length)) / (OFFSET + @estimator.data.length)
33
35
  end
34
36
 
35
37
  def entropy
@@ -43,7 +45,7 @@ class Pest::Estimator::Frequency
43
45
  private
44
46
 
45
47
  def unique_event_dataset
46
- vectors = Pest::DataSet::NArray[@frequencies.keys]
48
+ vectors = NMatrix[@frequencies.keys]
47
49
  hash = {}
48
50
  variable_array.each_index do |i|
49
51
  # Extract a single variable from the array of vectors
@@ -17,12 +17,5 @@ module Pest::Function
17
17
  evaluate.send(f, *args)
18
18
  end
19
19
  end
20
-
21
- private
22
-
23
- def parse(variables)
24
- variables.map {|arg| estimator.to_variable(arg) }.to_set
25
- end
26
-
27
20
  end
28
21
  end
@@ -12,12 +12,14 @@ module Pest::Function
12
12
 
13
13
  def initialize(estimator, variables)
14
14
  @estimator = estimator
15
- @event = parse(variables)
16
- @givens = [].to_set
15
+ @event = variables.to_set
16
+ @givens = Set.new
17
+ raise ArgumentError unless (@event - @estimator.variables).empty?
17
18
  end
18
19
 
19
20
  def given(*variables)
20
- @givens.merge parse(variables)
21
+ @givens.merge variables.to_set
22
+ raise ArgumentError unless (@givens - @estimator.variables).empty?
21
23
  self
22
24
  end
23
25
 
@@ -1,24 +1,30 @@
1
1
  module Pest::Function
2
2
  module Probability
3
- def probability(*variables)
4
- Builder.new(self, variables)
3
+ def batch_probability(*variables)
4
+ BatchBuilder.new(self, variables)
5
+ end
6
+ alias :batch_p :batch_probability
7
+
8
+ def probability(event={})
9
+ Builder.new(self, event)
5
10
  end
6
11
  alias :p :probability
7
12
 
8
- class Builder
13
+ class BatchBuilder
9
14
  include Pest::Function::Builder
10
15
 
11
16
  attr_reader :estimator, :data_source, :event, :givens
12
17
 
13
18
  def initialize(estimator, variables)
14
19
  @estimator = estimator
15
- @data_source = data_source
16
- @event = parse(variables)
20
+ @event = variables.to_set
17
21
  @givens = [].to_set
22
+ raise ArgumentError unless (@event - @estimator.variables).empty?
18
23
  end
19
24
 
20
25
  def given(*variables)
21
- @givens.merge parse(variables)
26
+ @givens += variables
27
+ raise ArgumentError unless (@givens - @estimator.variables).empty?
22
28
  self
23
29
  end
24
30
 
@@ -28,14 +34,43 @@ module Pest::Function
28
34
  end
29
35
 
30
36
  def evaluate
31
- joint = estimator.distributions[event].probability(data_source)
32
37
  if givens.empty?
33
- joint
38
+ estimator.distributions[*event].probability(data_source).to_a
34
39
  else
35
- conditional = estimator.distributions[givens].probability(data_source)
36
- joint / conditional
40
+ joint = estimator.distributions[*(event + givens)].probability(data_source)
41
+ conditional = estimator.distributions[*givens].probability(data_source)
42
+
43
+ (joint / conditional).to_a
37
44
  end
38
45
  end
39
46
  end
47
+
48
+ class Builder
49
+ include Pest::Function::Builder
50
+
51
+ attr_accessor :estimator, :event, :givens
52
+
53
+ def initialize(estimator, event)
54
+ @estimator = estimator
55
+ @event = event
56
+ @givens = Hash.new
57
+ end
58
+
59
+ def given(given)
60
+ givens.merge! given
61
+ raise ArgumentError unless (given.keys.to_set - @estimator.variables).empty?
62
+ self
63
+ end
64
+
65
+ def evaluate
66
+ data_hash = event.merge(givens)
67
+ data_hash.each_key {|key| data_hash[key] = Array(data_hash[key])}
68
+
69
+ data = Pest::DataSet::Hash.from_hash(data_hash)
70
+ BatchBuilder.new(estimator, event.keys).
71
+ given(*givens.keys).in(data).
72
+ evaluate.first
73
+ end
74
+ end
40
75
  end
41
76
  end
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "pest"
8
- s.version = "0.0.0"
8
+ s.version = "0.1.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Ryan Michael"]
12
- s.date = "2012-06-24"
12
+ s.date = "2012-07-05"
13
13
  s.description = "Wrappers to facilitate different classes of probability estimators"
14
14
  s.email = "kerinin@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -30,7 +30,6 @@ Gem::Specification.new do |s|
30
30
  "lib/pest/function.rb",
31
31
  "lib/pest/function/entropy.rb",
32
32
  "lib/pest/function/probability.rb",
33
- "lib/pest/variable.rb",
34
33
  "lib/pest/version.rb",
35
34
  "pest.gemspec",
36
35
  "spec/pest/data_set/hash_spec.rb",
@@ -45,14 +44,13 @@ Gem::Specification.new do |s|
45
44
  "spec/pest/estimator_spec.rb",
46
45
  "spec/pest/function/entropy_spec.rb",
47
46
  "spec/pest/function/probability_spec.rb",
48
- "spec/pest/variable_spec.rb",
49
47
  "spec/pest_spec.rb",
50
48
  "spec/spec_helper.rb"
51
49
  ]
52
50
  s.homepage = "http://github.com/kerinin/pest"
53
51
  s.licenses = ["MIT"]
54
52
  s.require_paths = ["lib"]
55
- s.rubygems_version = "1.8.24"
53
+ s.rubygems_version = "1.8.23"
56
54
  s.summary = "Probability Estimation"
57
55
 
58
56
  if s.respond_to? :specification_version then
@@ -74,6 +72,10 @@ Gem::Specification.new do |s|
74
72
  s.add_development_dependency(%q<rake>, [">= 0"])
75
73
  s.add_development_dependency(%q<pry>, [">= 0"])
76
74
  s.add_development_dependency(%q<rspec>, [">= 0"])
75
+ s.add_development_dependency(%q<jeweler>, [">= 0"])
76
+ s.add_development_dependency(%q<rake>, [">= 0"])
77
+ s.add_development_dependency(%q<pry>, [">= 0"])
78
+ s.add_development_dependency(%q<rspec>, [">= 0"])
77
79
  else
78
80
  s.add_dependency(%q<pest>, [">= 0"])
79
81
  s.add_dependency(%q<narray>, [">= 0"])
@@ -90,6 +92,10 @@ Gem::Specification.new do |s|
90
92
  s.add_dependency(%q<rake>, [">= 0"])
91
93
  s.add_dependency(%q<pry>, [">= 0"])
92
94
  s.add_dependency(%q<rspec>, [">= 0"])
95
+ s.add_dependency(%q<jeweler>, [">= 0"])
96
+ s.add_dependency(%q<rake>, [">= 0"])
97
+ s.add_dependency(%q<pry>, [">= 0"])
98
+ s.add_dependency(%q<rspec>, [">= 0"])
93
99
  end
94
100
  else
95
101
  s.add_dependency(%q<pest>, [">= 0"])
@@ -107,6 +113,10 @@ Gem::Specification.new do |s|
107
113
  s.add_dependency(%q<rake>, [">= 0"])
108
114
  s.add_dependency(%q<pry>, [">= 0"])
109
115
  s.add_dependency(%q<rspec>, [">= 0"])
116
+ s.add_dependency(%q<jeweler>, [">= 0"])
117
+ s.add_dependency(%q<rake>, [">= 0"])
118
+ s.add_dependency(%q<pry>, [">= 0"])
119
+ s.add_dependency(%q<rspec>, [">= 0"])
110
120
  end
111
121
  end
112
122
 
@@ -5,55 +5,30 @@ require 'spec_helper'
5
5
  # conflated
6
6
 
7
7
  describe Pest::DataSet::Hash do
8
- context "class methods" do
9
- before(:each) do
10
- @class = Pest::DataSet::Hash
11
- end
12
-
13
- describe "::translators" do
14
- it "maps File => from_file" do
15
- @class.translators[File].should == :from_file
16
- end
17
-
18
- it "maps String => from_file" do
19
- @class.translators[String].should == :from_file
20
- end
8
+ before(:each) do
9
+ @class = Pest::DataSet::Hash
10
+ end
21
11
 
22
- it "maps Symbol => from_file" do
23
- @class.translators[Symbol].should == :from_file
24
- end
12
+ describe "::translators" do
13
+ it "maps String => from_file" do
14
+ @class.translators[String].should == :from_file
25
15
  end
26
16
 
27
- describe "::from_file" do
28
- before(:each) do
29
- file = File.open(__FILE__, 'r')
30
- File.stub(:open).with('foo', 'r').and_return(file)
31
- Marshal.stub(:restore).with(file).and_return({:foo => 1})
32
- end
33
-
34
- it "looks for file if passed string" do
35
- File.should_receive(:open).with('foo', 'r')
36
- @class.from_file('foo')
37
- end
38
-
39
- it "unmarshals" do
40
- Marshal.should_receive(:restore)
41
- @class.from_file('foo')
42
- end
43
-
44
- it "sets variables" do
45
- @class.from_file('foo').variables.length.should == 1
46
- end
17
+ it "maps Symbol => from_file" do
18
+ @class.translators[Symbol].should == :from_file
19
+ end
20
+ end
47
21
 
48
- it "generates variables" do
49
- @class.from_file('foo').variables[:foo].should be_a(Pest::Variable)
50
- end
22
+ describe "::from_hash" do
23
+ it "gets variables from keys" do
24
+ @instance = Pest::DataSet::Hash.from_hash(:foo => [1,2,3], :bar => [3,4,5])
25
+ @instance.variables.should == [:foo, :bar].to_set
51
26
  end
52
27
  end
53
28
 
54
29
  before(:each) do
55
30
  @data = {:foo => [1,2,3], :bar => [3,4,5]}
56
- @instance = Pest::DataSet::Hash.new(@data)
31
+ @instance = Pest::DataSet::Hash.from_hash(@data)
57
32
  end
58
33
 
59
34
  describe "#to_hash" do
@@ -62,47 +37,87 @@ describe Pest::DataSet::Hash do
62
37
  end
63
38
  end
64
39
 
65
- describe "#data_vectors" do
66
- it "returns an enumerable" do
67
- @instance.data_vectors.should be_a(Enumerable)
40
+ describe "#pick" do
41
+ before(:each) do
42
+ @instance = @class.from_hash :foo => [1,2,3], :bar => [4,5,6]
43
+ end
44
+
45
+ it "accepts a single symbol string" do
46
+ @instance.pick(:foo).data.to_a.first.should == [1,2,3]
68
47
  end
69
48
 
70
- it "formats data as a list of rows" do
71
- @instance.data_vectors.first.should == [1,3]
49
+ it "accepts a single variable" do
50
+ @instance.pick(:foo).data.to_a.first.should == [1,2,3]
72
51
  end
52
+
53
+ it "accepts multiple variables" do
54
+ @instance.pick(:bar, :foo).data.to_a.should == [[4,5,6],[1,2,3]]
55
+ end
73
56
  end
74
57
 
75
- describe "#save" do
58
+ describe "#length" do
76
59
  before(:each) do
77
- @file = File.open(__FILE__, 'r')
78
- File.stub(:open).with('foo', 'w').and_return(@file)
79
- Marshal.stub(:dump)
60
+ @data = {:foo => [1,2,3], :bar => [3,4,5]}
61
+ @instance = Pest::DataSet::Hash.from_hash(@data)
80
62
  end
81
63
 
82
- it "marshals to file from path" do
83
- Marshal.should_receive(:dump).with(@data, @file)
84
- @instance.save('foo')
64
+ it "delegates to hash" do
65
+ @instance.length.should == 3
85
66
  end
67
+ end
86
68
 
87
- it "marshals to file from file" do
88
- Marshal.should_receive(:dump).with(@data, @file)
89
- @instance.save(@file)
69
+ describe "#each" do
70
+ before(:each) do
71
+ @instance = @class.from_hash :foo => [1,2,3], :bar => [4,5,6]
90
72
  end
91
73
 
92
- it "saves to tmp dir if no filename specified" do
93
- File.should_receive(:open).with(/pest_hash_dataset/, anything(), anything()).and_return(@file)
94
- @instance.save
74
+ it "yields vectors" do
75
+ block = double("block")
76
+ block.should_receive(:yielding).with([1,4])
77
+ block.should_receive(:yielding).with([2,5])
78
+ block.should_receive(:yielding).with([3,6])
79
+ @instance.each {|i| block.yielding(i)}
95
80
  end
96
81
  end
97
82
 
98
- describe "#length" do
83
+ describe "#map" do
99
84
  before(:each) do
100
- @data = {:foo => [1,2,3], :bar => [3,4,5]}
101
- @instance = Pest::DataSet::Hash.new(@data)
85
+ @instance = @class.from_hash :foo => [1,2,3], :bar => [4,5,6]
102
86
  end
103
87
 
104
- it "delegates to hash" do
105
- @instance.length.should == 3
88
+ it "works" do
89
+ @instance.map {|i| i}.should == [[1,4],[2,5],[3,6]]
90
+ end
91
+ end
92
+
93
+ describe "#merge" do
94
+ before(:each) do
95
+ @other = @class.from_hash :foo => [10,11,12,13], :baz => [1,2,3,4]
96
+ @instance = @class.from_hash :foo => [1,2,3,4], :bar => [5,6,7,8]
97
+ end
98
+
99
+ it "accepts a dataset and returns dataset" do
100
+ @instance.merge(@other).should be_a(@class)
101
+ end
102
+
103
+ it "accepts a hash and returns dataset" do
104
+ @instance.merge(:foo => [10,11,12,13], :baz => [1,2,3,4]).should be_a(@class)
105
+ end
106
+
107
+ it "requires the dataset to have the same length" do
108
+ expect { @instance.merge(:foo => [1,2,3,4,5]) }.to raise_error(ArgumentError)
109
+ end
110
+
111
+ it "adds the passed variable to self" do
112
+ @instance.merge(@other).variables.should include(:baz)
113
+ end
114
+
115
+ it "adds the passed data to self" do
116
+ @instance.merge(@other).pick(:baz).to_a.flatten.should == [1,2,3,4]
117
+ end
118
+
119
+ it "over-writes variables in self with variables in other" do
120
+ @instance.merge(@other).pick(:foo).to_a.flatten.should == [10,11,12,13]
106
121
  end
107
122
  end
108
123
  end