pest 0.0.0 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,17 +13,6 @@ module Pest::Estimator
13
13
  @distributions ||= DistributionList.new(self)
14
14
  end
15
15
 
16
- def to_variable(arg)
17
- variable = case arg.class.name
18
- when 'Pest::Variable'
19
- arg
20
- when 'String', 'Symbol'
21
- variables[arg] || Pest::Variable.new(:name => arg)
22
- end
23
- raise ArgumentError unless variables.values.include?(variable)
24
- variable
25
- end
26
-
27
16
  module Distribution
28
17
  attr_reader :variables
29
18
 
@@ -36,7 +25,11 @@ module Pest::Estimator
36
25
  variables.to_a.sort
37
26
  end
38
27
 
39
- def probability
28
+ def batch_probability(*args)
29
+ raise NotImplementedError
30
+ end
31
+
32
+ def probability(*args)
40
33
  raise NotImplementedError
41
34
  end
42
35
  end
@@ -46,23 +39,10 @@ module Pest::Estimator
46
39
  @estimator = estimator
47
40
  end
48
41
 
49
- def parse_args(args)
50
- set = if args.kind_of? Array
51
- if args.any? {|arg| arg.kind_of?(::Set)}
52
- args.inject(::Set.new) {|set, el| set + el.to_set}
53
- else
54
- args.flatten.to_set
55
- end
56
- elsif args.kind_of? ::Set
57
- args
58
- else
59
- Array(args).to_set
60
- end
61
- set.map! {|arg| @estimator.to_variable(arg) }
62
- end
63
-
64
42
  def [](*args)
65
- set = parse_args(args)
43
+ set = args.to_set
44
+ raise ArgumentError unless (set - @estimator.variables).empty?
45
+
66
46
  unless has_key? set
67
47
  self[set] = @estimator.distribution_class.new(@estimator, set)
68
48
  end
@@ -11,15 +11,16 @@ class Pest::Estimator::Frequency
11
11
 
12
12
  class Distribution
13
13
  include Pest::Estimator::Distribution
14
-
14
+
15
+ OFFSET = 0
15
16
  attr_reader :frequencies, :checksum
16
17
 
17
18
  def cache_model
18
19
  if @frequencies.nil?
19
20
  @frequencies = Hash.new(0)
20
- @estimator.data.data_vectors(variable_array).each do |vector|
21
+ @estimator.data.pick(*variable_array).each do |vector|
21
22
  # Make sure this vector is consistently ordered
22
- @frequencies[vector] += 1
23
+ @frequencies[Array(vector)] += 1
23
24
  end
24
25
  end
25
26
  end
@@ -27,9 +28,10 @@ class Pest::Estimator::Frequency
27
28
  def probability(data)
28
29
  cache_model
29
30
 
30
- NArray[ data.data_vectors(variable_array).map do |vector|
31
- @frequencies[vector].to_f
32
- end ] / @estimator.data.length
31
+ array = NArray[ data.pick(*variable_array).map do |vector|
32
+ @frequencies[Array(vector)].to_f
33
+ end ]
34
+ (OFFSET + array.reshape!(data.length)) / (OFFSET + @estimator.data.length)
33
35
  end
34
36
 
35
37
  def entropy
@@ -43,7 +45,7 @@ class Pest::Estimator::Frequency
43
45
  private
44
46
 
45
47
  def unique_event_dataset
46
- vectors = Pest::DataSet::NArray[@frequencies.keys]
48
+ vectors = NMatrix[@frequencies.keys]
47
49
  hash = {}
48
50
  variable_array.each_index do |i|
49
51
  # Extract a single variable from the array of vectors
@@ -17,12 +17,5 @@ module Pest::Function
17
17
  evaluate.send(f, *args)
18
18
  end
19
19
  end
20
-
21
- private
22
-
23
- def parse(variables)
24
- variables.map {|arg| estimator.to_variable(arg) }.to_set
25
- end
26
-
27
20
  end
28
21
  end
@@ -12,12 +12,14 @@ module Pest::Function
12
12
 
13
13
  def initialize(estimator, variables)
14
14
  @estimator = estimator
15
- @event = parse(variables)
16
- @givens = [].to_set
15
+ @event = variables.to_set
16
+ @givens = Set.new
17
+ raise ArgumentError unless (@event - @estimator.variables).empty?
17
18
  end
18
19
 
19
20
  def given(*variables)
20
- @givens.merge parse(variables)
21
+ @givens.merge variables.to_set
22
+ raise ArgumentError unless (@givens - @estimator.variables).empty?
21
23
  self
22
24
  end
23
25
 
@@ -1,24 +1,30 @@
1
1
  module Pest::Function
2
2
  module Probability
3
- def probability(*variables)
4
- Builder.new(self, variables)
3
+ def batch_probability(*variables)
4
+ BatchBuilder.new(self, variables)
5
+ end
6
+ alias :batch_p :batch_probability
7
+
8
+ def probability(event={})
9
+ Builder.new(self, event)
5
10
  end
6
11
  alias :p :probability
7
12
 
8
- class Builder
13
+ class BatchBuilder
9
14
  include Pest::Function::Builder
10
15
 
11
16
  attr_reader :estimator, :data_source, :event, :givens
12
17
 
13
18
  def initialize(estimator, variables)
14
19
  @estimator = estimator
15
- @data_source = data_source
16
- @event = parse(variables)
20
+ @event = variables.to_set
17
21
  @givens = [].to_set
22
+ raise ArgumentError unless (@event - @estimator.variables).empty?
18
23
  end
19
24
 
20
25
  def given(*variables)
21
- @givens.merge parse(variables)
26
+ @givens += variables
27
+ raise ArgumentError unless (@givens - @estimator.variables).empty?
22
28
  self
23
29
  end
24
30
 
@@ -28,14 +34,43 @@ module Pest::Function
28
34
  end
29
35
 
30
36
  def evaluate
31
- joint = estimator.distributions[event].probability(data_source)
32
37
  if givens.empty?
33
- joint
38
+ estimator.distributions[*event].probability(data_source).to_a
34
39
  else
35
- conditional = estimator.distributions[givens].probability(data_source)
36
- joint / conditional
40
+ joint = estimator.distributions[*(event + givens)].probability(data_source)
41
+ conditional = estimator.distributions[*givens].probability(data_source)
42
+
43
+ (joint / conditional).to_a
37
44
  end
38
45
  end
39
46
  end
47
+
48
+ class Builder
49
+ include Pest::Function::Builder
50
+
51
+ attr_accessor :estimator, :event, :givens
52
+
53
+ def initialize(estimator, event)
54
+ @estimator = estimator
55
+ @event = event
56
+ @givens = Hash.new
57
+ end
58
+
59
+ def given(given)
60
+ givens.merge! given
61
+ raise ArgumentError unless (given.keys.to_set - @estimator.variables).empty?
62
+ self
63
+ end
64
+
65
+ def evaluate
66
+ data_hash = event.merge(givens)
67
+ data_hash.each_key {|key| data_hash[key] = Array(data_hash[key])}
68
+
69
+ data = Pest::DataSet::Hash.from_hash(data_hash)
70
+ BatchBuilder.new(estimator, event.keys).
71
+ given(*givens.keys).in(data).
72
+ evaluate.first
73
+ end
74
+ end
40
75
  end
41
76
  end
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "pest"
8
- s.version = "0.0.0"
8
+ s.version = "0.1.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Ryan Michael"]
12
- s.date = "2012-06-24"
12
+ s.date = "2012-07-05"
13
13
  s.description = "Wrappers to facilitate different classes of probability estimators"
14
14
  s.email = "kerinin@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -30,7 +30,6 @@ Gem::Specification.new do |s|
30
30
  "lib/pest/function.rb",
31
31
  "lib/pest/function/entropy.rb",
32
32
  "lib/pest/function/probability.rb",
33
- "lib/pest/variable.rb",
34
33
  "lib/pest/version.rb",
35
34
  "pest.gemspec",
36
35
  "spec/pest/data_set/hash_spec.rb",
@@ -45,14 +44,13 @@ Gem::Specification.new do |s|
45
44
  "spec/pest/estimator_spec.rb",
46
45
  "spec/pest/function/entropy_spec.rb",
47
46
  "spec/pest/function/probability_spec.rb",
48
- "spec/pest/variable_spec.rb",
49
47
  "spec/pest_spec.rb",
50
48
  "spec/spec_helper.rb"
51
49
  ]
52
50
  s.homepage = "http://github.com/kerinin/pest"
53
51
  s.licenses = ["MIT"]
54
52
  s.require_paths = ["lib"]
55
- s.rubygems_version = "1.8.24"
53
+ s.rubygems_version = "1.8.23"
56
54
  s.summary = "Probability Estimation"
57
55
 
58
56
  if s.respond_to? :specification_version then
@@ -74,6 +72,10 @@ Gem::Specification.new do |s|
74
72
  s.add_development_dependency(%q<rake>, [">= 0"])
75
73
  s.add_development_dependency(%q<pry>, [">= 0"])
76
74
  s.add_development_dependency(%q<rspec>, [">= 0"])
75
+ s.add_development_dependency(%q<jeweler>, [">= 0"])
76
+ s.add_development_dependency(%q<rake>, [">= 0"])
77
+ s.add_development_dependency(%q<pry>, [">= 0"])
78
+ s.add_development_dependency(%q<rspec>, [">= 0"])
77
79
  else
78
80
  s.add_dependency(%q<pest>, [">= 0"])
79
81
  s.add_dependency(%q<narray>, [">= 0"])
@@ -90,6 +92,10 @@ Gem::Specification.new do |s|
90
92
  s.add_dependency(%q<rake>, [">= 0"])
91
93
  s.add_dependency(%q<pry>, [">= 0"])
92
94
  s.add_dependency(%q<rspec>, [">= 0"])
95
+ s.add_dependency(%q<jeweler>, [">= 0"])
96
+ s.add_dependency(%q<rake>, [">= 0"])
97
+ s.add_dependency(%q<pry>, [">= 0"])
98
+ s.add_dependency(%q<rspec>, [">= 0"])
93
99
  end
94
100
  else
95
101
  s.add_dependency(%q<pest>, [">= 0"])
@@ -107,6 +113,10 @@ Gem::Specification.new do |s|
107
113
  s.add_dependency(%q<rake>, [">= 0"])
108
114
  s.add_dependency(%q<pry>, [">= 0"])
109
115
  s.add_dependency(%q<rspec>, [">= 0"])
116
+ s.add_dependency(%q<jeweler>, [">= 0"])
117
+ s.add_dependency(%q<rake>, [">= 0"])
118
+ s.add_dependency(%q<pry>, [">= 0"])
119
+ s.add_dependency(%q<rspec>, [">= 0"])
110
120
  end
111
121
  end
112
122
 
@@ -5,55 +5,30 @@ require 'spec_helper'
5
5
  # conflated
6
6
 
7
7
  describe Pest::DataSet::Hash do
8
- context "class methods" do
9
- before(:each) do
10
- @class = Pest::DataSet::Hash
11
- end
12
-
13
- describe "::translators" do
14
- it "maps File => from_file" do
15
- @class.translators[File].should == :from_file
16
- end
17
-
18
- it "maps String => from_file" do
19
- @class.translators[String].should == :from_file
20
- end
8
+ before(:each) do
9
+ @class = Pest::DataSet::Hash
10
+ end
21
11
 
22
- it "maps Symbol => from_file" do
23
- @class.translators[Symbol].should == :from_file
24
- end
12
+ describe "::translators" do
13
+ it "maps String => from_file" do
14
+ @class.translators[String].should == :from_file
25
15
  end
26
16
 
27
- describe "::from_file" do
28
- before(:each) do
29
- file = File.open(__FILE__, 'r')
30
- File.stub(:open).with('foo', 'r').and_return(file)
31
- Marshal.stub(:restore).with(file).and_return({:foo => 1})
32
- end
33
-
34
- it "looks for file if passed string" do
35
- File.should_receive(:open).with('foo', 'r')
36
- @class.from_file('foo')
37
- end
38
-
39
- it "unmarshals" do
40
- Marshal.should_receive(:restore)
41
- @class.from_file('foo')
42
- end
43
-
44
- it "sets variables" do
45
- @class.from_file('foo').variables.length.should == 1
46
- end
17
+ it "maps Symbol => from_file" do
18
+ @class.translators[Symbol].should == :from_file
19
+ end
20
+ end
47
21
 
48
- it "generates variables" do
49
- @class.from_file('foo').variables[:foo].should be_a(Pest::Variable)
50
- end
22
+ describe "::from_hash" do
23
+ it "gets variables from keys" do
24
+ @instance = Pest::DataSet::Hash.from_hash(:foo => [1,2,3], :bar => [3,4,5])
25
+ @instance.variables.should == [:foo, :bar].to_set
51
26
  end
52
27
  end
53
28
 
54
29
  before(:each) do
55
30
  @data = {:foo => [1,2,3], :bar => [3,4,5]}
56
- @instance = Pest::DataSet::Hash.new(@data)
31
+ @instance = Pest::DataSet::Hash.from_hash(@data)
57
32
  end
58
33
 
59
34
  describe "#to_hash" do
@@ -62,47 +37,87 @@ describe Pest::DataSet::Hash do
62
37
  end
63
38
  end
64
39
 
65
- describe "#data_vectors" do
66
- it "returns an enumerable" do
67
- @instance.data_vectors.should be_a(Enumerable)
40
+ describe "#pick" do
41
+ before(:each) do
42
+ @instance = @class.from_hash :foo => [1,2,3], :bar => [4,5,6]
43
+ end
44
+
45
+ it "accepts a single symbol string" do
46
+ @instance.pick(:foo).data.to_a.first.should == [1,2,3]
68
47
  end
69
48
 
70
- it "formats data as a list of rows" do
71
- @instance.data_vectors.first.should == [1,3]
49
+ it "accepts a single variable" do
50
+ @instance.pick(:foo).data.to_a.first.should == [1,2,3]
72
51
  end
52
+
53
+ it "accepts multiple variables" do
54
+ @instance.pick(:bar, :foo).data.to_a.should == [[4,5,6],[1,2,3]]
55
+ end
73
56
  end
74
57
 
75
- describe "#save" do
58
+ describe "#length" do
76
59
  before(:each) do
77
- @file = File.open(__FILE__, 'r')
78
- File.stub(:open).with('foo', 'w').and_return(@file)
79
- Marshal.stub(:dump)
60
+ @data = {:foo => [1,2,3], :bar => [3,4,5]}
61
+ @instance = Pest::DataSet::Hash.from_hash(@data)
80
62
  end
81
63
 
82
- it "marshals to file from path" do
83
- Marshal.should_receive(:dump).with(@data, @file)
84
- @instance.save('foo')
64
+ it "delegates to hash" do
65
+ @instance.length.should == 3
85
66
  end
67
+ end
86
68
 
87
- it "marshals to file from file" do
88
- Marshal.should_receive(:dump).with(@data, @file)
89
- @instance.save(@file)
69
+ describe "#each" do
70
+ before(:each) do
71
+ @instance = @class.from_hash :foo => [1,2,3], :bar => [4,5,6]
90
72
  end
91
73
 
92
- it "saves to tmp dir if no filename specified" do
93
- File.should_receive(:open).with(/pest_hash_dataset/, anything(), anything()).and_return(@file)
94
- @instance.save
74
+ it "yields vectors" do
75
+ block = double("block")
76
+ block.should_receive(:yielding).with([1,4])
77
+ block.should_receive(:yielding).with([2,5])
78
+ block.should_receive(:yielding).with([3,6])
79
+ @instance.each {|i| block.yielding(i)}
95
80
  end
96
81
  end
97
82
 
98
- describe "#length" do
83
+ describe "#map" do
99
84
  before(:each) do
100
- @data = {:foo => [1,2,3], :bar => [3,4,5]}
101
- @instance = Pest::DataSet::Hash.new(@data)
85
+ @instance = @class.from_hash :foo => [1,2,3], :bar => [4,5,6]
102
86
  end
103
87
 
104
- it "delegates to hash" do
105
- @instance.length.should == 3
88
+ it "works" do
89
+ @instance.map {|i| i}.should == [[1,4],[2,5],[3,6]]
90
+ end
91
+ end
92
+
93
+ describe "#merge" do
94
+ before(:each) do
95
+ @other = @class.from_hash :foo => [10,11,12,13], :baz => [1,2,3,4]
96
+ @instance = @class.from_hash :foo => [1,2,3,4], :bar => [5,6,7,8]
97
+ end
98
+
99
+ it "accepts a dataset and returns dataset" do
100
+ @instance.merge(@other).should be_a(@class)
101
+ end
102
+
103
+ it "accepts a hash and returns dataset" do
104
+ @instance.merge(:foo => [10,11,12,13], :baz => [1,2,3,4]).should be_a(@class)
105
+ end
106
+
107
+ it "requires the dataset to have the same length" do
108
+ expect { @instance.merge(:foo => [1,2,3,4,5]) }.to raise_error(ArgumentError)
109
+ end
110
+
111
+ it "adds the passed variable to self" do
112
+ @instance.merge(@other).variables.should include(:baz)
113
+ end
114
+
115
+ it "adds the passed data to self" do
116
+ @instance.merge(@other).pick(:baz).to_a.flatten.should == [1,2,3,4]
117
+ end
118
+
119
+ it "over-writes variables in self with variables in other" do
120
+ @instance.merge(@other).pick(:foo).to_a.flatten.should == [10,11,12,13]
106
121
  end
107
122
  end
108
123
  end