fathom 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. data/.bundle/config +2 -0
  2. data/.document +5 -0
  3. data/.gitignore +5 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +5 -0
  6. data/Gemfile.lock +30 -0
  7. data/LICENSE +20 -0
  8. data/README.md +176 -0
  9. data/Rakefile +50 -0
  10. data/VERSION +1 -0
  11. data/autotest/discover.rb +1 -0
  12. data/lib/fathom.rb +68 -0
  13. data/lib/fathom/archive/conditional_probability_matrix.rb +116 -0
  14. data/lib/fathom/archive/n2.rb +198 -0
  15. data/lib/fathom/archive/n3.rb +119 -0
  16. data/lib/fathom/archive/node.rb +74 -0
  17. data/lib/fathom/archive/noodle.rb +136 -0
  18. data/lib/fathom/archive/scratch.rb +45 -0
  19. data/lib/fathom/basic_node.rb +8 -0
  20. data/lib/fathom/causal_graph.rb +12 -0
  21. data/lib/fathom/combined_plausibilities.rb +12 -0
  22. data/lib/fathom/concept.rb +83 -0
  23. data/lib/fathom/data_node.rb +51 -0
  24. data/lib/fathom/import.rb +68 -0
  25. data/lib/fathom/import/csv_import.rb +60 -0
  26. data/lib/fathom/import/yaml_import.rb +53 -0
  27. data/lib/fathom/inverter.rb +21 -0
  28. data/lib/fathom/knowledge_base.rb +23 -0
  29. data/lib/fathom/monte_carlo_set.rb +76 -0
  30. data/lib/fathom/node_utilities.rb +8 -0
  31. data/lib/fathom/plausible_range.rb +82 -0
  32. data/lib/fathom/value_aggregator.rb +11 -0
  33. data/lib/fathom/value_description.rb +79 -0
  34. data/lib/fathom/value_multiplier.rb +18 -0
  35. data/lib/options_hash.rb +186 -0
  36. data/spec/fathom/data_node_spec.rb +61 -0
  37. data/spec/fathom/import/csv_import_spec.rb +36 -0
  38. data/spec/fathom/import/yaml_import_spec.rb +40 -0
  39. data/spec/fathom/import_spec.rb +22 -0
  40. data/spec/fathom/knowledge_base_spec.rb +16 -0
  41. data/spec/fathom/monte_carlo_set_spec.rb +58 -0
  42. data/spec/fathom/plausible_range_spec.rb +130 -0
  43. data/spec/fathom/value_description_spec.rb +70 -0
  44. data/spec/fathom_spec.rb +8 -0
  45. data/spec/spec_helper.rb +13 -0
  46. data/spec/support/demo.yml +17 -0
  47. metadata +135 -0
@@ -0,0 +1,61 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ include Fathom
4
+
5
+ describe DataNode do
6
+
7
+ before do
8
+ @values = [1,2,3,4,5]
9
+ @opts = {:values => @values}
10
+ @dn = DataNode.new(@opts)
11
+ @vector = GSL::Vector.ary_to_gv(@values)
12
+ end
13
+
14
+ it "should initialize requiring values in the options" do
15
+ lambda{DataNode.new}.should raise_error(/values/)
16
+ lambda{DataNode.new(:values => @values)}.should_not raise_error
17
+ end
18
+
19
+ it "should make the values readable" do
20
+ @dn.values.should eql(@values)
21
+ end
22
+
23
+ it "should allow an optional name for the node" do
24
+ @dn = DataNode.new(:values => @values, :name => "Demo Name")
25
+ @dn.name.should eql("Demo Name")
26
+ end
27
+
28
+ # Note, the distributions aren't defined here yet, so this will eventually be
29
+ # Some sort of Fathom::Distribution::Constant eventually.
30
+ it "should take an optional distribiution" do
31
+ @dn = DataNode.new(@opts.merge(:distribution => :some_distribution))
32
+ @dn.distribution.should eql(:some_distribution)
33
+ end
34
+
35
+ it "should create a vector from the values" do
36
+ @dn.vector.should ==(@vector)
37
+ end
38
+
39
+ it "should provide the standard deviation" do
40
+ @dn.standard_deviation.should ==(@vector.sd)
41
+ end
42
+
43
+ it "should alias sd and std for standard_deviation" do
44
+ @dn.sd.should eql(@dn.standard_deviation)
45
+ @dn.std.should eql(@dn.standard_deviation)
46
+ end
47
+
48
+ it "should be able to produce the mean" do
49
+ @dn.mean.should eql(@vector.mean)
50
+ end
51
+
52
+ it "should generate a random variable that fits the data's distribution" do
53
+ @dn.rand.should be_a(Float)
54
+ end
55
+
56
+ it "should have a name_sym method" do
57
+ dn = DataNode.new(:name => "Demo Node", :values => [1,2,3])
58
+ dn.name_sym.should eql(:demo_node)
59
+ end
60
+
61
+ end
@@ -0,0 +1,36 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+ include Fathom
4
+
5
+ describe CSVImport do
6
+
7
+ before do
8
+ @content =<<-END
9
+ this,and,that
10
+ 1,2,3
11
+ 4,5,6
12
+ 7,8,9
13
+ END
14
+
15
+ @opts = {:content => @content}
16
+ @ci = CSVImport.new(@opts)
17
+ @result = @ci.import
18
+ end
19
+
20
+ it "should not work unless content is set" do
21
+ lambda{CSVImport.new.import}.should raise_error(NoMethodError)
22
+ lambda{CSVImport.new(@opts)}.should_not raise_error
23
+ end
24
+
25
+ it "should create as many data nodes as there are columns" do
26
+ @result.size.should eql(3)
27
+ @result.each {|dn| dn.should be_a(DataNode)}
28
+ end
29
+
30
+ it "should import the values from each column into each data node" do
31
+ @result[0].values.should eql([1,4,7])
32
+ @result[1].values.should eql([2,5,8])
33
+ @result[2].values.should eql([3,6,9])
34
+ end
35
+
36
+ end
@@ -0,0 +1,40 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+ include Fathom
4
+
5
+ describe YAMLImport do
6
+
7
+ before do
8
+ @demo_yaml_location = File.expand_path(File.dirname(__FILE__) + "/../../support/demo.yml")
9
+ @demo_yaml = open(@demo_yaml_location).read
10
+ @opts = {:content => @demo_yaml}
11
+ @yi = YAMLImport.new(@opts)
12
+ @result = @yi.import
13
+ end
14
+
15
+ it "should not work unless content is set" do
16
+ lambda{YAMLImport.new.import}.should raise_error
17
+ lambda{YAMLImport.new(@opts)}.should_not raise_error
18
+ end
19
+
20
+ it "should create PlausibleRange nodes for any hashes with at least a min and max key in it" do
21
+ @result.find {|r| r.name == "CO2 Emissions"}.should_not be_nil
22
+ end
23
+
24
+ it "should not create a PlausibleRange for entries missing min and max" do
25
+ @result.find {|r| r.name == "Invalid Hash"}.should be_nil
26
+ end
27
+
28
+ it "should be able to create a PlausibleRange with more complete information" do
29
+ more_complete_range = @result.find {|r| r.name == "More Complete Range"}
30
+ more_complete_range.ci.should eql(0.6)
31
+ more_complete_range.description.should eql('Some good description')
32
+ end
33
+
34
+ it "should create DataNodes for entries that have an array of information" do
35
+ data_node = @result.find {|r| r.name == 'CO2 Readings'}
36
+ data_node.should be_a(DataNode)
37
+ data_node.values.should eql([10,20,30])
38
+ end
39
+
40
+ end
@@ -0,0 +1,22 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ include Fathom
4
+
5
+ describe Import do
6
+
7
+ before do
8
+ @content = 'some content'
9
+ @options = OptionsHash.new({:content => @content})
10
+ @i = Import.new(@options)
11
+ @values = [1,2,3,4,5]
12
+ end
13
+
14
+ it "should initialize with a optional content" do
15
+ @i.content.should eql(@content)
16
+ end
17
+
18
+ it "should record the initialization options" do
19
+ @i.options.should eql(@options)
20
+ end
21
+
22
+ end
@@ -0,0 +1,16 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ include Fathom
4
+
5
+ describe KnowledgeBase do
6
+
7
+ before do
8
+ @kb = KnowledgeBase.new
9
+ end
10
+
11
+ it "should be able to add a node" do
12
+ @dn = DataNode.new(:name => :new_node, :values => [1,2,3])
13
+ @kb[:new_node] = @dn
14
+ @kb[:new_node].should eql(@dn)
15
+ end
16
+ end
@@ -0,0 +1,58 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ include Fathom
4
+
5
+ describe MonteCarloSet do
6
+
7
+ before(:all) do
8
+ @q1_sales = PlausibleRange.new(:min => 10, :max => 20, :hard_lower_bound => 0, :name => "First Quarter Sales")
9
+ @q1_prices = PlausibleRange.new(:min => 10_000, :max => 12_000, :name => "First Quarter Prices")
10
+ @q1_sales_commissions = PlausibleRange.new(:min => 0.2, :max => 0.2, :name => "Sales Commission Rate")
11
+
12
+ @q1_gross_margins = ValueDescription.new(@q1_sales, @q1_prices, @q1_sales_commissions) do |random_sample|
13
+ revenue = (random_sample.first_quarter_sales * random_sample.first_quarter_prices)
14
+ commissions_paid = random_sample.sales_commission_rate * revenue
15
+ gross_margins = revenue - commissions_paid
16
+ {:revenue => revenue, :commissions_paid => commissions_paid, :gross_margins => gross_margins}
17
+ end
18
+ end
19
+
20
+ before do
21
+ @mcs = MonteCarloSet.new(@q1_gross_margins)
22
+ end
23
+
24
+ it "should initialize with a ValueDescription" do
25
+ lambda{MonteCarloSet.new}.should raise_error
26
+ lambda{MonteCarloSet.new(@q1_gross_margins)}.should_not raise_error
27
+ end
28
+
29
+ it "should expose the value_description" do
30
+ @mcs.value_description.should eql(@q1_gross_margins)
31
+ end
32
+
33
+ it "should process with the default number of runs at 10,000", :slow => true do
34
+ lambda{@mcs.process}.should_not raise_error
35
+ @mcs.samples_taken.should eql(10_000)
36
+ end
37
+
38
+ it "should be able to process with a specified number of runs" do
39
+ @mcs.process(3)
40
+ @mcs.samples_taken.should eql(3)
41
+ end
42
+
43
+ it "should define lookup methods for all keys in the result set" do
44
+ @mcs.process(1)
45
+ @mcs.revenue.should be_a(GSL::Vector)
46
+ @mcs.revenue.length.should eql(1)
47
+ @mcs.commissions_paid.should be_a(GSL::Vector)
48
+ @mcs.commissions_paid.length.should eql(1)
49
+ @mcs.gross_margins.should be_a(GSL::Vector)
50
+ @mcs.gross_margins.length.should eql(1)
51
+ end
52
+
53
+ it "should be resetable" do
54
+ @mcs.process(1)
55
+ @mcs.reset!
56
+ lambda{@mcs.process(1)}.should_not raise_error
57
+ end
58
+ end
@@ -0,0 +1,130 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ include Fathom
4
+
5
+ describe PlausibleRange do
6
+
7
+ before do
8
+ @d = {:upper_bound => 10, :lower_bound => 1}
9
+ @pr = PlausibleRange.new(@d)
10
+ end
11
+
12
+ it "should require an upper and lower bound" do
13
+ lambda{PlausibleRange.new}.should raise_error(/upper/)
14
+ lambda{PlausibleRange.new(:upper_bound => 10)}.should raise_error(/lower/)
15
+ lambda{PlausibleRange.new(:upper_bound => 10, :lower_bound => 1)}.should_not raise_error
16
+ end
17
+
18
+ it "should expose the upper bound and lower bound" do
19
+ @pr.upper_bound.should eql(10)
20
+ @pr.lower_bound.should eql(1)
21
+ end
22
+
23
+ it "should use min or max instead of lower_bound or upper_bound" do
24
+ pr = PlausibleRange.new(:min => 1, :max => 10)
25
+ pr.min.should eql(1)
26
+ pr.max.should eql(10)
27
+ pr.lower_bound.should eql(1)
28
+ pr.upper_bound.should eql(10)
29
+ end
30
+
31
+ it "should allow an optional hard_lower_bound" do
32
+ pr = PlausibleRange.new(@d.merge(:hard_lower_bound => 0))
33
+ pr.hard_lower_bound.should eql(0)
34
+ end
35
+
36
+ it "should set the lower_bound to the hard_lower_bound if the hard_lower_bound is greater than the lower_bound" do
37
+ pr = PlausibleRange.new(:lower_bound => 2, :hard_lower_bound => 3, :upper_bound => 4)
38
+ pr.lower_bound.should eql(3)
39
+ end
40
+
41
+ it "should allow an optional hard_upper_bound" do
42
+ pr = PlausibleRange.new(@d.merge(:hard_upper_bound => 20))
43
+ pr.hard_upper_bound.should eql(20)
44
+ end
45
+
46
+ it "should set the upper_bound to the hard_upper_bound if the hard_upper_bound is less than the upper_bound" do
47
+ pr = PlausibleRange.new(:lower_bound => 2, :hard_upper_bound => 3, :upper_bound => 4)
48
+ pr.upper_bound.should eql(3)
49
+ end
50
+
51
+ it "should have a default confidence interval of 90%" do
52
+ @pr.confidence_interval.should eql(0.9)
53
+ end
54
+
55
+ it "should be able to instantiate with confidence_interval" do
56
+ pr = PlausibleRange.new(@d.merge(:confidence_interval => 0.8))
57
+ pr.confidence_interval.should eql(0.8)
58
+ end
59
+
60
+ it "should be able to use ci instead of confidence_interval" do
61
+ pr = PlausibleRange.new(@d.merge(:ci => 0.8))
62
+ pr.ci.should eql(0.8)
63
+ pr.confidence_interval.should eql(0.8)
64
+ end
65
+
66
+ it "should be able to calculate the midpoint" do
67
+ @pr.midpoint.should eql(5.5)
68
+ end
69
+
70
+ # TODO: Make this more accurate when we start using the GSL stuff more
71
+ it "should be able to calculate the standard deviation" do
72
+ @pr.standard_deviation.should be_close(2.73556231003039, 0.00000001)
73
+ end
74
+
75
+ it "should be able to use std instead of standard_deviation" do
76
+ @pr.std.should eql(@pr.standard_deviation)
77
+ end
78
+
79
+ it "should be able to produce a random value from within the distribution" do
80
+ @pr.should be_respond_to(:rand)
81
+ @pr.rand.should be_a(Float)
82
+ end
83
+
84
+ it "should be able to produce an array of random values" do
85
+ @pr.array_of_random_values.should be_an(Array)
86
+ @pr.array_of_random_values.length.should eql(10)
87
+ @pr.array_of_random_values(13).length.should eql(13)
88
+ end
89
+
90
+ it "should alias array_of_random_values with to_a" do
91
+ @pr.to_a.should be_an(Array)
92
+ @pr.to_a.length.should eql(10)
93
+ @pr.to_a(13).length.should eql(13)
94
+ end
95
+
96
+ it "should be able to produce a vector of random values" do
97
+ @pr.vector_of_random_values.should be_a(GSL::Vector)
98
+ @pr.vector_of_random_values.length.should eql(10)
99
+ @pr.vector_of_random_values(12).length.should eql(12)
100
+ end
101
+
102
+ it "should alias vector_of_random_values with to_v" do
103
+ @pr.to_v.should be_a(GSL::Vector)
104
+ @pr.to_v.length.should eql(10)
105
+ @pr.to_v(12).length.should eql(12)
106
+ end
107
+
108
+ it "should record the name, if one is provided" do
109
+ pr = PlausibleRange.new(@d.merge(:name => 'pr1'))
110
+ pr.name.should eql('pr1')
111
+ end
112
+
113
+ it "should record the description, if one is provided" do
114
+ pr = PlausibleRange.new(@d.merge(:description => 'some description'))
115
+ pr.description.should eql('some description')
116
+ end
117
+
118
+ it "should have no problems with a range with matching lower and upper bounds" do
119
+ pr = PlausibleRange.new(:min => 1, :max => 1)
120
+ pr.rand.should eql(1.0)
121
+ pr.midpoint.should eql(1.0)
122
+ pr.std.should eql(0.0)
123
+ end
124
+
125
+ it "should have a name_sym method" do
126
+ pr = PlausibleRange.new(:min => 1, :max => 2, :name => "Demo Node")
127
+ pr.name_sym.should eql(:demo_node)
128
+ end
129
+
130
+ end
@@ -0,0 +1,70 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ include Fathom
4
+
5
+ describe ValueDescription do
6
+
7
+ before(:all) do
8
+ @r1 = PlausibleRange.new(:min => 1, :max => 3, :name => 'r1')
9
+ @r2 = PlausibleRange.new(:min => 1, :max => 3, :name => 'r2')
10
+ @r3 = PlausibleRange.new(:min => 1, :max => 3, :name => 'r3')
11
+ @r4 = PlausibleRange.new(:min => 1, :max => 3, :name => 'r4')
12
+ end
13
+
14
+ before do
15
+ @vd = ValueDescription.new
16
+ end
17
+
18
+ it "should be able to initialize with no nodes" do
19
+ @vd.nodes.should be_empty
20
+ end
21
+
22
+ it "should be able to take a set of nodes to work with" do
23
+ vd = ValueDescription.new @r1, @r2, @r3, @r4
24
+ vd.nodes.should eql([@r1, @r2, @r3, @r4])
25
+ end
26
+
27
+ it "should respond to the names of the nodes being added" do
28
+ vd = ValueDescription.new @r1, @r2
29
+ vd.r1.should be_a(Float)
30
+ vd.r2.should be_a(Float)
31
+ end
32
+
33
+ it "should be able to add a node with add_node" do
34
+ @vd.add_node(@r3)
35
+ @vd.r3.should be_a(Float)
36
+ end
37
+
38
+ it "should be able to add a node with an alternative value method" do
39
+ @vd.add_node(@r3, :name)
40
+ @vd.r3.should eql('r3')
41
+ end
42
+
43
+ it "should be able to initialize with a hash, to define the value methods" do
44
+ vd = ValueDescription.new @r1 => :rand, @r2 => :name
45
+ vd.r1.should be_a(Float)
46
+ vd.r2.should eql('r2')
47
+ end
48
+
49
+ it "should convert node names to lower case, underscore values" do
50
+ pr = PlausibleRange.new(:min => 1, :max => 3, :name => 'Test Node')
51
+ vd = ValueDescription.new pr => :name
52
+ vd.test_node.should eql('Test Node')
53
+ end
54
+
55
+ it "should respond to process, which by default just adds up the values of the nodes" do
56
+ vd = ValueDescription.new @r1, @r2
57
+ output = vd.process
58
+ sum = vd.last_process.values.inject(0.0) {|s, e| s += e}
59
+ output.should eql(sum)
60
+ end
61
+
62
+ it "should be able to take an optional block at initialization" do
63
+ vd = ValueDescription.new(@r1 => :name) {|obj|
64
+ obj.r1
65
+ }
66
+ output = vd.process
67
+ output.should eql('r1')
68
+ end
69
+
70
+ end
@@ -0,0 +1,8 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "Fathom" do
4
+ it "should have required gsl, but not included it (avoiding Rational conflicts)" do
5
+ Fathom.included_modules.should_not be_include(GSL)
6
+ lambda{GSL}.should_not raise_error
7
+ end
8
+ end