fathom 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. data/.bundle/config +2 -0
  2. data/.document +5 -0
  3. data/.gitignore +5 -0
  4. data/.rspec +1 -0
  5. data/Gemfile +5 -0
  6. data/Gemfile.lock +30 -0
  7. data/LICENSE +20 -0
  8. data/README.md +176 -0
  9. data/Rakefile +50 -0
  10. data/VERSION +1 -0
  11. data/autotest/discover.rb +1 -0
  12. data/lib/fathom.rb +68 -0
  13. data/lib/fathom/archive/conditional_probability_matrix.rb +116 -0
  14. data/lib/fathom/archive/n2.rb +198 -0
  15. data/lib/fathom/archive/n3.rb +119 -0
  16. data/lib/fathom/archive/node.rb +74 -0
  17. data/lib/fathom/archive/noodle.rb +136 -0
  18. data/lib/fathom/archive/scratch.rb +45 -0
  19. data/lib/fathom/basic_node.rb +8 -0
  20. data/lib/fathom/causal_graph.rb +12 -0
  21. data/lib/fathom/combined_plausibilities.rb +12 -0
  22. data/lib/fathom/concept.rb +83 -0
  23. data/lib/fathom/data_node.rb +51 -0
  24. data/lib/fathom/import.rb +68 -0
  25. data/lib/fathom/import/csv_import.rb +60 -0
  26. data/lib/fathom/import/yaml_import.rb +53 -0
  27. data/lib/fathom/inverter.rb +21 -0
  28. data/lib/fathom/knowledge_base.rb +23 -0
  29. data/lib/fathom/monte_carlo_set.rb +76 -0
  30. data/lib/fathom/node_utilities.rb +8 -0
  31. data/lib/fathom/plausible_range.rb +82 -0
  32. data/lib/fathom/value_aggregator.rb +11 -0
  33. data/lib/fathom/value_description.rb +79 -0
  34. data/lib/fathom/value_multiplier.rb +18 -0
  35. data/lib/options_hash.rb +186 -0
  36. data/spec/fathom/data_node_spec.rb +61 -0
  37. data/spec/fathom/import/csv_import_spec.rb +36 -0
  38. data/spec/fathom/import/yaml_import_spec.rb +40 -0
  39. data/spec/fathom/import_spec.rb +22 -0
  40. data/spec/fathom/knowledge_base_spec.rb +16 -0
  41. data/spec/fathom/monte_carlo_set_spec.rb +58 -0
  42. data/spec/fathom/plausible_range_spec.rb +130 -0
  43. data/spec/fathom/value_description_spec.rb +70 -0
  44. data/spec/fathom_spec.rb +8 -0
  45. data/spec/spec_helper.rb +13 -0
  46. data/spec/support/demo.yml +17 -0
  47. metadata +135 -0
@@ -0,0 +1,61 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ include Fathom
4
+
5
+ describe DataNode do
6
+
7
+ before do
8
+ @values = [1,2,3,4,5]
9
+ @opts = {:values => @values}
10
+ @dn = DataNode.new(@opts)
11
+ @vector = GSL::Vector.ary_to_gv(@values)
12
+ end
13
+
14
+ it "should initialize requiring values in the options" do
15
+ lambda{DataNode.new}.should raise_error(/values/)
16
+ lambda{DataNode.new(:values => @values)}.should_not raise_error
17
+ end
18
+
19
+ it "should make the values readable" do
20
+ @dn.values.should eql(@values)
21
+ end
22
+
23
+ it "should allow an optional name for the node" do
24
+ @dn = DataNode.new(:values => @values, :name => "Demo Name")
25
+ @dn.name.should eql("Demo Name")
26
+ end
27
+
28
+ # Note, the distributions aren't defined here yet, so this will eventually be
29
+ # Some sort of Fathom::Distribution::Constant eventually.
30
+ it "should take an optional distribiution" do
31
+ @dn = DataNode.new(@opts.merge(:distribution => :some_distribution))
32
+ @dn.distribution.should eql(:some_distribution)
33
+ end
34
+
35
+ it "should create a vector from the values" do
36
+ @dn.vector.should ==(@vector)
37
+ end
38
+
39
+ it "should provide the standard deviation" do
40
+ @dn.standard_deviation.should ==(@vector.sd)
41
+ end
42
+
43
+ it "should alias sd and std for standard_deviation" do
44
+ @dn.sd.should eql(@dn.standard_deviation)
45
+ @dn.std.should eql(@dn.standard_deviation)
46
+ end
47
+
48
+ it "should be able to produce the mean" do
49
+ @dn.mean.should eql(@vector.mean)
50
+ end
51
+
52
+ it "should generate a random variable that fits the data's distribution" do
53
+ @dn.rand.should be_a(Float)
54
+ end
55
+
56
+ it "should have a name_sym method" do
57
+ dn = DataNode.new(:name => "Demo Node", :values => [1,2,3])
58
+ dn.name_sym.should eql(:demo_node)
59
+ end
60
+
61
+ end
@@ -0,0 +1,36 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+ include Fathom
4
+
5
+ describe CSVImport do
6
+
7
+ before do
8
+ @content =<<-END
9
+ this,and,that
10
+ 1,2,3
11
+ 4,5,6
12
+ 7,8,9
13
+ END
14
+
15
+ @opts = {:content => @content}
16
+ @ci = CSVImport.new(@opts)
17
+ @result = @ci.import
18
+ end
19
+
20
+ it "should not work unless content is set" do
21
+ lambda{CSVImport.new.import}.should raise_error(NoMethodError)
22
+ lambda{CSVImport.new(@opts)}.should_not raise_error
23
+ end
24
+
25
+ it "should create as many data nodes as there are columns" do
26
+ @result.size.should eql(3)
27
+ @result.each {|dn| dn.should be_a(DataNode)}
28
+ end
29
+
30
+ it "should import the values from each column into each data node" do
31
+ @result[0].values.should eql([1,4,7])
32
+ @result[1].values.should eql([2,5,8])
33
+ @result[2].values.should eql([3,6,9])
34
+ end
35
+
36
+ end
@@ -0,0 +1,40 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../../spec_helper')
2
+
3
+ include Fathom
4
+
5
+ describe YAMLImport do
6
+
7
+ before do
8
+ @demo_yaml_location = File.expand_path(File.dirname(__FILE__) + "/../../support/demo.yml")
9
+ @demo_yaml = open(@demo_yaml_location).read
10
+ @opts = {:content => @demo_yaml}
11
+ @yi = YAMLImport.new(@opts)
12
+ @result = @yi.import
13
+ end
14
+
15
+ it "should not work unless content is set" do
16
+ lambda{YAMLImport.new.import}.should raise_error
17
+ lambda{YAMLImport.new(@opts)}.should_not raise_error
18
+ end
19
+
20
+ it "should create PlausibleRange nodes for any hashes with at least a min and max key in it" do
21
+ @result.find {|r| r.name == "CO2 Emissions"}.should_not be_nil
22
+ end
23
+
24
+ it "should not create a PlausibleRange for entries missing min and max" do
25
+ @result.find {|r| r.name == "Invalid Hash"}.should be_nil
26
+ end
27
+
28
+ it "should be able to create a PlausibleRange with more complete information" do
29
+ more_complete_range = @result.find {|r| r.name == "More Complete Range"}
30
+ more_complete_range.ci.should eql(0.6)
31
+ more_complete_range.description.should eql('Some good description')
32
+ end
33
+
34
+ it "should create DataNodes for entries that have an array of information" do
35
+ data_node = @result.find {|r| r.name == 'CO2 Readings'}
36
+ data_node.should be_a(DataNode)
37
+ data_node.values.should eql([10,20,30])
38
+ end
39
+
40
+ end
@@ -0,0 +1,22 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ include Fathom
4
+
5
+ describe Import do
6
+
7
+ before do
8
+ @content = 'some content'
9
+ @options = OptionsHash.new({:content => @content})
10
+ @i = Import.new(@options)
11
+ @values = [1,2,3,4,5]
12
+ end
13
+
14
+ it "should initialize with a optional content" do
15
+ @i.content.should eql(@content)
16
+ end
17
+
18
+ it "should record the initialization options" do
19
+ @i.options.should eql(@options)
20
+ end
21
+
22
+ end
@@ -0,0 +1,16 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ include Fathom
4
+
5
+ describe KnowledgeBase do
6
+
7
+ before do
8
+ @kb = KnowledgeBase.new
9
+ end
10
+
11
+ it "should be able to add a node" do
12
+ @dn = DataNode.new(:name => :new_node, :values => [1,2,3])
13
+ @kb[:new_node] = @dn
14
+ @kb[:new_node].should eql(@dn)
15
+ end
16
+ end
@@ -0,0 +1,58 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ include Fathom
4
+
5
+ describe MonteCarloSet do
6
+
7
+ before(:all) do
8
+ @q1_sales = PlausibleRange.new(:min => 10, :max => 20, :hard_lower_bound => 0, :name => "First Quarter Sales")
9
+ @q1_prices = PlausibleRange.new(:min => 10_000, :max => 12_000, :name => "First Quarter Prices")
10
+ @q1_sales_commissions = PlausibleRange.new(:min => 0.2, :max => 0.2, :name => "Sales Commission Rate")
11
+
12
+ @q1_gross_margins = ValueDescription.new(@q1_sales, @q1_prices, @q1_sales_commissions) do |random_sample|
13
+ revenue = (random_sample.first_quarter_sales * random_sample.first_quarter_prices)
14
+ commissions_paid = random_sample.sales_commission_rate * revenue
15
+ gross_margins = revenue - commissions_paid
16
+ {:revenue => revenue, :commissions_paid => commissions_paid, :gross_margins => gross_margins}
17
+ end
18
+ end
19
+
20
+ before do
21
+ @mcs = MonteCarloSet.new(@q1_gross_margins)
22
+ end
23
+
24
+ it "should initialize with a ValueDescription" do
25
+ lambda{MonteCarloSet.new}.should raise_error
26
+ lambda{MonteCarloSet.new(@q1_gross_margins)}.should_not raise_error
27
+ end
28
+
29
+ it "should expose the value_description" do
30
+ @mcs.value_description.should eql(@q1_gross_margins)
31
+ end
32
+
33
+ it "should process with the default number of runs at 10,000", :slow => true do
34
+ lambda{@mcs.process}.should_not raise_error
35
+ @mcs.samples_taken.should eql(10_000)
36
+ end
37
+
38
+ it "should be able to process with a specified number of runs" do
39
+ @mcs.process(3)
40
+ @mcs.samples_taken.should eql(3)
41
+ end
42
+
43
+ it "should define lookup methods for all keys in the result set" do
44
+ @mcs.process(1)
45
+ @mcs.revenue.should be_a(GSL::Vector)
46
+ @mcs.revenue.length.should eql(1)
47
+ @mcs.commissions_paid.should be_a(GSL::Vector)
48
+ @mcs.commissions_paid.length.should eql(1)
49
+ @mcs.gross_margins.should be_a(GSL::Vector)
50
+ @mcs.gross_margins.length.should eql(1)
51
+ end
52
+
53
+ it "should be resetable" do
54
+ @mcs.process(1)
55
+ @mcs.reset!
56
+ lambda{@mcs.process(1)}.should_not raise_error
57
+ end
58
+ end
@@ -0,0 +1,130 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ include Fathom
4
+
5
+ describe PlausibleRange do
6
+
7
+ before do
8
+ @d = {:upper_bound => 10, :lower_bound => 1}
9
+ @pr = PlausibleRange.new(@d)
10
+ end
11
+
12
+ it "should require an upper and lower bound" do
13
+ lambda{PlausibleRange.new}.should raise_error(/upper/)
14
+ lambda{PlausibleRange.new(:upper_bound => 10)}.should raise_error(/lower/)
15
+ lambda{PlausibleRange.new(:upper_bound => 10, :lower_bound => 1)}.should_not raise_error
16
+ end
17
+
18
+ it "should expose the upper bound and lower bound" do
19
+ @pr.upper_bound.should eql(10)
20
+ @pr.lower_bound.should eql(1)
21
+ end
22
+
23
+ it "should use min or max instead of lower_bound or upper_bound" do
24
+ pr = PlausibleRange.new(:min => 1, :max => 10)
25
+ pr.min.should eql(1)
26
+ pr.max.should eql(10)
27
+ pr.lower_bound.should eql(1)
28
+ pr.upper_bound.should eql(10)
29
+ end
30
+
31
+ it "should allow an optional hard_lower_bound" do
32
+ pr = PlausibleRange.new(@d.merge(:hard_lower_bound => 0))
33
+ pr.hard_lower_bound.should eql(0)
34
+ end
35
+
36
+ it "should set the lower_bound to the hard_lower_bound if the hard_lower_bound is greater than the lower_bound" do
37
+ pr = PlausibleRange.new(:lower_bound => 2, :hard_lower_bound => 3, :upper_bound => 4)
38
+ pr.lower_bound.should eql(3)
39
+ end
40
+
41
+ it "should allow an optional hard_upper_bound" do
42
+ pr = PlausibleRange.new(@d.merge(:hard_upper_bound => 20))
43
+ pr.hard_upper_bound.should eql(20)
44
+ end
45
+
46
+ it "should set the upper_bound to the hard_upper_bound if the hard_upper_bound is less than the upper_bound" do
47
+ pr = PlausibleRange.new(:lower_bound => 2, :hard_upper_bound => 3, :upper_bound => 4)
48
+ pr.upper_bound.should eql(3)
49
+ end
50
+
51
+ it "should have a default confidence interval of 90%" do
52
+ @pr.confidence_interval.should eql(0.9)
53
+ end
54
+
55
+ it "should be able to instantiate with confidence_interval" do
56
+ pr = PlausibleRange.new(@d.merge(:confidence_interval => 0.8))
57
+ pr.confidence_interval.should eql(0.8)
58
+ end
59
+
60
+ it "should be able to use ci instead of confidence_interval" do
61
+ pr = PlausibleRange.new(@d.merge(:ci => 0.8))
62
+ pr.ci.should eql(0.8)
63
+ pr.confidence_interval.should eql(0.8)
64
+ end
65
+
66
+ it "should be able to calculate the midpoint" do
67
+ @pr.midpoint.should eql(5.5)
68
+ end
69
+
70
+ # TODO: Make this more accurate when we start using the GSL stuff more
71
+ it "should be able to calculate the standard deviation" do
72
+ @pr.standard_deviation.should be_close(2.73556231003039, 0.00000001)
73
+ end
74
+
75
+ it "should be able to use std instead of standard_deviation" do
76
+ @pr.std.should eql(@pr.standard_deviation)
77
+ end
78
+
79
+ it "should be able to produce a random value from within the distribution" do
80
+ @pr.should be_respond_to(:rand)
81
+ @pr.rand.should be_a(Float)
82
+ end
83
+
84
+ it "should be able to produce an array of random values" do
85
+ @pr.array_of_random_values.should be_an(Array)
86
+ @pr.array_of_random_values.length.should eql(10)
87
+ @pr.array_of_random_values(13).length.should eql(13)
88
+ end
89
+
90
+ it "should alias array_of_random_values with to_a" do
91
+ @pr.to_a.should be_an(Array)
92
+ @pr.to_a.length.should eql(10)
93
+ @pr.to_a(13).length.should eql(13)
94
+ end
95
+
96
+ it "should be able to produce a vector of random values" do
97
+ @pr.vector_of_random_values.should be_a(GSL::Vector)
98
+ @pr.vector_of_random_values.length.should eql(10)
99
+ @pr.vector_of_random_values(12).length.should eql(12)
100
+ end
101
+
102
+ it "should alias vector_of_random_values with to_v" do
103
+ @pr.to_v.should be_a(GSL::Vector)
104
+ @pr.to_v.length.should eql(10)
105
+ @pr.to_v(12).length.should eql(12)
106
+ end
107
+
108
+ it "should record the name, if one is provided" do
109
+ pr = PlausibleRange.new(@d.merge(:name => 'pr1'))
110
+ pr.name.should eql('pr1')
111
+ end
112
+
113
+ it "should record the description, if one is provided" do
114
+ pr = PlausibleRange.new(@d.merge(:description => 'some description'))
115
+ pr.description.should eql('some description')
116
+ end
117
+
118
+ it "should have no problems with a range with matching lower and upper bounds" do
119
+ pr = PlausibleRange.new(:min => 1, :max => 1)
120
+ pr.rand.should eql(1.0)
121
+ pr.midpoint.should eql(1.0)
122
+ pr.std.should eql(0.0)
123
+ end
124
+
125
+ it "should have a name_sym method" do
126
+ pr = PlausibleRange.new(:min => 1, :max => 2, :name => "Demo Node")
127
+ pr.name_sym.should eql(:demo_node)
128
+ end
129
+
130
+ end
@@ -0,0 +1,70 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+
3
+ include Fathom
4
+
5
+ describe ValueDescription do
6
+
7
+ before(:all) do
8
+ @r1 = PlausibleRange.new(:min => 1, :max => 3, :name => 'r1')
9
+ @r2 = PlausibleRange.new(:min => 1, :max => 3, :name => 'r2')
10
+ @r3 = PlausibleRange.new(:min => 1, :max => 3, :name => 'r3')
11
+ @r4 = PlausibleRange.new(:min => 1, :max => 3, :name => 'r4')
12
+ end
13
+
14
+ before do
15
+ @vd = ValueDescription.new
16
+ end
17
+
18
+ it "should be able to initialize with no nodes" do
19
+ @vd.nodes.should be_empty
20
+ end
21
+
22
+ it "should be able to take a set of nodes to work with" do
23
+ vd = ValueDescription.new @r1, @r2, @r3, @r4
24
+ vd.nodes.should eql([@r1, @r2, @r3, @r4])
25
+ end
26
+
27
+ it "should respond to the names of the nodes being added" do
28
+ vd = ValueDescription.new @r1, @r2
29
+ vd.r1.should be_a(Float)
30
+ vd.r2.should be_a(Float)
31
+ end
32
+
33
+ it "should be able to add a node with add_node" do
34
+ @vd.add_node(@r3)
35
+ @vd.r3.should be_a(Float)
36
+ end
37
+
38
+ it "should be able to add a node with an alternative value method" do
39
+ @vd.add_node(@r3, :name)
40
+ @vd.r3.should eql('r3')
41
+ end
42
+
43
+ it "should be able to initialize with a hash, to define the value methods" do
44
+ vd = ValueDescription.new @r1 => :rand, @r2 => :name
45
+ vd.r1.should be_a(Float)
46
+ vd.r2.should eql('r2')
47
+ end
48
+
49
+ it "should convert node names to lower case, underscore values" do
50
+ pr = PlausibleRange.new(:min => 1, :max => 3, :name => 'Test Node')
51
+ vd = ValueDescription.new pr => :name
52
+ vd.test_node.should eql('Test Node')
53
+ end
54
+
55
+ it "should respond to process, which by default just adds up the values of the nodes" do
56
+ vd = ValueDescription.new @r1, @r2
57
+ output = vd.process
58
+ sum = vd.last_process.values.inject(0.0) {|s, e| s += e}
59
+ output.should eql(sum)
60
+ end
61
+
62
+ it "should be able to take an optional block at initialization" do
63
+ vd = ValueDescription.new(@r1 => :name) {|obj|
64
+ obj.r1
65
+ }
66
+ output = vd.process
67
+ output.should eql('r1')
68
+ end
69
+
70
+ end
@@ -0,0 +1,8 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "Fathom" do
4
+ it "should have required gsl, but not included it (avoiding Rational conflicts)" do
5
+ Fathom.included_modules.should_not be_include(GSL)
6
+ lambda{GSL}.should_not raise_error
7
+ end
8
+ end