nimbus 0.10 → 1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,57 @@
1
+ # encoding: UTF-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+ describe Nimbus::Forest do
5
+ before(:each) do
6
+ @config = Nimbus::Configuration.new
7
+ @config.load fixture_file('regression_config.yml')
8
+ @config.load_training_data
9
+ @forest = ::Nimbus::Forest.new @config
10
+ end
11
+
12
+ it 'grows a forest of N trees' do
13
+ @forest.trees.should == []
14
+ @config.forest_size.should == 3
15
+ @forest.grow
16
+ @forest.trees.size.should == @config.forest_size
17
+ @forest.trees.each{|t| t.should be_kind_of Hash}
18
+ end
19
+
20
+ it 'creates averaged predictions for individuals in the training set' do
21
+ @forest.predictions.should == {}
22
+ @forest.grow
23
+ (@forest.predictions.keys - (1..800).to_a ).should == []
24
+ @forest.predictions.values.each{|v| v.should be_kind_of Numeric}
25
+ end
26
+
27
+ it 'computes averaged SNP importances for every SNP' do
28
+ @forest.snp_importances.should == {}
29
+ @forest.grow
30
+ @forest.snp_importances.keys.sort.should == (1..200).to_a
31
+ @forest.snp_importances.values.each{|v| v.should be_kind_of Numeric}
32
+ end
33
+
34
+ it 'traverses a set of testing individuals through every tree in the forest and return predictions' do
35
+ @forest = @config.load_forest
36
+ @forest.predictions.should == {}
37
+
38
+ tree_structure = YAML.load(File.open fixture_file('regression_random_forest.yml'))
39
+ expected_predictions = {}
40
+ @config.read_testing_data{|individual|
41
+ individual_prediction = 0.0
42
+ tree_structure.each do |t|
43
+ individual_prediction = (individual_prediction + Nimbus::Tree.traverse(t, individual.snp_list)).round(5)
44
+ end
45
+ expected_predictions[individual.id] = (individual_prediction / 3).round(5)
46
+ }
47
+
48
+ @forest.traverse
49
+ @forest.predictions.should == expected_predictions
50
+ end
51
+
52
+ it 'can output forest structure in YAML format' do
53
+ @forest = @config.load_forest
54
+ YAML.load(File.open fixture_file('regression_random_forest.yml')) == YAML.load(@forest.to_yaml)
55
+ end
56
+
57
+ end
@@ -0,0 +1,13 @@
1
+ # encoding: UTF-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+ describe Nimbus::Individual do
5
+
6
+ it "stores id, fenotype and SNPs information for an individual" do
7
+ @individual = Nimbus::Individual.new(11, 33.275, [1,0,2,1])
8
+ @individual.id.should == 11
9
+ @individual.fenotype.should == 33.275
10
+ @individual.snp_list.should == [1,0,2,1]
11
+ end
12
+
13
+ end
@@ -1,2 +1,18 @@
1
1
  # encoding: UTF-8
2
- require File.dirname(__FILE__) + '/spec_helper'
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+
5
+ describe 'Nimbus module' do
6
+
7
+ it "manages a Nimbus::Application object" do
8
+ app = Nimbus.application
9
+ app.should be_kind_of Nimbus::Application
10
+ end
11
+
12
+ it "accepts setting an external Nimbus::Application" do
13
+ app = Nimbus::Application.new
14
+ Nimbus.application = app
15
+ Nimbus.application.should == app
16
+ end
17
+
18
+ end
@@ -1,2 +1,8 @@
1
1
  # encoding: UTF-8
2
- require File.dirname(__FILE__) + '/../lib/nimbus'
2
+ require File.dirname(__FILE__) + '/../lib/nimbus'
3
+ $fixtures_path = File.dirname(__FILE__) + '/fixtures'
4
+ ENV['nimbus_test'] = 'running_nimbus_tests'
5
+
6
+ def fixture_file(filename) #:nodoc:
7
+ return "#{$fixtures_path}/#{filename}"
8
+ end
@@ -0,0 +1,25 @@
1
+ # encoding: UTF-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+ describe Nimbus::TrainingSet do
5
+
6
+ it "stores individuals list and fenotype data for them" do
7
+ i1 = Nimbus::Individual.new 1, 11.0, [1,0,2,1]
8
+ i2 = Nimbus::Individual.new 2, 22.0, [2,1,2,2]
9
+ i3 = Nimbus::Individual.new 3, 33.0, [0,2,1,0]
10
+ @training_set = Nimbus::TrainingSet.new [i1, i3], {i1.id => 11.0, i3.id => 33.0}
11
+
12
+ @training_set.individuals.should == [i1, i3]
13
+ @training_set.ids_fenotypes.should == {i1.id => 11.0, i3.id => 33.0}
14
+ end
15
+
16
+ it "keeps track of ids of all individuals in the training set" do
17
+ i1 = Nimbus::Individual.new 1, 11.0, [1,0,2,1]
18
+ i2 = Nimbus::Individual.new 2, 22.0, [2,1,2,2]
19
+ i3 = Nimbus::Individual.new 3, 33.0, [0,2,1,0]
20
+ @training_set = Nimbus::TrainingSet.new [i1, i3], {i1.id => 11.0, i3.id => 33.0}
21
+
22
+ @training_set.all_ids.should == [1,3]
23
+ end
24
+
25
+ end
@@ -0,0 +1,130 @@
1
+ # encoding: UTF-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+ describe Nimbus::Tree do
5
+
6
+ before(:each) do
7
+ @config = Nimbus::Configuration.new
8
+ @config.load fixture_file('regression_config.yml')
9
+
10
+ @tree = Nimbus::Tree.new @config.tree
11
+ end
12
+
13
+ it "is initialized with tree config info" do
14
+ @tree.snp_total_count.should == 200
15
+ @tree.snp_sample_size.should == 60
16
+ @tree.node_min_size.should == 5
17
+ end
18
+
19
+ it "creates a tree structure when seeded with training data" do
20
+ @config.load_training_data
21
+ @tree.structure.should be_nil
22
+ @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
23
+ @tree.structure.should_not be_nil
24
+ @tree.structure.should be_kind_of Hash
25
+
26
+ @tree.structure.keys.first.should == @tree.used_snps.last
27
+ @tree.used_snps.should_not be_empty
28
+ end
29
+
30
+ it "split node in three when building a node and finds a suitable split" do
31
+ @config.load_training_data
32
+ @tree.stub!(:snps_random_sample).and_return((141..200).to_a) #189 is best split
33
+
34
+ @tree.individuals = @config.training_set.individuals
35
+ @tree.id_to_fenotype = @config.training_set.ids_fenotypes
36
+ @tree.used_snps = []
37
+ @tree.predictions = {}
38
+
39
+ branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
40
+ branch.keys.size.should == 1
41
+ branch.keys.first.should == 189
42
+ branch[189].size.should == 3
43
+ branch[189][0].should be_kind_of Hash
44
+ branch[189][1].should be_kind_of Hash
45
+ branch[189][2].should be_kind_of Hash
46
+ end
47
+
48
+ it "keeps track of all SNPs used for the tree" do
49
+ @config.load_training_data
50
+ snps = (131..190).to_a
51
+ @tree.stub!(:snps_random_sample).and_return(snps)
52
+ @tree.used_snps.should be_nil
53
+ @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
54
+ @tree.used_snps.size.should > 4
55
+ @tree.used_snps.each{|snp|
56
+ snps.include?(snp).should be_true
57
+ }
58
+ end
59
+
60
+ it "labels node when building a node and there is not a suitable split" do
61
+ @config.load_training_data
62
+ @tree.stub!(:snps_random_sample).and_return([33])
63
+
64
+ @tree.individuals = @config.training_set.individuals
65
+ @tree.id_to_fenotype = @config.training_set.ids_fenotypes
66
+ @tree.used_snps = []
67
+ @tree.predictions = {}
68
+
69
+ branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
70
+ branch[33][0].should be_kind_of Numeric
71
+ branch[33][1].should be_kind_of Numeric
72
+ branch[33][2].should be_kind_of Numeric
73
+ end
74
+
75
+ it "labels node when building a node with less individuals than the minimum node size" do
76
+ @config.load_training_data
77
+
78
+ @tree.individuals = @config.training_set.individuals
79
+ @tree.id_to_fenotype = @config.training_set.ids_fenotypes
80
+ @tree.used_snps = []
81
+ @tree.predictions = {}
82
+
83
+ label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
84
+ label.should be_kind_of Numeric
85
+
86
+ label = @tree.build_node [2, 10], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
87
+ label.should be_kind_of Numeric
88
+
89
+ label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
90
+ label.should be_kind_of Numeric
91
+
92
+ label = @tree.build_node [108, 22, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
93
+ label.should be_kind_of Numeric
94
+ end
95
+
96
+ it 'computes generalization error for the tree' do
97
+ @config.load_training_data
98
+ @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
99
+ @tree.generalization_error.should be_nil
100
+ @tree.generalization_error_from_oob((2..200).to_a)
101
+ @tree.generalization_error.should be_kind_of Numeric
102
+ @tree.generalization_error.should > 0.0
103
+ @tree.generalization_error.should < 100.0
104
+ end
105
+
106
+ it 'estimates importance for all SNPs' do
107
+ @config.load_training_data
108
+ @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
109
+ @tree.importances.should be_nil
110
+ @tree.estimate_importances((300..533).to_a)
111
+ @tree.importances.should be_kind_of Hash
112
+ @tree.importances.keys.should_not be_empty
113
+ (@tree.importances.keys - (1..200).to_a).should be_empty
114
+ end
115
+
116
+ it 'get prediction for an individual pushing it down a tree structure' do
117
+ tree_structure = YAML.load(File.open fixture_file('regression_random_forest.yml')).first
118
+ individual_data = [0]*200
119
+ prediction = Nimbus::Tree.traverse tree_structure, individual_data
120
+ prediction.should == 0.25043
121
+
122
+ individual_data[189-1] = 1
123
+ individual_data[4-1] = 1
124
+ individual_data[62-1] = 2
125
+ individual_data[146-1] = 2
126
+ prediction = Nimbus::Tree.traverse tree_structure, individual_data
127
+ prediction.should == -0.9854
128
+ end
129
+
130
+ end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: nimbus
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: "0.10"
5
+ version: "1.0"
6
6
  platform: ruby
7
7
  authors:
8
8
  - "Juanjo Baz\xC3\xA1n"
@@ -11,7 +11,7 @@ autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
13
 
14
- date: 2011-08-24 00:00:00 Z
14
+ date: 2011-08-25 00:00:00 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: rspec
@@ -35,7 +35,6 @@ extra_rdoc_files: []
35
35
 
36
36
  files:
37
37
  - MIT-LICENSE.txt
38
- - README.rdoc
39
38
  - lib/nimbus/application.rb
40
39
  - lib/nimbus/configuration.rb
41
40
  - lib/nimbus/exceptions.rb
@@ -45,13 +44,22 @@ files:
45
44
  - lib/nimbus/training_set.rb
46
45
  - lib/nimbus/tree.rb
47
46
  - lib/nimbus.rb
48
- - spec/fixtures/config.yml
49
- - spec/fixtures/testing_regression.data
50
- - spec/fixtures/training_regression.data
47
+ - spec/configuration_spec.rb
48
+ - spec/fixtures/regression_config.yml
49
+ - spec/fixtures/regression_random_forest.yml
50
+ - spec/fixtures/regression_snp_importances.txt
51
+ - spec/fixtures/regression_testing.data
52
+ - spec/fixtures/regression_testing_file_predictions.txt
53
+ - spec/fixtures/regression_training.data
54
+ - spec/fixtures/regression_training_file_predictions.txt
55
+ - spec/forest_spec.rb
56
+ - spec/individual_spec.rb
51
57
  - spec/nimbus_spec.rb
52
58
  - spec/spec_helper.rb
59
+ - spec/training_set_spec.rb
60
+ - spec/tree_spec.rb
53
61
  - bin/nimbus
54
- homepage: http://github.com/xuanxu/nimbus
62
+ homepage: http://nimbusgem.org
55
63
  licenses: []
56
64
 
57
65
  post_install_message:
@@ -81,8 +89,17 @@ signing_key:
81
89
  specification_version: 3
82
90
  summary: Random Forest algorithm for Genomics
83
91
  test_files:
84
- - spec/fixtures/config.yml
85
- - spec/fixtures/testing_regression.data
86
- - spec/fixtures/training_regression.data
92
+ - spec/configuration_spec.rb
93
+ - spec/fixtures/regression_config.yml
94
+ - spec/fixtures/regression_random_forest.yml
95
+ - spec/fixtures/regression_snp_importances.txt
96
+ - spec/fixtures/regression_testing.data
97
+ - spec/fixtures/regression_testing_file_predictions.txt
98
+ - spec/fixtures/regression_training.data
99
+ - spec/fixtures/regression_training_file_predictions.txt
100
+ - spec/forest_spec.rb
101
+ - spec/individual_spec.rb
87
102
  - spec/nimbus_spec.rb
88
103
  - spec/spec_helper.rb
104
+ - spec/training_set_spec.rb
105
+ - spec/tree_spec.rb
@@ -1 +0,0 @@
1
- = Nimbus
@@ -1,11 +0,0 @@
1
- #Input files
2
- input:
3
- training: training_regression.data
4
- testing: testing_regression.data
5
-
6
- #Forest parameters
7
- forest:
8
- forest_size: 10 #how many trees
9
- SNP_sample_size_mtry: 60 #mtry
10
- SNP_total_count: 200
11
- node_min_size: 5