nimbus 0.10 → 1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,57 @@
1
+ # encoding: UTF-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+ describe Nimbus::Forest do
5
+ before(:each) do
6
+ @config = Nimbus::Configuration.new
7
+ @config.load fixture_file('regression_config.yml')
8
+ @config.load_training_data
9
+ @forest = ::Nimbus::Forest.new @config
10
+ end
11
+
12
+ it 'grows a forest of N trees' do
13
+ @forest.trees.should == []
14
+ @config.forest_size.should == 3
15
+ @forest.grow
16
+ @forest.trees.size.should == @config.forest_size
17
+ @forest.trees.each{|t| t.should be_kind_of Hash}
18
+ end
19
+
20
+ it 'creates averaged predictions for individuals in the training set' do
21
+ @forest.predictions.should == {}
22
+ @forest.grow
23
+ (@forest.predictions.keys - (1..800).to_a ).should == []
24
+ @forest.predictions.values.each{|v| v.should be_kind_of Numeric}
25
+ end
26
+
27
+ it 'computes averaged SNP importances for every SNP' do
28
+ @forest.snp_importances.should == {}
29
+ @forest.grow
30
+ @forest.snp_importances.keys.sort.should == (1..200).to_a
31
+ @forest.snp_importances.values.each{|v| v.should be_kind_of Numeric}
32
+ end
33
+
34
+ it 'traverses a set of testing individuals through every tree in the forest and return predictions' do
35
+ @forest = @config.load_forest
36
+ @forest.predictions.should == {}
37
+
38
+ tree_structure = YAML.load(File.open fixture_file('regression_random_forest.yml'))
39
+ expected_predictions = {}
40
+ @config.read_testing_data{|individual|
41
+ individual_prediction = 0.0
42
+ tree_structure.each do |t|
43
+ individual_prediction = (individual_prediction + Nimbus::Tree.traverse(t, individual.snp_list)).round(5)
44
+ end
45
+ expected_predictions[individual.id] = (individual_prediction / 3).round(5)
46
+ }
47
+
48
+ @forest.traverse
49
+ @forest.predictions.should == expected_predictions
50
+ end
51
+
52
+ it 'can output forest structure in YAML format' do
53
+ @forest = @config.load_forest
54
+ YAML.load(File.open fixture_file('regression_random_forest.yml')) == YAML.load(@forest.to_yaml)
55
+ end
56
+
57
+ end
@@ -0,0 +1,13 @@
1
+ # encoding: UTF-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+ describe Nimbus::Individual do
5
+
6
+ it "stores id, fenotype and SNPs information for an individual" do
7
+ @individual = Nimbus::Individual.new(11, 33.275, [1,0,2,1])
8
+ @individual.id.should == 11
9
+ @individual.fenotype.should == 33.275
10
+ @individual.snp_list.should == [1,0,2,1]
11
+ end
12
+
13
+ end
@@ -1,2 +1,18 @@
1
1
  # encoding: UTF-8
2
- require File.dirname(__FILE__) + '/spec_helper'
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+
5
+ describe 'Nimbus module' do
6
+
7
+ it "manages a Nimbus::Application object" do
8
+ app = Nimbus.application
9
+ app.should be_kind_of Nimbus::Application
10
+ end
11
+
12
+ it "accepts setting an external Nimbus::Application" do
13
+ app = Nimbus::Application.new
14
+ Nimbus.application = app
15
+ Nimbus.application.should == app
16
+ end
17
+
18
+ end
@@ -1,2 +1,8 @@
1
1
  # encoding: UTF-8
2
- require File.dirname(__FILE__) + '/../lib/nimbus'
2
+ require File.dirname(__FILE__) + '/../lib/nimbus'
3
+ $fixtures_path = File.dirname(__FILE__) + '/fixtures'
4
+ ENV['nimbus_test'] = 'running_nimbus_tests'
5
+
6
+ def fixture_file(filename) #:nodoc:
7
+ return "#{$fixtures_path}/#{filename}"
8
+ end
@@ -0,0 +1,25 @@
1
+ # encoding: UTF-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+ describe Nimbus::TrainingSet do
5
+
6
+ it "stores individuals list and fenotype data for them" do
7
+ i1 = Nimbus::Individual.new 1, 11.0, [1,0,2,1]
8
+ i2 = Nimbus::Individual.new 2, 22.0, [2,1,2,2]
9
+ i3 = Nimbus::Individual.new 3, 33.0, [0,2,1,0]
10
+ @training_set = Nimbus::TrainingSet.new [i1, i3], {i1.id => 11.0, i3.id => 33.0}
11
+
12
+ @training_set.individuals.should == [i1, i3]
13
+ @training_set.ids_fenotypes.should == {i1.id => 11.0, i3.id => 33.0}
14
+ end
15
+
16
+ it "keeps track of ids of all individuals in the training set" do
17
+ i1 = Nimbus::Individual.new 1, 11.0, [1,0,2,1]
18
+ i2 = Nimbus::Individual.new 2, 22.0, [2,1,2,2]
19
+ i3 = Nimbus::Individual.new 3, 33.0, [0,2,1,0]
20
+ @training_set = Nimbus::TrainingSet.new [i1, i3], {i1.id => 11.0, i3.id => 33.0}
21
+
22
+ @training_set.all_ids.should == [1,3]
23
+ end
24
+
25
+ end
@@ -0,0 +1,130 @@
1
+ # encoding: UTF-8
2
+ require File.dirname(__FILE__) + '/spec_helper'
3
+
4
+ describe Nimbus::Tree do
5
+
6
+ before(:each) do
7
+ @config = Nimbus::Configuration.new
8
+ @config.load fixture_file('regression_config.yml')
9
+
10
+ @tree = Nimbus::Tree.new @config.tree
11
+ end
12
+
13
+ it "is initialized with tree config info" do
14
+ @tree.snp_total_count.should == 200
15
+ @tree.snp_sample_size.should == 60
16
+ @tree.node_min_size.should == 5
17
+ end
18
+
19
+ it "creates a tree structure when seeded with training data" do
20
+ @config.load_training_data
21
+ @tree.structure.should be_nil
22
+ @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
23
+ @tree.structure.should_not be_nil
24
+ @tree.structure.should be_kind_of Hash
25
+
26
+ @tree.structure.keys.first.should == @tree.used_snps.last
27
+ @tree.used_snps.should_not be_empty
28
+ end
29
+
30
+ it "split node in three when building a node and finds a suitable split" do
31
+ @config.load_training_data
32
+ @tree.stub!(:snps_random_sample).and_return((141..200).to_a) #189 is best split
33
+
34
+ @tree.individuals = @config.training_set.individuals
35
+ @tree.id_to_fenotype = @config.training_set.ids_fenotypes
36
+ @tree.used_snps = []
37
+ @tree.predictions = {}
38
+
39
+ branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
40
+ branch.keys.size.should == 1
41
+ branch.keys.first.should == 189
42
+ branch[189].size.should == 3
43
+ branch[189][0].should be_kind_of Hash
44
+ branch[189][1].should be_kind_of Hash
45
+ branch[189][2].should be_kind_of Hash
46
+ end
47
+
48
+ it "keeps track of all SNPs used for the tree" do
49
+ @config.load_training_data
50
+ snps = (131..190).to_a
51
+ @tree.stub!(:snps_random_sample).and_return(snps)
52
+ @tree.used_snps.should be_nil
53
+ @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
54
+ @tree.used_snps.size.should > 4
55
+ @tree.used_snps.each{|snp|
56
+ snps.include?(snp).should be_true
57
+ }
58
+ end
59
+
60
+ it "labels node when building a node and there is not a suitable split" do
61
+ @config.load_training_data
62
+ @tree.stub!(:snps_random_sample).and_return([33])
63
+
64
+ @tree.individuals = @config.training_set.individuals
65
+ @tree.id_to_fenotype = @config.training_set.ids_fenotypes
66
+ @tree.used_snps = []
67
+ @tree.predictions = {}
68
+
69
+ branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
70
+ branch[33][0].should be_kind_of Numeric
71
+ branch[33][1].should be_kind_of Numeric
72
+ branch[33][2].should be_kind_of Numeric
73
+ end
74
+
75
+ it "labels node when building a node with less individuals than the minimum node size" do
76
+ @config.load_training_data
77
+
78
+ @tree.individuals = @config.training_set.individuals
79
+ @tree.id_to_fenotype = @config.training_set.ids_fenotypes
80
+ @tree.used_snps = []
81
+ @tree.predictions = {}
82
+
83
+ label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
84
+ label.should be_kind_of Numeric
85
+
86
+ label = @tree.build_node [2, 10], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
87
+ label.should be_kind_of Numeric
88
+
89
+ label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
90
+ label.should be_kind_of Numeric
91
+
92
+ label = @tree.build_node [108, 22, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
93
+ label.should be_kind_of Numeric
94
+ end
95
+
96
+ it 'computes generalization error for the tree' do
97
+ @config.load_training_data
98
+ @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
99
+ @tree.generalization_error.should be_nil
100
+ @tree.generalization_error_from_oob((2..200).to_a)
101
+ @tree.generalization_error.should be_kind_of Numeric
102
+ @tree.generalization_error.should > 0.0
103
+ @tree.generalization_error.should < 100.0
104
+ end
105
+
106
+ it 'estimates importance for all SNPs' do
107
+ @config.load_training_data
108
+ @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
109
+ @tree.importances.should be_nil
110
+ @tree.estimate_importances((300..533).to_a)
111
+ @tree.importances.should be_kind_of Hash
112
+ @tree.importances.keys.should_not be_empty
113
+ (@tree.importances.keys - (1..200).to_a).should be_empty
114
+ end
115
+
116
+ it 'get prediction for an individual pushing it down a tree structure' do
117
+ tree_structure = YAML.load(File.open fixture_file('regression_random_forest.yml')).first
118
+ individual_data = [0]*200
119
+ prediction = Nimbus::Tree.traverse tree_structure, individual_data
120
+ prediction.should == 0.25043
121
+
122
+ individual_data[189-1] = 1
123
+ individual_data[4-1] = 1
124
+ individual_data[62-1] = 2
125
+ individual_data[146-1] = 2
126
+ prediction = Nimbus::Tree.traverse tree_structure, individual_data
127
+ prediction.should == -0.9854
128
+ end
129
+
130
+ end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: nimbus
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: "0.10"
5
+ version: "1.0"
6
6
  platform: ruby
7
7
  authors:
8
8
  - "Juanjo Baz\xC3\xA1n"
@@ -11,7 +11,7 @@ autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
13
 
14
- date: 2011-08-24 00:00:00 Z
14
+ date: 2011-08-25 00:00:00 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: rspec
@@ -35,7 +35,6 @@ extra_rdoc_files: []
35
35
 
36
36
  files:
37
37
  - MIT-LICENSE.txt
38
- - README.rdoc
39
38
  - lib/nimbus/application.rb
40
39
  - lib/nimbus/configuration.rb
41
40
  - lib/nimbus/exceptions.rb
@@ -45,13 +44,22 @@ files:
45
44
  - lib/nimbus/training_set.rb
46
45
  - lib/nimbus/tree.rb
47
46
  - lib/nimbus.rb
48
- - spec/fixtures/config.yml
49
- - spec/fixtures/testing_regression.data
50
- - spec/fixtures/training_regression.data
47
+ - spec/configuration_spec.rb
48
+ - spec/fixtures/regression_config.yml
49
+ - spec/fixtures/regression_random_forest.yml
50
+ - spec/fixtures/regression_snp_importances.txt
51
+ - spec/fixtures/regression_testing.data
52
+ - spec/fixtures/regression_testing_file_predictions.txt
53
+ - spec/fixtures/regression_training.data
54
+ - spec/fixtures/regression_training_file_predictions.txt
55
+ - spec/forest_spec.rb
56
+ - spec/individual_spec.rb
51
57
  - spec/nimbus_spec.rb
52
58
  - spec/spec_helper.rb
59
+ - spec/training_set_spec.rb
60
+ - spec/tree_spec.rb
53
61
  - bin/nimbus
54
- homepage: http://github.com/xuanxu/nimbus
62
+ homepage: http://nimbusgem.org
55
63
  licenses: []
56
64
 
57
65
  post_install_message:
@@ -81,8 +89,17 @@ signing_key:
81
89
  specification_version: 3
82
90
  summary: Random Forest algorithm for Genomics
83
91
  test_files:
84
- - spec/fixtures/config.yml
85
- - spec/fixtures/testing_regression.data
86
- - spec/fixtures/training_regression.data
92
+ - spec/configuration_spec.rb
93
+ - spec/fixtures/regression_config.yml
94
+ - spec/fixtures/regression_random_forest.yml
95
+ - spec/fixtures/regression_snp_importances.txt
96
+ - spec/fixtures/regression_testing.data
97
+ - spec/fixtures/regression_testing_file_predictions.txt
98
+ - spec/fixtures/regression_training.data
99
+ - spec/fixtures/regression_training_file_predictions.txt
100
+ - spec/forest_spec.rb
101
+ - spec/individual_spec.rb
87
102
  - spec/nimbus_spec.rb
88
103
  - spec/spec_helper.rb
104
+ - spec/training_set_spec.rb
105
+ - spec/tree_spec.rb
@@ -1 +0,0 @@
1
- = Nimbus
@@ -1,11 +0,0 @@
1
- #Input files
2
- input:
3
- training: training_regression.data
4
- testing: testing_regression.data
5
-
6
- #Forest parameters
7
- forest:
8
- forest_size: 10 #how many trees
9
- SNP_sample_size_mtry: 60 #mtry
10
- SNP_total_count: 200
11
- node_min_size: 5