nimbus 0.10 → 1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/nimbus/application.rb +2 -1
- data/lib/nimbus/configuration.rb +15 -6
- data/lib/nimbus/tree.rb +1 -1
- data/spec/configuration_spec.rb +92 -0
- data/spec/fixtures/regression_config.yml +12 -0
- data/spec/fixtures/regression_random_forest.yml +1741 -0
- data/spec/fixtures/regression_snp_importances.txt +200 -0
- data/spec/fixtures/{testing_regression.data → regression_testing.data} +0 -0
- data/spec/fixtures/regression_testing_file_predictions.txt +200 -0
- data/spec/fixtures/{training_regression.data → regression_training.data} +0 -0
- data/spec/fixtures/regression_training_file_predictions.txt +758 -0
- data/spec/forest_spec.rb +57 -0
- data/spec/individual_spec.rb +13 -0
- data/spec/nimbus_spec.rb +17 -1
- data/spec/spec_helper.rb +7 -1
- data/spec/training_set_spec.rb +25 -0
- data/spec/tree_spec.rb +130 -0
- metadata +27 -10
- data/README.rdoc +0 -1
- data/spec/fixtures/config.yml +0 -11
data/spec/forest_spec.rb
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
3
|
+
|
4
|
+
describe Nimbus::Forest do
|
5
|
+
before(:each) do
|
6
|
+
@config = Nimbus::Configuration.new
|
7
|
+
@config.load fixture_file('regression_config.yml')
|
8
|
+
@config.load_training_data
|
9
|
+
@forest = ::Nimbus::Forest.new @config
|
10
|
+
end
|
11
|
+
|
12
|
+
it 'grows a forest of N trees' do
|
13
|
+
@forest.trees.should == []
|
14
|
+
@config.forest_size.should == 3
|
15
|
+
@forest.grow
|
16
|
+
@forest.trees.size.should == @config.forest_size
|
17
|
+
@forest.trees.each{|t| t.should be_kind_of Hash}
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'creates averaged predictions for individuals in the training set' do
|
21
|
+
@forest.predictions.should == {}
|
22
|
+
@forest.grow
|
23
|
+
(@forest.predictions.keys - (1..800).to_a ).should == []
|
24
|
+
@forest.predictions.values.each{|v| v.should be_kind_of Numeric}
|
25
|
+
end
|
26
|
+
|
27
|
+
it 'computes averaged SNP importances for every SNP' do
|
28
|
+
@forest.snp_importances.should == {}
|
29
|
+
@forest.grow
|
30
|
+
@forest.snp_importances.keys.sort.should == (1..200).to_a
|
31
|
+
@forest.snp_importances.values.each{|v| v.should be_kind_of Numeric}
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'traverses a set of testing individuals through every tree in the forest and return predictions' do
|
35
|
+
@forest = @config.load_forest
|
36
|
+
@forest.predictions.should == {}
|
37
|
+
|
38
|
+
tree_structure = YAML.load(File.open fixture_file('regression_random_forest.yml'))
|
39
|
+
expected_predictions = {}
|
40
|
+
@config.read_testing_data{|individual|
|
41
|
+
individual_prediction = 0.0
|
42
|
+
tree_structure.each do |t|
|
43
|
+
individual_prediction = (individual_prediction + Nimbus::Tree.traverse(t, individual.snp_list)).round(5)
|
44
|
+
end
|
45
|
+
expected_predictions[individual.id] = (individual_prediction / 3).round(5)
|
46
|
+
}
|
47
|
+
|
48
|
+
@forest.traverse
|
49
|
+
@forest.predictions.should == expected_predictions
|
50
|
+
end
|
51
|
+
|
52
|
+
it 'can output forest structure in YAML format' do
|
53
|
+
@forest = @config.load_forest
|
54
|
+
YAML.load(File.open fixture_file('regression_random_forest.yml')) == YAML.load(@forest.to_yaml)
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
3
|
+
|
4
|
+
describe Nimbus::Individual do
|
5
|
+
|
6
|
+
it "stores id, fenotype and SNPs information for an individual" do
|
7
|
+
@individual = Nimbus::Individual.new(11, 33.275, [1,0,2,1])
|
8
|
+
@individual.id.should == 11
|
9
|
+
@individual.fenotype.should == 33.275
|
10
|
+
@individual.snp_list.should == [1,0,2,1]
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
data/spec/nimbus_spec.rb
CHANGED
@@ -1,2 +1,18 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
3
|
+
|
4
|
+
|
5
|
+
describe 'Nimbus module' do
|
6
|
+
|
7
|
+
it "manages a Nimbus::Application object" do
|
8
|
+
app = Nimbus.application
|
9
|
+
app.should be_kind_of Nimbus::Application
|
10
|
+
end
|
11
|
+
|
12
|
+
it "accepts setting an external Nimbus::Application" do
|
13
|
+
app = Nimbus::Application.new
|
14
|
+
Nimbus.application = app
|
15
|
+
Nimbus.application.should == app
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,2 +1,8 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
-
require File.dirname(__FILE__) + '/../lib/nimbus'
|
2
|
+
require File.dirname(__FILE__) + '/../lib/nimbus'
|
3
|
+
$fixtures_path = File.dirname(__FILE__) + '/fixtures'
|
4
|
+
ENV['nimbus_test'] = 'running_nimbus_tests'
|
5
|
+
|
6
|
+
def fixture_file(filename) #:nodoc:
|
7
|
+
return "#{$fixtures_path}/#{filename}"
|
8
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
3
|
+
|
4
|
+
describe Nimbus::TrainingSet do
|
5
|
+
|
6
|
+
it "stores individuals list and fenotype data for them" do
|
7
|
+
i1 = Nimbus::Individual.new 1, 11.0, [1,0,2,1]
|
8
|
+
i2 = Nimbus::Individual.new 2, 22.0, [2,1,2,2]
|
9
|
+
i3 = Nimbus::Individual.new 3, 33.0, [0,2,1,0]
|
10
|
+
@training_set = Nimbus::TrainingSet.new [i1, i3], {i1.id => 11.0, i3.id => 33.0}
|
11
|
+
|
12
|
+
@training_set.individuals.should == [i1, i3]
|
13
|
+
@training_set.ids_fenotypes.should == {i1.id => 11.0, i3.id => 33.0}
|
14
|
+
end
|
15
|
+
|
16
|
+
it "keeps track of ids of all individuals in the training set" do
|
17
|
+
i1 = Nimbus::Individual.new 1, 11.0, [1,0,2,1]
|
18
|
+
i2 = Nimbus::Individual.new 2, 22.0, [2,1,2,2]
|
19
|
+
i3 = Nimbus::Individual.new 3, 33.0, [0,2,1,0]
|
20
|
+
@training_set = Nimbus::TrainingSet.new [i1, i3], {i1.id => 11.0, i3.id => 33.0}
|
21
|
+
|
22
|
+
@training_set.all_ids.should == [1,3]
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
data/spec/tree_spec.rb
ADDED
@@ -0,0 +1,130 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
require File.dirname(__FILE__) + '/spec_helper'
|
3
|
+
|
4
|
+
describe Nimbus::Tree do
|
5
|
+
|
6
|
+
before(:each) do
|
7
|
+
@config = Nimbus::Configuration.new
|
8
|
+
@config.load fixture_file('regression_config.yml')
|
9
|
+
|
10
|
+
@tree = Nimbus::Tree.new @config.tree
|
11
|
+
end
|
12
|
+
|
13
|
+
it "is initialized with tree config info" do
|
14
|
+
@tree.snp_total_count.should == 200
|
15
|
+
@tree.snp_sample_size.should == 60
|
16
|
+
@tree.node_min_size.should == 5
|
17
|
+
end
|
18
|
+
|
19
|
+
it "creates a tree structure when seeded with training data" do
|
20
|
+
@config.load_training_data
|
21
|
+
@tree.structure.should be_nil
|
22
|
+
@tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
23
|
+
@tree.structure.should_not be_nil
|
24
|
+
@tree.structure.should be_kind_of Hash
|
25
|
+
|
26
|
+
@tree.structure.keys.first.should == @tree.used_snps.last
|
27
|
+
@tree.used_snps.should_not be_empty
|
28
|
+
end
|
29
|
+
|
30
|
+
it "split node in three when building a node and finds a suitable split" do
|
31
|
+
@config.load_training_data
|
32
|
+
@tree.stub!(:snps_random_sample).and_return((141..200).to_a) #189 is best split
|
33
|
+
|
34
|
+
@tree.individuals = @config.training_set.individuals
|
35
|
+
@tree.id_to_fenotype = @config.training_set.ids_fenotypes
|
36
|
+
@tree.used_snps = []
|
37
|
+
@tree.predictions = {}
|
38
|
+
|
39
|
+
branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
40
|
+
branch.keys.size.should == 1
|
41
|
+
branch.keys.first.should == 189
|
42
|
+
branch[189].size.should == 3
|
43
|
+
branch[189][0].should be_kind_of Hash
|
44
|
+
branch[189][1].should be_kind_of Hash
|
45
|
+
branch[189][2].should be_kind_of Hash
|
46
|
+
end
|
47
|
+
|
48
|
+
it "keeps track of all SNPs used for the tree" do
|
49
|
+
@config.load_training_data
|
50
|
+
snps = (131..190).to_a
|
51
|
+
@tree.stub!(:snps_random_sample).and_return(snps)
|
52
|
+
@tree.used_snps.should be_nil
|
53
|
+
@tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
54
|
+
@tree.used_snps.size.should > 4
|
55
|
+
@tree.used_snps.each{|snp|
|
56
|
+
snps.include?(snp).should be_true
|
57
|
+
}
|
58
|
+
end
|
59
|
+
|
60
|
+
it "labels node when building a node and there is not a suitable split" do
|
61
|
+
@config.load_training_data
|
62
|
+
@tree.stub!(:snps_random_sample).and_return([33])
|
63
|
+
|
64
|
+
@tree.individuals = @config.training_set.individuals
|
65
|
+
@tree.id_to_fenotype = @config.training_set.ids_fenotypes
|
66
|
+
@tree.used_snps = []
|
67
|
+
@tree.predictions = {}
|
68
|
+
|
69
|
+
branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
70
|
+
branch[33][0].should be_kind_of Numeric
|
71
|
+
branch[33][1].should be_kind_of Numeric
|
72
|
+
branch[33][2].should be_kind_of Numeric
|
73
|
+
end
|
74
|
+
|
75
|
+
it "labels node when building a node with less individuals than the minimum node size" do
|
76
|
+
@config.load_training_data
|
77
|
+
|
78
|
+
@tree.individuals = @config.training_set.individuals
|
79
|
+
@tree.id_to_fenotype = @config.training_set.ids_fenotypes
|
80
|
+
@tree.used_snps = []
|
81
|
+
@tree.predictions = {}
|
82
|
+
|
83
|
+
label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
84
|
+
label.should be_kind_of Numeric
|
85
|
+
|
86
|
+
label = @tree.build_node [2, 10], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
87
|
+
label.should be_kind_of Numeric
|
88
|
+
|
89
|
+
label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
90
|
+
label.should be_kind_of Numeric
|
91
|
+
|
92
|
+
label = @tree.build_node [108, 22, 10, 33], Nimbus::LossFunctions.average(@config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
93
|
+
label.should be_kind_of Numeric
|
94
|
+
end
|
95
|
+
|
96
|
+
it 'computes generalization error for the tree' do
|
97
|
+
@config.load_training_data
|
98
|
+
@tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
99
|
+
@tree.generalization_error.should be_nil
|
100
|
+
@tree.generalization_error_from_oob((2..200).to_a)
|
101
|
+
@tree.generalization_error.should be_kind_of Numeric
|
102
|
+
@tree.generalization_error.should > 0.0
|
103
|
+
@tree.generalization_error.should < 100.0
|
104
|
+
end
|
105
|
+
|
106
|
+
it 'estimates importance for all SNPs' do
|
107
|
+
@config.load_training_data
|
108
|
+
@tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
|
109
|
+
@tree.importances.should be_nil
|
110
|
+
@tree.estimate_importances((300..533).to_a)
|
111
|
+
@tree.importances.should be_kind_of Hash
|
112
|
+
@tree.importances.keys.should_not be_empty
|
113
|
+
(@tree.importances.keys - (1..200).to_a).should be_empty
|
114
|
+
end
|
115
|
+
|
116
|
+
it 'get prediction for an individual pushing it down a tree structure' do
|
117
|
+
tree_structure = YAML.load(File.open fixture_file('regression_random_forest.yml')).first
|
118
|
+
individual_data = [0]*200
|
119
|
+
prediction = Nimbus::Tree.traverse tree_structure, individual_data
|
120
|
+
prediction.should == 0.25043
|
121
|
+
|
122
|
+
individual_data[189-1] = 1
|
123
|
+
individual_data[4-1] = 1
|
124
|
+
individual_data[62-1] = 2
|
125
|
+
individual_data[146-1] = 2
|
126
|
+
prediction = Nimbus::Tree.traverse tree_structure, individual_data
|
127
|
+
prediction.should == -0.9854
|
128
|
+
end
|
129
|
+
|
130
|
+
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: nimbus
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: "0
|
5
|
+
version: "1.0"
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- "Juanjo Baz\xC3\xA1n"
|
@@ -11,7 +11,7 @@ autorequire:
|
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
13
|
|
14
|
-
date: 2011-08-
|
14
|
+
date: 2011-08-25 00:00:00 Z
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
17
17
|
name: rspec
|
@@ -35,7 +35,6 @@ extra_rdoc_files: []
|
|
35
35
|
|
36
36
|
files:
|
37
37
|
- MIT-LICENSE.txt
|
38
|
-
- README.rdoc
|
39
38
|
- lib/nimbus/application.rb
|
40
39
|
- lib/nimbus/configuration.rb
|
41
40
|
- lib/nimbus/exceptions.rb
|
@@ -45,13 +44,22 @@ files:
|
|
45
44
|
- lib/nimbus/training_set.rb
|
46
45
|
- lib/nimbus/tree.rb
|
47
46
|
- lib/nimbus.rb
|
48
|
-
- spec/
|
49
|
-
- spec/fixtures/
|
50
|
-
- spec/fixtures/
|
47
|
+
- spec/configuration_spec.rb
|
48
|
+
- spec/fixtures/regression_config.yml
|
49
|
+
- spec/fixtures/regression_random_forest.yml
|
50
|
+
- spec/fixtures/regression_snp_importances.txt
|
51
|
+
- spec/fixtures/regression_testing.data
|
52
|
+
- spec/fixtures/regression_testing_file_predictions.txt
|
53
|
+
- spec/fixtures/regression_training.data
|
54
|
+
- spec/fixtures/regression_training_file_predictions.txt
|
55
|
+
- spec/forest_spec.rb
|
56
|
+
- spec/individual_spec.rb
|
51
57
|
- spec/nimbus_spec.rb
|
52
58
|
- spec/spec_helper.rb
|
59
|
+
- spec/training_set_spec.rb
|
60
|
+
- spec/tree_spec.rb
|
53
61
|
- bin/nimbus
|
54
|
-
homepage: http://
|
62
|
+
homepage: http://nimbusgem.org
|
55
63
|
licenses: []
|
56
64
|
|
57
65
|
post_install_message:
|
@@ -81,8 +89,17 @@ signing_key:
|
|
81
89
|
specification_version: 3
|
82
90
|
summary: Random Forest algorithm for Genomics
|
83
91
|
test_files:
|
84
|
-
- spec/
|
85
|
-
- spec/fixtures/
|
86
|
-
- spec/fixtures/
|
92
|
+
- spec/configuration_spec.rb
|
93
|
+
- spec/fixtures/regression_config.yml
|
94
|
+
- spec/fixtures/regression_random_forest.yml
|
95
|
+
- spec/fixtures/regression_snp_importances.txt
|
96
|
+
- spec/fixtures/regression_testing.data
|
97
|
+
- spec/fixtures/regression_testing_file_predictions.txt
|
98
|
+
- spec/fixtures/regression_training.data
|
99
|
+
- spec/fixtures/regression_training_file_predictions.txt
|
100
|
+
- spec/forest_spec.rb
|
101
|
+
- spec/individual_spec.rb
|
87
102
|
- spec/nimbus_spec.rb
|
88
103
|
- spec/spec_helper.rb
|
104
|
+
- spec/training_set_spec.rb
|
105
|
+
- spec/tree_spec.rb
|
data/README.rdoc
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
= Nimbus
|
data/spec/fixtures/config.yml
DELETED