nimbus 2.0.1 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +8 -1
- data/lib/nimbus/application.rb +1 -1
- data/lib/nimbus/configuration.rb +5 -1
- data/lib/nimbus/forest.rb +2 -2
- data/lib/nimbus/version.rb +1 -1
- data/spec/configuration_spec.rb +2 -0
- data/spec/fixtures/classification_config.yml +2 -1
- data/spec/forest_spec.rb +15 -0
- metadata +4 -4
data/README.md
CHANGED
@@ -45,6 +45,12 @@ Nimbus can be used both with regression and classification problems.
|
|
45
45
|
* The split of nodes uses the Gini index as loss function.
|
46
46
|
* Labeling of nodes is made finding the majority fenotype class of the individuals in the node.
|
47
47
|
|
48
|
+
## Variable importances
|
49
|
+
|
50
|
+
By default Nimbus will estimate SNP importances everytime a training file is run to create a forest.
|
51
|
+
|
52
|
+
You can disable this behaviour (and speed up the training process) by setting the parameter `var_importances: No` in the configuration file.
|
53
|
+
|
48
54
|
## Install
|
49
55
|
|
50
56
|
You need to have Ruby (1.9.2 or higher) and Rubygems installed in your computer. Then install nimbus with:
|
@@ -98,6 +104,7 @@ Under the forest chapter:
|
|
98
104
|
* `SNP_sample_size_mtry`: size of the random sample of SNPs to be used in every tree node.
|
99
105
|
* `SNP_total_count`: total count of SNPs in the training and/or testing files
|
100
106
|
* `node_min_size`: minimum amount of individuals in a tree node to make a split.
|
107
|
+
* `var_importances`: **optional**. If set to `No` Nimbus will not calculate SNP importances.
|
101
108
|
|
102
109
|
|
103
110
|
## Input files
|
@@ -126,7 +133,7 @@ After training:
|
|
126
133
|
* `random_forest.yml`: A file defining the structure of the computed Random Forest. It can be used as input forest file.
|
127
134
|
* `generalization_errors.txt`: A file with the generalization error for every tree in the forest.
|
128
135
|
* `training_file_predictions.txt`: A file with predictions for every individual from the training file.
|
129
|
-
* `snp_importances.txt`: A file with the computed importance for every SNP.
|
136
|
+
* `snp_importances.txt`: A file with the computed importance for every SNP. _(unless `var_importances` set to `No` in config file)_
|
130
137
|
|
131
138
|
After testing:
|
132
139
|
|
data/lib/nimbus/application.rb
CHANGED
@@ -34,7 +34,7 @@ module Nimbus
|
|
34
34
|
output_random_forest_file(@forest)
|
35
35
|
output_tree_errors_file(@forest)
|
36
36
|
output_training_file_predictions(@forest)
|
37
|
-
output_snp_importances_file(@forest)
|
37
|
+
output_snp_importances_file(@forest) if @config.do_importances
|
38
38
|
end
|
39
39
|
|
40
40
|
if @config.do_testing
|
data/lib/nimbus/configuration.rb
CHANGED
@@ -25,6 +25,7 @@ module Nimbus
|
|
25
25
|
:loss_function_continuous,
|
26
26
|
:do_training,
|
27
27
|
:do_testing,
|
28
|
+
:do_importances,
|
28
29
|
:training_set,
|
29
30
|
:output_forest_file,
|
30
31
|
:output_training_file,
|
@@ -63,6 +64,7 @@ module Nimbus
|
|
63
64
|
def initialize
|
64
65
|
@do_training = false
|
65
66
|
@do_testing = false
|
67
|
+
@do_importances = true
|
66
68
|
|
67
69
|
@forest_size = DEFAULTS[:forest_size]
|
68
70
|
@tree_SNP_sample_size = DEFAULTS[:tree_SNP_sample_size]
|
@@ -137,6 +139,8 @@ module Nimbus
|
|
137
139
|
@tree_SNP_total_count = user_config_params['forest']['SNP_total_count'].to_i if user_config_params['forest']['SNP_total_count']
|
138
140
|
@tree_SNP_sample_size = user_config_params['forest']['SNP_sample_size_mtry'].to_i if user_config_params['forest']['SNP_sample_size_mtry']
|
139
141
|
@tree_node_min_size = user_config_params['forest']['node_min_size'].to_i if user_config_params['forest']['node_min_size']
|
142
|
+
@do_importances = user_config_params['forest']['var_importances'].to_s.strip.downcase
|
143
|
+
@do_importances = (@do_importances != 'no' && @do_importances != 'false')
|
140
144
|
end
|
141
145
|
|
142
146
|
check_configuration
|
@@ -170,7 +174,7 @@ module Nimbus
|
|
170
174
|
next if line.strip == ''
|
171
175
|
data_id, *snp_list = line.strip.split
|
172
176
|
raise Nimbus::InputFileError, "There are individuals with no ID, please check data in Testing file." unless (!data_id.nil? && data_id.strip != '')
|
173
|
-
raise Nimbus::InputFileError, "Individual ##{data_id} from testing set has no value for all #{@tree_SNP_total_count} SNPs
|
177
|
+
raise Nimbus::InputFileError, "Individual ##{data_id} from testing set has no value for all #{@tree_SNP_total_count} SNPs." unless snp_list.size == @tree_SNP_total_count
|
174
178
|
individual_test = Nimbus::Individual.new(data_id.to_i, nil, snp_list.map{|snp| snp.to_i})
|
175
179
|
yield individual_test
|
176
180
|
end
|
data/lib/nimbus/forest.rb
CHANGED
@@ -42,11 +42,11 @@ module Nimbus
|
|
42
42
|
tree = tree_class.new @options.tree
|
43
43
|
@trees << tree.seed(@options.training_set.individuals, tree_individuals_bag, @options.training_set.ids_fenotypes)
|
44
44
|
@tree_errors << tree.generalization_error_from_oob(tree_out_of_bag)
|
45
|
-
@tree_snp_importances << tree.estimate_importances(tree_out_of_bag)
|
45
|
+
@tree_snp_importances << tree.estimate_importances(tree_out_of_bag) if @options.do_importances
|
46
46
|
acumulate_predictions tree.predictions
|
47
47
|
Nimbus.clear_line!
|
48
48
|
end
|
49
|
-
average_snp_importances
|
49
|
+
average_snp_importances if @options.do_importances
|
50
50
|
totalize_predictions
|
51
51
|
end
|
52
52
|
|
data/lib/nimbus/version.rb
CHANGED
data/spec/configuration_spec.rb
CHANGED
@@ -11,6 +11,7 @@ describe Nimbus::Configuration do
|
|
11
11
|
config.testing_file.should == fixture_file('regression_testing.data')
|
12
12
|
config.forest_file.should == fixture_file('regression_random_forest.yml')
|
13
13
|
config.classes.should be_nil
|
14
|
+
config.do_importances.should be
|
14
15
|
|
15
16
|
config.forest_size.should == 3
|
16
17
|
config.tree_SNP_sample_size.should == 60
|
@@ -24,6 +25,7 @@ describe Nimbus::Configuration do
|
|
24
25
|
config.testing_file.should == fixture_file('classification_testing.data')
|
25
26
|
config.forest_file.should == fixture_file('classification_random_forest.yml')
|
26
27
|
config.classes.should == ['0','1']
|
28
|
+
config.do_importances.should_not be
|
27
29
|
|
28
30
|
config.forest_size.should == 3
|
29
31
|
config.tree_SNP_sample_size.should == 33
|
data/spec/forest_spec.rb
CHANGED
@@ -34,6 +34,13 @@ describe Nimbus::Forest do
|
|
34
34
|
@forest.snp_importances.values.each{|v| v.should be_kind_of Numeric}
|
35
35
|
end
|
36
36
|
|
37
|
+
it 'does not compute SNP importances if config set to false' do
|
38
|
+
@forest.snp_importances.should == {}
|
39
|
+
@forest.options.do_importances = false
|
40
|
+
@forest.grow
|
41
|
+
@forest.snp_importances.should == {}
|
42
|
+
end
|
43
|
+
|
37
44
|
it 'traverses a set of testing individuals through every tree in the forest and returns predictions' do
|
38
45
|
@forest = @config.load_forest
|
39
46
|
@forest.predictions.should == {}
|
@@ -85,11 +92,19 @@ describe Nimbus::Forest do
|
|
85
92
|
|
86
93
|
it 'computes averaged SNP importances for every SNP' do
|
87
94
|
@forest.snp_importances.should == {}
|
95
|
+
@forest.options.do_importances = true
|
88
96
|
@forest.grow
|
89
97
|
@forest.snp_importances.keys.sort.should == (1..100).to_a # 100 snps in the training file
|
90
98
|
@forest.snp_importances.values.each{|v| v.should be_kind_of Numeric}
|
91
99
|
end
|
92
100
|
|
101
|
+
it 'does not compute SNP importances if config set to false' do
|
102
|
+
@forest.snp_importances.should == {}
|
103
|
+
@forest.options.do_importances = false
|
104
|
+
@forest.grow
|
105
|
+
@forest.snp_importances.should == {}
|
106
|
+
end
|
107
|
+
|
93
108
|
it 'traverses a set of testing individuals through every tree in the forest and returns predictions' do
|
94
109
|
@forest = @config.load_forest
|
95
110
|
@forest.predictions.should == {}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nimbus
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0
|
4
|
+
version: 2.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,11 +10,11 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2012-07-
|
13
|
+
date: 2012-07-28 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rspec
|
17
|
-
requirement: &
|
17
|
+
requirement: &2152757020 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ! '>='
|
@@ -22,7 +22,7 @@ dependencies:
|
|
22
22
|
version: 2.11.0
|
23
23
|
type: :development
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *2152757020
|
26
26
|
description: Nimbus is a Ruby gem to implement Random Forest in a genomic selection
|
27
27
|
context.
|
28
28
|
email:
|