nimbus 2.0.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +8 -1
- data/lib/nimbus/application.rb +1 -1
- data/lib/nimbus/configuration.rb +5 -1
- data/lib/nimbus/forest.rb +2 -2
- data/lib/nimbus/version.rb +1 -1
- data/spec/configuration_spec.rb +2 -0
- data/spec/fixtures/classification_config.yml +2 -1
- data/spec/forest_spec.rb +15 -0
- metadata +4 -4
data/README.md
CHANGED
@@ -45,6 +45,12 @@ Nimbus can be used both with regression and classification problems.
|
|
45
45
|
* The split of nodes uses the Gini index as loss function.
|
46
46
|
* Labeling of nodes is made finding the majority fenotype class of the individuals in the node.
|
47
47
|
|
48
|
+
## Variable importances
|
49
|
+
|
50
|
+
By default Nimbus will estimate SNP importances everytime a training file is run to create a forest.
|
51
|
+
|
52
|
+
You can disable this behaviour (and speed up the training process) by setting the parameter `var_importances: No` in the configuration file.
|
53
|
+
|
48
54
|
## Install
|
49
55
|
|
50
56
|
You need to have Ruby (1.9.2 or higher) and Rubygems installed in your computer. Then install nimbus with:
|
@@ -98,6 +104,7 @@ Under the forest chapter:
|
|
98
104
|
* `SNP_sample_size_mtry`: size of the random sample of SNPs to be used in every tree node.
|
99
105
|
* `SNP_total_count`: total count of SNPs in the training and/or testing files
|
100
106
|
* `node_min_size`: minimum amount of individuals in a tree node to make a split.
|
107
|
+
* `var_importances`: **optional**. If set to `No` Nimbus will not calculate SNP importances.
|
101
108
|
|
102
109
|
|
103
110
|
## Input files
|
@@ -126,7 +133,7 @@ After training:
|
|
126
133
|
* `random_forest.yml`: A file defining the structure of the computed Random Forest. It can be used as input forest file.
|
127
134
|
* `generalization_errors.txt`: A file with the generalization error for every tree in the forest.
|
128
135
|
* `training_file_predictions.txt`: A file with predictions for every individual from the training file.
|
129
|
-
* `snp_importances.txt`: A file with the computed importance for every SNP.
|
136
|
+
* `snp_importances.txt`: A file with the computed importance for every SNP. _(unless `var_importances` set to `No` in config file)_
|
130
137
|
|
131
138
|
After testing:
|
132
139
|
|
data/lib/nimbus/application.rb
CHANGED
@@ -34,7 +34,7 @@ module Nimbus
|
|
34
34
|
output_random_forest_file(@forest)
|
35
35
|
output_tree_errors_file(@forest)
|
36
36
|
output_training_file_predictions(@forest)
|
37
|
-
output_snp_importances_file(@forest)
|
37
|
+
output_snp_importances_file(@forest) if @config.do_importances
|
38
38
|
end
|
39
39
|
|
40
40
|
if @config.do_testing
|
data/lib/nimbus/configuration.rb
CHANGED
@@ -25,6 +25,7 @@ module Nimbus
|
|
25
25
|
:loss_function_continuous,
|
26
26
|
:do_training,
|
27
27
|
:do_testing,
|
28
|
+
:do_importances,
|
28
29
|
:training_set,
|
29
30
|
:output_forest_file,
|
30
31
|
:output_training_file,
|
@@ -63,6 +64,7 @@ module Nimbus
|
|
63
64
|
def initialize
|
64
65
|
@do_training = false
|
65
66
|
@do_testing = false
|
67
|
+
@do_importances = true
|
66
68
|
|
67
69
|
@forest_size = DEFAULTS[:forest_size]
|
68
70
|
@tree_SNP_sample_size = DEFAULTS[:tree_SNP_sample_size]
|
@@ -137,6 +139,8 @@ module Nimbus
|
|
137
139
|
@tree_SNP_total_count = user_config_params['forest']['SNP_total_count'].to_i if user_config_params['forest']['SNP_total_count']
|
138
140
|
@tree_SNP_sample_size = user_config_params['forest']['SNP_sample_size_mtry'].to_i if user_config_params['forest']['SNP_sample_size_mtry']
|
139
141
|
@tree_node_min_size = user_config_params['forest']['node_min_size'].to_i if user_config_params['forest']['node_min_size']
|
142
|
+
@do_importances = user_config_params['forest']['var_importances'].to_s.strip.downcase
|
143
|
+
@do_importances = (@do_importances != 'no' && @do_importances != 'false')
|
140
144
|
end
|
141
145
|
|
142
146
|
check_configuration
|
@@ -170,7 +174,7 @@ module Nimbus
|
|
170
174
|
next if line.strip == ''
|
171
175
|
data_id, *snp_list = line.strip.split
|
172
176
|
raise Nimbus::InputFileError, "There are individuals with no ID, please check data in Testing file." unless (!data_id.nil? && data_id.strip != '')
|
173
|
-
raise Nimbus::InputFileError, "Individual ##{data_id} from testing set has no value for all #{@tree_SNP_total_count} SNPs
|
177
|
+
raise Nimbus::InputFileError, "Individual ##{data_id} from testing set has no value for all #{@tree_SNP_total_count} SNPs." unless snp_list.size == @tree_SNP_total_count
|
174
178
|
individual_test = Nimbus::Individual.new(data_id.to_i, nil, snp_list.map{|snp| snp.to_i})
|
175
179
|
yield individual_test
|
176
180
|
end
|
data/lib/nimbus/forest.rb
CHANGED
@@ -42,11 +42,11 @@ module Nimbus
|
|
42
42
|
tree = tree_class.new @options.tree
|
43
43
|
@trees << tree.seed(@options.training_set.individuals, tree_individuals_bag, @options.training_set.ids_fenotypes)
|
44
44
|
@tree_errors << tree.generalization_error_from_oob(tree_out_of_bag)
|
45
|
-
@tree_snp_importances << tree.estimate_importances(tree_out_of_bag)
|
45
|
+
@tree_snp_importances << tree.estimate_importances(tree_out_of_bag) if @options.do_importances
|
46
46
|
acumulate_predictions tree.predictions
|
47
47
|
Nimbus.clear_line!
|
48
48
|
end
|
49
|
-
average_snp_importances
|
49
|
+
average_snp_importances if @options.do_importances
|
50
50
|
totalize_predictions
|
51
51
|
end
|
52
52
|
|
data/lib/nimbus/version.rb
CHANGED
data/spec/configuration_spec.rb
CHANGED
@@ -11,6 +11,7 @@ describe Nimbus::Configuration do
|
|
11
11
|
config.testing_file.should == fixture_file('regression_testing.data')
|
12
12
|
config.forest_file.should == fixture_file('regression_random_forest.yml')
|
13
13
|
config.classes.should be_nil
|
14
|
+
config.do_importances.should be
|
14
15
|
|
15
16
|
config.forest_size.should == 3
|
16
17
|
config.tree_SNP_sample_size.should == 60
|
@@ -24,6 +25,7 @@ describe Nimbus::Configuration do
|
|
24
25
|
config.testing_file.should == fixture_file('classification_testing.data')
|
25
26
|
config.forest_file.should == fixture_file('classification_random_forest.yml')
|
26
27
|
config.classes.should == ['0','1']
|
28
|
+
config.do_importances.should_not be
|
27
29
|
|
28
30
|
config.forest_size.should == 3
|
29
31
|
config.tree_SNP_sample_size.should == 33
|
data/spec/forest_spec.rb
CHANGED
@@ -34,6 +34,13 @@ describe Nimbus::Forest do
|
|
34
34
|
@forest.snp_importances.values.each{|v| v.should be_kind_of Numeric}
|
35
35
|
end
|
36
36
|
|
37
|
+
it 'does not compute SNP importances if config set to false' do
|
38
|
+
@forest.snp_importances.should == {}
|
39
|
+
@forest.options.do_importances = false
|
40
|
+
@forest.grow
|
41
|
+
@forest.snp_importances.should == {}
|
42
|
+
end
|
43
|
+
|
37
44
|
it 'traverses a set of testing individuals through every tree in the forest and returns predictions' do
|
38
45
|
@forest = @config.load_forest
|
39
46
|
@forest.predictions.should == {}
|
@@ -85,11 +92,19 @@ describe Nimbus::Forest do
|
|
85
92
|
|
86
93
|
it 'computes averaged SNP importances for every SNP' do
|
87
94
|
@forest.snp_importances.should == {}
|
95
|
+
@forest.options.do_importances = true
|
88
96
|
@forest.grow
|
89
97
|
@forest.snp_importances.keys.sort.should == (1..100).to_a # 100 snps in the training file
|
90
98
|
@forest.snp_importances.values.each{|v| v.should be_kind_of Numeric}
|
91
99
|
end
|
92
100
|
|
101
|
+
it 'does not compute SNP importances if config set to false' do
|
102
|
+
@forest.snp_importances.should == {}
|
103
|
+
@forest.options.do_importances = false
|
104
|
+
@forest.grow
|
105
|
+
@forest.snp_importances.should == {}
|
106
|
+
end
|
107
|
+
|
93
108
|
it 'traverses a set of testing individuals through every tree in the forest and returns predictions' do
|
94
109
|
@forest = @config.load_forest
|
95
110
|
@forest.predictions.should == {}
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nimbus
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0
|
4
|
+
version: 2.1.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -10,11 +10,11 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2012-07-
|
13
|
+
date: 2012-07-28 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: rspec
|
17
|
-
requirement: &
|
17
|
+
requirement: &2152757020 !ruby/object:Gem::Requirement
|
18
18
|
none: false
|
19
19
|
requirements:
|
20
20
|
- - ! '>='
|
@@ -22,7 +22,7 @@ dependencies:
|
|
22
22
|
version: 2.11.0
|
23
23
|
type: :development
|
24
24
|
prerelease: false
|
25
|
-
version_requirements: *
|
25
|
+
version_requirements: *2152757020
|
26
26
|
description: Nimbus is a Ruby gem to implement Random Forest in a genomic selection
|
27
27
|
context.
|
28
28
|
email:
|