nimbus 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -45,6 +45,12 @@ Nimbus can be used both with regression and classification problems.
45
45
  * The split of nodes uses the Gini index as loss function.
46
46
  * Labeling of nodes is made finding the majority fenotype class of the individuals in the node.
47
47
 
48
+ ## Variable importances
49
+
50
+ By default Nimbus will estimate SNP importances everytime a training file is run to create a forest.
51
+
52
+ You can disable this behaviour (and speed up the training process) by setting the parameter `var_importances: No` in the configuration file.
53
+
48
54
  ## Install
49
55
 
50
56
  You need to have Ruby (1.9.2 or higher) and Rubygems installed in your computer. Then install nimbus with:
@@ -98,6 +104,7 @@ Under the forest chapter:
98
104
  * `SNP_sample_size_mtry`: size of the random sample of SNPs to be used in every tree node.
99
105
  * `SNP_total_count`: total count of SNPs in the training and/or testing files
100
106
  * `node_min_size`: minimum amount of individuals in a tree node to make a split.
107
+ * `var_importances`: **optional**. If set to `No` Nimbus will not calculate SNP importances.
101
108
 
102
109
 
103
110
  ## Input files
@@ -126,7 +133,7 @@ After training:
126
133
  * `random_forest.yml`: A file defining the structure of the computed Random Forest. It can be used as input forest file.
127
134
  * `generalization_errors.txt`: A file with the generalization error for every tree in the forest.
128
135
  * `training_file_predictions.txt`: A file with predictions for every individual from the training file.
129
- * `snp_importances.txt`: A file with the computed importance for every SNP.
136
+ * `snp_importances.txt`: A file with the computed importance for every SNP. _(unless `var_importances` set to `No` in config file)_
130
137
 
131
138
  After testing:
132
139
 
@@ -34,7 +34,7 @@ module Nimbus
34
34
  output_random_forest_file(@forest)
35
35
  output_tree_errors_file(@forest)
36
36
  output_training_file_predictions(@forest)
37
- output_snp_importances_file(@forest)
37
+ output_snp_importances_file(@forest) if @config.do_importances
38
38
  end
39
39
 
40
40
  if @config.do_testing
@@ -25,6 +25,7 @@ module Nimbus
25
25
  :loss_function_continuous,
26
26
  :do_training,
27
27
  :do_testing,
28
+ :do_importances,
28
29
  :training_set,
29
30
  :output_forest_file,
30
31
  :output_training_file,
@@ -63,6 +64,7 @@ module Nimbus
63
64
  def initialize
64
65
  @do_training = false
65
66
  @do_testing = false
67
+ @do_importances = true
66
68
 
67
69
  @forest_size = DEFAULTS[:forest_size]
68
70
  @tree_SNP_sample_size = DEFAULTS[:tree_SNP_sample_size]
@@ -137,6 +139,8 @@ module Nimbus
137
139
  @tree_SNP_total_count = user_config_params['forest']['SNP_total_count'].to_i if user_config_params['forest']['SNP_total_count']
138
140
  @tree_SNP_sample_size = user_config_params['forest']['SNP_sample_size_mtry'].to_i if user_config_params['forest']['SNP_sample_size_mtry']
139
141
  @tree_node_min_size = user_config_params['forest']['node_min_size'].to_i if user_config_params['forest']['node_min_size']
142
+ @do_importances = user_config_params['forest']['var_importances'].to_s.strip.downcase
143
+ @do_importances = (@do_importances != 'no' && @do_importances != 'false')
140
144
  end
141
145
 
142
146
  check_configuration
@@ -170,7 +174,7 @@ module Nimbus
170
174
  next if line.strip == ''
171
175
  data_id, *snp_list = line.strip.split
172
176
  raise Nimbus::InputFileError, "There are individuals with no ID, please check data in Testing file." unless (!data_id.nil? && data_id.strip != '')
173
- raise Nimbus::InputFileError, "Individual ##{data_id} from testing set has no value for all #{@tree_SNP_total_count} SNPs, please check structure of the testing file." unless snp_list.size == @tree_SNP_total_count
177
+ raise Nimbus::InputFileError, "Individual ##{data_id} from testing set has no value for all #{@tree_SNP_total_count} SNPs." unless snp_list.size == @tree_SNP_total_count
174
178
  individual_test = Nimbus::Individual.new(data_id.to_i, nil, snp_list.map{|snp| snp.to_i})
175
179
  yield individual_test
176
180
  end
@@ -42,11 +42,11 @@ module Nimbus
42
42
  tree = tree_class.new @options.tree
43
43
  @trees << tree.seed(@options.training_set.individuals, tree_individuals_bag, @options.training_set.ids_fenotypes)
44
44
  @tree_errors << tree.generalization_error_from_oob(tree_out_of_bag)
45
- @tree_snp_importances << tree.estimate_importances(tree_out_of_bag)
45
+ @tree_snp_importances << tree.estimate_importances(tree_out_of_bag) if @options.do_importances
46
46
  acumulate_predictions tree.predictions
47
47
  Nimbus.clear_line!
48
48
  end
49
- average_snp_importances
49
+ average_snp_importances if @options.do_importances
50
50
  totalize_predictions
51
51
  end
52
52
 
@@ -1,3 +1,3 @@
1
1
  module Nimbus
2
- VERSION = "2.0.1"
2
+ VERSION = "2.1.0"
3
3
  end
@@ -11,6 +11,7 @@ describe Nimbus::Configuration do
11
11
  config.testing_file.should == fixture_file('regression_testing.data')
12
12
  config.forest_file.should == fixture_file('regression_random_forest.yml')
13
13
  config.classes.should be_nil
14
+ config.do_importances.should be
14
15
 
15
16
  config.forest_size.should == 3
16
17
  config.tree_SNP_sample_size.should == 60
@@ -24,6 +25,7 @@ describe Nimbus::Configuration do
24
25
  config.testing_file.should == fixture_file('classification_testing.data')
25
26
  config.forest_file.should == fixture_file('classification_random_forest.yml')
26
27
  config.classes.should == ['0','1']
28
+ config.do_importances.should_not be
27
29
 
28
30
  config.forest_size.should == 3
29
31
  config.tree_SNP_sample_size.should == 33
@@ -10,4 +10,5 @@ forest:
10
10
  forest_size: 3 #how many trees
11
11
  SNP_sample_size_mtry: 33 #mtry
12
12
  SNP_total_count: 100
13
- node_min_size: 5
13
+ node_min_size: 5
14
+ var_importances: NO
@@ -34,6 +34,13 @@ describe Nimbus::Forest do
34
34
  @forest.snp_importances.values.each{|v| v.should be_kind_of Numeric}
35
35
  end
36
36
 
37
+ it 'does not compute SNP importances if config set to false' do
38
+ @forest.snp_importances.should == {}
39
+ @forest.options.do_importances = false
40
+ @forest.grow
41
+ @forest.snp_importances.should == {}
42
+ end
43
+
37
44
  it 'traverses a set of testing individuals through every tree in the forest and returns predictions' do
38
45
  @forest = @config.load_forest
39
46
  @forest.predictions.should == {}
@@ -85,11 +92,19 @@ describe Nimbus::Forest do
85
92
 
86
93
  it 'computes averaged SNP importances for every SNP' do
87
94
  @forest.snp_importances.should == {}
95
+ @forest.options.do_importances = true
88
96
  @forest.grow
89
97
  @forest.snp_importances.keys.sort.should == (1..100).to_a # 100 snps in the training file
90
98
  @forest.snp_importances.values.each{|v| v.should be_kind_of Numeric}
91
99
  end
92
100
 
101
+ it 'does not compute SNP importances if config set to false' do
102
+ @forest.snp_importances.should == {}
103
+ @forest.options.do_importances = false
104
+ @forest.grow
105
+ @forest.snp_importances.should == {}
106
+ end
107
+
93
108
  it 'traverses a set of testing individuals through every tree in the forest and returns predictions' do
94
109
  @forest = @config.load_forest
95
110
  @forest.predictions.should == {}
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nimbus
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.1
4
+ version: 2.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -10,11 +10,11 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2012-07-27 00:00:00.000000000 Z
13
+ date: 2012-07-28 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: rspec
17
- requirement: &2160657580 !ruby/object:Gem::Requirement
17
+ requirement: &2152757020 !ruby/object:Gem::Requirement
18
18
  none: false
19
19
  requirements:
20
20
  - - ! '>='
@@ -22,7 +22,7 @@ dependencies:
22
22
  version: 2.11.0
23
23
  type: :development
24
24
  prerelease: false
25
- version_requirements: *2160657580
25
+ version_requirements: *2152757020
26
26
  description: Nimbus is a Ruby gem to implement Random Forest in a genomic selection
27
27
  context.
28
28
  email: