nimbus 0.7 → 0.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -31,6 +31,7 @@ module Nimbus
31
31
  output_random_forest_file(@forest)
32
32
  output_tree_errors_file(@forest)
33
33
  output_training_file_predictions(@forest)
34
+ output_snp_importances_file(@forest)
34
35
  end
35
36
 
36
37
  if @config.do_testing
@@ -102,7 +103,7 @@ module Nimbus
102
103
  }
103
104
  }
104
105
  Nimbus.message "* Predictions for the training sample saved to:"
105
- Nimbus.message "* Output forest file: #{@config.output_training_file}"
106
+ Nimbus.message "* Output from training file: #{@config.output_training_file}"
106
107
  Nimbus.message "*" * 50
107
108
  end
108
109
 
@@ -113,7 +114,18 @@ module Nimbus
113
114
  }
114
115
  }
115
116
  Nimbus.message "* Predictions for the testing set saved to:"
116
- Nimbus.message "* Output forest file: #{@config.output_testing_file}"
117
+ Nimbus.message "* Output from testing file: #{@config.output_testing_file}"
118
+ Nimbus.message "*" * 50
119
+ end
120
+
121
+ def output_snp_importances_file(forest)
122
+ File.open(@config.output_snp_importances_file , 'w') {|f|
123
+ forest.snp_importances.sort.each{|p|
124
+ f.write("SNP ##{p[0]}: #{p[1].round(5)}\n")
125
+ }
126
+ }
127
+ Nimbus.message "* SNP importances for the forest saved to:"
128
+ Nimbus.message "* Output snp importance file: #{@config.output_snp_importances_file}"
117
129
  Nimbus.message "*" * 50
118
130
  end
119
131
 
@@ -17,7 +17,8 @@ module Nimbus
17
17
  :output_forest_file,
18
18
  :output_training_file,
19
19
  :output_testing_file,
20
- :output_tree_errors_file
20
+ :output_tree_errors_file,
21
+ :output_snp_importances_file
21
22
  )
22
23
 
23
24
  DEFAULTS = {
@@ -37,7 +38,8 @@ module Nimbus
37
38
  :output_forest_file => 'random_forest.yml',
38
39
  :output_training_file => 'training_file_predictions.txt',
39
40
  :output_testing_file => 'testing_file_predictions.txt',
40
- :output_tree_errors_file => 'generalization_errors.txt'
41
+ :output_tree_errors_file => 'generalization_errors.txt',
42
+ :output_snp_importances_file => 'snp_importances.txt'
41
43
  }
42
44
 
43
45
 
@@ -56,6 +58,7 @@ module Nimbus
56
58
  @output_training_file = File.expand_path(DEFAULTS[:output_training_file], Dir.pwd)
57
59
  @output_testing_file = File.expand_path(DEFAULTS[:output_testing_file], Dir.pwd)
58
60
  @output_tree_errors_file = File.expand_path(DEFAULTS[:output_tree_errors_file], Dir.pwd)
61
+ @output_snp_importances_file = File.expand_path(DEFAULTS[:output_snp_importances_file], Dir.pwd)
59
62
  end
60
63
 
61
64
  def tree
@@ -1,7 +1,7 @@
1
1
  module Nimbus
2
2
 
3
3
  class Forest
4
- attr_accessor :size, :trees, :bag, :predictions, :tree_errors
4
+ attr_accessor :size, :trees, :bag, :predictions, :tree_errors, :snp_importances
5
5
  attr_accessor :options
6
6
 
7
7
  def initialize(config)
@@ -10,7 +10,9 @@ module Nimbus
10
10
  @options = config
11
11
  @size = config.forest_size
12
12
  @predictions = {}
13
- @times_predicted =[]
13
+ @times_predicted = []
14
+ @snp_importances = {}
15
+ @tree_snp_importances = []
14
16
  raise Nimbus::ForestError, "Forest size parameter (#{@size}) is invalid. You need at least one tree." if @size < 1
15
17
  end
16
18
 
@@ -22,9 +24,11 @@ module Nimbus
22
24
  tree = Tree.new @options.tree
23
25
  @trees << tree.seed(@options.training_set.individuals, tree_individuals_bag, @options.training_set.ids_fenotypes)
24
26
  @tree_errors << tree.generalization_error_from_oob(tree_out_of_bag)
27
+ @tree_snp_importances << tree.estimate_importances(tree_out_of_bag)
25
28
  acumulate_predictions tree.predictions
26
29
  Nimbus.clear_line!
27
30
  end
31
+ average_snp_importances
28
32
  average_predictions
29
33
  end
30
34
 
@@ -76,6 +80,16 @@ module Nimbus
76
80
  }
77
81
  end
78
82
 
83
+ def average_snp_importances
84
+ 1.upto(@options.tree_SNP_total_count) {|snp|
85
+ @snp_importances[snp] = 0.0
86
+ @tree_snp_importances.each{|tree_snp_importance|
87
+ @snp_importances[snp] += tree_snp_importance[snp] unless tree_snp_importance[snp].nil?
88
+ }
89
+ @snp_importances[snp] = @snp_importances[snp] / @size
90
+ }
91
+ end
92
+
79
93
  end
80
94
 
81
95
  end
@@ -1,7 +1,7 @@
1
1
  module Nimbus
2
2
 
3
3
  class Tree
4
- attr_accessor :snp_sample_size, :snp_total_count, :node_min_size, :structure, :generalization_error, :predictions
4
+ attr_accessor :snp_sample_size, :snp_total_count, :node_min_size, :used_snps, :structure, :generalization_error, :predictions, :importances
5
5
  attr_accessor :individuals, :id_to_fenotype
6
6
 
7
7
  def initialize(options)
@@ -14,19 +14,10 @@ module Nimbus
14
14
  @individuals = all_individuals
15
15
  @id_to_fenotype = ids_fenotypes
16
16
  @predictions = {}
17
+ @used_snps = []
17
18
 
18
19
  @structure = build_node individuals_sample, Nimbus::LossFunctions.average(individuals_sample, @id_to_fenotype)
19
20
  end
20
-
21
- def generalization_error_from_oob(oob_ids)
22
- return nil if (@structure.nil? || @individuals.nil? || @id_to_fenotype.nil?)
23
- oob_y_hat = Nimbus::LossFunctions.average(oob_ids, @id_to_fenotype)
24
- oob_predictions = {}
25
- oob_ids.each do |oobi|
26
- oob_predictions[oobi] = Tree.traverse @structure, individuals[oobi].snp_list
27
- end
28
- @generalization_error = Nimbus::LossFunctions.quadratic_loss oob_ids, oob_predictions, oob_y_hat
29
- end
30
21
 
31
22
  def build_node(individuals_ids, y_hat)
32
23
  # General loss function value for the node
@@ -60,8 +51,35 @@ module Nimbus
60
51
  node_1 = split[1].size == 0 ? label_node(parent_y_hat, []) : build_node(split[1], y_hats[1])
61
52
  node_2 = split[2].size == 0 ? label_node(parent_y_hat, []) : build_node(split[2], y_hats[2])
62
53
 
54
+ split_by_snp(snp)
63
55
  return { snp => [node_0, node_1, node_2] }
64
56
  end
57
+
58
+ def generalization_error_from_oob(oob_ids)
59
+ return nil if (@structure.nil? || @individuals.nil? || @id_to_fenotype.nil?)
60
+ oob_y_hat = Nimbus::LossFunctions.average(oob_ids, @id_to_fenotype)
61
+ oob_predictions = {}
62
+ oob_ids.each do |oobi|
63
+ oob_predictions[oobi] = Tree.traverse @structure, individuals[oobi].snp_list
64
+ end
65
+ @generalization_error = Nimbus::LossFunctions.quadratic_loss oob_ids, oob_predictions, oob_y_hat
66
+ end
67
+
68
+ def estimate_importances(oob_ids)
69
+ return nil if (@generalization_error.nil? && generalization_error_from_oob(oob_ids))
70
+ oob_individuals_count = oob_ids.size
71
+ @importances = {}
72
+ @used_snps.uniq.each do |current_snp|
73
+ shuffled_ids = oob_ids.shuffle
74
+ permutated_snp_error = 0.0
75
+ oob_ids.each_with_index {|oobi, index|
76
+ permutated_prediction = traverse_with_permutation @structure, individuals[oobi].snp_list, current_snp, individuals[shuffled_ids[index]].snp_list
77
+ permutated_snp_error += Nimbus::LossFunctions.mean_squared_error [oobi], @id_to_fenotype, permutated_prediction
78
+ }
79
+ @importances[current_snp] = ((permutated_snp_error / oob_individuals_count) - @generalization_error).round(5)
80
+ end
81
+ @importances
82
+ end
65
83
 
66
84
  def self.traverse(tree_structure, data)
67
85
  return tree_structure if tree_structure.is_a? Numeric
@@ -69,6 +87,12 @@ module Nimbus
69
87
  return self.traverse( tree_structure.values.first[ data[tree_structure.keys.first - 1].to_i], data)
70
88
  end
71
89
 
90
+ def traverse_with_permutation(tree_structure, data, snp_to_permute, individual_to_permute)
91
+ return tree_structure if tree_structure.is_a? Numeric
92
+ individual_data = (tree_structure.keys.first == snp_to_permute ? individual_to_permute : data)
93
+ return traverse_with_permutation( tree_structure.values.first[ individual_data[tree_structure.keys.first - 1].to_i], data, snp_to_permute, individual_to_permute)
94
+ end
95
+
72
96
 
73
97
  private
74
98
 
@@ -92,6 +116,10 @@ module Nimbus
92
116
  raise Nimbus::TreeError, "Values for SNPs columns must be in [0, 1, 2]"
93
117
  end
94
118
 
119
+ def split_by_snp(x)
120
+ @used_snps << x
121
+ end
122
+
95
123
  end
96
124
 
97
125
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: nimbus
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: "0.7"
5
+ version: "0.8"
6
6
  platform: ruby
7
7
  authors:
8
8
  - "Juanjo Baz\xC3\xA1n"
@@ -11,7 +11,7 @@ autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
13
 
14
- date: 2011-08-22 00:00:00 Z
14
+ date: 2011-08-23 00:00:00 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: rspec