nimbus 0.7 → 0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -31,6 +31,7 @@ module Nimbus
31
31
  output_random_forest_file(@forest)
32
32
  output_tree_errors_file(@forest)
33
33
  output_training_file_predictions(@forest)
34
+ output_snp_importances_file(@forest)
34
35
  end
35
36
 
36
37
  if @config.do_testing
@@ -102,7 +103,7 @@ module Nimbus
102
103
  }
103
104
  }
104
105
  Nimbus.message "* Predictions for the training sample saved to:"
105
- Nimbus.message "* Output forest file: #{@config.output_training_file}"
106
+ Nimbus.message "* Output from training file: #{@config.output_training_file}"
106
107
  Nimbus.message "*" * 50
107
108
  end
108
109
 
@@ -113,7 +114,18 @@ module Nimbus
113
114
  }
114
115
  }
115
116
  Nimbus.message "* Predictions for the testing set saved to:"
116
- Nimbus.message "* Output forest file: #{@config.output_testing_file}"
117
+ Nimbus.message "* Output from testing file: #{@config.output_testing_file}"
118
+ Nimbus.message "*" * 50
119
+ end
120
+
121
+ def output_snp_importances_file(forest)
122
+ File.open(@config.output_snp_importances_file , 'w') {|f|
123
+ forest.snp_importances.sort.each{|p|
124
+ f.write("SNP ##{p[0]}: #{p[1].round(5)}\n")
125
+ }
126
+ }
127
+ Nimbus.message "* SNP importances for the forest saved to:"
128
+ Nimbus.message "* Output snp importance file: #{@config.output_snp_importances_file}"
117
129
  Nimbus.message "*" * 50
118
130
  end
119
131
 
@@ -17,7 +17,8 @@ module Nimbus
17
17
  :output_forest_file,
18
18
  :output_training_file,
19
19
  :output_testing_file,
20
- :output_tree_errors_file
20
+ :output_tree_errors_file,
21
+ :output_snp_importances_file
21
22
  )
22
23
 
23
24
  DEFAULTS = {
@@ -37,7 +38,8 @@ module Nimbus
37
38
  :output_forest_file => 'random_forest.yml',
38
39
  :output_training_file => 'training_file_predictions.txt',
39
40
  :output_testing_file => 'testing_file_predictions.txt',
40
- :output_tree_errors_file => 'generalization_errors.txt'
41
+ :output_tree_errors_file => 'generalization_errors.txt',
42
+ :output_snp_importances_file => 'snp_importances.txt'
41
43
  }
42
44
 
43
45
 
@@ -56,6 +58,7 @@ module Nimbus
56
58
  @output_training_file = File.expand_path(DEFAULTS[:output_training_file], Dir.pwd)
57
59
  @output_testing_file = File.expand_path(DEFAULTS[:output_testing_file], Dir.pwd)
58
60
  @output_tree_errors_file = File.expand_path(DEFAULTS[:output_tree_errors_file], Dir.pwd)
61
+ @output_snp_importances_file = File.expand_path(DEFAULTS[:output_snp_importances_file], Dir.pwd)
59
62
  end
60
63
 
61
64
  def tree
@@ -1,7 +1,7 @@
1
1
  module Nimbus
2
2
 
3
3
  class Forest
4
- attr_accessor :size, :trees, :bag, :predictions, :tree_errors
4
+ attr_accessor :size, :trees, :bag, :predictions, :tree_errors, :snp_importances
5
5
  attr_accessor :options
6
6
 
7
7
  def initialize(config)
@@ -10,7 +10,9 @@ module Nimbus
10
10
  @options = config
11
11
  @size = config.forest_size
12
12
  @predictions = {}
13
- @times_predicted =[]
13
+ @times_predicted = []
14
+ @snp_importances = {}
15
+ @tree_snp_importances = []
14
16
  raise Nimbus::ForestError, "Forest size parameter (#{@size}) is invalid. You need at least one tree." if @size < 1
15
17
  end
16
18
 
@@ -22,9 +24,11 @@ module Nimbus
22
24
  tree = Tree.new @options.tree
23
25
  @trees << tree.seed(@options.training_set.individuals, tree_individuals_bag, @options.training_set.ids_fenotypes)
24
26
  @tree_errors << tree.generalization_error_from_oob(tree_out_of_bag)
27
+ @tree_snp_importances << tree.estimate_importances(tree_out_of_bag)
25
28
  acumulate_predictions tree.predictions
26
29
  Nimbus.clear_line!
27
30
  end
31
+ average_snp_importances
28
32
  average_predictions
29
33
  end
30
34
 
@@ -76,6 +80,16 @@ module Nimbus
76
80
  }
77
81
  end
78
82
 
83
+ def average_snp_importances
84
+ 1.upto(@options.tree_SNP_total_count) {|snp|
85
+ @snp_importances[snp] = 0.0
86
+ @tree_snp_importances.each{|tree_snp_importance|
87
+ @snp_importances[snp] += tree_snp_importance[snp] unless tree_snp_importance[snp].nil?
88
+ }
89
+ @snp_importances[snp] = @snp_importances[snp] / @size
90
+ }
91
+ end
92
+
79
93
  end
80
94
 
81
95
  end
@@ -1,7 +1,7 @@
1
1
  module Nimbus
2
2
 
3
3
  class Tree
4
- attr_accessor :snp_sample_size, :snp_total_count, :node_min_size, :structure, :generalization_error, :predictions
4
+ attr_accessor :snp_sample_size, :snp_total_count, :node_min_size, :used_snps, :structure, :generalization_error, :predictions, :importances
5
5
  attr_accessor :individuals, :id_to_fenotype
6
6
 
7
7
  def initialize(options)
@@ -14,19 +14,10 @@ module Nimbus
14
14
  @individuals = all_individuals
15
15
  @id_to_fenotype = ids_fenotypes
16
16
  @predictions = {}
17
+ @used_snps = []
17
18
 
18
19
  @structure = build_node individuals_sample, Nimbus::LossFunctions.average(individuals_sample, @id_to_fenotype)
19
20
  end
20
-
21
- def generalization_error_from_oob(oob_ids)
22
- return nil if (@structure.nil? || @individuals.nil? || @id_to_fenotype.nil?)
23
- oob_y_hat = Nimbus::LossFunctions.average(oob_ids, @id_to_fenotype)
24
- oob_predictions = {}
25
- oob_ids.each do |oobi|
26
- oob_predictions[oobi] = Tree.traverse @structure, individuals[oobi].snp_list
27
- end
28
- @generalization_error = Nimbus::LossFunctions.quadratic_loss oob_ids, oob_predictions, oob_y_hat
29
- end
30
21
 
31
22
  def build_node(individuals_ids, y_hat)
32
23
  # General loss function value for the node
@@ -60,8 +51,35 @@ module Nimbus
60
51
  node_1 = split[1].size == 0 ? label_node(parent_y_hat, []) : build_node(split[1], y_hats[1])
61
52
  node_2 = split[2].size == 0 ? label_node(parent_y_hat, []) : build_node(split[2], y_hats[2])
62
53
 
54
+ split_by_snp(snp)
63
55
  return { snp => [node_0, node_1, node_2] }
64
56
  end
57
+
58
+ def generalization_error_from_oob(oob_ids)
59
+ return nil if (@structure.nil? || @individuals.nil? || @id_to_fenotype.nil?)
60
+ oob_y_hat = Nimbus::LossFunctions.average(oob_ids, @id_to_fenotype)
61
+ oob_predictions = {}
62
+ oob_ids.each do |oobi|
63
+ oob_predictions[oobi] = Tree.traverse @structure, individuals[oobi].snp_list
64
+ end
65
+ @generalization_error = Nimbus::LossFunctions.quadratic_loss oob_ids, oob_predictions, oob_y_hat
66
+ end
67
+
68
+ def estimate_importances(oob_ids)
69
+ return nil if (@generalization_error.nil? && generalization_error_from_oob(oob_ids))
70
+ oob_individuals_count = oob_ids.size
71
+ @importances = {}
72
+ @used_snps.uniq.each do |current_snp|
73
+ shuffled_ids = oob_ids.shuffle
74
+ permutated_snp_error = 0.0
75
+ oob_ids.each_with_index {|oobi, index|
76
+ permutated_prediction = traverse_with_permutation @structure, individuals[oobi].snp_list, current_snp, individuals[shuffled_ids[index]].snp_list
77
+ permutated_snp_error += Nimbus::LossFunctions.mean_squared_error [oobi], @id_to_fenotype, permutated_prediction
78
+ }
79
+ @importances[current_snp] = ((permutated_snp_error / oob_individuals_count) - @generalization_error).round(5)
80
+ end
81
+ @importances
82
+ end
65
83
 
66
84
  def self.traverse(tree_structure, data)
67
85
  return tree_structure if tree_structure.is_a? Numeric
@@ -69,6 +87,12 @@ module Nimbus
69
87
  return self.traverse( tree_structure.values.first[ data[tree_structure.keys.first - 1].to_i], data)
70
88
  end
71
89
 
90
+ def traverse_with_permutation(tree_structure, data, snp_to_permute, individual_to_permute)
91
+ return tree_structure if tree_structure.is_a? Numeric
92
+ individual_data = (tree_structure.keys.first == snp_to_permute ? individual_to_permute : data)
93
+ return traverse_with_permutation( tree_structure.values.first[ individual_data[tree_structure.keys.first - 1].to_i], data, snp_to_permute, individual_to_permute)
94
+ end
95
+
72
96
 
73
97
  private
74
98
 
@@ -92,6 +116,10 @@ module Nimbus
92
116
  raise Nimbus::TreeError, "Values for SNPs columns must be in [0, 1, 2]"
93
117
  end
94
118
 
119
+ def split_by_snp(x)
120
+ @used_snps << x
121
+ end
122
+
95
123
  end
96
124
 
97
125
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: nimbus
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: "0.7"
5
+ version: "0.8"
6
6
  platform: ruby
7
7
  authors:
8
8
  - "Juanjo Baz\xC3\xA1n"
@@ -11,7 +11,7 @@ autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
13
 
14
- date: 2011-08-22 00:00:00 Z
14
+ date: 2011-08-23 00:00:00 Z
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
17
17
  name: rspec