nimbus 0.6.1 → 0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -29,6 +29,7 @@ module Nimbus
29
29
  @forest = ::Nimbus::Forest.new @config
30
30
  @forest.grow
31
31
  output_random_forest_file(@forest)
32
+ output_tree_errors_file(@forest)
32
33
  output_training_file_predictions(@forest)
33
34
  end
34
35
 
@@ -83,6 +84,17 @@ module Nimbus
83
84
  Nimbus.message "*" * 50
84
85
  end
85
86
 
87
+ def output_tree_errors_file(forest)
88
+ File.open(@config.output_tree_errors_file , 'w') {|f|
89
+ 1.upto(forest.tree_errors.size) do |te|
90
+ f.write("generalization error for tree ##{te}: #{forest.tree_errors[te-1].round(5)}\n")
91
+ end
92
+ }
93
+ Nimbus.message "* Generalization errors for every tree saved to:"
94
+ Nimbus.message "* Output tree errors file: #{@config.output_tree_errors_file}"
95
+ Nimbus.message "*" * 50
96
+ end
97
+
86
98
  def output_training_file_predictions(forest)
87
99
  File.open(@config.output_training_file , 'w') {|f|
88
100
  forest.predictions.sort.each{|p|
@@ -16,7 +16,8 @@ module Nimbus
16
16
  :training_set,
17
17
  :output_forest_file,
18
18
  :output_training_file,
19
- :output_testing_file
19
+ :output_testing_file,
20
+ :output_tree_errors_file
20
21
  )
21
22
 
22
23
  DEFAULTS = {
@@ -35,7 +36,8 @@ module Nimbus
35
36
 
36
37
  :output_forest_file => 'random_forest.yml',
37
38
  :output_training_file => 'training_file_predictions.txt',
38
- :output_testing_file => 'testing_file_predictions.txt'
39
+ :output_testing_file => 'testing_file_predictions.txt',
40
+ :output_tree_errors_file => 'generalization_errors.txt'
39
41
  }
40
42
 
41
43
 
@@ -53,6 +55,7 @@ module Nimbus
53
55
  @output_forest_file = File.expand_path(DEFAULTS[:output_forest_file], Dir.pwd)
54
56
  @output_training_file = File.expand_path(DEFAULTS[:output_training_file], Dir.pwd)
55
57
  @output_testing_file = File.expand_path(DEFAULTS[:output_testing_file], Dir.pwd)
58
+ @output_tree_errors_file = File.expand_path(DEFAULTS[:output_tree_errors_file], Dir.pwd)
56
59
  end
57
60
 
58
61
  def tree
data/lib/nimbus/forest.rb CHANGED
@@ -1,11 +1,12 @@
1
1
  module Nimbus
2
2
 
3
3
  class Forest
4
- attr_accessor :size, :trees, :bag, :predictions
4
+ attr_accessor :size, :trees, :bag, :predictions, :tree_errors
5
5
  attr_accessor :options
6
6
 
7
7
  def initialize(config)
8
8
  @trees = []
9
+ @tree_errors = []
9
10
  @options = config
10
11
  @size = config.forest_size
11
12
  @predictions = {}
@@ -20,6 +21,7 @@ module Nimbus
20
21
  tree_out_of_bag = oob tree_individuals_bag
21
22
  tree = Tree.new @options.tree
22
23
  @trees << tree.seed(@options.training_set.individuals, tree_individuals_bag, @options.training_set.ids_fenotypes)
24
+ @tree_errors << tree.generalization_error_from_oob(tree_out_of_bag)
23
25
  acumulate_predictions tree.predictions
24
26
  Nimbus.clear_line!
25
27
  end
@@ -30,7 +32,7 @@ module Nimbus
30
32
  @predictions = {}
31
33
  prediction_count = trees.size
32
34
  @options.read_testing_data{|individual|
33
- individual_prediction=0.0
35
+ individual_prediction = 0.0
34
36
  trees.each do |t|
35
37
  individual_prediction = (individual_prediction + Nimbus::Tree.traverse(t, individual.snp_list)).round(5)
36
38
  end
data/lib/nimbus/tree.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  module Nimbus
2
2
 
3
3
  class Tree
4
- attr_accessor :snp_sample_size, :snp_total_count, :node_min_size, :structure, :predictions
4
+ attr_accessor :snp_sample_size, :snp_total_count, :node_min_size, :structure, :generalization_error, :predictions
5
5
  attr_accessor :individuals, :id_to_fenotype
6
6
 
7
7
  def initialize(options)
@@ -17,6 +17,16 @@ module Nimbus
17
17
 
18
18
  @structure = build_node individuals_sample, Nimbus::LossFunctions.average(individuals_sample, @id_to_fenotype)
19
19
  end
20
+
21
+ def generalization_error_from_oob(oob_ids)
22
+ return nil if (@structure.nil? || @individuals.nil? || @id_to_fenotype.nil?)
23
+ oob_y_hat = Nimbus::LossFunctions.average(oob_ids, @id_to_fenotype)
24
+ oob_predictions = {}
25
+ oob_ids.each do |oobi|
26
+ oob_predictions[oobi] = Tree.traverse @structure, individuals[oobi].snp_list
27
+ end
28
+ @generalization_error = Nimbus::LossFunctions.quadratic_loss oob_ids, oob_predictions, oob_y_hat
29
+ end
20
30
 
21
31
  def build_node(individuals_ids, y_hat)
22
32
  # General loss function value for the node
@@ -26,7 +36,7 @@ module Nimbus
26
36
 
27
37
  # Finding the SNP that minimizes loss function
28
38
  snps = snps_random_sample
29
- min_loss, min_SNP, split, means = node_loss_function, nil, nil, nil
39
+ min_loss, min_SNP, split, means = node_loss_function, nil, nil, nil
30
40
 
31
41
  snps.each do |snp|
32
42
  individuals_split_by_snp_value = split_by_snp_value individuals_ids, snp
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: nimbus
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.6.1
5
+ version: "0.7"
6
6
  platform: ruby
7
7
  authors:
8
8
  - "Juanjo Baz\xC3\xA1n"