nimbus 2.2.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. checksums.yaml +7 -0
  2. data/CODE_OF_CONDUCT.md +7 -0
  3. data/CONTRIBUTING.md +46 -0
  4. data/MIT-LICENSE.txt +1 -1
  5. data/README.md +131 -21
  6. data/bin/nimbus +2 -2
  7. data/lib/nimbus.rb +2 -6
  8. data/lib/nimbus/classification_tree.rb +9 -12
  9. data/lib/nimbus/configuration.rb +22 -22
  10. data/lib/nimbus/forest.rb +8 -8
  11. data/lib/nimbus/loss_functions.rb +11 -0
  12. data/lib/nimbus/regression_tree.rb +8 -10
  13. data/lib/nimbus/tree.rb +54 -12
  14. data/lib/nimbus/version.rb +1 -1
  15. data/spec/classification_tree_spec.rb +47 -47
  16. data/spec/configuration_spec.rb +55 -55
  17. data/spec/fixtures/{classification_config.yml → classification/config.yml} +3 -3
  18. data/spec/fixtures/classification/random_forest.yml +1174 -0
  19. data/spec/fixtures/{classification_testing.data → classification/testing.data} +0 -0
  20. data/spec/fixtures/{classification_training.data → classification/training.data} +0 -0
  21. data/spec/fixtures/{regression_config.yml → regression/config.yml} +4 -4
  22. data/spec/fixtures/regression/random_forest.yml +2737 -0
  23. data/spec/fixtures/{regression_testing.data → regression/testing.data} +0 -0
  24. data/spec/fixtures/{regression_training.data → regression/training.data} +0 -0
  25. data/spec/forest_spec.rb +39 -39
  26. data/spec/individual_spec.rb +3 -3
  27. data/spec/loss_functions_spec.rb +31 -13
  28. data/spec/nimbus_spec.rb +2 -2
  29. data/spec/regression_tree_spec.rb +44 -44
  30. data/spec/training_set_spec.rb +3 -3
  31. data/spec/tree_spec.rb +4 -4
  32. metadata +37 -34
  33. data/spec/fixtures/classification_random_forest.yml +0 -922
  34. data/spec/fixtures/regression_random_forest.yml +0 -1741
@@ -88,6 +88,14 @@ module Nimbus
88
88
  @trees.to_yaml
89
89
  end
90
90
 
91
+ def classification?
92
+ @options.tree[:classes]
93
+ end
94
+
95
+ def regression?
96
+ @options.tree[:classes].nil?
97
+ end
98
+
91
99
  private
92
100
 
93
101
  def individuals_random_sample
@@ -140,14 +148,6 @@ module Nimbus
140
148
  }
141
149
  end
142
150
 
143
- def classification?
144
- @options.tree[:classes]
145
- end
146
-
147
- def regression?
148
- @options.tree[:classes].nil?
149
- end
150
-
151
151
  end
152
152
 
153
153
  end
@@ -35,6 +35,17 @@ module Nimbus
35
35
  def squared_difference(x,y)
36
36
  0.0 + (x-y)**2
37
37
  end
38
+
39
+ # Simplified Huber function
40
+ def pseudo_huber_error(ids, value_table, mean = nil)
41
+ mean ||= self.average ids, value_table
42
+ ids.inject(0.0){|sum, i| sum + (Math.log(Math.cosh(value_table[i] - mean))) }
43
+ end
44
+
45
+ # Simplified Huber loss function: PHE / n
46
+ def pseudo_huber_loss(ids, value_table, mean = nil)
47
+ self.pseudo_huber_error(ids, value_table, mean) / ids.size
48
+ end
38
49
 
39
50
  ## CLASSSIFICATION
40
51
 
@@ -8,7 +8,7 @@ module Nimbus
8
8
  # * 1: Calculate loss function for the individuals in the node (first node contains all the individuals).
9
9
  # * 2: Take a random sample of the SNPs (size m << total count of SNPs)
10
10
  # * 3: Compute the loss function (quadratic loss) for the split of the sample based on value of every SNP.
11
- # * 4: If the SNP with minimum loss function also minimizes the general loss of the node, split the individuals sample in three nodes, based on value for that SNP [0, 1, or 2]
11
+ # * 4: If the SNP with minimum loss function also minimizes the general loss of the node, split the individuals sample in two nodes, based on average value for that SNP [0,1][2], or [0][1,2]
12
12
  # * 5: Repeat from 1 for every node until:
13
13
  # - a) The individuals count in that node is < minimum size OR
14
14
  # - b) None of the SNP splits has a loss function smaller than the node loss function
@@ -27,8 +27,8 @@ module Nimbus
27
27
 
28
28
  # Creates a node by taking a random sample of the SNPs and computing the loss function for every split by SNP of that sample.
29
29
  #
30
- # * If SNP_min is the SNP with smaller loss function and it is < the loss function of the node, it splits the individuals sample in three:
31
- # (those with value 0 for the SNP_min, those with value 1 for the SNP_min, and those with value 2 for the SNP_min) then it builds these 3 new nodes.
30
+ # * If SNP_min is the SNP with smaller loss function and it is < the loss function of the node, it splits the individuals sample in two:
31
+ # (the average of the 0,1,2 values for the SNP_min in the individuals is computed, and they are splitted in [<=avg], [>avg]) then it builds these 2 new nodes.
32
32
  # * Otherwise every individual in the node gets labeled with the average of the fenotype values of all of them.
33
33
  def build_node(individuals_ids, y_hat)
34
34
  # General loss function value for the node
@@ -38,22 +38,20 @@ module Nimbus
38
38
 
39
39
  # Finding the SNP that minimizes loss function
40
40
  snps = snps_random_sample
41
- min_loss, min_SNP, split, means = node_loss_function, nil, nil, nil
41
+ min_loss, min_SNP, split, split_type, means = node_loss_function, nil, nil, nil, nil
42
42
 
43
43
  snps.each do |snp|
44
- individuals_split_by_snp_value = split_by_snp_value individuals_ids, snp
44
+ individuals_split_by_snp_value, node_split_type = split_by_snp_avegare_value individuals_ids, snp
45
45
  mean_0 = Nimbus::LossFunctions.average individuals_split_by_snp_value[0], @id_to_fenotype
46
46
  mean_1 = Nimbus::LossFunctions.average individuals_split_by_snp_value[1], @id_to_fenotype
47
- mean_2 = Nimbus::LossFunctions.average individuals_split_by_snp_value[2], @id_to_fenotype
48
47
  loss_0 = Nimbus::LossFunctions.mean_squared_error individuals_split_by_snp_value[0], @id_to_fenotype, mean_0
49
48
  loss_1 = Nimbus::LossFunctions.mean_squared_error individuals_split_by_snp_value[1], @id_to_fenotype, mean_1
50
- loss_2 = Nimbus::LossFunctions.mean_squared_error individuals_split_by_snp_value[2], @id_to_fenotype, mean_2
51
- loss_snp = (loss_0 + loss_1 + loss_2) / individuals_count
49
+ loss_snp = (loss_0 + loss_1) / individuals_count
52
50
 
53
- min_loss, min_SNP, split, means = loss_snp, snp, individuals_split_by_snp_value, [mean_0, mean_1, mean_2] if loss_snp < min_loss
51
+ min_loss, min_SNP, split, split_type, means = loss_snp, snp, individuals_split_by_snp_value, node_split_type, [mean_0, mean_1] if loss_snp < min_loss
54
52
  end
55
53
 
56
- return build_branch(min_SNP, split, means, y_hat) if min_loss < node_loss_function
54
+ return build_branch(min_SNP, split, split_type, means, y_hat) if min_loss < node_loss_function
57
55
  return label_node(y_hat, individuals_ids)
58
56
  end
59
57
 
@@ -18,6 +18,9 @@ module Nimbus
18
18
  attr_accessor :snp_sample_size, :snp_total_count, :node_min_size, :used_snps, :structure, :generalization_error, :predictions, :importances
19
19
  attr_accessor :individuals, :id_to_fenotype
20
20
 
21
+ NODE_SPLIT_01_2 = "zero"
22
+ NODE_SPLIT_0_12 = "two"
23
+
21
24
  # Initialize Tree object with the configuration (as in Nimbus::Configuration.tree) options received.
22
25
  def initialize(options)
23
26
  @snp_total_count = options[:snp_total_count]
@@ -53,8 +56,14 @@ module Nimbus
53
56
  # Returns the prediction for that individual (the label of the final node reached by the individual).
54
57
  def self.traverse(tree_structure, data)
55
58
  return tree_structure if tree_structure.is_a?(Numeric) || tree_structure.is_a?(String)
59
+
56
60
  raise Nimbus::TreeError, "Forest data has invalid structure. Please check your forest data (file)." if !(tree_structure.is_a?(Hash) && tree_structure.keys.size == 1)
57
- return self.traverse( tree_structure.values.first[ data[tree_structure.keys.first - 1].to_i], data)
61
+
62
+ branch = tree_structure.values.first
63
+ split_type = branch[1].to_s
64
+ datum = data_traversing_value(data[tree_structure.keys.first - 1], split_type)
65
+
66
+ return self.traverse(branch[datum], data)
58
67
  end
59
68
 
60
69
  protected
@@ -63,13 +72,12 @@ module Nimbus
63
72
  (1..@snp_total_count).to_a.sample(@snp_sample_size).sort
64
73
  end
65
74
 
66
- def build_branch(snp, split, y_hats, parent_y_hat)
67
- node_0 = split[0].size == 0 ? label_node(parent_y_hat, []) : build_node(split[0], y_hats[0])
68
- node_1 = split[1].size == 0 ? label_node(parent_y_hat, []) : build_node(split[1], y_hats[1])
69
- node_2 = split[2].size == 0 ? label_node(parent_y_hat, []) : build_node(split[2], y_hats[2])
75
+ def build_branch(snp, split, split_type, y_hats, parent_y_hat)
76
+ node_a = split[0].size == 0 ? label_node(parent_y_hat, []) : build_node(split[0], y_hats[0])
77
+ node_b = split[1].size == 0 ? label_node(parent_y_hat, []) : build_node(split[1], y_hats[1])
70
78
 
71
79
  split_by_snp(snp)
72
- return { snp => [node_0, node_1, node_2] }
80
+ return { snp => [node_a, split_type, node_b] }
73
81
  end
74
82
 
75
83
  def label_node(value, ids)
@@ -78,24 +86,58 @@ module Nimbus
78
86
  label
79
87
  end
80
88
 
81
- def split_by_snp_value(ids, snp)
82
- split = [[], [], []]
89
+ def split_by_snp_avegare_value(ids, snp)
90
+ split_012 = [[], [], []]
83
91
  ids.each do |i|
84
- split[ @individuals[i].snp_list[snp-1] ] << @individuals[i].id
92
+ split_012[ @individuals[i].snp_list[snp-1] ] << @individuals[i].id
85
93
  end
86
- split
94
+ # we split by the average number of 0,1,2 values.
95
+ # So if there are less or equal 0s than 2s the split is [0,1][2]
96
+ # and if there are more 0s than 2s the average will be <1 so the split is [0][1,2]
97
+ split_type = (split_012[0].size <= split_012[2].size ? NODE_SPLIT_01_2 : NODE_SPLIT_0_12)
98
+ split_type == NODE_SPLIT_01_2 ? split_012[0] += split_012[1] : split_012[2] += split_012[1]
99
+ split = [split_012[0], split_012[2]]
100
+ [split, split_type]
87
101
  rescue => ex
88
102
  raise Nimbus::TreeError, "Values for SNPs columns must be in [0, 1, 2]"
89
103
  end
90
104
 
105
+ def split_by_value(ids, snp, value)
106
+ split = [[], []]
107
+ ids.each do |i|
108
+ @individuals[i].snp_list[snp-1] > value ? (split[1] << @individuals[i].id) : (split[0] << @individuals[i].id)
109
+ end
110
+ split
111
+ rescue => ex
112
+ raise Nimbus::TreeError, "Values for SNPs columns must be numeric"
113
+ end
114
+
91
115
  def split_by_snp(x)
92
116
  @used_snps << x
93
117
  end
94
118
 
95
119
  def traverse_with_permutation(tree_structure, data, snp_to_permute, individual_to_permute)
96
120
  return tree_structure if tree_structure.is_a?(Numeric) || tree_structure.is_a?(String)
97
- individual_data = (tree_structure.keys.first == snp_to_permute ? individual_to_permute : data)
98
- return traverse_with_permutation( tree_structure.values.first[ individual_data[tree_structure.keys.first - 1].to_i], data, snp_to_permute, individual_to_permute)
121
+
122
+ key = tree_structure.keys.first
123
+ branch = tree_structure.values.first
124
+ individual_data = (key == snp_to_permute ? individual_to_permute : data)
125
+ split_type = branch[1]
126
+ datum = data_traversing_value(individual_data[key - 1].to_i, split_type)
127
+
128
+ return traverse_with_permutation branch[datum], data, snp_to_permute, individual_to_permute
129
+ end
130
+
131
+ def data_traversing_value(datum, split_type)
132
+ Nimbus::Tree.data_traversing_value(datum, split_type)
133
+ end
134
+
135
+ def self.data_traversing_value(datum, split_type)
136
+ if datum == 1
137
+ return 0 if split_type == NODE_SPLIT_01_2
138
+ return 2 if split_type == NODE_SPLIT_0_12
139
+ end
140
+ datum
99
141
  end
100
142
 
101
143
  end
@@ -1,3 +1,3 @@
1
1
  module Nimbus
2
- VERSION = "2.2.1"
2
+ VERSION = "2.3.0"
3
3
  end
@@ -4,34 +4,34 @@ describe Nimbus::ClassificationTree do
4
4
 
5
5
  before(:each) do
6
6
  @config = Nimbus::Configuration.new
7
- @config.load fixture_file('classification_config.yml')
7
+ @config.load fixture_file('classification/config.yml')
8
8
 
9
9
  @tree = Nimbus::ClassificationTree.new @config.tree
10
10
  end
11
11
 
12
12
  it "is initialized with tree config info" do
13
- @tree.snp_total_count.should == 100
14
- @tree.snp_sample_size.should == 33
15
- @tree.node_min_size.should == 5
16
- @tree.classes.size.should == 2
17
- @tree.classes[0].should == '0'
18
- @tree.classes[1].should == '1'
13
+ expect(@tree.snp_total_count).to eq 100
14
+ expect(@tree.snp_sample_size).to eq 33
15
+ expect(@tree.node_min_size).to eq 5
16
+ expect(@tree.classes.size).to eq 2
17
+ expect(@tree.classes[0]).to eq '0'
18
+ expect(@tree.classes[1]).to eq '1'
19
19
  end
20
20
 
21
21
  it "creates a tree structure when seeded with training data" do
22
22
  @config.load_training_data
23
- @tree.structure.should be_nil
23
+ expect(@tree.structure).to be_nil
24
24
  @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
25
- @tree.structure.should_not be_nil
26
- @tree.structure.should be_kind_of Hash
25
+ expect(@tree.structure).to_not be_nil
26
+ expect(@tree.structure).to be_kind_of Hash
27
27
 
28
- @tree.structure.keys.first.should == @tree.used_snps.last
29
- @tree.used_snps.should_not be_empty
28
+ expect(@tree.structure.keys.first).to eq @tree.used_snps.last
29
+ expect(@tree.used_snps).to_not be_empty
30
30
  end
31
31
 
32
- it "splits node in three when building a node and finds a suitable split" do
32
+ it "splits node when building a node and finds a suitable split" do
33
33
  @config.load_training_data
34
- @tree.stub!(:snps_random_sample).and_return((68..100).to_a) #97 is best split
34
+ allow_any_instance_of(Nimbus::ClassificationTree).to receive(:snps_random_sample).and_return((68..100).to_a) #97 is best split
35
35
 
36
36
  @tree.individuals = @config.training_set.individuals
37
37
  @tree.id_to_fenotype = @config.training_set.ids_fenotypes
@@ -39,29 +39,29 @@ describe Nimbus::ClassificationTree do
39
39
  @tree.predictions = {}
40
40
 
41
41
  branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.majority_class(@config.training_set.all_ids, @config.training_set.ids_fenotypes, @config.classes)
42
- branch.keys.size.should == 1
43
- branch.keys.first.should == 97
44
- branch[97].size.should == 3
45
- branch[97][0].should be_kind_of Hash
46
- branch[97][1].should be_kind_of Hash
47
- branch[97][2].should be_kind_of Hash
42
+ expect(branch.keys.size).to eq 1
43
+ expect(branch.keys.first).to eq 97
44
+ expect(branch[97].size).to eq 3
45
+ expect(branch[97][0]).to be_kind_of Hash
46
+ expect([Nimbus::Tree::NODE_SPLIT_01_2, Nimbus::Tree::NODE_SPLIT_0_12]).to include(branch[97][1])
47
+ expect(branch[97][2]).to be_kind_of Hash
48
48
  end
49
49
 
50
50
  it "keeps track of all SNPs used for the tree" do
51
51
  @config.load_training_data
52
52
  snps = (33..65).to_a
53
- @tree.stub!(:snps_random_sample).and_return(snps)
54
- @tree.used_snps.should be_nil
53
+ allow_any_instance_of(Nimbus::ClassificationTree).to receive(:snps_random_sample).and_return(snps)
54
+ expect(@tree.used_snps).to be_nil
55
55
  @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
56
- @tree.used_snps.size.should > 4
56
+ expect(@tree.used_snps.size).to be > 4
57
57
  @tree.used_snps.each{|snp|
58
- snps.include?(snp).should be_true
58
+ expect(snps.include?(snp)).to be true
59
59
  }
60
60
  end
61
61
 
62
62
  it "labels node when building a node and there is not a suitable split" do
63
63
  @config.load_training_data
64
- @tree.stub!(:snps_random_sample).and_return([33])
64
+ allow_any_instance_of(Nimbus::ClassificationTree).to receive(:snps_random_sample).and_return([11])
65
65
 
66
66
  @tree.individuals = @config.training_set.individuals
67
67
  @tree.id_to_fenotype = @config.training_set.ids_fenotypes
@@ -69,9 +69,9 @@ describe Nimbus::ClassificationTree do
69
69
  @tree.predictions = {}
70
70
 
71
71
  branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.majority_class(@config.training_set.all_ids, @config.training_set.ids_fenotypes, @config.classes)
72
- branch[33][0].should be_kind_of String
73
- branch[33][1].should be_kind_of String
74
- branch[33][2].should be_kind_of String
72
+ expect(branch[11][0]).to be_kind_of String
73
+ expect(branch[11][1]).to be_kind_of String
74
+ expect(branch[11][2]).to be_kind_of String
75
75
  end
76
76
 
77
77
  it "labels node when building a node with less individuals than the minimum node size" do
@@ -83,50 +83,50 @@ describe Nimbus::ClassificationTree do
83
83
  @tree.predictions = {}
84
84
 
85
85
  label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.majority_class(@config.training_set.all_ids, @config.training_set.ids_fenotypes, @config.classes)
86
- label.should be_kind_of String
86
+ expect(label).to be_kind_of String
87
87
 
88
88
  label = @tree.build_node [2, 10], Nimbus::LossFunctions.majority_class(@config.training_set.all_ids, @config.training_set.ids_fenotypes, @config.classes)
89
- label.should be_kind_of String
89
+ expect(label).to be_kind_of String
90
90
 
91
91
  label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.majority_class(@config.training_set.all_ids, @config.training_set.ids_fenotypes, @config.classes)
92
- label.should be_kind_of String
92
+ expect(label).to be_kind_of String
93
93
 
94
94
  label = @tree.build_node [99, 22, 10, 33], Nimbus::LossFunctions.majority_class(@config.training_set.all_ids, @config.training_set.ids_fenotypes, @config.classes)
95
- label.should be_kind_of String
95
+ expect(label).to be_kind_of String
96
96
  end
97
97
 
98
98
  it 'computes generalization error for the tree' do
99
99
  @config.load_training_data
100
100
  @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
101
- @tree.generalization_error.should be_nil
101
+ expect(@tree.generalization_error).to be_nil
102
102
  @tree.generalization_error_from_oob((3..300).to_a)
103
- @tree.generalization_error.should be_kind_of Numeric
104
- @tree.generalization_error.should > 0.0
105
- @tree.generalization_error.should < 1.0
103
+ expect(@tree.generalization_error).to be_kind_of Numeric
104
+ expect(@tree.generalization_error).to be > 0.0
105
+ expect(@tree.generalization_error).to be < 1.0
106
106
  end
107
107
 
108
108
  it 'estimates importance for all SNPs' do
109
109
  @config.load_training_data
110
110
  @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
111
- @tree.importances.should be_nil
111
+ expect(@tree.importances).to be_nil
112
112
  @tree.estimate_importances((200..533).to_a)
113
- @tree.importances.should be_kind_of Hash
114
- @tree.importances.keys.should_not be_empty
115
- (@tree.importances.keys - (1..100).to_a).should be_empty #all keys are snp indexes (100 snps in training file)
113
+ expect(@tree.importances).to be_kind_of Hash
114
+ expect(@tree.importances.keys).to_not be_empty
115
+ expect((@tree.importances.keys - (1..100).to_a)).to be_empty #all keys are snp indexes (100 snps in training file)
116
116
  end
117
117
 
118
118
  it 'get prediction for an individual pushing it down a tree structure' do
119
- tree_structure = Psych.load(File.open fixture_file('classification_random_forest.yml')).first
119
+ tree_structure = Psych.load(File.open fixture_file('classification/random_forest.yml')).first
120
120
  individual_data = [0]*100
121
121
  prediction = Nimbus::Tree.traverse tree_structure, individual_data
122
- prediction.should == '1'
122
+ expect(prediction).to eq '0'
123
123
 
124
- individual_data[26-1] = 1
125
- individual_data[57-1] = 2
126
- individual_data[98-1] = 2
127
- individual_data[8-1] = 1
124
+ individual_data[8-1] = 2
125
+ individual_data[29-1] = 0
126
+ individual_data[1-1] = 1
127
+ individual_data[7-1] = 1
128
128
  prediction = Nimbus::Tree.traverse tree_structure, individual_data
129
- prediction.should == '0'
129
+ expect(prediction).to eq '1'
130
130
  end
131
131
 
132
132
  end
@@ -5,65 +5,65 @@ describe Nimbus::Configuration do
5
5
 
6
6
  it "loads configuration options from config file" do
7
7
  config = Nimbus::Configuration.new
8
- config.load fixture_file('regression_config.yml')
8
+ config.load fixture_file('regression/config.yml')
9
9
 
10
- config.training_file.should == fixture_file('regression_training.data')
11
- config.testing_file.should == fixture_file('regression_testing.data')
12
- config.forest_file.should == fixture_file('regression_random_forest.yml')
13
- config.classes.should be_nil
14
- config.do_importances.should be
10
+ expect(config.training_file).to eq fixture_file('regression/training.data')
11
+ expect(config.testing_file).to eq fixture_file('regression/testing.data')
12
+ expect(config.forest_file).to eq fixture_file('regression/random_forest.yml')
13
+ expect(config.classes).to be_nil
14
+ expect(config.do_importances).to be
15
15
 
16
- config.forest_size.should == 3
17
- config.tree_SNP_sample_size.should == 60
18
- config.tree_SNP_total_count.should == 200
19
- config.tree_node_min_size.should == 5
16
+ expect(config.forest_size).to eq 3
17
+ expect(config.tree_SNP_sample_size).to eq 60
18
+ expect(config.tree_SNP_total_count).to eq 200
19
+ expect(config.tree_node_min_size).to eq 5
20
20
 
21
21
  config = Nimbus::Configuration.new
22
- config.load fixture_file('classification_config.yml')
23
-
24
- config.training_file.should == fixture_file('classification_training.data')
25
- config.testing_file.should == fixture_file('classification_testing.data')
26
- config.forest_file.should == fixture_file('classification_random_forest.yml')
27
- config.classes.should == ['0','1']
28
- config.do_importances.should_not be
29
-
30
- config.forest_size.should == 3
31
- config.tree_SNP_sample_size.should == 33
32
- config.tree_SNP_total_count.should == 100
33
- config.tree_node_min_size.should == 5
22
+ config.load fixture_file('classification/config.yml')
23
+
24
+ expect(config.training_file).to eq fixture_file('classification/training.data')
25
+ expect(config.testing_file).to eq fixture_file('classification/testing.data')
26
+ expect(config.forest_file).to eq fixture_file('classification/random_forest.yml')
27
+ expect(config.classes).to eq ['0','1']
28
+ expect(config.do_importances).to_not be
29
+
30
+ expect(config.forest_size).to eq 3
31
+ expect(config.tree_SNP_sample_size).to eq 33
32
+ expect(config.tree_SNP_total_count).to eq 100
33
+ expect(config.tree_node_min_size).to eq 5
34
34
  end
35
35
 
36
36
  it 'tree method return tree-related subset of options for regression trees' do
37
37
  config = Nimbus::Configuration.new
38
- config.load fixture_file('regression_config.yml')
38
+ config.load fixture_file('regression/config.yml')
39
39
  tree_options = config.tree
40
40
 
41
- tree_options[:snp_sample_size].should_not be_nil
42
- tree_options[:snp_total_count].should_not be_nil
43
- tree_options[:tree_node_min_size].should_not be_nil
44
- tree_options[:classes].should be_nil
41
+ expect(tree_options[:snp_sample_size]).to_not be_nil
42
+ expect(tree_options[:snp_total_count]).to_not be_nil
43
+ expect(tree_options[:tree_node_min_size]).to_not be_nil
44
+ expect(tree_options[:classes]).to be_nil
45
45
  end
46
46
 
47
47
  it 'tree method return tree-related subset of options for classification trees' do
48
48
  config = Nimbus::Configuration.new
49
- config.load fixture_file('classification_config.yml')
49
+ config.load fixture_file('classification/config.yml')
50
50
  tree_options = config.tree
51
51
 
52
- tree_options[:snp_sample_size].should_not be_nil
53
- tree_options[:snp_total_count].should_not be_nil
54
- tree_options[:tree_node_min_size].should_not be_nil
55
- tree_options[:classes].should_not be_nil
52
+ expect(tree_options[:snp_sample_size]).to_not be_nil
53
+ expect(tree_options[:snp_total_count]).to_not be_nil
54
+ expect(tree_options[:tree_node_min_size]).to_not be_nil
55
+ expect(tree_options[:classes]).to_not be_nil
56
56
  end
57
57
 
58
58
  it "creates a training set object from training data file" do
59
59
  config = Nimbus::Configuration.new
60
- config.load fixture_file('regression_config.yml')
61
- config.training_set.should be_nil
60
+ config.load fixture_file('regression/config.yml')
61
+ expect(config.training_set).to be_nil
62
62
  config.load_training_data
63
- config.training_set.should be_kind_of Nimbus::TrainingSet
64
- config.training_set.all_ids.sort.should == (1..800).to_a
63
+ expect(config.training_set).to be_kind_of Nimbus::TrainingSet
64
+ expect(config.training_set.all_ids.sort).to eq (1..800).to_a
65
65
 
66
- File.open(fixture_file('regression_training.data')) {|file|
66
+ File.open(fixture_file('regression/training.data')) {|file|
67
67
  feno1, id1, *snp_list_1 = file.readline.split
68
68
  feno2, id2, *snp_list_2 = file.readline.split
69
69
  feno3, id3, *snp_list_3 = file.readline.split
@@ -72,9 +72,9 @@ describe Nimbus::Configuration do
72
72
  i2 = Nimbus::Individual.new(id2.to_i, feno2.to_f, snp_list_2.map{|snp| snp.to_i})
73
73
  i3 = Nimbus::Individual.new(id3.to_i, feno3.to_f, snp_list_3.map{|snp| snp.to_i})
74
74
 
75
- config.training_set.individuals[id1.to_i].id.should == i1.id
76
- config.training_set.individuals[id2.to_i].fenotype.should == i2.fenotype
77
- config.training_set.individuals[id3.to_i].snp_list.should == i3.snp_list
75
+ expect(config.training_set.individuals[id1.to_i].id).to eq i1.id
76
+ expect(config.training_set.individuals[id2.to_i].fenotype).to eq i2.fenotype
77
+ expect(config.training_set.individuals[id3.to_i].snp_list).to eq i3.snp_list
78
78
 
79
79
  config.training_set.ids_fenotypes[id1.to_i] = feno1.to_f
80
80
  config.training_set.ids_fenotypes[id2.to_i] = feno2.to_f
@@ -84,38 +84,38 @@ describe Nimbus::Configuration do
84
84
 
85
85
  it "reads testing data and yields one individual at a time" do
86
86
  config = Nimbus::Configuration.new
87
- config.load fixture_file('regression_config.yml')
87
+ config.load fixture_file('regression/config.yml')
88
88
 
89
89
  test_individuals = []
90
- File.open(fixture_file('regression_testing.data')) {|file|
90
+ File.open(fixture_file('regression/testing.data')) {|file|
91
91
  file.each do |line|
92
92
  data_id, *snp_list = line.strip.split
93
93
  test_individuals << Nimbus::Individual.new(data_id.to_i, nil, snp_list.map{|snp| snp.to_i})
94
94
  end
95
95
  }
96
- test_individuals.size.should == 200
96
+ expect(test_individuals.size).to eq 200
97
97
  config.read_testing_data{|individual|
98
98
  test_individual = test_individuals.shift
99
- individual.id.should_not be_nil
100
- individual.id.should == test_individual.id
101
- individual.snp_list.should_not be_empty
102
- individual.snp_list.should == test_individual.snp_list
99
+ expect(individual.id).to_not be_nil
100
+ expect(individual.id).to eq test_individual.id
101
+ expect(individual.snp_list).to_not be_empty
102
+ expect(individual.snp_list).to eq test_individual.snp_list
103
103
  }
104
104
  end
105
105
 
106
106
  it "creates a forest object loading data from a yaml file" do
107
107
  config = Nimbus::Configuration.new
108
- config.load fixture_file('regression_config.yml')
108
+ config.load fixture_file('regression/config.yml')
109
109
 
110
- trees = Psych.load(File.open fixture_file('regression_random_forest.yml'))
111
- trees.first.keys.first.should == 189
112
- trees.size.should == 3
110
+ trees = Psych.load(File.open fixture_file('regression/random_forest.yml'))
111
+ expect(trees.first.keys.first).to eq 176
112
+ expect(trees.size).to eq 3
113
113
 
114
114
  forest = config.load_forest
115
- forest.should be_kind_of Nimbus::Forest
116
- forest.trees[0].should == trees.first
117
- forest.trees[1].should == trees[1]
118
- forest.trees.last.should == trees[2]
115
+ expect(forest).to be_kind_of Nimbus::Forest
116
+ expect(forest.trees[0]).to eq trees.first
117
+ expect(forest.trees[1]).to eq trees[1]
118
+ expect(forest.trees.last).to eq trees[2]
119
119
  end
120
120
 
121
121
  end