RubyGems - nimbus - Versions diffs - 2.2.1 → 2.3.0 - Mend

nimbus 2.2.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

checksums.yaml +7 -0
data/CODE_OF_CONDUCT.md +7 -0
data/CONTRIBUTING.md +46 -0
data/MIT-LICENSE.txt +1 -1
data/README.md +131 -21
data/bin/nimbus +2 -2
data/lib/nimbus.rb +2 -6
data/lib/nimbus/classification_tree.rb +9 -12
data/lib/nimbus/configuration.rb +22 -22
data/lib/nimbus/forest.rb +8 -8
data/lib/nimbus/loss_functions.rb +11 -0
data/lib/nimbus/regression_tree.rb +8 -10
data/lib/nimbus/tree.rb +54 -12
data/lib/nimbus/version.rb +1 -1
data/spec/classification_tree_spec.rb +47 -47
data/spec/configuration_spec.rb +55 -55
data/spec/fixtures/{classification_config.yml → classification/config.yml} +3 -3
data/spec/fixtures/classification/random_forest.yml +1174 -0
data/spec/fixtures/{classification_testing.data → classification/testing.data} +0 -0
data/spec/fixtures/{classification_training.data → classification/training.data} +0 -0
data/spec/fixtures/{regression_config.yml → regression/config.yml} +4 -4
data/spec/fixtures/regression/random_forest.yml +2737 -0
data/spec/fixtures/{regression_testing.data → regression/testing.data} +0 -0
data/spec/fixtures/{regression_training.data → regression/training.data} +0 -0
data/spec/forest_spec.rb +39 -39
data/spec/individual_spec.rb +3 -3
data/spec/loss_functions_spec.rb +31 -13
data/spec/nimbus_spec.rb +2 -2
data/spec/regression_tree_spec.rb +44 -44
data/spec/training_set_spec.rb +3 -3
data/spec/tree_spec.rb +4 -4
metadata +37 -34
data/spec/fixtures/classification_random_forest.yml +0 -922
data/spec/fixtures/regression_random_forest.yml +0 -1741

data/lib/nimbus/forest.rb CHANGED

@@ -88,6 +88,14 @@ module Nimbus
       @trees.to_yaml
     end
+    def classification?
+      @options.tree[:classes]
+    end
+    def regression?
+      @options.tree[:classes].nil?
+    end
     private
     def individuals_random_sample
@@ -140,14 +148,6 @@ module Nimbus
       }
     end
-    def classification?
-      @options.tree[:classes]
-    end
-    def regression?
-      @options.tree[:classes].nil?
-    end
   end
 end

data/lib/nimbus/loss_functions.rb CHANGED

@@ -35,6 +35,17 @@ module Nimbus
       def squared_difference(x,y)
         0.0 + (x-y)**2
       end
+      # Simplified Huber function
+      def pseudo_huber_error(ids, value_table, mean = nil)
+        mean ||= self.average ids, value_table
+        ids.inject(0.0){|sum, i| sum + (Math.log(Math.cosh(value_table[i] - mean))) }
+      end
+      # Simplified Huber loss function: PHE / n
+      def pseudo_huber_loss(ids, value_table, mean = nil)
+        self.pseudo_huber_error(ids, value_table, mean) / ids.size
+      end
       ## CLASSSIFICATION

data/lib/nimbus/regression_tree.rb CHANGED

@@ -8,7 +8,7 @@ module Nimbus
   # * 1: Calculate loss function for the individuals in the node (first node contains all the individuals).
   # * 2: Take a random sample of the SNPs (size m << total count of SNPs)
   # * 3: Compute the loss function (quadratic loss) for the split of the sample based on value of every SNP.
-  # * 4: If the SNP with minimum loss function also minimizes the general loss of the node, split the individuals sample in three nodes, based on value for that SNP [0, 1, or 2]
+  # * 4: If the SNP with minimum loss function also minimizes the general loss of the node, split the individuals sample in two nodes, based on average value for that SNP [0,1][2], or [0][1,2]
   # * 5: Repeat from 1 for every node until:
   #   - a) The individuals count in that node is < minimum size OR
   #   - b) None of the SNP splits has a loss function smaller than the node loss function
@@ -27,8 +27,8 @@ module Nimbus
     # Creates a node by taking a random sample of the SNPs and computing the loss function for every split by SNP of that sample.
     #
-    # * If SNP_min is the SNP with smaller loss function and it is < the loss function of the node, it splits the individuals sample in three:
-    # (those with value 0 for the SNP_min, those with value 1 for the SNP_min, and those with value 2 for the SNP_min) then it builds these 3 new nodes.
+    # * If SNP_min is the SNP with smaller loss function and it is < the loss function of the node, it splits the individuals sample in two:
+    # (the average of the 0,1,2 values for the SNP_min in the individuals is computed, and they are splitted in [<=avg], [>avg]) then it builds these 2 new nodes.
     # * Otherwise every individual in the node gets labeled with the average of the fenotype values of all of them.
     def build_node(individuals_ids, y_hat)
       # General loss function value for the node
@@ -38,22 +38,20 @@ module Nimbus
       # Finding the SNP that minimizes loss function
       snps = snps_random_sample
-      min_loss, min_SNP, split, means = node_loss_function, nil, nil, nil
+      min_loss, min_SNP, split, split_type, means = node_loss_function, nil, nil, nil, nil
       snps.each do |snp|
-        individuals_split_by_snp_value = split_by_snp_value individuals_ids, snp
+        individuals_split_by_snp_value, node_split_type = split_by_snp_avegare_value individuals_ids, snp
         mean_0 = Nimbus::LossFunctions.average individuals_split_by_snp_value[0], @id_to_fenotype
         mean_1 = Nimbus::LossFunctions.average individuals_split_by_snp_value[1], @id_to_fenotype
-        mean_2 = Nimbus::LossFunctions.average individuals_split_by_snp_value[2], @id_to_fenotype
         loss_0 = Nimbus::LossFunctions.mean_squared_error individuals_split_by_snp_value[0], @id_to_fenotype, mean_0
         loss_1 = Nimbus::LossFunctions.mean_squared_error individuals_split_by_snp_value[1], @id_to_fenotype, mean_1
-        loss_2 = Nimbus::LossFunctions.mean_squared_error individuals_split_by_snp_value[2], @id_to_fenotype, mean_2
-        loss_snp = (loss_0 + loss_1 + loss_2) / individuals_count
+        loss_snp = (loss_0 + loss_1) / individuals_count
-        min_loss, min_SNP, split, means = loss_snp, snp, individuals_split_by_snp_value, [mean_0, mean_1, mean_2] if loss_snp < min_loss
+        min_loss, min_SNP, split, split_type, means = loss_snp, snp, individuals_split_by_snp_value, node_split_type, [mean_0, mean_1] if loss_snp < min_loss
       end
-      return build_branch(min_SNP, split, means, y_hat) if min_loss < node_loss_function
+      return build_branch(min_SNP, split, split_type, means, y_hat) if min_loss < node_loss_function
       return label_node(y_hat, individuals_ids)
     end

data/lib/nimbus/tree.rb CHANGED

@@ -18,6 +18,9 @@ module Nimbus
     attr_accessor :snp_sample_size, :snp_total_count, :node_min_size, :used_snps, :structure, :generalization_error, :predictions, :importances
     attr_accessor :individuals, :id_to_fenotype
+    NODE_SPLIT_01_2 = "zero"
+    NODE_SPLIT_0_12 = "two"
     # Initialize Tree object with the configuration (as in Nimbus::Configuration.tree) options received.
     def initialize(options)
       @snp_total_count = options[:snp_total_count]
@@ -53,8 +56,14 @@ module Nimbus
     # Returns the prediction for that individual (the label of the final node reached by the individual).
     def self.traverse(tree_structure, data)
       return tree_structure if tree_structure.is_a?(Numeric) || tree_structure.is_a?(String)
       raise Nimbus::TreeError, "Forest data has invalid structure. Please check your forest data (file)." if !(tree_structure.is_a?(Hash) && tree_structure.keys.size == 1)
-      return self.traverse( tree_structure.values.first[ data[tree_structure.keys.first - 1].to_i], data)
+      branch = tree_structure.values.first
+      split_type = branch[1].to_s
+      datum = data_traversing_value(data[tree_structure.keys.first - 1], split_type)
+      return self.traverse(branch[datum], data)
     end
     protected
@@ -63,13 +72,12 @@ module Nimbus
       (1..@snp_total_count).to_a.sample(@snp_sample_size).sort
     end
-    def build_branch(snp, split, y_hats, parent_y_hat)
-      node_0 = split[0].size == 0 ? label_node(parent_y_hat, []) : build_node(split[0], y_hats[0])
-      node_1 = split[1].size == 0 ? label_node(parent_y_hat, []) : build_node(split[1], y_hats[1])
-      node_2 = split[2].size == 0 ? label_node(parent_y_hat, []) : build_node(split[2], y_hats[2])
+    def build_branch(snp, split, split_type, y_hats, parent_y_hat)
+      node_a = split[0].size == 0 ? label_node(parent_y_hat, []) : build_node(split[0], y_hats[0])
+      node_b = split[1].size == 0 ? label_node(parent_y_hat, []) : build_node(split[1], y_hats[1])
       split_by_snp(snp)
-      return { snp => [node_0, node_1, node_2] }
+      return { snp => [node_a, split_type, node_b] }
     end
     def label_node(value, ids)
@@ -78,24 +86,58 @@ module Nimbus
       label
     end
-    def split_by_snp_value(ids, snp)
-      split = [[], [], []]
+    def split_by_snp_avegare_value(ids, snp)
+      split_012 = [[], [], []]
       ids.each do |i|
-        split[ @individuals[i].snp_list[snp-1] ] << @individuals[i].id
+        split_012[ @individuals[i].snp_list[snp-1] ] << @individuals[i].id
       end
-      split
+      # we split by the average number of 0,1,2 values.
+      # So if there are less or equal 0s than 2s the split is [0,1][2]
+      # and if there are more 0s than 2s the average will be <1 so the split is [0][1,2]
+      split_type = (split_012[0].size <= split_012[2].size ? NODE_SPLIT_01_2 : NODE_SPLIT_0_12)
+      split_type ==  NODE_SPLIT_01_2 ? split_012[0] += split_012[1] : split_012[2] += split_012[1]
+      split = [split_012[0], split_012[2]]
+      [split, split_type]
     rescue => ex
       raise Nimbus::TreeError, "Values for SNPs columns must be in [0, 1, 2]"
     end
+    def split_by_value(ids, snp, value)
+      split = [[], []]
+      ids.each do |i|
+        @individuals[i].snp_list[snp-1] > value ? (split[1] << @individuals[i].id) : (split[0] << @individuals[i].id)
+      end
+      split
+    rescue => ex
+      raise Nimbus::TreeError, "Values for SNPs columns must be numeric"
+    end
     def split_by_snp(x)
       @used_snps << x
     end
     def traverse_with_permutation(tree_structure, data, snp_to_permute, individual_to_permute)
       return tree_structure if tree_structure.is_a?(Numeric) || tree_structure.is_a?(String)
-      individual_data = (tree_structure.keys.first == snp_to_permute ? individual_to_permute : data)
-      return traverse_with_permutation( tree_structure.values.first[ individual_data[tree_structure.keys.first - 1].to_i], data, snp_to_permute, individual_to_permute)
+      key = tree_structure.keys.first
+      branch = tree_structure.values.first
+      individual_data = (key == snp_to_permute ? individual_to_permute : data)
+      split_type = branch[1]
+      datum = data_traversing_value(individual_data[key - 1].to_i, split_type)
+      return traverse_with_permutation branch[datum], data, snp_to_permute, individual_to_permute
+    end
+    def data_traversing_value(datum, split_type)
+      Nimbus::Tree.data_traversing_value(datum, split_type)
+    end
+    def self.data_traversing_value(datum, split_type)
+      if datum == 1
+        return 0 if split_type == NODE_SPLIT_01_2
+        return 2 if split_type == NODE_SPLIT_0_12
+      end
+      datum
     end
   end

data/lib/nimbus/version.rb CHANGED

@@ -1,3 +1,3 @@
 module Nimbus
-  VERSION = "2.2.1"
+  VERSION = "2.3.0"
 end

data/spec/classification_tree_spec.rb CHANGED

@@ -4,34 +4,34 @@ describe Nimbus::ClassificationTree do
   before(:each) do
     @config = Nimbus::Configuration.new
-    @config.load fixture_file('classification_config.yml')
+    @config.load fixture_file('classification/config.yml')
     @tree = Nimbus::ClassificationTree.new @config.tree
   end
   it "is initialized with tree config info" do
-    @tree.snp_total_count.should == 100
-    @tree.snp_sample_size.should == 33
-    @tree.node_min_size.should   == 5
-    @tree.classes.size.should    == 2
-    @tree.classes[0].should      == '0'
-    @tree.classes[1].should      == '1'
+    expect(@tree.snp_total_count).to eq 100
+    expect(@tree.snp_sample_size).to eq 33
+    expect(@tree.node_min_size).to eq 5
+    expect(@tree.classes.size).to eq 2
+    expect(@tree.classes[0]).to eq '0'
+    expect(@tree.classes[1]).to eq '1'
   end
   it "creates a tree structure when seeded with training data" do
     @config.load_training_data
-    @tree.structure.should be_nil
+    expect(@tree.structure).to be_nil
     @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
-    @tree.structure.should_not be_nil
-    @tree.structure.should be_kind_of Hash
+    expect(@tree.structure).to_not be_nil
+    expect(@tree.structure).to be_kind_of Hash
-    @tree.structure.keys.first.should == @tree.used_snps.last
-    @tree.used_snps.should_not be_empty
+    expect(@tree.structure.keys.first).to eq @tree.used_snps.last
+    expect(@tree.used_snps).to_not be_empty
   end
-  it "splits node in three when building a node and finds a suitable split" do
+  it "splits node when building a node and finds a suitable split" do
     @config.load_training_data
-    @tree.stub!(:snps_random_sample).and_return((68..100).to_a) #97 is best split
+    allow_any_instance_of(Nimbus::ClassificationTree).to receive(:snps_random_sample).and_return((68..100).to_a) #97 is best split
     @tree.individuals = @config.training_set.individuals
     @tree.id_to_fenotype = @config.training_set.ids_fenotypes
@@ -39,29 +39,29 @@ describe Nimbus::ClassificationTree do
     @tree.predictions = {}
     branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.majority_class(@config.training_set.all_ids, @config.training_set.ids_fenotypes, @config.classes)
-    branch.keys.size.should == 1
-    branch.keys.first.should == 97
-    branch[97].size.should == 3
-    branch[97][0].should be_kind_of Hash
-    branch[97][1].should be_kind_of Hash
-    branch[97][2].should be_kind_of Hash
+    expect(branch.keys.size).to eq 1
+    expect(branch.keys.first).to eq 97
+    expect(branch[97].size).to eq 3
+    expect(branch[97][0]).to be_kind_of Hash
+    expect([Nimbus::Tree::NODE_SPLIT_01_2, Nimbus::Tree::NODE_SPLIT_0_12]).to include(branch[97][1])
+    expect(branch[97][2]).to be_kind_of Hash
   end
   it "keeps track of all SNPs used for the tree" do
     @config.load_training_data
     snps = (33..65).to_a
-    @tree.stub!(:snps_random_sample).and_return(snps)
-    @tree.used_snps.should be_nil
+    allow_any_instance_of(Nimbus::ClassificationTree).to receive(:snps_random_sample).and_return(snps)
+    expect(@tree.used_snps).to be_nil
     @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
-    @tree.used_snps.size.should > 4
+    expect(@tree.used_snps.size).to be > 4
     @tree.used_snps.each{|snp|
-      snps.include?(snp).should be_true
+      expect(snps.include?(snp)).to be true
     }
   end
   it "labels node when building a node and there is not a suitable split" do
     @config.load_training_data
-    @tree.stub!(:snps_random_sample).and_return([33])
+    allow_any_instance_of(Nimbus::ClassificationTree).to receive(:snps_random_sample).and_return([11])
     @tree.individuals = @config.training_set.individuals
     @tree.id_to_fenotype = @config.training_set.ids_fenotypes
@@ -69,9 +69,9 @@ describe Nimbus::ClassificationTree do
     @tree.predictions = {}
     branch = @tree.build_node @config.training_set.all_ids, Nimbus::LossFunctions.majority_class(@config.training_set.all_ids, @config.training_set.ids_fenotypes, @config.classes)
-    branch[33][0].should be_kind_of String
-    branch[33][1].should be_kind_of String
-    branch[33][2].should be_kind_of String
+    expect(branch[11][0]).to be_kind_of String
+    expect(branch[11][1]).to be_kind_of String
+    expect(branch[11][2]).to be_kind_of String
   end
   it "labels node when building a node with less individuals than the minimum node size" do
@@ -83,50 +83,50 @@ describe Nimbus::ClassificationTree do
     @tree.predictions = {}
     label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.majority_class(@config.training_set.all_ids, @config.training_set.ids_fenotypes, @config.classes)
-    label.should be_kind_of String
+    expect(label).to be_kind_of String
     label = @tree.build_node [2, 10], Nimbus::LossFunctions.majority_class(@config.training_set.all_ids, @config.training_set.ids_fenotypes, @config.classes)
-    label.should be_kind_of String
+    expect(label).to be_kind_of String
     label = @tree.build_node [1, 10, 33], Nimbus::LossFunctions.majority_class(@config.training_set.all_ids, @config.training_set.ids_fenotypes, @config.classes)
-    label.should be_kind_of String
+    expect(label).to be_kind_of String
     label = @tree.build_node [99, 22, 10, 33], Nimbus::LossFunctions.majority_class(@config.training_set.all_ids, @config.training_set.ids_fenotypes, @config.classes)
-    label.should be_kind_of String
+    expect(label).to be_kind_of String
   end
   it 'computes generalization error for the tree' do
     @config.load_training_data
     @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
-    @tree.generalization_error.should be_nil
+    expect(@tree.generalization_error).to be_nil
     @tree.generalization_error_from_oob((3..300).to_a)
-    @tree.generalization_error.should be_kind_of Numeric
-    @tree.generalization_error.should > 0.0
-    @tree.generalization_error.should < 1.0
+    expect(@tree.generalization_error).to be_kind_of Numeric
+    expect(@tree.generalization_error).to be > 0.0
+    expect(@tree.generalization_error).to be < 1.0
   end
   it 'estimates importance for all SNPs' do
     @config.load_training_data
     @tree.seed(@config.training_set.individuals, @config.training_set.all_ids, @config.training_set.ids_fenotypes)
-    @tree.importances.should be_nil
+    expect(@tree.importances).to be_nil
     @tree.estimate_importances((200..533).to_a)
-    @tree.importances.should be_kind_of Hash
-    @tree.importances.keys.should_not be_empty
-    (@tree.importances.keys - (1..100).to_a).should be_empty #all keys are snp indexes (100 snps in training file)
+    expect(@tree.importances).to be_kind_of Hash
+    expect(@tree.importances.keys).to_not be_empty
+    expect((@tree.importances.keys - (1..100).to_a)).to be_empty #all keys are snp indexes (100 snps in training file)
   end
   it 'get prediction for an individual pushing it down a tree structure' do
-    tree_structure = Psych.load(File.open fixture_file('classification_random_forest.yml')).first
+    tree_structure = Psych.load(File.open fixture_file('classification/random_forest.yml')).first
     individual_data = [0]*100
     prediction = Nimbus::Tree.traverse tree_structure, individual_data
-    prediction.should == '1'
+    expect(prediction).to eq '0'
-    individual_data[26-1] = 1
-    individual_data[57-1] = 2
-    individual_data[98-1] = 2
-    individual_data[8-1]  = 1
+    individual_data[8-1]  = 2
+    individual_data[29-1] = 0
+    individual_data[1-1]  = 1
+    individual_data[7-1]  = 1
     prediction = Nimbus::Tree.traverse tree_structure, individual_data
-    prediction.should == '0'
+    expect(prediction).to eq '1'
   end
 end

data/spec/configuration_spec.rb CHANGED

@@ -5,65 +5,65 @@ describe Nimbus::Configuration do
   it "loads configuration options from config file" do
     config = Nimbus::Configuration.new
-    config.load fixture_file('regression_config.yml')
+    config.load fixture_file('regression/config.yml')
-    config.training_file.should == fixture_file('regression_training.data')
-    config.testing_file.should == fixture_file('regression_testing.data')
-    config.forest_file.should == fixture_file('regression_random_forest.yml')
-    config.classes.should be_nil
-    config.do_importances.should be
+    expect(config.training_file).to eq fixture_file('regression/training.data')
+    expect(config.testing_file).to eq fixture_file('regression/testing.data')
+    expect(config.forest_file).to eq fixture_file('regression/random_forest.yml')
+    expect(config.classes).to be_nil
+    expect(config.do_importances).to be
-    config.forest_size.should == 3
-    config.tree_SNP_sample_size.should == 60
-    config.tree_SNP_total_count.should == 200
-    config.tree_node_min_size.should == 5
+    expect(config.forest_size).to eq 3
+    expect(config.tree_SNP_sample_size).to eq 60
+    expect(config.tree_SNP_total_count).to eq 200
+    expect(config.tree_node_min_size).to eq 5
     config = Nimbus::Configuration.new
-    config.load fixture_file('classification_config.yml')
-    config.training_file.should == fixture_file('classification_training.data')
-    config.testing_file.should == fixture_file('classification_testing.data')
-    config.forest_file.should == fixture_file('classification_random_forest.yml')
-    config.classes.should == ['0','1']
-    config.do_importances.should_not be
-    config.forest_size.should == 3
-    config.tree_SNP_sample_size.should == 33
-    config.tree_SNP_total_count.should == 100
-    config.tree_node_min_size.should == 5
+    config.load fixture_file('classification/config.yml')
+    expect(config.training_file).to eq fixture_file('classification/training.data')
+    expect(config.testing_file).to eq fixture_file('classification/testing.data')
+    expect(config.forest_file).to eq fixture_file('classification/random_forest.yml')
+    expect(config.classes).to eq ['0','1']
+    expect(config.do_importances).to_not be
+    expect(config.forest_size).to eq 3
+    expect(config.tree_SNP_sample_size).to eq 33
+    expect(config.tree_SNP_total_count).to eq 100
+    expect(config.tree_node_min_size).to eq 5
   end
   it 'tree method return tree-related subset of options for regression trees' do
     config = Nimbus::Configuration.new
-    config.load fixture_file('regression_config.yml')
+    config.load fixture_file('regression/config.yml')
     tree_options = config.tree
-    tree_options[:snp_sample_size].should_not be_nil
-    tree_options[:snp_total_count].should_not be_nil
-    tree_options[:tree_node_min_size].should_not be_nil
-    tree_options[:classes].should be_nil
+    expect(tree_options[:snp_sample_size]).to_not be_nil
+    expect(tree_options[:snp_total_count]).to_not be_nil
+    expect(tree_options[:tree_node_min_size]).to_not be_nil
+    expect(tree_options[:classes]).to be_nil
   end
   it 'tree method return tree-related subset of options for classification trees' do
     config = Nimbus::Configuration.new
-    config.load fixture_file('classification_config.yml')
+    config.load fixture_file('classification/config.yml')
     tree_options = config.tree
-    tree_options[:snp_sample_size].should_not be_nil
-    tree_options[:snp_total_count].should_not be_nil
-    tree_options[:tree_node_min_size].should_not be_nil
-    tree_options[:classes].should_not be_nil
+    expect(tree_options[:snp_sample_size]).to_not be_nil
+    expect(tree_options[:snp_total_count]).to_not be_nil
+    expect(tree_options[:tree_node_min_size]).to_not be_nil
+    expect(tree_options[:classes]).to_not be_nil
   end
   it "creates a training set object from training data file" do
     config = Nimbus::Configuration.new
-    config.load fixture_file('regression_config.yml')
-    config.training_set.should be_nil
+    config.load fixture_file('regression/config.yml')
+    expect(config.training_set).to be_nil
     config.load_training_data
-    config.training_set.should be_kind_of Nimbus::TrainingSet
-    config.training_set.all_ids.sort.should == (1..800).to_a
+    expect(config.training_set).to be_kind_of Nimbus::TrainingSet
+    expect(config.training_set.all_ids.sort).to eq (1..800).to_a
-    File.open(fixture_file('regression_training.data')) {|file|
+    File.open(fixture_file('regression/training.data')) {|file|
       feno1, id1, *snp_list_1 = file.readline.split
       feno2, id2, *snp_list_2 = file.readline.split
       feno3, id3, *snp_list_3 = file.readline.split
@@ -72,9 +72,9 @@ describe Nimbus::Configuration do
       i2 = Nimbus::Individual.new(id2.to_i, feno2.to_f, snp_list_2.map{|snp| snp.to_i})
       i3 = Nimbus::Individual.new(id3.to_i, feno3.to_f, snp_list_3.map{|snp| snp.to_i})
-      config.training_set.individuals[id1.to_i].id.should == i1.id
-      config.training_set.individuals[id2.to_i].fenotype.should == i2.fenotype
-      config.training_set.individuals[id3.to_i].snp_list.should == i3.snp_list
+      expect(config.training_set.individuals[id1.to_i].id).to eq i1.id
+      expect(config.training_set.individuals[id2.to_i].fenotype).to eq i2.fenotype
+      expect(config.training_set.individuals[id3.to_i].snp_list).to eq i3.snp_list
       config.training_set.ids_fenotypes[id1.to_i] = feno1.to_f
       config.training_set.ids_fenotypes[id2.to_i] = feno2.to_f
@@ -84,38 +84,38 @@ describe Nimbus::Configuration do
   it "reads testing data and yields one individual at a time" do
     config = Nimbus::Configuration.new
-    config.load fixture_file('regression_config.yml')
+    config.load fixture_file('regression/config.yml')
     test_individuals = []
-    File.open(fixture_file('regression_testing.data')) {|file|
+    File.open(fixture_file('regression/testing.data')) {|file|
       file.each do |line|
         data_id, *snp_list = line.strip.split
         test_individuals << Nimbus::Individual.new(data_id.to_i, nil, snp_list.map{|snp| snp.to_i})
       end
     }
-    test_individuals.size.should == 200
+    expect(test_individuals.size).to eq 200
     config.read_testing_data{|individual|
       test_individual = test_individuals.shift
-      individual.id.should_not be_nil
-      individual.id.should == test_individual.id
-      individual.snp_list.should_not be_empty
-      individual.snp_list.should == test_individual.snp_list
+      expect(individual.id).to_not be_nil
+      expect(individual.id).to eq test_individual.id
+      expect(individual.snp_list).to_not be_empty
+      expect(individual.snp_list).to eq test_individual.snp_list
     }
   end
   it "creates a forest object loading data from a yaml file" do
     config = Nimbus::Configuration.new
-    config.load fixture_file('regression_config.yml')
+    config.load fixture_file('regression/config.yml')
-    trees = Psych.load(File.open fixture_file('regression_random_forest.yml'))
-    trees.first.keys.first.should == 189
-    trees.size.should == 3
+    trees = Psych.load(File.open fixture_file('regression/random_forest.yml'))
+    expect(trees.first.keys.first).to eq 176
+    expect(trees.size).to eq 3
     forest = config.load_forest
-    forest.should be_kind_of Nimbus::Forest
-    forest.trees[0].should == trees.first
-    forest.trees[1].should == trees[1]
-    forest.trees.last.should == trees[2]
+    expect(forest).to be_kind_of Nimbus::Forest
+    expect(forest.trees[0]).to eq trees.first
+    expect(forest.trees[1]).to eq trees[1]
+    expect(forest.trees.last).to eq trees[2]
   end
 end