RubyGems - lazar - Versions diffs - 0.0.7 → 0.0.9 - Mend

lazar 0.0.7 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

checksums.yaml +4 -4
data/.gitignore +3 -0
data/README.md +2 -1
data/VERSION +1 -1
data/ext/lazar/extconf.rb +15 -76
data/ext/lazar/rinstall.R +9 -0
data/lazar.gemspec +7 -7
data/lib/classification.rb +5 -78
data/lib/compound.rb +201 -44
data/lib/crossvalidation.rb +224 -121
data/lib/dataset.rb +83 -93
data/lib/error.rb +1 -1
data/lib/experiment.rb +99 -0
data/lib/feature.rb +2 -54
data/lib/lazar.rb +47 -34
data/lib/leave-one-out-validation.rb +205 -0
data/lib/model.rb +131 -76
data/lib/opentox.rb +2 -2
data/lib/overwrite.rb +37 -0
data/lib/physchem.rb +133 -0
data/lib/regression.rb +117 -189
data/lib/rest-client-wrapper.rb +4 -5
data/lib/unique_descriptors.rb +6 -7
data/lib/validation.rb +63 -69
data/test/all.rb +2 -2
data/test/classification.rb +41 -0
data/test/compound.rb +116 -7
data/test/data/LOAEL_log_mg_corrected_smiles.csv +567 -567
data/test/data/LOAEL_log_mmol_corrected_smiles.csv +566 -566
data/test/data/LOAEL_mmol_corrected_smiles.csv +568 -0
data/test/data/batch_prediction.csv +25 -0
data/test/data/batch_prediction_inchi_small.csv +4 -0
data/test/data/batch_prediction_smiles_small.csv +4 -0
data/test/data/hamster_carcinogenicity.json +3 -0
data/test/data/loael.csv +568 -0
data/test/dataset-long.rb +5 -8
data/test/dataset.rb +31 -11
data/test/default_environment.rb +11 -0
data/test/descriptor.rb +26 -41
data/test/error.rb +1 -3
data/test/experiment.rb +301 -0
data/test/feature.rb +22 -10
data/test/lazar-long.rb +43 -23
data/test/lazar-physchem-short.rb +19 -16
data/test/prediction_models.rb +20 -0
data/test/regression.rb +43 -0
data/test/setup.rb +3 -1
data/test/test_environment.rb +10 -0
data/test/validation.rb +92 -26
metadata +64 -38
data/lib/SMARTS_InteLigand.txt +0 -983
data/lib/bbrc.rb +0 -165
data/lib/descriptor.rb +0 -247
data/lib/neighbor.rb +0 -25
data/lib/similarity.rb +0 -58
data/mongoid.yml +0 -8
data/test/descriptor-long.rb +0 -26
data/test/fminer-long.rb +0 -38
data/test/fminer.rb +0 -52
data/test/lazar-fminer.rb +0 -50
data/test/lazar-regression.rb +0 -27

data/lib/dataset.rb CHANGED Viewed

@@ -5,24 +5,11 @@ module OpenTox
   class Dataset
-    attr_writer :data_entries
     # associations like has_many, belongs_to deteriorate performance
     field :feature_ids, type: Array, default: []
     field :compound_ids, type: Array, default: []
-    field :data_entries_id, type: BSON::ObjectId, default: []
+    field :data_entries, type: Array, default: []
     field :source, type: String
-    field :warnings, type: Array, default: []
-    # Save all data including data_entries
-    # Should be used instead of save
-    def save_all
-      dump = Marshal.dump(@data_entries)
-      file = Mongo::Grid::File.new(dump, :filename => "#{self.id.to_s}.data_entries")
-      data_entries_id = $gridfs.insert_one(file)
-      update(:data_entries_id => data_entries_id)
-      save
-    end
     # Readers
@@ -38,24 +25,6 @@ module OpenTox
       @features
     end
-    # Get all data_entries
-    def data_entries
-      unless @data_entries
-        t = Time.now
-        data_entry_file = $gridfs.find_one(_id: data_entries_id)
-        if data_entry_file.nil?
-          @data_entries = []
-        else
-          @data_entries = Marshal.load(data_entry_file.data)
-          bad_request_error "Data entries (#{data_entries_id}) are not a 2D-Array" unless @data_entries.is_a? Array and @data_entries.first.is_a? Array
-          bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries.size} rows, but dataset (#{id}) has #{compound_ids.size} compounds" unless @data_entries.size == compound_ids.size
-          bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries.first.size} columns, but dataset (#{id}) has #{feature_ids.size} features" unless @data_entries.first.size == feature_ids.size
-          $logger.debug "Retrieving data: #{Time.now-t}"
-        end
-      end
-      @data_entries
-    end
     # Find data entry values for a given compound and feature
     # @param compound [OpenTox::Compound] OpenTox Compound object
     # @param feature [OpenTox::Feature] OpenTox Feature object
@@ -84,7 +53,13 @@ module OpenTox
     # @param [Integer] number of folds
     # @return [Array] Array with folds [training_dataset,test_dataset]
     def folds n
-      len = self.compound_ids.size
+      unique_compound_data = {}
+      compound_ids.each_with_index do |cid,i|
+        unique_compound_data[cid] ||= []
+        unique_compound_data[cid] << data_entries[i]
+      end
+      unique_compound_ids = unique_compound_data.keys
+      len = unique_compound_ids.size
       indices = (0..len-1).to_a.shuffle
       mid = (len/n)
       chunks = []
@@ -93,22 +68,44 @@ module OpenTox
         last = start+mid
         last = last-1 unless len%n >= i
         test_idxs = indices[start..last] || []
-        test_cids = test_idxs.collect{|i| self.compound_ids[i]}
-        test_data_entries = test_idxs.collect{|i| self.data_entries[i]}
-        test_dataset = self.class.new(:compound_ids => test_cids, :feature_ids => self.feature_ids, :data_entries => test_data_entries)
+        test_cids = test_idxs.collect{|i| unique_compound_ids[i]}
         training_idxs = indices-test_idxs
-        training_cids = training_idxs.collect{|i| self.compound_ids[i]}
-        training_data_entries = training_idxs.collect{|i| self.data_entries[i]}
-        training_dataset = self.class.new(:compound_ids => training_cids, :feature_ids => self.feature_ids, :data_entries => training_data_entries)
-        test_dataset.save_all
-        training_dataset.save_all
-        chunks << [training_dataset,test_dataset]
+        training_cids = training_idxs.collect{|i| unique_compound_ids[i]}
+        chunk = [training_cids,test_cids].collect do |unique_cids|
+          cids = []
+          data_entries = []
+          unique_cids.each do |cid|
+            unique_compound_data[cid].each do |de|
+              cids << cid
+              data_entries << de
+            end
+          end
+          dataset = self.class.new(:compound_ids => cids, :feature_ids => self.feature_ids, :data_entries => data_entries, :source => self.id )
+          dataset.compounds.each do |compound|
+            compound.dataset_ids << dataset.id
+            compound.save
+          end
+          dataset.save
+          dataset
+        end
         start = last+1
+        chunks << chunk
       end
       chunks
     end
     # Diagnostics
+    def duplicates feature=self.features.first
+      col = feature_ids.index feature.id
+      dups = {}
+      compound_ids.each_with_index do |cid,i|
+        rows = compound_ids.each_index.select{|r| compound_ids[r] == cid }
+        values = rows.collect{|row| data_entries[row][col]}
+        dups[cid] = values if values.size > 1
+      end
+      dups
+    end
     def correlation_plot training_dataset
       # TODO: create/store svg
@@ -120,23 +117,22 @@ module OpenTox
     def density_plot
       # TODO: create/store svg
       R.assign "acts", data_entries.collect{|r| r.first }#.compact
-      R.eval "plot(density(log(acts),na.rm= TRUE), main='log(#{features.first.name})')"
+      R.eval "plot(density(-log(acts),na.rm= TRUE), main='-log(#{features.first.name})')"
     end
     # Serialisation
-    # converts dataset to csv format including compound smiles as first column, other column headers are feature titles
+    # converts dataset to csv format including compound smiles as first column, other column headers are feature names
     # @return [String]
     def to_csv(inchi=false)
       CSV.generate() do |csv| #{:force_quotes=>true}
-        csv << [inchi ? "InChI" : "SMILES"] + features.collect{|f| f.title}
+        csv << [inchi ? "InChI" : "SMILES"] + features.collect{|f| f.name}
         compounds.each_with_index do |c,i|
           csv << [inchi ? c.inchi : c.smiles] + data_entries[i]
         end
       end
     end
     # Parsers
     # Create a dataset from file (csv,sdf,...)
@@ -145,14 +141,21 @@ module OpenTox
     # TODO
     #def self.from_sdf_file
     #end
     # Create a dataset from CSV file
     # TODO: document structure
-    def self.from_csv_file file, source=nil, bioassay=true
+    def self.from_csv_file file, source=nil, bioassay=true#, layout={}
       source ||= file
-      table = CSV.read file, :skip_blanks => true
-      dataset = self.new(:source => source, :name => File.basename(file))
-      dataset.parse_table table, bioassay
+      name = File.basename(file,".*")
+      dataset = self.find_by(:source => source, :name => name)
+      if dataset
+        $logger.debug "Skipping import of #{file}, it is already in the database (id: #{dataset.id})."
+      else
+        $logger.debug "Parsing #{file}."
+        table = CSV.read file, :skip_blanks => true, :encoding => 'windows-1251:utf-8'
+        dataset = self.new(:source => source, :name => name)
+        dataset.parse_table table, bioassay#, layout
+      end
       dataset
     end
@@ -197,7 +200,7 @@ module OpenTox
             feature = NominalFeature.find_or_create_by(metadata)
           end
         end
-        feature_ids << feature.id
+        feature_ids << feature.id if feature
       end
       $logger.debug "Feature values: #{Time.now-time}"
@@ -208,11 +211,11 @@ module OpenTox
       value_time = 0
       # compounds and values
-      @data_entries = [] #Array.new(table.size){Array.new(table.first.size-1)}
+      self.data_entries = []
       table.each_with_index do |vals,i|
         ct = Time.now
-        identifier = vals.shift
+        identifier = vals.shift.strip
         warnings << "No feature values for compound at position #{i+2}." if vals.compact.empty?
         begin
           case compound_format
@@ -229,7 +232,7 @@ module OpenTox
           warnings << "Cannot parse #{compound_format} compound '#{identifier}' at position #{i+2}, all entries are ignored."
           next
         end
-        # TODO insert empty compounds to keep positions?
+        compound.dataset_ids << self.id unless compound.dataset_ids.include? self.id
         compound_time += Time.now-ct
         r += 1
@@ -239,72 +242,48 @@ module OpenTox
         end
         compound_ids << compound.id
-        @data_entries << Array.new(table.first.size-1)
+        table.first.size == 0 ?  self.data_entries << Array.new(0) : self.data_entries << Array.new(table.first.size-1)
         vals.each_with_index do |v,j|
           if v.blank?
             warnings << "Empty value for compound '#{identifier}' (row #{r+2}) and feature '#{feature_names[j]}' (column #{j+2})."
             next
           elsif numeric[j]
-            @data_entries.last[j] = v.to_f
+            v = v.to_f
           else
-            @data_entries.last[j] = v.strip
+            v = v.strip
           end
+          self.data_entries.last[j] = v
+          #i = compound.feature_ids.index feature_ids[j]
+          compound.features[feature_ids[j].to_s] ||= []
+          compound.features[feature_ids[j].to_s] << v
+          compound.save
         end
       end
       compounds.duplicates.each do |compound|
         positions = []
-        compounds.each_with_index{|c,i| positions << i+1 if !c.blank? and c.inchi == compound.inchi}
-        warnings << "Duplicate compound #{compound.inchi} at rows #{positions.join(', ')}. Entries are accepted, assuming that measurements come from independent experiments."
+        compounds.each_with_index{|c,i| positions << i+1 if !c.blank? and c.inchi and c.inchi == compound.inchi}
+        warnings << "Duplicate compound #{compound.smiles} at rows #{positions.join(', ')}. Entries are accepted, assuming that measurements come from independent experiments."
       end
       $logger.debug "Value parsing: #{Time.now-time} (Compound creation: #{compound_time})"
       time = Time.now
-      save_all
+      save
       $logger.debug "Saving: #{Time.now-time}"
     end
-=begin
-    # TODO remove
-    # Create a dataset with compounds and features
-    def self.create compounds, features, warnings=[], source=nil
-      dataset = Dataset.new(:warnings => warnings)
-      dataset.compounds = compounds
-      dataset.features = features
-      dataset
-    end
-    # merge dataset (i.e. append features)
-    def +(dataset)
-      bad_request_error "Dataset merge failed because the argument is not a OpenTox::Dataset but a #{dataset.class}" unless dataset.is_a? Dataset
-      bad_request_error "Dataset merge failed because compounds are unequal in datasets #{self.id} and #{dataset.id}" unless compound_ids == dataset.compound_ids
-      self.feature_ids ||= []
-      self.feature_ids = self.feature_ids + dataset.feature_ids
-      @data_entries ||= Array.new(compound_ids.size){[]}
-      @data_entries.each_with_index do |row,i|
-        @data_entries[i] = row + dataset.fingerprint(compounds[i])
-      end
-      self
-    end
-    def fingerprint(compound)
-      i = compound_ids.index(compound.id)
-      i.nil? ? nil : data_entries[i]
-    end
-=end
     # Fill unset data entries
     # @param any value
     def fill_nil_with n
       (0 .. compound_ids.size-1).each do |i|
-        @data_entries[i] ||= []
+        data_entries[i] ||= []
         (0 .. feature_ids.size-1).each do |j|
-          @data_entries[i][j] ||= n
+          data_entries[i][j] ||= n
         end
       end
     end
   end
   # Dataset for lazar predictions
@@ -321,6 +300,17 @@ module OpenTox
   # Dataset for descriptors (physchem)
   class DescriptorDataset < Dataset
     field :feature_calculation_algorithm, type: String
+  end
+  class ScaledDataset < DescriptorDataset
+    field :centers, type: Array, default: []
+    field :scales, type: Array, default: []
+    def original_value value, i
+      value * scales[i] + centers[i]
+    end
   end
   # Dataset for fminer descriptors

data/lib/error.rb CHANGED Viewed

@@ -58,7 +58,7 @@ module OpenTox
     OpenTox.const_set error[:class],c
     # define global methods for raising errors, eg. bad_request_error
-    Object.send(:define_method, error[:method]) do |message,uri=nil,cause=nil|
+    Object.send(:define_method, error[:method]) do |message|
       raise c.new(message)
     end
   end

data/lib/experiment.rb ADDED Viewed

@@ -0,0 +1,99 @@
+module OpenTox
+  class Experiment
+    field :dataset_ids, type: Array
+    field :model_settings, type: Array, default: []
+    field :results, type: Hash, default: {}
+    def run
+      dataset_ids.each do |dataset_id|
+        dataset = Dataset.find(dataset_id)
+        results[dataset_id.to_s] = []
+        model_settings.each do |setting|
+          setting = setting.dup
+          model_algorithm = setting.delete :model_algorithm #if setting[:model_algorithm]
+          model = Object.const_get(model_algorithm).create dataset, setting
+          $logger.debug model
+          model.save
+          repeated_crossvalidation = RepeatedCrossValidation.create model
+          results[dataset_id.to_s] << {:model_id => model.id, :repeated_crossvalidation_id => repeated_crossvalidation.id}
+        end
+      end
+      save
+    end
+    def report
+      # statistical significances http://www.r-bloggers.com/anova-and-tukeys-test-on-r/
+      report = {}
+      report[:name] = name
+      report[:experiment_id] = self.id.to_s
+      report[:results] = {}
+      parameters = []
+      dataset_ids.each do |dataset_id|
+        dataset_name = Dataset.find(dataset_id).name
+        report[:results][dataset_name] = {}
+        report[:results][dataset_name][:anova] = {}
+        report[:results][dataset_name][:data] = []
+        # TODO results[dataset_id.to_s] does not exist
+        results[dataset_id.to_s].each do |result|
+          model = Model::Lazar.find(result[:model_id])
+          repeated_cv = RepeatedCrossValidation.find(result[:repeated_crossvalidation_id])
+          crossvalidations = repeated_cv.crossvalidations
+          if crossvalidations.first.is_a? ClassificationCrossValidation
+            parameters = [:accuracy,:true_rate,:predictivity]
+          elsif crossvalidations.first.is_a? RegressionCrossValidation
+            parameters = [:rmse,:mae,:r_squared]
+          end
+          summary = {}
+          [:neighbor_algorithm, :neighbor_algorithm_parameters, :prediction_algorithm].each do |key|
+            summary[key] = model[key]
+          end
+          summary[:nr_instances] = crossvalidations.first.nr_instances
+          summary[:nr_unpredicted] = crossvalidations.collect{|cv| cv.nr_unpredicted}
+          summary[:time] = crossvalidations.collect{|cv| cv.time}
+          parameters.each do |param|
+            summary[param] = crossvalidations.collect{|cv| cv.send(param)}
+          end
+          report[:results][dataset_name][:data] << summary
+        end
+      end
+      report[:results].each do |dataset,results|
+        ([:time,:nr_unpredicted]+parameters).each do |param|
+          experiments = []
+          outcome = []
+          results[:data].each_with_index do |result,i|
+            result[param].each do |p|
+              experiments << i
+              p = nil if p.kind_of? Float and p.infinite? # TODO fix @ division by 0
+              outcome << p
+            end
+          end
+          begin
+          R.assign "experiment_nr",experiments.collect{|i| "Experiment #{i}"}
+          R.eval "experiment_nr = factor(experiment_nr)"
+          R.assign "outcome", outcome
+          R.eval "data = data.frame(experiment_nr,outcome)"
+          # one-way ANOVA
+          R.eval "fit = aov(outcome ~ experiment_nr, data=data,na.action='na.omit')"
+          # http://stackoverflow.com/questions/3366506/extract-p-value-from-aov
+          p_value = R.eval("summary(fit)[[1]][['Pr(>F)']][[1]]").to_ruby
+          # aequivalent
+          # sum = R.eval("summary(fit)")
+          #p_value = sum.to_ruby.first.last.first
+          rescue
+            p_value = nil
+          end
+          report[:results][dataset][:anova][param] = p_value
+=begin
+=end
+        end
+      end
+      report
+    end
+    def summary
+      report[:results].collect{|dataset,data| {dataset => data[:anova].select{|param,p_val| p_val < 0.1}}}
+    end
+  end
+end

data/lib/feature.rb CHANGED Viewed

@@ -2,15 +2,14 @@ module OpenTox
   # Basic feature class
   class Feature
-    field :name, as: :title, type: String
     field :nominal, type: Boolean
     field :numeric, type: Boolean
     field :measured, type: Boolean
+    field :calculated, type: Boolean
   end
   # Feature for categorical variables
   class NominalFeature < Feature
-    # TODO check if accept_values are still needed
     field :accept_values, type: Array
     def initialize params
       super params
@@ -29,69 +28,18 @@ module OpenTox
   # Feature for SMARTS fragments
   class Smarts < NominalFeature
     field :smarts, type: String
+    index "smarts" => 1
     def self.from_smarts smarts
       self.find_or_create_by :smarts => smarts
     end
   end
-  # Feature for supervised fragments from Fminer algorithm
-  class FminerSmarts < Smarts
-    field :p_value, type: Float
-    # TODO check if effect is used
-    field :effect, type: String
-    field :dataset_id
-  end
-  # Feature for database fingerprints
-  # needs count for efficient retrieval (see compound.rb)
-  class FingerprintSmarts < Smarts
-    field :count, type: Integer
-    def self.fingerprint
-      @@fp4 ||= OpenTox::FingerprintSmarts.all
-      unless @@fp4.size == 306
-        @@fp4 = []
-        # OpenBabel FP4 fingerprints
-        # OpenBabel http://open-babel.readthedocs.org/en/latest/Fingerprints/intro.html
-        # TODO investigate other types of fingerprints (MACCS)
-        # OpenBabel http://open-babel.readthedocs.org/en/latest/Fingerprints/intro.html
-        # http://www.dalkescientific.com/writings/diary/archive/2008/06/26/fingerprint_background.html
-        # OpenBabel MNA http://openbabel.org/docs/dev/FileFormats/Multilevel_Neighborhoods_of_Atoms_(MNA).html#multilevel-neighborhoods-of-atoms-mna
-        # Morgan ECFP, FCFP
-        # http://cdk.github.io/cdk/1.5/docs/api/org/openscience/cdk/fingerprint/CircularFingerprinter.html
-        # http://www.rdkit.org/docs/GettingStartedInPython.html
-        # Chemfp
-        # https://chemfp.readthedocs.org/en/latest/using-tools.html
-        # CACTVS/PubChem
-        File.open(File.join(File.dirname(__FILE__),"SMARTS_InteLigand.txt")).each do |l|
-          l.strip!
-          unless l.empty? or l.match /^#/
-            name,smarts = l.split(': ')
-            @@fp4 << OpenTox::FingerprintSmarts.find_or_create_by(:name => name, :smarts => smarts) unless smarts.nil?
-          end
-        end
-      end
-      @@fp4
-    end
-  end
-  # Feature for physico-chemical descriptors
-  class PhysChemDescriptor < NumericFeature
-    field :algorithm, type: String, default: "OpenTox::Algorithm::Descriptor.physchem"
-    field :parameters, type: Hash
-    field :creator, type: String
-  end
   # Feature for categorical bioassay results
   class NominalBioAssay < NominalFeature
-    # TODO: needed? move to dataset?
-    field :description, type: String
   end
   # Feature for quantitative bioassay results
   class NumericBioAssay < NumericFeature
-    # TODO: needed? move to dataset?
-    field :description, type: String
   end
 end

data/lib/lazar.rb CHANGED Viewed

@@ -8,43 +8,58 @@ require 'mongoid'
 require 'rserve'
 require "nokogiri"
 require "base64"
+require 'openbabel'
+# Environment setup
+ENV["LAZAR_ENV"] ||= "production"
+raise "Incorrect lazar environment variable LAZAR_ENV '#{ENV["LAZAR_ENV"]}', please set it to 'production' or 'development'." unless ENV["LAZAR_ENV"].match(/production|development/)
-# Mongo setup
-# TODO retrieve correct environment from Rack/Sinatra
-ENV["MONGOID_ENV"] ||= "development"
-# TODO remove config files, change default via ENV or directly in Mongoid class
-Mongoid.load!("#{File.expand_path(File.join(File.dirname(__FILE__),'..','mongoid.yml'))}")
-# TODO get Mongo::Client from Mongoid
-$mongo = Mongo::Client.new('mongodb://127.0.0.1:27017/opentox')
-# TODO same for GridFS
+ENV["MONGOID_ENV"] = ENV["LAZAR_ENV"]
+ENV["RACK_ENV"] = ENV["LAZAR_ENV"] # should set sinatra environment
+Mongoid.load_configuration({
+  :clients => {
+    :default => {
+      :database => ENV["LAZAR_ENV"],
+      :hosts => ["localhost:27017"],
+    }
+  }
+})
+Mongoid.raise_not_found_error = false # return nil if no document is found
+$mongo = Mongo::Client.new("mongodb://127.0.0.1:27017/#{ENV['LAZAR_ENV']}")
 $gridfs = $mongo.database.fs
-# R setup
-R = Rserve::Connection.new
 # Logger setup
+STDOUT.sync = true # for redirection, etc see http://stackoverflow.com/questions/8549443/why-doesnt-logger-output-to-stdout-get-redirected-to-files
 $logger = Logger.new STDOUT # STDERR did not work on my development machine (CH)
-$logger.level = Logger::DEBUG
-Mongo::Logger.logger = $logger
-Mongo::Logger.level = Logger::WARN
-#Mongoid.logger = $logger
-# Require sub-Repositories
-require_relative '../libfminer/libbbrc/bbrc' # include before openbabel
-require_relative '../libfminer/liblast/last' #
-require_relative '../last-utils/lu.rb'
-require_relative '../openbabel/lib/openbabel'
+case ENV["LAZAR_ENV"]
+when "production"
+  $logger.level = Logger::WARN
+  Mongo::Logger.level = Logger::WARN
+when "development"
+  $logger.level = Logger::DEBUG
+  Mongo::Logger.level = Logger::WARN
+end
-# Fminer environment variables
-ENV['FMINER_SMARTS'] = 'true'
-ENV['FMINER_NO_AROMATIC'] = 'true'
-ENV['FMINER_PVALUES'] = 'true'
-ENV['FMINER_SILENT'] = 'true'
-ENV['FMINER_NR_HITS'] = 'true'
+# R setup
+rlib = File.expand_path(File.join(File.dirname(__FILE__),"..","R"))
+# should work on POSIX including os x
+# http://stackoverflow.com/questions/19619582/number-of-processors-cores-in-command-line
+NR_CORES = `getconf _NPROCESSORS_ONLN`.to_i
+R = Rserve::Connection.new
+R.eval "
+suppressPackageStartupMessages({
+  library(ggplot2,lib=\"#{rlib}\")
+  library(grid,lib=\"#{rlib}\")
+  library(gridExtra,lib=\"#{rlib}\")
+  library(pls,lib=\"#{rlib}\")
+  library(caret,lib=\"#{rlib}\")
+  library(doMC,lib=\"#{rlib}\")
+  registerDoMC(#{NR_CORES})
+})
+"
 # OpenTox classes and includes
-CLASSES = ["Feature","Compound","Dataset","Validation","CrossValidation"]# Algorithm and Models are modules
+CLASSES = ["Feature","Compound","Dataset","Validation","CrossValidation","LeaveOneOutValidation","RepeatedCrossValidation","Experiment"]# Algorithm and Models are modules
 [ # be aware of the require sequence as it affects class/method overwrites
   "overwrite.rb",
@@ -52,18 +67,16 @@ CLASSES = ["Feature","Compound","Dataset","Validation","CrossValidation"]# Algor
   "error.rb",
   "opentox.rb",
   "feature.rb",
+  "physchem.rb",
   "compound.rb",
   "dataset.rb",
-  "descriptor.rb",
   "algorithm.rb",
-  "descriptor.rb",
-  "bbrc.rb",
   "model.rb",
-  "similarity.rb",
-  "neighbor.rb",
   "classification.rb",
   "regression.rb",
   "validation.rb",
   "crossvalidation.rb",
+  "leave-one-out-validation.rb",
+  "experiment.rb",
 ].each{ |f| require_relative f }
+OpenTox::PhysChem.descriptors # load descriptor features