RubyGems - lazar - Versions diffs - 0.9.3 → 1.0.0 - Mend

lazar 0.9.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (88) hide show

checksums.yaml +4 -4
data/.gitignore +1 -4
data/README.md +5 -15
data/VERSION +1 -1
data/ext/lazar/extconf.rb +1 -1
data/ext/lazar/rinstall.R +9 -7
data/java/CdkDescriptorInfo.class +0 -0
data/java/CdkDescriptorInfo.java +3 -2
data/java/CdkDescriptors.class +0 -0
data/java/CdkDescriptors.java +28 -28
data/java/Rakefile +3 -3
data/java/{cdk-1.4.19.jar → cdk-2.0-SNAPSHOT.jar} +0 -0
data/lazar.gemspec +6 -7
data/lib/algorithm.rb +2 -11
data/lib/caret.rb +96 -0
data/lib/classification.rb +14 -22
data/lib/compound.rb +21 -87
data/lib/crossvalidation.rb +80 -279
data/lib/dataset.rb +105 -174
data/lib/feature.rb +11 -18
data/lib/feature_selection.rb +42 -0
data/lib/import.rb +122 -0
data/lib/lazar.rb +14 -4
data/lib/leave-one-out-validation.rb +46 -192
data/lib/model.rb +319 -128
data/lib/nanoparticle.rb +98 -0
data/lib/opentox.rb +7 -4
data/lib/overwrite.rb +24 -3
data/lib/physchem.rb +11 -10
data/lib/regression.rb +7 -137
data/lib/rest-client-wrapper.rb +0 -6
data/lib/similarity.rb +65 -0
data/lib/substance.rb +8 -0
data/lib/train-test-validation.rb +69 -0
data/lib/validation-statistics.rb +223 -0
data/lib/validation.rb +17 -100
data/scripts/mg2mmol.rb +17 -0
data/scripts/mirror-enm2test.rb +4 -0
data/scripts/mmol2-log10.rb +32 -0
data/test/compound.rb +4 -94
data/test/data/EPAFHM.medi_log10.csv +92 -0
data/test/data/EPAFHM.mini_log10.csv +16 -0
data/test/data/EPAFHM_log10.csv +581 -0
data/test/data/loael_log10.csv +568 -0
data/test/dataset.rb +195 -133
data/test/descriptor.rb +27 -18
data/test/error.rb +2 -2
data/test/experiment.rb +4 -4
data/test/feature.rb +2 -3
data/test/gridfs.rb +10 -0
data/test/model-classification.rb +106 -0
data/test/model-nanoparticle.rb +128 -0
data/test/model-regression.rb +171 -0
data/test/model-validation.rb +19 -0
data/test/nanomaterial-model-validation.rb +55 -0
data/test/setup.rb +8 -4
data/test/validation-classification.rb +67 -0
data/test/validation-nanoparticle.rb +133 -0
data/test/validation-regression.rb +92 -0
metadata +50 -121
data/test/classification.rb +0 -41
data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +0 -13553
data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +0 -436
data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +0 -568
data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +0 -87
data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +0 -978
data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +0 -1120
data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +0 -1113
data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +0 -850
data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +0 -829
data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +0 -1198
data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +0 -1505
data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +0 -581
data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +0 -1217
data/test/data/LOAEL_log_mg_corrected_smiles.csv +0 -568
data/test/data/LOAEL_log_mmol_corrected_smiles.csv +0 -568
data/test/data/boiling_points.ext.sdf +0 -11460
data/test/data/cpdb_100.csv +0 -101
data/test/data/hamster_carcinogenicity.ntriples +0 -618
data/test/data/hamster_carcinogenicity.sdf +0 -2805
data/test/data/hamster_carcinogenicity.xls +0 -0
data/test/data/hamster_carcinogenicity.yaml +0 -352
data/test/dataset-long.rb +0 -114
data/test/lazar-long.rb +0 -92
data/test/lazar-physchem-short.rb +0 -31
data/test/prediction_models.rb +0 -20
data/test/regression.rb +0 -43
data/test/validation.rb +0 -108

data/test/validation-nanoparticle.rb ADDED Viewed

@@ -0,0 +1,133 @@
+require_relative "setup.rb"
+class NanoparticleValidationTest  < MiniTest::Test
+  include OpenTox::Validation
+  def setup
+    @training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
+    @prediction_feature = @training_dataset.features.select{|f| f["name"] == 'log2(Net cell association)'}.first
+  end
+  def test_validate_default_nanoparticle_model
+    model = Model::Lazar.create training_dataset: @training_dataset, prediction_feature: @prediction_feature
+    cv = CrossValidation.create model
+    p cv.id
+    #File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot format:"pdf"}
+    refute_nil cv.r_squared
+    refute_nil cv.rmse
+  end
+  def test_validate_pls_pchem_model
+    algorithms = {
+      :descriptors => {
+        :method => "properties",
+        :categories => ["P-CHEM"]
+      },
+      :prediction => {:method => 'Algorithm::Caret.pls' },
+      :feature_selection => {
+        :method => "Algorithm::FeatureSelection.correlation_filter",
+      },
+    }
+    model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
+    assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method]
+    cv = CrossValidation.create model
+    p cv.id
+    #File.open("tmp2.pdf","w+"){|f| f.puts cv.correlation_plot format:"pdf"}
+    refute_nil cv.r_squared
+    refute_nil cv.rmse
+  end
+=begin
+  def test_validate_proteomics_pls_pchem_model
+    algorithms = {
+      :descriptors => {
+        :method => "properties",
+        :categories => ["Proteomics"]
+      },
+      :prediction => {:method => 'Algorithm::Caret.pls' },
+      :feature_selection => {
+        :method => "Algorithm::FeatureSelection.correlation_filter",
+      },
+    }
+    model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
+    assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method]
+    cv = CrossValidation.create model
+    refute_nil cv.r_squared
+    refute_nil cv.rmse
+  end
+=end
+  def test_validate_proteomics_pchem_default_model
+    algorithms = {
+      :descriptors => {
+        :method => "properties",
+        :categories => ["Proteomics","P-CHEM"]
+      },
+      :feature_selection => {
+        :method => "Algorithm::FeatureSelection.correlation_filter",
+      },
+    }
+    model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
+    cv = CrossValidation.create model
+    refute_nil cv.r_squared
+    refute_nil cv.rmse
+  end
+  def test_nanoparticle_fingerprint_model_without_feature_selection
+    algorithms = {
+      :descriptors => {
+        :method => "fingerprint",
+        :type => "MP2D",
+      },
+      :similarity => {
+        :method => "Algorithm::Similarity.tanimoto",
+        :min => 0.1
+      },
+      :feature_selection => nil
+    }
+    model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
+    cv = CrossValidation.create model
+    refute_nil cv.r_squared
+    refute_nil cv.rmse
+  end
+  def test_nanoparticle_fingerprint_weighted_average_model_without_feature_selection
+    algorithms = {
+      :descriptors => {
+        :method => "fingerprint",
+        :type => "MP2D",
+      },
+      :similarity => {
+        :method => "Algorithm::Similarity.tanimoto",
+        :min => 0.1
+      },
+      :prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" },
+      :feature_selection => nil
+    }
+    model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
+    cv = CrossValidation.create model
+    refute_nil cv.r_squared
+    refute_nil cv.rmse
+  end
+  def test_nanoparticle_fingerprint_model_with_feature_selection
+    algorithms = {
+      :descriptors => {
+        :method => "fingerprint",
+        :type => "MP2D",
+      },
+      :similarity => {
+        :method => "Algorithm::Similarity.tanimoto",
+        :min => 0.1
+      },
+      :feature_selection => {
+        :method => "Algorithm::FeatureSelection.correlation_filter",
+      },
+    }
+    model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
+    cv = CrossValidation.create model
+    refute_nil cv.r_squared
+    refute_nil cv.rmse
+  end
+end

data/test/validation-regression.rb ADDED Viewed

@@ -0,0 +1,92 @@
+require_relative "setup.rb"
+class ValidationRegressionTest < MiniTest::Test
+  include OpenTox::Validation
+  # defaults
+  def test_default_regression_crossvalidation
+    dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
+    model = Model::Lazar.create training_dataset: dataset
+    cv = RegressionCrossValidation.create model
+    assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be smaller than 1.5, this may occur due to unfavorable training/test set splits"
+    assert cv.mae < 1.1, "MAE #{cv.mae} should be smaller than 1.1, this may occur due to unfavorable training/test set splits"
+    assert cv.percent_within_prediction_interval > 80, "Only #{cv.percent_within_prediction_interval.round(2)}% of measurement within prediction interval. This may occur due to unfavorable training/test set splits"
+  end
+  # parameters
+  def test_regression_crossvalidation_params
+    dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
+    algorithms = {
+      :prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" },
+      :descriptors => { :type => "MACCS", },
+      :similarity => {:min => 0.7}
+    }
+    model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms
+    assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type]
+    cv = RegressionCrossValidation.create model
+    cv.validation_ids.each do |vid|
+      model = Model::Lazar.find(Validation.find(vid).model_id)
+      assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type]
+      assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min]
+      refute_nil model.training_dataset_id
+      refute_equal dataset.id, model.training_dataset_id
+    end
+    refute_nil cv.rmse
+    refute_nil cv.mae
+  end
+  def test_physchem_regression_crossvalidation
+    training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
+    model = Model::Lazar.create training_dataset:training_dataset
+    cv = RegressionCrossValidation.create model
+    refute_nil cv.rmse
+    refute_nil cv.mae
+  end
+  # LOO
+  def test_regression_loo_validation
+    dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
+    model = Model::Lazar.create training_dataset: dataset
+    loo = RegressionLeaveOneOut.create model
+    assert loo.r_squared > 0.34, "R^2 (#{loo.r_squared}) should be larger than 0.034"
+  end
+  def test_regression_loo_validation_with_feature_selection
+    dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
+    algorithms = {
+      :descriptors => {
+        :method => "calculate_properties",
+        :features => PhysChem.openbabel_descriptors,
+      },
+      :similarity => {
+        :method => "Algorithm::Similarity.weighted_cosine",
+        :min => 0.5
+      },
+      :feature_selection => {
+        :method => "Algorithm::FeatureSelection.correlation_filter",
+      },
+    }
+    model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms
+    assert_raises OpenTox::BadRequestError do
+      loo = RegressionLeaveOneOut.create model
+    end
+  end
+  # repeated CV
+  def test_repeated_crossvalidation
+    dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
+    model = Model::Lazar.create training_dataset: dataset
+    repeated_cv = RepeatedCrossValidation.create model
+    repeated_cv.crossvalidations.each do |cv|
+      #assert cv.r_squared > 0.34, "R^2 (#{cv.r_squared}) should be larger than 0.034"
+      #assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split"
+    end
+    File.open("tmp.png","w+"){|f| f.puts repeated_cv.correlation_plot}
+  end
+end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: lazar
 version: !ruby/object:Gem::Version
-  version: 0.9.3
+  version: 1.0.0
 platform: ruby
 authors:
 - Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler,
@@ -9,98 +9,92 @@ authors:
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-04-12 00:00:00.000000000 Z
+date: 2016-12-21 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '1.11'
+        version: '0'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '1.11'
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: rest-client
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '1.8'
+        version: '0'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '1.8'
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: nokogiri
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '1.6'
+        version: '0'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '1.6'
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: rserve-client
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '0.3'
+        version: '0'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '0.3'
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: mongoid
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '5.0'
+        version: '0'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '5.0'
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: openbabel
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
-      - !ruby/object:Gem::Version
-        version: '2.3'
     - - ">="
       - !ruby/object:Gem::Version
-        version: 2.3.2.2
+        version: '0'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
-      - !ruby/object:Gem::Version
-        version: '2.3'
     - - ">="
       - !ruby/object:Gem::Version
-        version: 2.3.2.2
+        version: '0'
 description: Libraries for lazy structure-activity relationships and read-across.
 email:
 - helma@in-silico.ch
@@ -129,11 +123,12 @@ files:
 - java/JoelibDescriptors.class
 - java/JoelibDescriptors.java
 - java/Rakefile
-- java/cdk-1.4.19.jar
+- java/cdk-2.0-SNAPSHOT.jar
 - java/joelib2.jar
 - java/log4j.jar
 - lazar.gemspec
 - lib/algorithm.rb
+- lib/caret.rb
 - lib/classification.rb
 - lib/compound.rb
 - lib/crossvalidation.rb
@@ -141,77 +136,72 @@ files:
 - lib/error.rb
 - lib/experiment.rb
 - lib/feature.rb
+- lib/feature_selection.rb
+- lib/import.rb
 - lib/lazar.rb
 - lib/leave-one-out-validation.rb
 - lib/model.rb
+- lib/nanoparticle.rb
 - lib/opentox.rb
 - lib/overwrite.rb
 - lib/physchem.rb
 - lib/regression.rb
 - lib/rest-client-wrapper.rb
+- lib/similarity.rb
+- lib/substance.rb
+- lib/train-test-validation.rb
 - lib/unique_descriptors.rb
+- lib/validation-statistics.rb
 - lib/validation.rb
+- scripts/mg2mmol.rb
+- scripts/mirror-enm2test.rb
+- scripts/mmol2-log10.rb
 - test/all.rb
-- test/classification.rb
 - test/compound.rb
-- test/data/CPDBAS_v5c_1547_29Apr2008part.sdf
-- test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv
-- test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv
-- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv
-- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv
-- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv
-- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv
-- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv
-- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv
-- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv
-- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv
 - test/data/EPAFHM.csv
 - test/data/EPAFHM.medi.csv
+- test/data/EPAFHM.medi_log10.csv
 - test/data/EPAFHM.mini.csv
-- test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv
-- test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv
+- test/data/EPAFHM.mini_log10.csv
+- test/data/EPAFHM_log10.csv
 - test/data/ISSCAN-multi.csv
-- test/data/LOAEL_log_mg_corrected_smiles.csv
-- test/data/LOAEL_log_mmol_corrected_smiles.csv
 - test/data/LOAEL_mmol_corrected_smiles.csv
 - test/data/acetaldehyde.sdf
 - test/data/batch_prediction.csv
 - test/data/batch_prediction_inchi_small.csv
 - test/data/batch_prediction_smiles_small.csv
-- test/data/boiling_points.ext.sdf
-- test/data/cpdb_100.csv
 - test/data/hamster_carcinogenicity.csv
 - test/data/hamster_carcinogenicity.json
 - test/data/hamster_carcinogenicity.mini.bool_float.csv
 - test/data/hamster_carcinogenicity.mini.bool_int.csv
 - test/data/hamster_carcinogenicity.mini.bool_string.csv
 - test/data/hamster_carcinogenicity.mini.csv
-- test/data/hamster_carcinogenicity.ntriples
-- test/data/hamster_carcinogenicity.sdf
-- test/data/hamster_carcinogenicity.xls
-- test/data/hamster_carcinogenicity.yaml
 - test/data/hamster_carcinogenicity_with_errors.csv
 - test/data/kazius.csv
 - test/data/loael.csv
+- test/data/loael_log10.csv
 - test/data/multi_cell_call.csv
 - test/data/multi_cell_call_no_dup.csv
 - test/data/multicolumn.csv
 - test/data/rat_feature_dataset.csv
 - test/data/wrong_dataset.csv
-- test/dataset-long.rb
 - test/dataset.rb
 - test/default_environment.rb
 - test/descriptor.rb
 - test/error.rb
 - test/experiment.rb
 - test/feature.rb
-- test/lazar-long.rb
-- test/lazar-physchem-short.rb
-- test/prediction_models.rb
-- test/regression.rb
+- test/gridfs.rb
+- test/model-classification.rb
+- test/model-nanoparticle.rb
+- test/model-regression.rb
+- test/model-validation.rb
+- test/nanomaterial-model-validation.rb
 - test/setup.rb
 - test/test_environment.rb
-- test/validation.rb
+- test/validation-classification.rb
+- test/validation-nanoparticle.rb
+- test/validation-regression.rb
 homepage: http://github.com/opentox/lazar
 licenses:
 - GPL-3.0
@@ -236,65 +226,4 @@ rubygems_version: 2.5.1
 signing_key:
 specification_version: 4
 summary: Lazar framework
-test_files:
-- test/all.rb
-- test/classification.rb
-- test/compound.rb
-- test/data/CPDBAS_v5c_1547_29Apr2008part.sdf
-- test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv
-- test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv
-- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv
-- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv
-- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv
-- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv
-- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv
-- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv
-- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv
-- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv
-- test/data/EPAFHM.csv
-- test/data/EPAFHM.medi.csv
-- test/data/EPAFHM.mini.csv
-- test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv
-- test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv
-- test/data/ISSCAN-multi.csv
-- test/data/LOAEL_log_mg_corrected_smiles.csv
-- test/data/LOAEL_log_mmol_corrected_smiles.csv
-- test/data/LOAEL_mmol_corrected_smiles.csv
-- test/data/acetaldehyde.sdf
-- test/data/batch_prediction.csv
-- test/data/batch_prediction_inchi_small.csv
-- test/data/batch_prediction_smiles_small.csv
-- test/data/boiling_points.ext.sdf
-- test/data/cpdb_100.csv
-- test/data/hamster_carcinogenicity.csv
-- test/data/hamster_carcinogenicity.json
-- test/data/hamster_carcinogenicity.mini.bool_float.csv
-- test/data/hamster_carcinogenicity.mini.bool_int.csv
-- test/data/hamster_carcinogenicity.mini.bool_string.csv
-- test/data/hamster_carcinogenicity.mini.csv
-- test/data/hamster_carcinogenicity.ntriples
-- test/data/hamster_carcinogenicity.sdf
-- test/data/hamster_carcinogenicity.xls
-- test/data/hamster_carcinogenicity.yaml
-- test/data/hamster_carcinogenicity_with_errors.csv
-- test/data/kazius.csv
-- test/data/loael.csv
-- test/data/multi_cell_call.csv
-- test/data/multi_cell_call_no_dup.csv
-- test/data/multicolumn.csv
-- test/data/rat_feature_dataset.csv
-- test/data/wrong_dataset.csv
-- test/dataset-long.rb
-- test/dataset.rb
-- test/default_environment.rb
-- test/descriptor.rb
-- test/error.rb
-- test/experiment.rb
-- test/feature.rb
-- test/lazar-long.rb
-- test/lazar-physchem-short.rb
-- test/prediction_models.rb
-- test/regression.rb
-- test/setup.rb
-- test/test_environment.rb
-- test/validation.rb
+test_files: []

data/test/classification.rb DELETED Viewed

@@ -1,41 +0,0 @@
-require_relative "setup.rb"
-class LazarClassificationTest < MiniTest::Test
-  def test_lazar_classification
-    training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
-    model = Model::LazarClassification.create training_dataset
-    [ {
-      :compound => OpenTox::Compound.from_inchi("InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"),
-      :prediction => "false",
-      :confidence => 0.25281385281385277,
-      :nr_neighbors => 11
-    },{
-      :compound => OpenTox::Compound.from_smiles("c1ccccc1NN"),
-      :prediction => "false",
-      :confidence => 0.3639589577089577,
-      :nr_neighbors => 14
-    } ].each do |example|
-      prediction = model.predict example[:compound]
-      assert_equal example[:prediction], prediction[:value]
-      #assert_equal example[:confidence], prediction[:confidence]
-      #assert_equal example[:nr_neighbors], prediction[:neighbors].size
-    end
-    compound = Compound.from_smiles "CCO"
-    prediction = model.predict compound
-    assert_equal ["false"], prediction[:database_activities]
-    assert_equal "true", prediction[:value]
-    # make a dataset prediction
-    compound_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv")
-    prediction = model.predict compound_dataset
-    assert_equal compound_dataset.compounds, prediction.compounds
-    assert_equal "Could not find similar compounds with experimental data in the training dataset.", prediction.data_entries[7][3]
-    assert_equal "1 compounds have been removed from neighbors, because they have the same structure as the query compound.", prediction.data_entries[14][3]
-    # cleanup
-    [training_dataset,model,compound_dataset].each{|o| o.delete}
-  end
-end