lazar 0.9.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -4
- data/README.md +5 -15
- data/VERSION +1 -1
- data/ext/lazar/extconf.rb +1 -1
- data/ext/lazar/rinstall.R +9 -7
- data/java/CdkDescriptorInfo.class +0 -0
- data/java/CdkDescriptorInfo.java +3 -2
- data/java/CdkDescriptors.class +0 -0
- data/java/CdkDescriptors.java +28 -28
- data/java/Rakefile +3 -3
- data/java/{cdk-1.4.19.jar → cdk-2.0-SNAPSHOT.jar} +0 -0
- data/lazar.gemspec +6 -7
- data/lib/algorithm.rb +2 -11
- data/lib/caret.rb +96 -0
- data/lib/classification.rb +14 -22
- data/lib/compound.rb +21 -87
- data/lib/crossvalidation.rb +80 -279
- data/lib/dataset.rb +105 -174
- data/lib/feature.rb +11 -18
- data/lib/feature_selection.rb +42 -0
- data/lib/import.rb +122 -0
- data/lib/lazar.rb +14 -4
- data/lib/leave-one-out-validation.rb +46 -192
- data/lib/model.rb +319 -128
- data/lib/nanoparticle.rb +98 -0
- data/lib/opentox.rb +7 -4
- data/lib/overwrite.rb +24 -3
- data/lib/physchem.rb +11 -10
- data/lib/regression.rb +7 -137
- data/lib/rest-client-wrapper.rb +0 -6
- data/lib/similarity.rb +65 -0
- data/lib/substance.rb +8 -0
- data/lib/train-test-validation.rb +69 -0
- data/lib/validation-statistics.rb +223 -0
- data/lib/validation.rb +17 -100
- data/scripts/mg2mmol.rb +17 -0
- data/scripts/mirror-enm2test.rb +4 -0
- data/scripts/mmol2-log10.rb +32 -0
- data/test/compound.rb +4 -94
- data/test/data/EPAFHM.medi_log10.csv +92 -0
- data/test/data/EPAFHM.mini_log10.csv +16 -0
- data/test/data/EPAFHM_log10.csv +581 -0
- data/test/data/loael_log10.csv +568 -0
- data/test/dataset.rb +195 -133
- data/test/descriptor.rb +27 -18
- data/test/error.rb +2 -2
- data/test/experiment.rb +4 -4
- data/test/feature.rb +2 -3
- data/test/gridfs.rb +10 -0
- data/test/model-classification.rb +106 -0
- data/test/model-nanoparticle.rb +128 -0
- data/test/model-regression.rb +171 -0
- data/test/model-validation.rb +19 -0
- data/test/nanomaterial-model-validation.rb +55 -0
- data/test/setup.rb +8 -4
- data/test/validation-classification.rb +67 -0
- data/test/validation-nanoparticle.rb +133 -0
- data/test/validation-regression.rb +92 -0
- metadata +50 -121
- data/test/classification.rb +0 -41
- data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +0 -13553
- data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +0 -436
- data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +0 -568
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +0 -87
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +0 -978
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +0 -1120
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +0 -1113
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +0 -850
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +0 -829
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +0 -1198
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +0 -1505
- data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +0 -581
- data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +0 -1217
- data/test/data/LOAEL_log_mg_corrected_smiles.csv +0 -568
- data/test/data/LOAEL_log_mmol_corrected_smiles.csv +0 -568
- data/test/data/boiling_points.ext.sdf +0 -11460
- data/test/data/cpdb_100.csv +0 -101
- data/test/data/hamster_carcinogenicity.ntriples +0 -618
- data/test/data/hamster_carcinogenicity.sdf +0 -2805
- data/test/data/hamster_carcinogenicity.xls +0 -0
- data/test/data/hamster_carcinogenicity.yaml +0 -352
- data/test/dataset-long.rb +0 -114
- data/test/lazar-long.rb +0 -92
- data/test/lazar-physchem-short.rb +0 -31
- data/test/prediction_models.rb +0 -20
- data/test/regression.rb +0 -43
- data/test/validation.rb +0 -108
| @@ -0,0 +1,133 @@ | |
| 1 | 
            +
            require_relative "setup.rb"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            class NanoparticleValidationTest  < MiniTest::Test
         | 
| 4 | 
            +
              include OpenTox::Validation
         | 
| 5 | 
            +
             | 
| 6 | 
            +
              def setup
         | 
| 7 | 
            +
                @training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
         | 
| 8 | 
            +
                @prediction_feature = @training_dataset.features.select{|f| f["name"] == 'log2(Net cell association)'}.first
         | 
| 9 | 
            +
              end
         | 
| 10 | 
            +
             | 
| 11 | 
            +
              def test_validate_default_nanoparticle_model
         | 
| 12 | 
            +
                model = Model::Lazar.create training_dataset: @training_dataset, prediction_feature: @prediction_feature
         | 
| 13 | 
            +
                cv = CrossValidation.create model
         | 
| 14 | 
            +
                p cv.id
         | 
| 15 | 
            +
                #File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot format:"pdf"}
         | 
| 16 | 
            +
                refute_nil cv.r_squared
         | 
| 17 | 
            +
                refute_nil cv.rmse
         | 
| 18 | 
            +
              end
         | 
| 19 | 
            +
             | 
| 20 | 
            +
              def test_validate_pls_pchem_model
         | 
| 21 | 
            +
                algorithms = {
         | 
| 22 | 
            +
                  :descriptors => {
         | 
| 23 | 
            +
                    :method => "properties",
         | 
| 24 | 
            +
                    :categories => ["P-CHEM"]
         | 
| 25 | 
            +
                  },
         | 
| 26 | 
            +
                  :prediction => {:method => 'Algorithm::Caret.pls' },
         | 
| 27 | 
            +
                  :feature_selection => {
         | 
| 28 | 
            +
                    :method => "Algorithm::FeatureSelection.correlation_filter",
         | 
| 29 | 
            +
                  },
         | 
| 30 | 
            +
                }
         | 
| 31 | 
            +
                model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
         | 
| 32 | 
            +
                assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method]
         | 
| 33 | 
            +
                cv = CrossValidation.create model
         | 
| 34 | 
            +
                p cv.id
         | 
| 35 | 
            +
                #File.open("tmp2.pdf","w+"){|f| f.puts cv.correlation_plot format:"pdf"}
         | 
| 36 | 
            +
                refute_nil cv.r_squared
         | 
| 37 | 
            +
                refute_nil cv.rmse
         | 
| 38 | 
            +
              end
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            =begin
         | 
| 41 | 
            +
              def test_validate_proteomics_pls_pchem_model
         | 
| 42 | 
            +
                algorithms = {
         | 
| 43 | 
            +
                  :descriptors => {
         | 
| 44 | 
            +
                    :method => "properties",
         | 
| 45 | 
            +
                    :categories => ["Proteomics"]
         | 
| 46 | 
            +
                  },
         | 
| 47 | 
            +
                  :prediction => {:method => 'Algorithm::Caret.pls' },
         | 
| 48 | 
            +
                  :feature_selection => {
         | 
| 49 | 
            +
                    :method => "Algorithm::FeatureSelection.correlation_filter",
         | 
| 50 | 
            +
                  },
         | 
| 51 | 
            +
                }
         | 
| 52 | 
            +
                model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
         | 
| 53 | 
            +
                assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method]
         | 
| 54 | 
            +
                cv = CrossValidation.create model
         | 
| 55 | 
            +
                refute_nil cv.r_squared
         | 
| 56 | 
            +
                refute_nil cv.rmse
         | 
| 57 | 
            +
              end
         | 
| 58 | 
            +
            =end
         | 
| 59 | 
            +
             | 
| 60 | 
            +
              def test_validate_proteomics_pchem_default_model
         | 
| 61 | 
            +
                algorithms = {
         | 
| 62 | 
            +
                  :descriptors => {
         | 
| 63 | 
            +
                    :method => "properties",
         | 
| 64 | 
            +
                    :categories => ["Proteomics","P-CHEM"]
         | 
| 65 | 
            +
                  },
         | 
| 66 | 
            +
                  :feature_selection => {
         | 
| 67 | 
            +
                    :method => "Algorithm::FeatureSelection.correlation_filter",
         | 
| 68 | 
            +
                  },
         | 
| 69 | 
            +
                }
         | 
| 70 | 
            +
                model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
         | 
| 71 | 
            +
                cv = CrossValidation.create model
         | 
| 72 | 
            +
                refute_nil cv.r_squared
         | 
| 73 | 
            +
                refute_nil cv.rmse
         | 
| 74 | 
            +
              end
         | 
| 75 | 
            +
             | 
| 76 | 
            +
              def test_nanoparticle_fingerprint_model_without_feature_selection
         | 
| 77 | 
            +
                algorithms = {
         | 
| 78 | 
            +
                  :descriptors => {
         | 
| 79 | 
            +
                    :method => "fingerprint",
         | 
| 80 | 
            +
                    :type => "MP2D",
         | 
| 81 | 
            +
                  },
         | 
| 82 | 
            +
                  :similarity => {
         | 
| 83 | 
            +
                    :method => "Algorithm::Similarity.tanimoto",
         | 
| 84 | 
            +
                    :min => 0.1
         | 
| 85 | 
            +
                  },
         | 
| 86 | 
            +
                  :feature_selection => nil
         | 
| 87 | 
            +
                }
         | 
| 88 | 
            +
                model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
         | 
| 89 | 
            +
                cv = CrossValidation.create model
         | 
| 90 | 
            +
                refute_nil cv.r_squared
         | 
| 91 | 
            +
                refute_nil cv.rmse
         | 
| 92 | 
            +
              end
         | 
| 93 | 
            +
             | 
| 94 | 
            +
              def test_nanoparticle_fingerprint_weighted_average_model_without_feature_selection
         | 
| 95 | 
            +
                algorithms = {
         | 
| 96 | 
            +
                  :descriptors => {
         | 
| 97 | 
            +
                    :method => "fingerprint",
         | 
| 98 | 
            +
                    :type => "MP2D",
         | 
| 99 | 
            +
                  },
         | 
| 100 | 
            +
                  :similarity => {
         | 
| 101 | 
            +
                    :method => "Algorithm::Similarity.tanimoto",
         | 
| 102 | 
            +
                    :min => 0.1
         | 
| 103 | 
            +
                  },
         | 
| 104 | 
            +
                  :prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" },
         | 
| 105 | 
            +
                  :feature_selection => nil
         | 
| 106 | 
            +
                }
         | 
| 107 | 
            +
                model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
         | 
| 108 | 
            +
                cv = CrossValidation.create model
         | 
| 109 | 
            +
                refute_nil cv.r_squared
         | 
| 110 | 
            +
                refute_nil cv.rmse
         | 
| 111 | 
            +
              end
         | 
| 112 | 
            +
             | 
| 113 | 
            +
              def test_nanoparticle_fingerprint_model_with_feature_selection
         | 
| 114 | 
            +
                algorithms = {
         | 
| 115 | 
            +
                  :descriptors => {
         | 
| 116 | 
            +
                    :method => "fingerprint",
         | 
| 117 | 
            +
                    :type => "MP2D",
         | 
| 118 | 
            +
                  },
         | 
| 119 | 
            +
                  :similarity => {
         | 
| 120 | 
            +
                    :method => "Algorithm::Similarity.tanimoto",
         | 
| 121 | 
            +
                    :min => 0.1
         | 
| 122 | 
            +
                  },
         | 
| 123 | 
            +
                  :feature_selection => {
         | 
| 124 | 
            +
                    :method => "Algorithm::FeatureSelection.correlation_filter",
         | 
| 125 | 
            +
                  },
         | 
| 126 | 
            +
                }
         | 
| 127 | 
            +
                model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
         | 
| 128 | 
            +
                cv = CrossValidation.create model
         | 
| 129 | 
            +
                refute_nil cv.r_squared
         | 
| 130 | 
            +
                refute_nil cv.rmse
         | 
| 131 | 
            +
              end
         | 
| 132 | 
            +
             | 
| 133 | 
            +
            end
         | 
| @@ -0,0 +1,92 @@ | |
| 1 | 
            +
            require_relative "setup.rb"
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            class ValidationRegressionTest < MiniTest::Test
         | 
| 4 | 
            +
              include OpenTox::Validation
         | 
| 5 | 
            +
             | 
| 6 | 
            +
              # defaults
         | 
| 7 | 
            +
              
         | 
| 8 | 
            +
              def test_default_regression_crossvalidation
         | 
| 9 | 
            +
                dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
         | 
| 10 | 
            +
                model = Model::Lazar.create training_dataset: dataset
         | 
| 11 | 
            +
                cv = RegressionCrossValidation.create model
         | 
| 12 | 
            +
                assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be smaller than 1.5, this may occur due to unfavorable training/test set splits"
         | 
| 13 | 
            +
                assert cv.mae < 1.1, "MAE #{cv.mae} should be smaller than 1.1, this may occur due to unfavorable training/test set splits"
         | 
| 14 | 
            +
                assert cv.percent_within_prediction_interval > 80, "Only #{cv.percent_within_prediction_interval.round(2)}% of measurement within prediction interval. This may occur due to unfavorable training/test set splits"
         | 
| 15 | 
            +
              end
         | 
| 16 | 
            +
             | 
| 17 | 
            +
              # parameters
         | 
| 18 | 
            +
              
         | 
| 19 | 
            +
              def test_regression_crossvalidation_params
         | 
| 20 | 
            +
                dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
         | 
| 21 | 
            +
                algorithms = {
         | 
| 22 | 
            +
                  :prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" },
         | 
| 23 | 
            +
                  :descriptors => { :type => "MACCS", },
         | 
| 24 | 
            +
                  :similarity => {:min => 0.7}
         | 
| 25 | 
            +
                }
         | 
| 26 | 
            +
                model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms
         | 
| 27 | 
            +
                assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type]
         | 
| 28 | 
            +
                cv = RegressionCrossValidation.create model
         | 
| 29 | 
            +
                cv.validation_ids.each do |vid|
         | 
| 30 | 
            +
                  model = Model::Lazar.find(Validation.find(vid).model_id)
         | 
| 31 | 
            +
                  assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type]
         | 
| 32 | 
            +
                  assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min]
         | 
| 33 | 
            +
                  refute_nil model.training_dataset_id
         | 
| 34 | 
            +
                  refute_equal dataset.id, model.training_dataset_id
         | 
| 35 | 
            +
                end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                refute_nil cv.rmse
         | 
| 38 | 
            +
                refute_nil cv.mae 
         | 
| 39 | 
            +
              end
         | 
| 40 | 
            +
             | 
| 41 | 
            +
              def test_physchem_regression_crossvalidation
         | 
| 42 | 
            +
                training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
         | 
| 43 | 
            +
                model = Model::Lazar.create training_dataset:training_dataset
         | 
| 44 | 
            +
                cv = RegressionCrossValidation.create model
         | 
| 45 | 
            +
                refute_nil cv.rmse
         | 
| 46 | 
            +
                refute_nil cv.mae 
         | 
| 47 | 
            +
              end
         | 
| 48 | 
            +
             | 
| 49 | 
            +
              # LOO
         | 
| 50 | 
            +
             | 
| 51 | 
            +
              def test_regression_loo_validation
         | 
| 52 | 
            +
                dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
         | 
| 53 | 
            +
                model = Model::Lazar.create training_dataset: dataset
         | 
| 54 | 
            +
                loo = RegressionLeaveOneOut.create model
         | 
| 55 | 
            +
                assert loo.r_squared > 0.34, "R^2 (#{loo.r_squared}) should be larger than 0.034"
         | 
| 56 | 
            +
              end
         | 
| 57 | 
            +
             | 
| 58 | 
            +
              def test_regression_loo_validation_with_feature_selection
         | 
| 59 | 
            +
                dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
         | 
| 60 | 
            +
                algorithms = {
         | 
| 61 | 
            +
                  :descriptors => {
         | 
| 62 | 
            +
                    :method => "calculate_properties",
         | 
| 63 | 
            +
                    :features => PhysChem.openbabel_descriptors,
         | 
| 64 | 
            +
                  },
         | 
| 65 | 
            +
                  :similarity => {
         | 
| 66 | 
            +
                    :method => "Algorithm::Similarity.weighted_cosine",
         | 
| 67 | 
            +
                    :min => 0.5
         | 
| 68 | 
            +
                  },
         | 
| 69 | 
            +
                  :feature_selection => {
         | 
| 70 | 
            +
                    :method => "Algorithm::FeatureSelection.correlation_filter",
         | 
| 71 | 
            +
                  },
         | 
| 72 | 
            +
                }
         | 
| 73 | 
            +
                model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms
         | 
| 74 | 
            +
                assert_raises OpenTox::BadRequestError do
         | 
| 75 | 
            +
                  loo = RegressionLeaveOneOut.create model
         | 
| 76 | 
            +
                end
         | 
| 77 | 
            +
              end
         | 
| 78 | 
            +
             | 
| 79 | 
            +
              # repeated CV
         | 
| 80 | 
            +
             | 
| 81 | 
            +
              def test_repeated_crossvalidation
         | 
| 82 | 
            +
                dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
         | 
| 83 | 
            +
                model = Model::Lazar.create training_dataset: dataset
         | 
| 84 | 
            +
                repeated_cv = RepeatedCrossValidation.create model
         | 
| 85 | 
            +
                repeated_cv.crossvalidations.each do |cv|
         | 
| 86 | 
            +
                  #assert cv.r_squared > 0.34, "R^2 (#{cv.r_squared}) should be larger than 0.034"
         | 
| 87 | 
            +
                  #assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split"
         | 
| 88 | 
            +
                end
         | 
| 89 | 
            +
                File.open("tmp.png","w+"){|f| f.puts repeated_cv.correlation_plot}
         | 
| 90 | 
            +
              end
         | 
| 91 | 
            +
             | 
| 92 | 
            +
            end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: lazar
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 1.0.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler,
         | 
| @@ -9,98 +9,92 @@ authors: | |
| 9 9 | 
             
            autorequire: 
         | 
| 10 10 | 
             
            bindir: bin
         | 
| 11 11 | 
             
            cert_chain: []
         | 
| 12 | 
            -
            date: 2016- | 
| 12 | 
            +
            date: 2016-12-21 00:00:00.000000000 Z
         | 
| 13 13 | 
             
            dependencies:
         | 
| 14 14 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 15 15 | 
             
              name: bundler
         | 
| 16 16 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 17 17 | 
             
                requirements:
         | 
| 18 | 
            -
                - - " | 
| 18 | 
            +
                - - ">="
         | 
| 19 19 | 
             
                  - !ruby/object:Gem::Version
         | 
| 20 | 
            -
                    version: ' | 
| 20 | 
            +
                    version: '0'
         | 
| 21 21 | 
             
              type: :runtime
         | 
| 22 22 | 
             
              prerelease: false
         | 
| 23 23 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 24 24 | 
             
                requirements:
         | 
| 25 | 
            -
                - - " | 
| 25 | 
            +
                - - ">="
         | 
| 26 26 | 
             
                  - !ruby/object:Gem::Version
         | 
| 27 | 
            -
                    version: ' | 
| 27 | 
            +
                    version: '0'
         | 
| 28 28 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 29 29 | 
             
              name: rest-client
         | 
| 30 30 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 31 31 | 
             
                requirements:
         | 
| 32 | 
            -
                - - " | 
| 32 | 
            +
                - - ">="
         | 
| 33 33 | 
             
                  - !ruby/object:Gem::Version
         | 
| 34 | 
            -
                    version: ' | 
| 34 | 
            +
                    version: '0'
         | 
| 35 35 | 
             
              type: :runtime
         | 
| 36 36 | 
             
              prerelease: false
         | 
| 37 37 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 38 38 | 
             
                requirements:
         | 
| 39 | 
            -
                - - " | 
| 39 | 
            +
                - - ">="
         | 
| 40 40 | 
             
                  - !ruby/object:Gem::Version
         | 
| 41 | 
            -
                    version: ' | 
| 41 | 
            +
                    version: '0'
         | 
| 42 42 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 43 43 | 
             
              name: nokogiri
         | 
| 44 44 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 45 45 | 
             
                requirements:
         | 
| 46 | 
            -
                - - " | 
| 46 | 
            +
                - - ">="
         | 
| 47 47 | 
             
                  - !ruby/object:Gem::Version
         | 
| 48 | 
            -
                    version: ' | 
| 48 | 
            +
                    version: '0'
         | 
| 49 49 | 
             
              type: :runtime
         | 
| 50 50 | 
             
              prerelease: false
         | 
| 51 51 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 52 52 | 
             
                requirements:
         | 
| 53 | 
            -
                - - " | 
| 53 | 
            +
                - - ">="
         | 
| 54 54 | 
             
                  - !ruby/object:Gem::Version
         | 
| 55 | 
            -
                    version: ' | 
| 55 | 
            +
                    version: '0'
         | 
| 56 56 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 57 57 | 
             
              name: rserve-client
         | 
| 58 58 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 59 59 | 
             
                requirements:
         | 
| 60 | 
            -
                - - " | 
| 60 | 
            +
                - - ">="
         | 
| 61 61 | 
             
                  - !ruby/object:Gem::Version
         | 
| 62 | 
            -
                    version: '0 | 
| 62 | 
            +
                    version: '0'
         | 
| 63 63 | 
             
              type: :runtime
         | 
| 64 64 | 
             
              prerelease: false
         | 
| 65 65 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 66 66 | 
             
                requirements:
         | 
| 67 | 
            -
                - - " | 
| 67 | 
            +
                - - ">="
         | 
| 68 68 | 
             
                  - !ruby/object:Gem::Version
         | 
| 69 | 
            -
                    version: '0 | 
| 69 | 
            +
                    version: '0'
         | 
| 70 70 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 71 71 | 
             
              name: mongoid
         | 
| 72 72 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 73 73 | 
             
                requirements:
         | 
| 74 | 
            -
                - - " | 
| 74 | 
            +
                - - ">="
         | 
| 75 75 | 
             
                  - !ruby/object:Gem::Version
         | 
| 76 | 
            -
                    version: ' | 
| 76 | 
            +
                    version: '0'
         | 
| 77 77 | 
             
              type: :runtime
         | 
| 78 78 | 
             
              prerelease: false
         | 
| 79 79 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 80 80 | 
             
                requirements:
         | 
| 81 | 
            -
                - - " | 
| 81 | 
            +
                - - ">="
         | 
| 82 82 | 
             
                  - !ruby/object:Gem::Version
         | 
| 83 | 
            -
                    version: ' | 
| 83 | 
            +
                    version: '0'
         | 
| 84 84 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 85 85 | 
             
              name: openbabel
         | 
| 86 86 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 87 87 | 
             
                requirements:
         | 
| 88 | 
            -
                - - "~>"
         | 
| 89 | 
            -
                  - !ruby/object:Gem::Version
         | 
| 90 | 
            -
                    version: '2.3'
         | 
| 91 88 | 
             
                - - ">="
         | 
| 92 89 | 
             
                  - !ruby/object:Gem::Version
         | 
| 93 | 
            -
                    version:  | 
| 90 | 
            +
                    version: '0'
         | 
| 94 91 | 
             
              type: :runtime
         | 
| 95 92 | 
             
              prerelease: false
         | 
| 96 93 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 97 94 | 
             
                requirements:
         | 
| 98 | 
            -
                - - "~>"
         | 
| 99 | 
            -
                  - !ruby/object:Gem::Version
         | 
| 100 | 
            -
                    version: '2.3'
         | 
| 101 95 | 
             
                - - ">="
         | 
| 102 96 | 
             
                  - !ruby/object:Gem::Version
         | 
| 103 | 
            -
                    version:  | 
| 97 | 
            +
                    version: '0'
         | 
| 104 98 | 
             
            description: Libraries for lazy structure-activity relationships and read-across.
         | 
| 105 99 | 
             
            email:
         | 
| 106 100 | 
             
            - helma@in-silico.ch
         | 
| @@ -129,11 +123,12 @@ files: | |
| 129 123 | 
             
            - java/JoelibDescriptors.class
         | 
| 130 124 | 
             
            - java/JoelibDescriptors.java
         | 
| 131 125 | 
             
            - java/Rakefile
         | 
| 132 | 
            -
            - java/cdk- | 
| 126 | 
            +
            - java/cdk-2.0-SNAPSHOT.jar
         | 
| 133 127 | 
             
            - java/joelib2.jar
         | 
| 134 128 | 
             
            - java/log4j.jar
         | 
| 135 129 | 
             
            - lazar.gemspec
         | 
| 136 130 | 
             
            - lib/algorithm.rb
         | 
| 131 | 
            +
            - lib/caret.rb
         | 
| 137 132 | 
             
            - lib/classification.rb
         | 
| 138 133 | 
             
            - lib/compound.rb
         | 
| 139 134 | 
             
            - lib/crossvalidation.rb
         | 
| @@ -141,77 +136,72 @@ files: | |
| 141 136 | 
             
            - lib/error.rb
         | 
| 142 137 | 
             
            - lib/experiment.rb
         | 
| 143 138 | 
             
            - lib/feature.rb
         | 
| 139 | 
            +
            - lib/feature_selection.rb
         | 
| 140 | 
            +
            - lib/import.rb
         | 
| 144 141 | 
             
            - lib/lazar.rb
         | 
| 145 142 | 
             
            - lib/leave-one-out-validation.rb
         | 
| 146 143 | 
             
            - lib/model.rb
         | 
| 144 | 
            +
            - lib/nanoparticle.rb
         | 
| 147 145 | 
             
            - lib/opentox.rb
         | 
| 148 146 | 
             
            - lib/overwrite.rb
         | 
| 149 147 | 
             
            - lib/physchem.rb
         | 
| 150 148 | 
             
            - lib/regression.rb
         | 
| 151 149 | 
             
            - lib/rest-client-wrapper.rb
         | 
| 150 | 
            +
            - lib/similarity.rb
         | 
| 151 | 
            +
            - lib/substance.rb
         | 
| 152 | 
            +
            - lib/train-test-validation.rb
         | 
| 152 153 | 
             
            - lib/unique_descriptors.rb
         | 
| 154 | 
            +
            - lib/validation-statistics.rb
         | 
| 153 155 | 
             
            - lib/validation.rb
         | 
| 156 | 
            +
            - scripts/mg2mmol.rb
         | 
| 157 | 
            +
            - scripts/mirror-enm2test.rb
         | 
| 158 | 
            +
            - scripts/mmol2-log10.rb
         | 
| 154 159 | 
             
            - test/all.rb
         | 
| 155 | 
            -
            - test/classification.rb
         | 
| 156 160 | 
             
            - test/compound.rb
         | 
| 157 | 
            -
            - test/data/CPDBAS_v5c_1547_29Apr2008part.sdf
         | 
| 158 | 
            -
            - test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv
         | 
| 159 | 
            -
            - test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv
         | 
| 160 | 
            -
            - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv
         | 
| 161 | 
            -
            - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv
         | 
| 162 | 
            -
            - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv
         | 
| 163 | 
            -
            - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv
         | 
| 164 | 
            -
            - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv
         | 
| 165 | 
            -
            - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv
         | 
| 166 | 
            -
            - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv
         | 
| 167 | 
            -
            - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv
         | 
| 168 161 | 
             
            - test/data/EPAFHM.csv
         | 
| 169 162 | 
             
            - test/data/EPAFHM.medi.csv
         | 
| 163 | 
            +
            - test/data/EPAFHM.medi_log10.csv
         | 
| 170 164 | 
             
            - test/data/EPAFHM.mini.csv
         | 
| 171 | 
            -
            - test/data/ | 
| 172 | 
            -
            - test/data/ | 
| 165 | 
            +
            - test/data/EPAFHM.mini_log10.csv
         | 
| 166 | 
            +
            - test/data/EPAFHM_log10.csv
         | 
| 173 167 | 
             
            - test/data/ISSCAN-multi.csv
         | 
| 174 | 
            -
            - test/data/LOAEL_log_mg_corrected_smiles.csv
         | 
| 175 | 
            -
            - test/data/LOAEL_log_mmol_corrected_smiles.csv
         | 
| 176 168 | 
             
            - test/data/LOAEL_mmol_corrected_smiles.csv
         | 
| 177 169 | 
             
            - test/data/acetaldehyde.sdf
         | 
| 178 170 | 
             
            - test/data/batch_prediction.csv
         | 
| 179 171 | 
             
            - test/data/batch_prediction_inchi_small.csv
         | 
| 180 172 | 
             
            - test/data/batch_prediction_smiles_small.csv
         | 
| 181 | 
            -
            - test/data/boiling_points.ext.sdf
         | 
| 182 | 
            -
            - test/data/cpdb_100.csv
         | 
| 183 173 | 
             
            - test/data/hamster_carcinogenicity.csv
         | 
| 184 174 | 
             
            - test/data/hamster_carcinogenicity.json
         | 
| 185 175 | 
             
            - test/data/hamster_carcinogenicity.mini.bool_float.csv
         | 
| 186 176 | 
             
            - test/data/hamster_carcinogenicity.mini.bool_int.csv
         | 
| 187 177 | 
             
            - test/data/hamster_carcinogenicity.mini.bool_string.csv
         | 
| 188 178 | 
             
            - test/data/hamster_carcinogenicity.mini.csv
         | 
| 189 | 
            -
            - test/data/hamster_carcinogenicity.ntriples
         | 
| 190 | 
            -
            - test/data/hamster_carcinogenicity.sdf
         | 
| 191 | 
            -
            - test/data/hamster_carcinogenicity.xls
         | 
| 192 | 
            -
            - test/data/hamster_carcinogenicity.yaml
         | 
| 193 179 | 
             
            - test/data/hamster_carcinogenicity_with_errors.csv
         | 
| 194 180 | 
             
            - test/data/kazius.csv
         | 
| 195 181 | 
             
            - test/data/loael.csv
         | 
| 182 | 
            +
            - test/data/loael_log10.csv
         | 
| 196 183 | 
             
            - test/data/multi_cell_call.csv
         | 
| 197 184 | 
             
            - test/data/multi_cell_call_no_dup.csv
         | 
| 198 185 | 
             
            - test/data/multicolumn.csv
         | 
| 199 186 | 
             
            - test/data/rat_feature_dataset.csv
         | 
| 200 187 | 
             
            - test/data/wrong_dataset.csv
         | 
| 201 | 
            -
            - test/dataset-long.rb
         | 
| 202 188 | 
             
            - test/dataset.rb
         | 
| 203 189 | 
             
            - test/default_environment.rb
         | 
| 204 190 | 
             
            - test/descriptor.rb
         | 
| 205 191 | 
             
            - test/error.rb
         | 
| 206 192 | 
             
            - test/experiment.rb
         | 
| 207 193 | 
             
            - test/feature.rb
         | 
| 208 | 
            -
            - test/ | 
| 209 | 
            -
            - test/ | 
| 210 | 
            -
            - test/ | 
| 211 | 
            -
            - test/regression.rb
         | 
| 194 | 
            +
            - test/gridfs.rb
         | 
| 195 | 
            +
            - test/model-classification.rb
         | 
| 196 | 
            +
            - test/model-nanoparticle.rb
         | 
| 197 | 
            +
            - test/model-regression.rb
         | 
| 198 | 
            +
            - test/model-validation.rb
         | 
| 199 | 
            +
            - test/nanomaterial-model-validation.rb
         | 
| 212 200 | 
             
            - test/setup.rb
         | 
| 213 201 | 
             
            - test/test_environment.rb
         | 
| 214 | 
            -
            - test/validation.rb
         | 
| 202 | 
            +
            - test/validation-classification.rb
         | 
| 203 | 
            +
            - test/validation-nanoparticle.rb
         | 
| 204 | 
            +
            - test/validation-regression.rb
         | 
| 215 205 | 
             
            homepage: http://github.com/opentox/lazar
         | 
| 216 206 | 
             
            licenses:
         | 
| 217 207 | 
             
            - GPL-3.0
         | 
| @@ -236,65 +226,4 @@ rubygems_version: 2.5.1 | |
| 236 226 | 
             
            signing_key: 
         | 
| 237 227 | 
             
            specification_version: 4
         | 
| 238 228 | 
             
            summary: Lazar framework
         | 
| 239 | 
            -
            test_files:
         | 
| 240 | 
            -
            - test/all.rb
         | 
| 241 | 
            -
            - test/classification.rb
         | 
| 242 | 
            -
            - test/compound.rb
         | 
| 243 | 
            -
            - test/data/CPDBAS_v5c_1547_29Apr2008part.sdf
         | 
| 244 | 
            -
            - test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv
         | 
| 245 | 
            -
            - test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv
         | 
| 246 | 
            -
            - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv
         | 
| 247 | 
            -
            - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv
         | 
| 248 | 
            -
            - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv
         | 
| 249 | 
            -
            - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv
         | 
| 250 | 
            -
            - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv
         | 
| 251 | 
            -
            - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv
         | 
| 252 | 
            -
            - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv
         | 
| 253 | 
            -
            - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv
         | 
| 254 | 
            -
            - test/data/EPAFHM.csv
         | 
| 255 | 
            -
            - test/data/EPAFHM.medi.csv
         | 
| 256 | 
            -
            - test/data/EPAFHM.mini.csv
         | 
| 257 | 
            -
            - test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv
         | 
| 258 | 
            -
            - test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv
         | 
| 259 | 
            -
            - test/data/ISSCAN-multi.csv
         | 
| 260 | 
            -
            - test/data/LOAEL_log_mg_corrected_smiles.csv
         | 
| 261 | 
            -
            - test/data/LOAEL_log_mmol_corrected_smiles.csv
         | 
| 262 | 
            -
            - test/data/LOAEL_mmol_corrected_smiles.csv
         | 
| 263 | 
            -
            - test/data/acetaldehyde.sdf
         | 
| 264 | 
            -
            - test/data/batch_prediction.csv
         | 
| 265 | 
            -
            - test/data/batch_prediction_inchi_small.csv
         | 
| 266 | 
            -
            - test/data/batch_prediction_smiles_small.csv
         | 
| 267 | 
            -
            - test/data/boiling_points.ext.sdf
         | 
| 268 | 
            -
            - test/data/cpdb_100.csv
         | 
| 269 | 
            -
            - test/data/hamster_carcinogenicity.csv
         | 
| 270 | 
            -
            - test/data/hamster_carcinogenicity.json
         | 
| 271 | 
            -
            - test/data/hamster_carcinogenicity.mini.bool_float.csv
         | 
| 272 | 
            -
            - test/data/hamster_carcinogenicity.mini.bool_int.csv
         | 
| 273 | 
            -
            - test/data/hamster_carcinogenicity.mini.bool_string.csv
         | 
| 274 | 
            -
            - test/data/hamster_carcinogenicity.mini.csv
         | 
| 275 | 
            -
            - test/data/hamster_carcinogenicity.ntriples
         | 
| 276 | 
            -
            - test/data/hamster_carcinogenicity.sdf
         | 
| 277 | 
            -
            - test/data/hamster_carcinogenicity.xls
         | 
| 278 | 
            -
            - test/data/hamster_carcinogenicity.yaml
         | 
| 279 | 
            -
            - test/data/hamster_carcinogenicity_with_errors.csv
         | 
| 280 | 
            -
            - test/data/kazius.csv
         | 
| 281 | 
            -
            - test/data/loael.csv
         | 
| 282 | 
            -
            - test/data/multi_cell_call.csv
         | 
| 283 | 
            -
            - test/data/multi_cell_call_no_dup.csv
         | 
| 284 | 
            -
            - test/data/multicolumn.csv
         | 
| 285 | 
            -
            - test/data/rat_feature_dataset.csv
         | 
| 286 | 
            -
            - test/data/wrong_dataset.csv
         | 
| 287 | 
            -
            - test/dataset-long.rb
         | 
| 288 | 
            -
            - test/dataset.rb
         | 
| 289 | 
            -
            - test/default_environment.rb
         | 
| 290 | 
            -
            - test/descriptor.rb
         | 
| 291 | 
            -
            - test/error.rb
         | 
| 292 | 
            -
            - test/experiment.rb
         | 
| 293 | 
            -
            - test/feature.rb
         | 
| 294 | 
            -
            - test/lazar-long.rb
         | 
| 295 | 
            -
            - test/lazar-physchem-short.rb
         | 
| 296 | 
            -
            - test/prediction_models.rb
         | 
| 297 | 
            -
            - test/regression.rb
         | 
| 298 | 
            -
            - test/setup.rb
         | 
| 299 | 
            -
            - test/test_environment.rb
         | 
| 300 | 
            -
            - test/validation.rb
         | 
| 229 | 
            +
            test_files: []
         | 
    
        data/test/classification.rb
    DELETED
    
    | @@ -1,41 +0,0 @@ | |
| 1 | 
            -
            require_relative "setup.rb"
         | 
| 2 | 
            -
             | 
| 3 | 
            -
            class LazarClassificationTest < MiniTest::Test
         | 
| 4 | 
            -
             | 
| 5 | 
            -
              def test_lazar_classification
         | 
| 6 | 
            -
                training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
         | 
| 7 | 
            -
                model = Model::LazarClassification.create training_dataset
         | 
| 8 | 
            -
             | 
| 9 | 
            -
                [ {
         | 
| 10 | 
            -
                  :compound => OpenTox::Compound.from_inchi("InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"),
         | 
| 11 | 
            -
                  :prediction => "false",
         | 
| 12 | 
            -
                  :confidence => 0.25281385281385277,
         | 
| 13 | 
            -
                  :nr_neighbors => 11
         | 
| 14 | 
            -
                },{
         | 
| 15 | 
            -
                  :compound => OpenTox::Compound.from_smiles("c1ccccc1NN"),
         | 
| 16 | 
            -
                  :prediction => "false",
         | 
| 17 | 
            -
                  :confidence => 0.3639589577089577,
         | 
| 18 | 
            -
                  :nr_neighbors => 14
         | 
| 19 | 
            -
                } ].each do |example|
         | 
| 20 | 
            -
                  prediction = model.predict example[:compound]
         | 
| 21 | 
            -
                  assert_equal example[:prediction], prediction[:value]
         | 
| 22 | 
            -
                  #assert_equal example[:confidence], prediction[:confidence]
         | 
| 23 | 
            -
                  #assert_equal example[:nr_neighbors], prediction[:neighbors].size
         | 
| 24 | 
            -
                end
         | 
| 25 | 
            -
             | 
| 26 | 
            -
                compound = Compound.from_smiles "CCO"
         | 
| 27 | 
            -
                prediction = model.predict compound
         | 
| 28 | 
            -
                assert_equal ["false"], prediction[:database_activities]
         | 
| 29 | 
            -
                assert_equal "true", prediction[:value]
         | 
| 30 | 
            -
             | 
| 31 | 
            -
                # make a dataset prediction
         | 
| 32 | 
            -
                compound_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv")
         | 
| 33 | 
            -
                prediction = model.predict compound_dataset
         | 
| 34 | 
            -
                assert_equal compound_dataset.compounds, prediction.compounds
         | 
| 35 | 
            -
             | 
| 36 | 
            -
                assert_equal "Could not find similar compounds with experimental data in the training dataset.", prediction.data_entries[7][3]
         | 
| 37 | 
            -
                assert_equal "1 compounds have been removed from neighbors, because they have the same structure as the query compound.", prediction.data_entries[14][3]
         | 
| 38 | 
            -
                # cleanup
         | 
| 39 | 
            -
                [training_dataset,model,compound_dataset].each{|o| o.delete}
         | 
| 40 | 
            -
              end
         | 
| 41 | 
            -
            end
         |