lazar 0.9.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -4
- data/README.md +5 -15
- data/VERSION +1 -1
- data/ext/lazar/extconf.rb +1 -1
- data/ext/lazar/rinstall.R +9 -7
- data/java/CdkDescriptorInfo.class +0 -0
- data/java/CdkDescriptorInfo.java +3 -2
- data/java/CdkDescriptors.class +0 -0
- data/java/CdkDescriptors.java +28 -28
- data/java/Rakefile +3 -3
- data/java/{cdk-1.4.19.jar → cdk-2.0-SNAPSHOT.jar} +0 -0
- data/lazar.gemspec +6 -7
- data/lib/algorithm.rb +2 -11
- data/lib/caret.rb +96 -0
- data/lib/classification.rb +14 -22
- data/lib/compound.rb +21 -87
- data/lib/crossvalidation.rb +80 -279
- data/lib/dataset.rb +105 -174
- data/lib/feature.rb +11 -18
- data/lib/feature_selection.rb +42 -0
- data/lib/import.rb +122 -0
- data/lib/lazar.rb +14 -4
- data/lib/leave-one-out-validation.rb +46 -192
- data/lib/model.rb +319 -128
- data/lib/nanoparticle.rb +98 -0
- data/lib/opentox.rb +7 -4
- data/lib/overwrite.rb +24 -3
- data/lib/physchem.rb +11 -10
- data/lib/regression.rb +7 -137
- data/lib/rest-client-wrapper.rb +0 -6
- data/lib/similarity.rb +65 -0
- data/lib/substance.rb +8 -0
- data/lib/train-test-validation.rb +69 -0
- data/lib/validation-statistics.rb +223 -0
- data/lib/validation.rb +17 -100
- data/scripts/mg2mmol.rb +17 -0
- data/scripts/mirror-enm2test.rb +4 -0
- data/scripts/mmol2-log10.rb +32 -0
- data/test/compound.rb +4 -94
- data/test/data/EPAFHM.medi_log10.csv +92 -0
- data/test/data/EPAFHM.mini_log10.csv +16 -0
- data/test/data/EPAFHM_log10.csv +581 -0
- data/test/data/loael_log10.csv +568 -0
- data/test/dataset.rb +195 -133
- data/test/descriptor.rb +27 -18
- data/test/error.rb +2 -2
- data/test/experiment.rb +4 -4
- data/test/feature.rb +2 -3
- data/test/gridfs.rb +10 -0
- data/test/model-classification.rb +106 -0
- data/test/model-nanoparticle.rb +128 -0
- data/test/model-regression.rb +171 -0
- data/test/model-validation.rb +19 -0
- data/test/nanomaterial-model-validation.rb +55 -0
- data/test/setup.rb +8 -4
- data/test/validation-classification.rb +67 -0
- data/test/validation-nanoparticle.rb +133 -0
- data/test/validation-regression.rb +92 -0
- metadata +50 -121
- data/test/classification.rb +0 -41
- data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +0 -13553
- data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +0 -436
- data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +0 -568
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +0 -87
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +0 -978
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +0 -1120
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +0 -1113
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +0 -850
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +0 -829
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +0 -1198
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +0 -1505
- data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +0 -581
- data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +0 -1217
- data/test/data/LOAEL_log_mg_corrected_smiles.csv +0 -568
- data/test/data/LOAEL_log_mmol_corrected_smiles.csv +0 -568
- data/test/data/boiling_points.ext.sdf +0 -11460
- data/test/data/cpdb_100.csv +0 -101
- data/test/data/hamster_carcinogenicity.ntriples +0 -618
- data/test/data/hamster_carcinogenicity.sdf +0 -2805
- data/test/data/hamster_carcinogenicity.xls +0 -0
- data/test/data/hamster_carcinogenicity.yaml +0 -352
- data/test/dataset-long.rb +0 -114
- data/test/lazar-long.rb +0 -92
- data/test/lazar-physchem-short.rb +0 -31
- data/test/prediction_models.rb +0 -20
- data/test/regression.rb +0 -43
- data/test/validation.rb +0 -108
@@ -0,0 +1,133 @@
|
|
1
|
+
require_relative "setup.rb"
|
2
|
+
|
3
|
+
class NanoparticleValidationTest < MiniTest::Test
|
4
|
+
include OpenTox::Validation
|
5
|
+
|
6
|
+
def setup
|
7
|
+
@training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
|
8
|
+
@prediction_feature = @training_dataset.features.select{|f| f["name"] == 'log2(Net cell association)'}.first
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_validate_default_nanoparticle_model
|
12
|
+
model = Model::Lazar.create training_dataset: @training_dataset, prediction_feature: @prediction_feature
|
13
|
+
cv = CrossValidation.create model
|
14
|
+
p cv.id
|
15
|
+
#File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot format:"pdf"}
|
16
|
+
refute_nil cv.r_squared
|
17
|
+
refute_nil cv.rmse
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_validate_pls_pchem_model
|
21
|
+
algorithms = {
|
22
|
+
:descriptors => {
|
23
|
+
:method => "properties",
|
24
|
+
:categories => ["P-CHEM"]
|
25
|
+
},
|
26
|
+
:prediction => {:method => 'Algorithm::Caret.pls' },
|
27
|
+
:feature_selection => {
|
28
|
+
:method => "Algorithm::FeatureSelection.correlation_filter",
|
29
|
+
},
|
30
|
+
}
|
31
|
+
model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
|
32
|
+
assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method]
|
33
|
+
cv = CrossValidation.create model
|
34
|
+
p cv.id
|
35
|
+
#File.open("tmp2.pdf","w+"){|f| f.puts cv.correlation_plot format:"pdf"}
|
36
|
+
refute_nil cv.r_squared
|
37
|
+
refute_nil cv.rmse
|
38
|
+
end
|
39
|
+
|
40
|
+
=begin
|
41
|
+
def test_validate_proteomics_pls_pchem_model
|
42
|
+
algorithms = {
|
43
|
+
:descriptors => {
|
44
|
+
:method => "properties",
|
45
|
+
:categories => ["Proteomics"]
|
46
|
+
},
|
47
|
+
:prediction => {:method => 'Algorithm::Caret.pls' },
|
48
|
+
:feature_selection => {
|
49
|
+
:method => "Algorithm::FeatureSelection.correlation_filter",
|
50
|
+
},
|
51
|
+
}
|
52
|
+
model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
|
53
|
+
assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method]
|
54
|
+
cv = CrossValidation.create model
|
55
|
+
refute_nil cv.r_squared
|
56
|
+
refute_nil cv.rmse
|
57
|
+
end
|
58
|
+
=end
|
59
|
+
|
60
|
+
def test_validate_proteomics_pchem_default_model
|
61
|
+
algorithms = {
|
62
|
+
:descriptors => {
|
63
|
+
:method => "properties",
|
64
|
+
:categories => ["Proteomics","P-CHEM"]
|
65
|
+
},
|
66
|
+
:feature_selection => {
|
67
|
+
:method => "Algorithm::FeatureSelection.correlation_filter",
|
68
|
+
},
|
69
|
+
}
|
70
|
+
model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
|
71
|
+
cv = CrossValidation.create model
|
72
|
+
refute_nil cv.r_squared
|
73
|
+
refute_nil cv.rmse
|
74
|
+
end
|
75
|
+
|
76
|
+
def test_nanoparticle_fingerprint_model_without_feature_selection
|
77
|
+
algorithms = {
|
78
|
+
:descriptors => {
|
79
|
+
:method => "fingerprint",
|
80
|
+
:type => "MP2D",
|
81
|
+
},
|
82
|
+
:similarity => {
|
83
|
+
:method => "Algorithm::Similarity.tanimoto",
|
84
|
+
:min => 0.1
|
85
|
+
},
|
86
|
+
:feature_selection => nil
|
87
|
+
}
|
88
|
+
model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
|
89
|
+
cv = CrossValidation.create model
|
90
|
+
refute_nil cv.r_squared
|
91
|
+
refute_nil cv.rmse
|
92
|
+
end
|
93
|
+
|
94
|
+
def test_nanoparticle_fingerprint_weighted_average_model_without_feature_selection
|
95
|
+
algorithms = {
|
96
|
+
:descriptors => {
|
97
|
+
:method => "fingerprint",
|
98
|
+
:type => "MP2D",
|
99
|
+
},
|
100
|
+
:similarity => {
|
101
|
+
:method => "Algorithm::Similarity.tanimoto",
|
102
|
+
:min => 0.1
|
103
|
+
},
|
104
|
+
:prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" },
|
105
|
+
:feature_selection => nil
|
106
|
+
}
|
107
|
+
model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
|
108
|
+
cv = CrossValidation.create model
|
109
|
+
refute_nil cv.r_squared
|
110
|
+
refute_nil cv.rmse
|
111
|
+
end
|
112
|
+
|
113
|
+
def test_nanoparticle_fingerprint_model_with_feature_selection
|
114
|
+
algorithms = {
|
115
|
+
:descriptors => {
|
116
|
+
:method => "fingerprint",
|
117
|
+
:type => "MP2D",
|
118
|
+
},
|
119
|
+
:similarity => {
|
120
|
+
:method => "Algorithm::Similarity.tanimoto",
|
121
|
+
:min => 0.1
|
122
|
+
},
|
123
|
+
:feature_selection => {
|
124
|
+
:method => "Algorithm::FeatureSelection.correlation_filter",
|
125
|
+
},
|
126
|
+
}
|
127
|
+
model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
|
128
|
+
cv = CrossValidation.create model
|
129
|
+
refute_nil cv.r_squared
|
130
|
+
refute_nil cv.rmse
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require_relative "setup.rb"
|
2
|
+
|
3
|
+
class ValidationRegressionTest < MiniTest::Test
|
4
|
+
include OpenTox::Validation
|
5
|
+
|
6
|
+
# defaults
|
7
|
+
|
8
|
+
def test_default_regression_crossvalidation
|
9
|
+
dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
|
10
|
+
model = Model::Lazar.create training_dataset: dataset
|
11
|
+
cv = RegressionCrossValidation.create model
|
12
|
+
assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be smaller than 1.5, this may occur due to unfavorable training/test set splits"
|
13
|
+
assert cv.mae < 1.1, "MAE #{cv.mae} should be smaller than 1.1, this may occur due to unfavorable training/test set splits"
|
14
|
+
assert cv.percent_within_prediction_interval > 80, "Only #{cv.percent_within_prediction_interval.round(2)}% of measurement within prediction interval. This may occur due to unfavorable training/test set splits"
|
15
|
+
end
|
16
|
+
|
17
|
+
# parameters
|
18
|
+
|
19
|
+
def test_regression_crossvalidation_params
|
20
|
+
dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
|
21
|
+
algorithms = {
|
22
|
+
:prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" },
|
23
|
+
:descriptors => { :type => "MACCS", },
|
24
|
+
:similarity => {:min => 0.7}
|
25
|
+
}
|
26
|
+
model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms
|
27
|
+
assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type]
|
28
|
+
cv = RegressionCrossValidation.create model
|
29
|
+
cv.validation_ids.each do |vid|
|
30
|
+
model = Model::Lazar.find(Validation.find(vid).model_id)
|
31
|
+
assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type]
|
32
|
+
assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min]
|
33
|
+
refute_nil model.training_dataset_id
|
34
|
+
refute_equal dataset.id, model.training_dataset_id
|
35
|
+
end
|
36
|
+
|
37
|
+
refute_nil cv.rmse
|
38
|
+
refute_nil cv.mae
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_physchem_regression_crossvalidation
|
42
|
+
training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
|
43
|
+
model = Model::Lazar.create training_dataset:training_dataset
|
44
|
+
cv = RegressionCrossValidation.create model
|
45
|
+
refute_nil cv.rmse
|
46
|
+
refute_nil cv.mae
|
47
|
+
end
|
48
|
+
|
49
|
+
# LOO
|
50
|
+
|
51
|
+
def test_regression_loo_validation
|
52
|
+
dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
|
53
|
+
model = Model::Lazar.create training_dataset: dataset
|
54
|
+
loo = RegressionLeaveOneOut.create model
|
55
|
+
assert loo.r_squared > 0.34, "R^2 (#{loo.r_squared}) should be larger than 0.034"
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_regression_loo_validation_with_feature_selection
|
59
|
+
dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
|
60
|
+
algorithms = {
|
61
|
+
:descriptors => {
|
62
|
+
:method => "calculate_properties",
|
63
|
+
:features => PhysChem.openbabel_descriptors,
|
64
|
+
},
|
65
|
+
:similarity => {
|
66
|
+
:method => "Algorithm::Similarity.weighted_cosine",
|
67
|
+
:min => 0.5
|
68
|
+
},
|
69
|
+
:feature_selection => {
|
70
|
+
:method => "Algorithm::FeatureSelection.correlation_filter",
|
71
|
+
},
|
72
|
+
}
|
73
|
+
model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms
|
74
|
+
assert_raises OpenTox::BadRequestError do
|
75
|
+
loo = RegressionLeaveOneOut.create model
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# repeated CV
|
80
|
+
|
81
|
+
def test_repeated_crossvalidation
|
82
|
+
dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
|
83
|
+
model = Model::Lazar.create training_dataset: dataset
|
84
|
+
repeated_cv = RepeatedCrossValidation.create model
|
85
|
+
repeated_cv.crossvalidations.each do |cv|
|
86
|
+
#assert cv.r_squared > 0.34, "R^2 (#{cv.r_squared}) should be larger than 0.034"
|
87
|
+
#assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split"
|
88
|
+
end
|
89
|
+
File.open("tmp.png","w+"){|f| f.puts repeated_cv.correlation_plot}
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lazar
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler,
|
@@ -9,98 +9,92 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-12-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
-
- - "
|
18
|
+
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: '
|
20
|
+
version: '0'
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
|
-
- - "
|
25
|
+
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version: '
|
27
|
+
version: '0'
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: rest-client
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
|
-
- - "
|
32
|
+
- - ">="
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version: '
|
34
|
+
version: '0'
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
|
-
- - "
|
39
|
+
- - ">="
|
40
40
|
- !ruby/object:Gem::Version
|
41
|
-
version: '
|
41
|
+
version: '0'
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
43
|
name: nokogiri
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
45
45
|
requirements:
|
46
|
-
- - "
|
46
|
+
- - ">="
|
47
47
|
- !ruby/object:Gem::Version
|
48
|
-
version: '
|
48
|
+
version: '0'
|
49
49
|
type: :runtime
|
50
50
|
prerelease: false
|
51
51
|
version_requirements: !ruby/object:Gem::Requirement
|
52
52
|
requirements:
|
53
|
-
- - "
|
53
|
+
- - ">="
|
54
54
|
- !ruby/object:Gem::Version
|
55
|
-
version: '
|
55
|
+
version: '0'
|
56
56
|
- !ruby/object:Gem::Dependency
|
57
57
|
name: rserve-client
|
58
58
|
requirement: !ruby/object:Gem::Requirement
|
59
59
|
requirements:
|
60
|
-
- - "
|
60
|
+
- - ">="
|
61
61
|
- !ruby/object:Gem::Version
|
62
|
-
version: '0
|
62
|
+
version: '0'
|
63
63
|
type: :runtime
|
64
64
|
prerelease: false
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
|
-
- - "
|
67
|
+
- - ">="
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version: '0
|
69
|
+
version: '0'
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
71
|
name: mongoid
|
72
72
|
requirement: !ruby/object:Gem::Requirement
|
73
73
|
requirements:
|
74
|
-
- - "
|
74
|
+
- - ">="
|
75
75
|
- !ruby/object:Gem::Version
|
76
|
-
version: '
|
76
|
+
version: '0'
|
77
77
|
type: :runtime
|
78
78
|
prerelease: false
|
79
79
|
version_requirements: !ruby/object:Gem::Requirement
|
80
80
|
requirements:
|
81
|
-
- - "
|
81
|
+
- - ">="
|
82
82
|
- !ruby/object:Gem::Version
|
83
|
-
version: '
|
83
|
+
version: '0'
|
84
84
|
- !ruby/object:Gem::Dependency
|
85
85
|
name: openbabel
|
86
86
|
requirement: !ruby/object:Gem::Requirement
|
87
87
|
requirements:
|
88
|
-
- - "~>"
|
89
|
-
- !ruby/object:Gem::Version
|
90
|
-
version: '2.3'
|
91
88
|
- - ">="
|
92
89
|
- !ruby/object:Gem::Version
|
93
|
-
version:
|
90
|
+
version: '0'
|
94
91
|
type: :runtime
|
95
92
|
prerelease: false
|
96
93
|
version_requirements: !ruby/object:Gem::Requirement
|
97
94
|
requirements:
|
98
|
-
- - "~>"
|
99
|
-
- !ruby/object:Gem::Version
|
100
|
-
version: '2.3'
|
101
95
|
- - ">="
|
102
96
|
- !ruby/object:Gem::Version
|
103
|
-
version:
|
97
|
+
version: '0'
|
104
98
|
description: Libraries for lazy structure-activity relationships and read-across.
|
105
99
|
email:
|
106
100
|
- helma@in-silico.ch
|
@@ -129,11 +123,12 @@ files:
|
|
129
123
|
- java/JoelibDescriptors.class
|
130
124
|
- java/JoelibDescriptors.java
|
131
125
|
- java/Rakefile
|
132
|
-
- java/cdk-
|
126
|
+
- java/cdk-2.0-SNAPSHOT.jar
|
133
127
|
- java/joelib2.jar
|
134
128
|
- java/log4j.jar
|
135
129
|
- lazar.gemspec
|
136
130
|
- lib/algorithm.rb
|
131
|
+
- lib/caret.rb
|
137
132
|
- lib/classification.rb
|
138
133
|
- lib/compound.rb
|
139
134
|
- lib/crossvalidation.rb
|
@@ -141,77 +136,72 @@ files:
|
|
141
136
|
- lib/error.rb
|
142
137
|
- lib/experiment.rb
|
143
138
|
- lib/feature.rb
|
139
|
+
- lib/feature_selection.rb
|
140
|
+
- lib/import.rb
|
144
141
|
- lib/lazar.rb
|
145
142
|
- lib/leave-one-out-validation.rb
|
146
143
|
- lib/model.rb
|
144
|
+
- lib/nanoparticle.rb
|
147
145
|
- lib/opentox.rb
|
148
146
|
- lib/overwrite.rb
|
149
147
|
- lib/physchem.rb
|
150
148
|
- lib/regression.rb
|
151
149
|
- lib/rest-client-wrapper.rb
|
150
|
+
- lib/similarity.rb
|
151
|
+
- lib/substance.rb
|
152
|
+
- lib/train-test-validation.rb
|
152
153
|
- lib/unique_descriptors.rb
|
154
|
+
- lib/validation-statistics.rb
|
153
155
|
- lib/validation.rb
|
156
|
+
- scripts/mg2mmol.rb
|
157
|
+
- scripts/mirror-enm2test.rb
|
158
|
+
- scripts/mmol2-log10.rb
|
154
159
|
- test/all.rb
|
155
|
-
- test/classification.rb
|
156
160
|
- test/compound.rb
|
157
|
-
- test/data/CPDBAS_v5c_1547_29Apr2008part.sdf
|
158
|
-
- test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv
|
159
|
-
- test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv
|
160
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv
|
161
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv
|
162
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv
|
163
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv
|
164
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv
|
165
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv
|
166
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv
|
167
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv
|
168
161
|
- test/data/EPAFHM.csv
|
169
162
|
- test/data/EPAFHM.medi.csv
|
163
|
+
- test/data/EPAFHM.medi_log10.csv
|
170
164
|
- test/data/EPAFHM.mini.csv
|
171
|
-
- test/data/
|
172
|
-
- test/data/
|
165
|
+
- test/data/EPAFHM.mini_log10.csv
|
166
|
+
- test/data/EPAFHM_log10.csv
|
173
167
|
- test/data/ISSCAN-multi.csv
|
174
|
-
- test/data/LOAEL_log_mg_corrected_smiles.csv
|
175
|
-
- test/data/LOAEL_log_mmol_corrected_smiles.csv
|
176
168
|
- test/data/LOAEL_mmol_corrected_smiles.csv
|
177
169
|
- test/data/acetaldehyde.sdf
|
178
170
|
- test/data/batch_prediction.csv
|
179
171
|
- test/data/batch_prediction_inchi_small.csv
|
180
172
|
- test/data/batch_prediction_smiles_small.csv
|
181
|
-
- test/data/boiling_points.ext.sdf
|
182
|
-
- test/data/cpdb_100.csv
|
183
173
|
- test/data/hamster_carcinogenicity.csv
|
184
174
|
- test/data/hamster_carcinogenicity.json
|
185
175
|
- test/data/hamster_carcinogenicity.mini.bool_float.csv
|
186
176
|
- test/data/hamster_carcinogenicity.mini.bool_int.csv
|
187
177
|
- test/data/hamster_carcinogenicity.mini.bool_string.csv
|
188
178
|
- test/data/hamster_carcinogenicity.mini.csv
|
189
|
-
- test/data/hamster_carcinogenicity.ntriples
|
190
|
-
- test/data/hamster_carcinogenicity.sdf
|
191
|
-
- test/data/hamster_carcinogenicity.xls
|
192
|
-
- test/data/hamster_carcinogenicity.yaml
|
193
179
|
- test/data/hamster_carcinogenicity_with_errors.csv
|
194
180
|
- test/data/kazius.csv
|
195
181
|
- test/data/loael.csv
|
182
|
+
- test/data/loael_log10.csv
|
196
183
|
- test/data/multi_cell_call.csv
|
197
184
|
- test/data/multi_cell_call_no_dup.csv
|
198
185
|
- test/data/multicolumn.csv
|
199
186
|
- test/data/rat_feature_dataset.csv
|
200
187
|
- test/data/wrong_dataset.csv
|
201
|
-
- test/dataset-long.rb
|
202
188
|
- test/dataset.rb
|
203
189
|
- test/default_environment.rb
|
204
190
|
- test/descriptor.rb
|
205
191
|
- test/error.rb
|
206
192
|
- test/experiment.rb
|
207
193
|
- test/feature.rb
|
208
|
-
- test/
|
209
|
-
- test/
|
210
|
-
- test/
|
211
|
-
- test/regression.rb
|
194
|
+
- test/gridfs.rb
|
195
|
+
- test/model-classification.rb
|
196
|
+
- test/model-nanoparticle.rb
|
197
|
+
- test/model-regression.rb
|
198
|
+
- test/model-validation.rb
|
199
|
+
- test/nanomaterial-model-validation.rb
|
212
200
|
- test/setup.rb
|
213
201
|
- test/test_environment.rb
|
214
|
-
- test/validation.rb
|
202
|
+
- test/validation-classification.rb
|
203
|
+
- test/validation-nanoparticle.rb
|
204
|
+
- test/validation-regression.rb
|
215
205
|
homepage: http://github.com/opentox/lazar
|
216
206
|
licenses:
|
217
207
|
- GPL-3.0
|
@@ -236,65 +226,4 @@ rubygems_version: 2.5.1
|
|
236
226
|
signing_key:
|
237
227
|
specification_version: 4
|
238
228
|
summary: Lazar framework
|
239
|
-
test_files:
|
240
|
-
- test/all.rb
|
241
|
-
- test/classification.rb
|
242
|
-
- test/compound.rb
|
243
|
-
- test/data/CPDBAS_v5c_1547_29Apr2008part.sdf
|
244
|
-
- test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv
|
245
|
-
- test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv
|
246
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv
|
247
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv
|
248
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv
|
249
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv
|
250
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv
|
251
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv
|
252
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv
|
253
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv
|
254
|
-
- test/data/EPAFHM.csv
|
255
|
-
- test/data/EPAFHM.medi.csv
|
256
|
-
- test/data/EPAFHM.mini.csv
|
257
|
-
- test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv
|
258
|
-
- test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv
|
259
|
-
- test/data/ISSCAN-multi.csv
|
260
|
-
- test/data/LOAEL_log_mg_corrected_smiles.csv
|
261
|
-
- test/data/LOAEL_log_mmol_corrected_smiles.csv
|
262
|
-
- test/data/LOAEL_mmol_corrected_smiles.csv
|
263
|
-
- test/data/acetaldehyde.sdf
|
264
|
-
- test/data/batch_prediction.csv
|
265
|
-
- test/data/batch_prediction_inchi_small.csv
|
266
|
-
- test/data/batch_prediction_smiles_small.csv
|
267
|
-
- test/data/boiling_points.ext.sdf
|
268
|
-
- test/data/cpdb_100.csv
|
269
|
-
- test/data/hamster_carcinogenicity.csv
|
270
|
-
- test/data/hamster_carcinogenicity.json
|
271
|
-
- test/data/hamster_carcinogenicity.mini.bool_float.csv
|
272
|
-
- test/data/hamster_carcinogenicity.mini.bool_int.csv
|
273
|
-
- test/data/hamster_carcinogenicity.mini.bool_string.csv
|
274
|
-
- test/data/hamster_carcinogenicity.mini.csv
|
275
|
-
- test/data/hamster_carcinogenicity.ntriples
|
276
|
-
- test/data/hamster_carcinogenicity.sdf
|
277
|
-
- test/data/hamster_carcinogenicity.xls
|
278
|
-
- test/data/hamster_carcinogenicity.yaml
|
279
|
-
- test/data/hamster_carcinogenicity_with_errors.csv
|
280
|
-
- test/data/kazius.csv
|
281
|
-
- test/data/loael.csv
|
282
|
-
- test/data/multi_cell_call.csv
|
283
|
-
- test/data/multi_cell_call_no_dup.csv
|
284
|
-
- test/data/multicolumn.csv
|
285
|
-
- test/data/rat_feature_dataset.csv
|
286
|
-
- test/data/wrong_dataset.csv
|
287
|
-
- test/dataset-long.rb
|
288
|
-
- test/dataset.rb
|
289
|
-
- test/default_environment.rb
|
290
|
-
- test/descriptor.rb
|
291
|
-
- test/error.rb
|
292
|
-
- test/experiment.rb
|
293
|
-
- test/feature.rb
|
294
|
-
- test/lazar-long.rb
|
295
|
-
- test/lazar-physchem-short.rb
|
296
|
-
- test/prediction_models.rb
|
297
|
-
- test/regression.rb
|
298
|
-
- test/setup.rb
|
299
|
-
- test/test_environment.rb
|
300
|
-
- test/validation.rb
|
229
|
+
test_files: []
|
data/test/classification.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
require_relative "setup.rb"
|
2
|
-
|
3
|
-
class LazarClassificationTest < MiniTest::Test
|
4
|
-
|
5
|
-
def test_lazar_classification
|
6
|
-
training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
|
7
|
-
model = Model::LazarClassification.create training_dataset
|
8
|
-
|
9
|
-
[ {
|
10
|
-
:compound => OpenTox::Compound.from_inchi("InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"),
|
11
|
-
:prediction => "false",
|
12
|
-
:confidence => 0.25281385281385277,
|
13
|
-
:nr_neighbors => 11
|
14
|
-
},{
|
15
|
-
:compound => OpenTox::Compound.from_smiles("c1ccccc1NN"),
|
16
|
-
:prediction => "false",
|
17
|
-
:confidence => 0.3639589577089577,
|
18
|
-
:nr_neighbors => 14
|
19
|
-
} ].each do |example|
|
20
|
-
prediction = model.predict example[:compound]
|
21
|
-
assert_equal example[:prediction], prediction[:value]
|
22
|
-
#assert_equal example[:confidence], prediction[:confidence]
|
23
|
-
#assert_equal example[:nr_neighbors], prediction[:neighbors].size
|
24
|
-
end
|
25
|
-
|
26
|
-
compound = Compound.from_smiles "CCO"
|
27
|
-
prediction = model.predict compound
|
28
|
-
assert_equal ["false"], prediction[:database_activities]
|
29
|
-
assert_equal "true", prediction[:value]
|
30
|
-
|
31
|
-
# make a dataset prediction
|
32
|
-
compound_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv")
|
33
|
-
prediction = model.predict compound_dataset
|
34
|
-
assert_equal compound_dataset.compounds, prediction.compounds
|
35
|
-
|
36
|
-
assert_equal "Could not find similar compounds with experimental data in the training dataset.", prediction.data_entries[7][3]
|
37
|
-
assert_equal "1 compounds have been removed from neighbors, because they have the same structure as the query compound.", prediction.data_entries[14][3]
|
38
|
-
# cleanup
|
39
|
-
[training_dataset,model,compound_dataset].each{|o| o.delete}
|
40
|
-
end
|
41
|
-
end
|