lazar 0.9.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -4
- data/README.md +5 -15
- data/VERSION +1 -1
- data/ext/lazar/extconf.rb +1 -1
- data/ext/lazar/rinstall.R +9 -7
- data/java/CdkDescriptorInfo.class +0 -0
- data/java/CdkDescriptorInfo.java +3 -2
- data/java/CdkDescriptors.class +0 -0
- data/java/CdkDescriptors.java +28 -28
- data/java/Rakefile +3 -3
- data/java/{cdk-1.4.19.jar → cdk-2.0-SNAPSHOT.jar} +0 -0
- data/lazar.gemspec +6 -7
- data/lib/algorithm.rb +2 -11
- data/lib/caret.rb +96 -0
- data/lib/classification.rb +14 -22
- data/lib/compound.rb +21 -87
- data/lib/crossvalidation.rb +80 -279
- data/lib/dataset.rb +105 -174
- data/lib/feature.rb +11 -18
- data/lib/feature_selection.rb +42 -0
- data/lib/import.rb +122 -0
- data/lib/lazar.rb +14 -4
- data/lib/leave-one-out-validation.rb +46 -192
- data/lib/model.rb +319 -128
- data/lib/nanoparticle.rb +98 -0
- data/lib/opentox.rb +7 -4
- data/lib/overwrite.rb +24 -3
- data/lib/physchem.rb +11 -10
- data/lib/regression.rb +7 -137
- data/lib/rest-client-wrapper.rb +0 -6
- data/lib/similarity.rb +65 -0
- data/lib/substance.rb +8 -0
- data/lib/train-test-validation.rb +69 -0
- data/lib/validation-statistics.rb +223 -0
- data/lib/validation.rb +17 -100
- data/scripts/mg2mmol.rb +17 -0
- data/scripts/mirror-enm2test.rb +4 -0
- data/scripts/mmol2-log10.rb +32 -0
- data/test/compound.rb +4 -94
- data/test/data/EPAFHM.medi_log10.csv +92 -0
- data/test/data/EPAFHM.mini_log10.csv +16 -0
- data/test/data/EPAFHM_log10.csv +581 -0
- data/test/data/loael_log10.csv +568 -0
- data/test/dataset.rb +195 -133
- data/test/descriptor.rb +27 -18
- data/test/error.rb +2 -2
- data/test/experiment.rb +4 -4
- data/test/feature.rb +2 -3
- data/test/gridfs.rb +10 -0
- data/test/model-classification.rb +106 -0
- data/test/model-nanoparticle.rb +128 -0
- data/test/model-regression.rb +171 -0
- data/test/model-validation.rb +19 -0
- data/test/nanomaterial-model-validation.rb +55 -0
- data/test/setup.rb +8 -4
- data/test/validation-classification.rb +67 -0
- data/test/validation-nanoparticle.rb +133 -0
- data/test/validation-regression.rb +92 -0
- metadata +50 -121
- data/test/classification.rb +0 -41
- data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +0 -13553
- data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +0 -436
- data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +0 -568
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +0 -87
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +0 -978
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +0 -1120
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +0 -1113
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +0 -850
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +0 -829
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +0 -1198
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +0 -1505
- data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +0 -581
- data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +0 -1217
- data/test/data/LOAEL_log_mg_corrected_smiles.csv +0 -568
- data/test/data/LOAEL_log_mmol_corrected_smiles.csv +0 -568
- data/test/data/boiling_points.ext.sdf +0 -11460
- data/test/data/cpdb_100.csv +0 -101
- data/test/data/hamster_carcinogenicity.ntriples +0 -618
- data/test/data/hamster_carcinogenicity.sdf +0 -2805
- data/test/data/hamster_carcinogenicity.xls +0 -0
- data/test/data/hamster_carcinogenicity.yaml +0 -352
- data/test/dataset-long.rb +0 -114
- data/test/lazar-long.rb +0 -92
- data/test/lazar-physchem-short.rb +0 -31
- data/test/prediction_models.rb +0 -20
- data/test/regression.rb +0 -43
- data/test/validation.rb +0 -108
@@ -0,0 +1,133 @@
|
|
1
|
+
require_relative "setup.rb"
|
2
|
+
|
3
|
+
class NanoparticleValidationTest < MiniTest::Test
|
4
|
+
include OpenTox::Validation
|
5
|
+
|
6
|
+
def setup
|
7
|
+
@training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
|
8
|
+
@prediction_feature = @training_dataset.features.select{|f| f["name"] == 'log2(Net cell association)'}.first
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_validate_default_nanoparticle_model
|
12
|
+
model = Model::Lazar.create training_dataset: @training_dataset, prediction_feature: @prediction_feature
|
13
|
+
cv = CrossValidation.create model
|
14
|
+
p cv.id
|
15
|
+
#File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot format:"pdf"}
|
16
|
+
refute_nil cv.r_squared
|
17
|
+
refute_nil cv.rmse
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_validate_pls_pchem_model
|
21
|
+
algorithms = {
|
22
|
+
:descriptors => {
|
23
|
+
:method => "properties",
|
24
|
+
:categories => ["P-CHEM"]
|
25
|
+
},
|
26
|
+
:prediction => {:method => 'Algorithm::Caret.pls' },
|
27
|
+
:feature_selection => {
|
28
|
+
:method => "Algorithm::FeatureSelection.correlation_filter",
|
29
|
+
},
|
30
|
+
}
|
31
|
+
model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
|
32
|
+
assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method]
|
33
|
+
cv = CrossValidation.create model
|
34
|
+
p cv.id
|
35
|
+
#File.open("tmp2.pdf","w+"){|f| f.puts cv.correlation_plot format:"pdf"}
|
36
|
+
refute_nil cv.r_squared
|
37
|
+
refute_nil cv.rmse
|
38
|
+
end
|
39
|
+
|
40
|
+
=begin
|
41
|
+
def test_validate_proteomics_pls_pchem_model
|
42
|
+
algorithms = {
|
43
|
+
:descriptors => {
|
44
|
+
:method => "properties",
|
45
|
+
:categories => ["Proteomics"]
|
46
|
+
},
|
47
|
+
:prediction => {:method => 'Algorithm::Caret.pls' },
|
48
|
+
:feature_selection => {
|
49
|
+
:method => "Algorithm::FeatureSelection.correlation_filter",
|
50
|
+
},
|
51
|
+
}
|
52
|
+
model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
|
53
|
+
assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method]
|
54
|
+
cv = CrossValidation.create model
|
55
|
+
refute_nil cv.r_squared
|
56
|
+
refute_nil cv.rmse
|
57
|
+
end
|
58
|
+
=end
|
59
|
+
|
60
|
+
def test_validate_proteomics_pchem_default_model
|
61
|
+
algorithms = {
|
62
|
+
:descriptors => {
|
63
|
+
:method => "properties",
|
64
|
+
:categories => ["Proteomics","P-CHEM"]
|
65
|
+
},
|
66
|
+
:feature_selection => {
|
67
|
+
:method => "Algorithm::FeatureSelection.correlation_filter",
|
68
|
+
},
|
69
|
+
}
|
70
|
+
model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
|
71
|
+
cv = CrossValidation.create model
|
72
|
+
refute_nil cv.r_squared
|
73
|
+
refute_nil cv.rmse
|
74
|
+
end
|
75
|
+
|
76
|
+
def test_nanoparticle_fingerprint_model_without_feature_selection
|
77
|
+
algorithms = {
|
78
|
+
:descriptors => {
|
79
|
+
:method => "fingerprint",
|
80
|
+
:type => "MP2D",
|
81
|
+
},
|
82
|
+
:similarity => {
|
83
|
+
:method => "Algorithm::Similarity.tanimoto",
|
84
|
+
:min => 0.1
|
85
|
+
},
|
86
|
+
:feature_selection => nil
|
87
|
+
}
|
88
|
+
model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
|
89
|
+
cv = CrossValidation.create model
|
90
|
+
refute_nil cv.r_squared
|
91
|
+
refute_nil cv.rmse
|
92
|
+
end
|
93
|
+
|
94
|
+
def test_nanoparticle_fingerprint_weighted_average_model_without_feature_selection
|
95
|
+
algorithms = {
|
96
|
+
:descriptors => {
|
97
|
+
:method => "fingerprint",
|
98
|
+
:type => "MP2D",
|
99
|
+
},
|
100
|
+
:similarity => {
|
101
|
+
:method => "Algorithm::Similarity.tanimoto",
|
102
|
+
:min => 0.1
|
103
|
+
},
|
104
|
+
:prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" },
|
105
|
+
:feature_selection => nil
|
106
|
+
}
|
107
|
+
model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
|
108
|
+
cv = CrossValidation.create model
|
109
|
+
refute_nil cv.r_squared
|
110
|
+
refute_nil cv.rmse
|
111
|
+
end
|
112
|
+
|
113
|
+
def test_nanoparticle_fingerprint_model_with_feature_selection
|
114
|
+
algorithms = {
|
115
|
+
:descriptors => {
|
116
|
+
:method => "fingerprint",
|
117
|
+
:type => "MP2D",
|
118
|
+
},
|
119
|
+
:similarity => {
|
120
|
+
:method => "Algorithm::Similarity.tanimoto",
|
121
|
+
:min => 0.1
|
122
|
+
},
|
123
|
+
:feature_selection => {
|
124
|
+
:method => "Algorithm::FeatureSelection.correlation_filter",
|
125
|
+
},
|
126
|
+
}
|
127
|
+
model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
|
128
|
+
cv = CrossValidation.create model
|
129
|
+
refute_nil cv.r_squared
|
130
|
+
refute_nil cv.rmse
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require_relative "setup.rb"
|
2
|
+
|
3
|
+
class ValidationRegressionTest < MiniTest::Test
|
4
|
+
include OpenTox::Validation
|
5
|
+
|
6
|
+
# defaults
|
7
|
+
|
8
|
+
def test_default_regression_crossvalidation
|
9
|
+
dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
|
10
|
+
model = Model::Lazar.create training_dataset: dataset
|
11
|
+
cv = RegressionCrossValidation.create model
|
12
|
+
assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be smaller than 1.5, this may occur due to unfavorable training/test set splits"
|
13
|
+
assert cv.mae < 1.1, "MAE #{cv.mae} should be smaller than 1.1, this may occur due to unfavorable training/test set splits"
|
14
|
+
assert cv.percent_within_prediction_interval > 80, "Only #{cv.percent_within_prediction_interval.round(2)}% of measurement within prediction interval. This may occur due to unfavorable training/test set splits"
|
15
|
+
end
|
16
|
+
|
17
|
+
# parameters
|
18
|
+
|
19
|
+
def test_regression_crossvalidation_params
|
20
|
+
dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
|
21
|
+
algorithms = {
|
22
|
+
:prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" },
|
23
|
+
:descriptors => { :type => "MACCS", },
|
24
|
+
:similarity => {:min => 0.7}
|
25
|
+
}
|
26
|
+
model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms
|
27
|
+
assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type]
|
28
|
+
cv = RegressionCrossValidation.create model
|
29
|
+
cv.validation_ids.each do |vid|
|
30
|
+
model = Model::Lazar.find(Validation.find(vid).model_id)
|
31
|
+
assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type]
|
32
|
+
assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min]
|
33
|
+
refute_nil model.training_dataset_id
|
34
|
+
refute_equal dataset.id, model.training_dataset_id
|
35
|
+
end
|
36
|
+
|
37
|
+
refute_nil cv.rmse
|
38
|
+
refute_nil cv.mae
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_physchem_regression_crossvalidation
|
42
|
+
training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
|
43
|
+
model = Model::Lazar.create training_dataset:training_dataset
|
44
|
+
cv = RegressionCrossValidation.create model
|
45
|
+
refute_nil cv.rmse
|
46
|
+
refute_nil cv.mae
|
47
|
+
end
|
48
|
+
|
49
|
+
# LOO
|
50
|
+
|
51
|
+
def test_regression_loo_validation
|
52
|
+
dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
|
53
|
+
model = Model::Lazar.create training_dataset: dataset
|
54
|
+
loo = RegressionLeaveOneOut.create model
|
55
|
+
assert loo.r_squared > 0.34, "R^2 (#{loo.r_squared}) should be larger than 0.034"
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_regression_loo_validation_with_feature_selection
|
59
|
+
dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
|
60
|
+
algorithms = {
|
61
|
+
:descriptors => {
|
62
|
+
:method => "calculate_properties",
|
63
|
+
:features => PhysChem.openbabel_descriptors,
|
64
|
+
},
|
65
|
+
:similarity => {
|
66
|
+
:method => "Algorithm::Similarity.weighted_cosine",
|
67
|
+
:min => 0.5
|
68
|
+
},
|
69
|
+
:feature_selection => {
|
70
|
+
:method => "Algorithm::FeatureSelection.correlation_filter",
|
71
|
+
},
|
72
|
+
}
|
73
|
+
model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms
|
74
|
+
assert_raises OpenTox::BadRequestError do
|
75
|
+
loo = RegressionLeaveOneOut.create model
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
# repeated CV
|
80
|
+
|
81
|
+
def test_repeated_crossvalidation
|
82
|
+
dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
|
83
|
+
model = Model::Lazar.create training_dataset: dataset
|
84
|
+
repeated_cv = RepeatedCrossValidation.create model
|
85
|
+
repeated_cv.crossvalidations.each do |cv|
|
86
|
+
#assert cv.r_squared > 0.34, "R^2 (#{cv.r_squared}) should be larger than 0.034"
|
87
|
+
#assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split"
|
88
|
+
end
|
89
|
+
File.open("tmp.png","w+"){|f| f.puts repeated_cv.correlation_plot}
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lazar
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler,
|
@@ -9,98 +9,92 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2016-
|
12
|
+
date: 2016-12-21 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
-
- - "
|
18
|
+
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version: '
|
20
|
+
version: '0'
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
|
-
- - "
|
25
|
+
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version: '
|
27
|
+
version: '0'
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: rest-client
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
|
-
- - "
|
32
|
+
- - ">="
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version: '
|
34
|
+
version: '0'
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
|
-
- - "
|
39
|
+
- - ">="
|
40
40
|
- !ruby/object:Gem::Version
|
41
|
-
version: '
|
41
|
+
version: '0'
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
43
|
name: nokogiri
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
45
45
|
requirements:
|
46
|
-
- - "
|
46
|
+
- - ">="
|
47
47
|
- !ruby/object:Gem::Version
|
48
|
-
version: '
|
48
|
+
version: '0'
|
49
49
|
type: :runtime
|
50
50
|
prerelease: false
|
51
51
|
version_requirements: !ruby/object:Gem::Requirement
|
52
52
|
requirements:
|
53
|
-
- - "
|
53
|
+
- - ">="
|
54
54
|
- !ruby/object:Gem::Version
|
55
|
-
version: '
|
55
|
+
version: '0'
|
56
56
|
- !ruby/object:Gem::Dependency
|
57
57
|
name: rserve-client
|
58
58
|
requirement: !ruby/object:Gem::Requirement
|
59
59
|
requirements:
|
60
|
-
- - "
|
60
|
+
- - ">="
|
61
61
|
- !ruby/object:Gem::Version
|
62
|
-
version: '0
|
62
|
+
version: '0'
|
63
63
|
type: :runtime
|
64
64
|
prerelease: false
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
|
-
- - "
|
67
|
+
- - ">="
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version: '0
|
69
|
+
version: '0'
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
71
|
name: mongoid
|
72
72
|
requirement: !ruby/object:Gem::Requirement
|
73
73
|
requirements:
|
74
|
-
- - "
|
74
|
+
- - ">="
|
75
75
|
- !ruby/object:Gem::Version
|
76
|
-
version: '
|
76
|
+
version: '0'
|
77
77
|
type: :runtime
|
78
78
|
prerelease: false
|
79
79
|
version_requirements: !ruby/object:Gem::Requirement
|
80
80
|
requirements:
|
81
|
-
- - "
|
81
|
+
- - ">="
|
82
82
|
- !ruby/object:Gem::Version
|
83
|
-
version: '
|
83
|
+
version: '0'
|
84
84
|
- !ruby/object:Gem::Dependency
|
85
85
|
name: openbabel
|
86
86
|
requirement: !ruby/object:Gem::Requirement
|
87
87
|
requirements:
|
88
|
-
- - "~>"
|
89
|
-
- !ruby/object:Gem::Version
|
90
|
-
version: '2.3'
|
91
88
|
- - ">="
|
92
89
|
- !ruby/object:Gem::Version
|
93
|
-
version:
|
90
|
+
version: '0'
|
94
91
|
type: :runtime
|
95
92
|
prerelease: false
|
96
93
|
version_requirements: !ruby/object:Gem::Requirement
|
97
94
|
requirements:
|
98
|
-
- - "~>"
|
99
|
-
- !ruby/object:Gem::Version
|
100
|
-
version: '2.3'
|
101
95
|
- - ">="
|
102
96
|
- !ruby/object:Gem::Version
|
103
|
-
version:
|
97
|
+
version: '0'
|
104
98
|
description: Libraries for lazy structure-activity relationships and read-across.
|
105
99
|
email:
|
106
100
|
- helma@in-silico.ch
|
@@ -129,11 +123,12 @@ files:
|
|
129
123
|
- java/JoelibDescriptors.class
|
130
124
|
- java/JoelibDescriptors.java
|
131
125
|
- java/Rakefile
|
132
|
-
- java/cdk-
|
126
|
+
- java/cdk-2.0-SNAPSHOT.jar
|
133
127
|
- java/joelib2.jar
|
134
128
|
- java/log4j.jar
|
135
129
|
- lazar.gemspec
|
136
130
|
- lib/algorithm.rb
|
131
|
+
- lib/caret.rb
|
137
132
|
- lib/classification.rb
|
138
133
|
- lib/compound.rb
|
139
134
|
- lib/crossvalidation.rb
|
@@ -141,77 +136,72 @@ files:
|
|
141
136
|
- lib/error.rb
|
142
137
|
- lib/experiment.rb
|
143
138
|
- lib/feature.rb
|
139
|
+
- lib/feature_selection.rb
|
140
|
+
- lib/import.rb
|
144
141
|
- lib/lazar.rb
|
145
142
|
- lib/leave-one-out-validation.rb
|
146
143
|
- lib/model.rb
|
144
|
+
- lib/nanoparticle.rb
|
147
145
|
- lib/opentox.rb
|
148
146
|
- lib/overwrite.rb
|
149
147
|
- lib/physchem.rb
|
150
148
|
- lib/regression.rb
|
151
149
|
- lib/rest-client-wrapper.rb
|
150
|
+
- lib/similarity.rb
|
151
|
+
- lib/substance.rb
|
152
|
+
- lib/train-test-validation.rb
|
152
153
|
- lib/unique_descriptors.rb
|
154
|
+
- lib/validation-statistics.rb
|
153
155
|
- lib/validation.rb
|
156
|
+
- scripts/mg2mmol.rb
|
157
|
+
- scripts/mirror-enm2test.rb
|
158
|
+
- scripts/mmol2-log10.rb
|
154
159
|
- test/all.rb
|
155
|
-
- test/classification.rb
|
156
160
|
- test/compound.rb
|
157
|
-
- test/data/CPDBAS_v5c_1547_29Apr2008part.sdf
|
158
|
-
- test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv
|
159
|
-
- test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv
|
160
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv
|
161
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv
|
162
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv
|
163
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv
|
164
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv
|
165
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv
|
166
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv
|
167
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv
|
168
161
|
- test/data/EPAFHM.csv
|
169
162
|
- test/data/EPAFHM.medi.csv
|
163
|
+
- test/data/EPAFHM.medi_log10.csv
|
170
164
|
- test/data/EPAFHM.mini.csv
|
171
|
-
- test/data/
|
172
|
-
- test/data/
|
165
|
+
- test/data/EPAFHM.mini_log10.csv
|
166
|
+
- test/data/EPAFHM_log10.csv
|
173
167
|
- test/data/ISSCAN-multi.csv
|
174
|
-
- test/data/LOAEL_log_mg_corrected_smiles.csv
|
175
|
-
- test/data/LOAEL_log_mmol_corrected_smiles.csv
|
176
168
|
- test/data/LOAEL_mmol_corrected_smiles.csv
|
177
169
|
- test/data/acetaldehyde.sdf
|
178
170
|
- test/data/batch_prediction.csv
|
179
171
|
- test/data/batch_prediction_inchi_small.csv
|
180
172
|
- test/data/batch_prediction_smiles_small.csv
|
181
|
-
- test/data/boiling_points.ext.sdf
|
182
|
-
- test/data/cpdb_100.csv
|
183
173
|
- test/data/hamster_carcinogenicity.csv
|
184
174
|
- test/data/hamster_carcinogenicity.json
|
185
175
|
- test/data/hamster_carcinogenicity.mini.bool_float.csv
|
186
176
|
- test/data/hamster_carcinogenicity.mini.bool_int.csv
|
187
177
|
- test/data/hamster_carcinogenicity.mini.bool_string.csv
|
188
178
|
- test/data/hamster_carcinogenicity.mini.csv
|
189
|
-
- test/data/hamster_carcinogenicity.ntriples
|
190
|
-
- test/data/hamster_carcinogenicity.sdf
|
191
|
-
- test/data/hamster_carcinogenicity.xls
|
192
|
-
- test/data/hamster_carcinogenicity.yaml
|
193
179
|
- test/data/hamster_carcinogenicity_with_errors.csv
|
194
180
|
- test/data/kazius.csv
|
195
181
|
- test/data/loael.csv
|
182
|
+
- test/data/loael_log10.csv
|
196
183
|
- test/data/multi_cell_call.csv
|
197
184
|
- test/data/multi_cell_call_no_dup.csv
|
198
185
|
- test/data/multicolumn.csv
|
199
186
|
- test/data/rat_feature_dataset.csv
|
200
187
|
- test/data/wrong_dataset.csv
|
201
|
-
- test/dataset-long.rb
|
202
188
|
- test/dataset.rb
|
203
189
|
- test/default_environment.rb
|
204
190
|
- test/descriptor.rb
|
205
191
|
- test/error.rb
|
206
192
|
- test/experiment.rb
|
207
193
|
- test/feature.rb
|
208
|
-
- test/
|
209
|
-
- test/
|
210
|
-
- test/
|
211
|
-
- test/regression.rb
|
194
|
+
- test/gridfs.rb
|
195
|
+
- test/model-classification.rb
|
196
|
+
- test/model-nanoparticle.rb
|
197
|
+
- test/model-regression.rb
|
198
|
+
- test/model-validation.rb
|
199
|
+
- test/nanomaterial-model-validation.rb
|
212
200
|
- test/setup.rb
|
213
201
|
- test/test_environment.rb
|
214
|
-
- test/validation.rb
|
202
|
+
- test/validation-classification.rb
|
203
|
+
- test/validation-nanoparticle.rb
|
204
|
+
- test/validation-regression.rb
|
215
205
|
homepage: http://github.com/opentox/lazar
|
216
206
|
licenses:
|
217
207
|
- GPL-3.0
|
@@ -236,65 +226,4 @@ rubygems_version: 2.5.1
|
|
236
226
|
signing_key:
|
237
227
|
specification_version: 4
|
238
228
|
summary: Lazar framework
|
239
|
-
test_files:
|
240
|
-
- test/all.rb
|
241
|
-
- test/classification.rb
|
242
|
-
- test/compound.rb
|
243
|
-
- test/data/CPDBAS_v5c_1547_29Apr2008part.sdf
|
244
|
-
- test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv
|
245
|
-
- test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv
|
246
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv
|
247
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv
|
248
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv
|
249
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv
|
250
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv
|
251
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv
|
252
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv
|
253
|
-
- test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv
|
254
|
-
- test/data/EPAFHM.csv
|
255
|
-
- test/data/EPAFHM.medi.csv
|
256
|
-
- test/data/EPAFHM.mini.csv
|
257
|
-
- test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv
|
258
|
-
- test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv
|
259
|
-
- test/data/ISSCAN-multi.csv
|
260
|
-
- test/data/LOAEL_log_mg_corrected_smiles.csv
|
261
|
-
- test/data/LOAEL_log_mmol_corrected_smiles.csv
|
262
|
-
- test/data/LOAEL_mmol_corrected_smiles.csv
|
263
|
-
- test/data/acetaldehyde.sdf
|
264
|
-
- test/data/batch_prediction.csv
|
265
|
-
- test/data/batch_prediction_inchi_small.csv
|
266
|
-
- test/data/batch_prediction_smiles_small.csv
|
267
|
-
- test/data/boiling_points.ext.sdf
|
268
|
-
- test/data/cpdb_100.csv
|
269
|
-
- test/data/hamster_carcinogenicity.csv
|
270
|
-
- test/data/hamster_carcinogenicity.json
|
271
|
-
- test/data/hamster_carcinogenicity.mini.bool_float.csv
|
272
|
-
- test/data/hamster_carcinogenicity.mini.bool_int.csv
|
273
|
-
- test/data/hamster_carcinogenicity.mini.bool_string.csv
|
274
|
-
- test/data/hamster_carcinogenicity.mini.csv
|
275
|
-
- test/data/hamster_carcinogenicity.ntriples
|
276
|
-
- test/data/hamster_carcinogenicity.sdf
|
277
|
-
- test/data/hamster_carcinogenicity.xls
|
278
|
-
- test/data/hamster_carcinogenicity.yaml
|
279
|
-
- test/data/hamster_carcinogenicity_with_errors.csv
|
280
|
-
- test/data/kazius.csv
|
281
|
-
- test/data/loael.csv
|
282
|
-
- test/data/multi_cell_call.csv
|
283
|
-
- test/data/multi_cell_call_no_dup.csv
|
284
|
-
- test/data/multicolumn.csv
|
285
|
-
- test/data/rat_feature_dataset.csv
|
286
|
-
- test/data/wrong_dataset.csv
|
287
|
-
- test/dataset-long.rb
|
288
|
-
- test/dataset.rb
|
289
|
-
- test/default_environment.rb
|
290
|
-
- test/descriptor.rb
|
291
|
-
- test/error.rb
|
292
|
-
- test/experiment.rb
|
293
|
-
- test/feature.rb
|
294
|
-
- test/lazar-long.rb
|
295
|
-
- test/lazar-physchem-short.rb
|
296
|
-
- test/prediction_models.rb
|
297
|
-
- test/regression.rb
|
298
|
-
- test/setup.rb
|
299
|
-
- test/test_environment.rb
|
300
|
-
- test/validation.rb
|
229
|
+
test_files: []
|
data/test/classification.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
require_relative "setup.rb"
|
2
|
-
|
3
|
-
class LazarClassificationTest < MiniTest::Test
|
4
|
-
|
5
|
-
def test_lazar_classification
|
6
|
-
training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
|
7
|
-
model = Model::LazarClassification.create training_dataset
|
8
|
-
|
9
|
-
[ {
|
10
|
-
:compound => OpenTox::Compound.from_inchi("InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"),
|
11
|
-
:prediction => "false",
|
12
|
-
:confidence => 0.25281385281385277,
|
13
|
-
:nr_neighbors => 11
|
14
|
-
},{
|
15
|
-
:compound => OpenTox::Compound.from_smiles("c1ccccc1NN"),
|
16
|
-
:prediction => "false",
|
17
|
-
:confidence => 0.3639589577089577,
|
18
|
-
:nr_neighbors => 14
|
19
|
-
} ].each do |example|
|
20
|
-
prediction = model.predict example[:compound]
|
21
|
-
assert_equal example[:prediction], prediction[:value]
|
22
|
-
#assert_equal example[:confidence], prediction[:confidence]
|
23
|
-
#assert_equal example[:nr_neighbors], prediction[:neighbors].size
|
24
|
-
end
|
25
|
-
|
26
|
-
compound = Compound.from_smiles "CCO"
|
27
|
-
prediction = model.predict compound
|
28
|
-
assert_equal ["false"], prediction[:database_activities]
|
29
|
-
assert_equal "true", prediction[:value]
|
30
|
-
|
31
|
-
# make a dataset prediction
|
32
|
-
compound_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv")
|
33
|
-
prediction = model.predict compound_dataset
|
34
|
-
assert_equal compound_dataset.compounds, prediction.compounds
|
35
|
-
|
36
|
-
assert_equal "Could not find similar compounds with experimental data in the training dataset.", prediction.data_entries[7][3]
|
37
|
-
assert_equal "1 compounds have been removed from neighbors, because they have the same structure as the query compound.", prediction.data_entries[14][3]
|
38
|
-
# cleanup
|
39
|
-
[training_dataset,model,compound_dataset].each{|o| o.delete}
|
40
|
-
end
|
41
|
-
end
|