lazar 0.0.7 → 0.0.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/README.md +2 -1
- data/VERSION +1 -1
- data/ext/lazar/extconf.rb +15 -76
- data/ext/lazar/rinstall.R +9 -0
- data/lazar.gemspec +7 -7
- data/lib/classification.rb +5 -78
- data/lib/compound.rb +201 -44
- data/lib/crossvalidation.rb +224 -121
- data/lib/dataset.rb +83 -93
- data/lib/error.rb +1 -1
- data/lib/experiment.rb +99 -0
- data/lib/feature.rb +2 -54
- data/lib/lazar.rb +47 -34
- data/lib/leave-one-out-validation.rb +205 -0
- data/lib/model.rb +131 -76
- data/lib/opentox.rb +2 -2
- data/lib/overwrite.rb +37 -0
- data/lib/physchem.rb +133 -0
- data/lib/regression.rb +117 -189
- data/lib/rest-client-wrapper.rb +4 -5
- data/lib/unique_descriptors.rb +6 -7
- data/lib/validation.rb +63 -69
- data/test/all.rb +2 -2
- data/test/classification.rb +41 -0
- data/test/compound.rb +116 -7
- data/test/data/LOAEL_log_mg_corrected_smiles.csv +567 -567
- data/test/data/LOAEL_log_mmol_corrected_smiles.csv +566 -566
- data/test/data/LOAEL_mmol_corrected_smiles.csv +568 -0
- data/test/data/batch_prediction.csv +25 -0
- data/test/data/batch_prediction_inchi_small.csv +4 -0
- data/test/data/batch_prediction_smiles_small.csv +4 -0
- data/test/data/hamster_carcinogenicity.json +3 -0
- data/test/data/loael.csv +568 -0
- data/test/dataset-long.rb +5 -8
- data/test/dataset.rb +31 -11
- data/test/default_environment.rb +11 -0
- data/test/descriptor.rb +26 -41
- data/test/error.rb +1 -3
- data/test/experiment.rb +301 -0
- data/test/feature.rb +22 -10
- data/test/lazar-long.rb +43 -23
- data/test/lazar-physchem-short.rb +19 -16
- data/test/prediction_models.rb +20 -0
- data/test/regression.rb +43 -0
- data/test/setup.rb +3 -1
- data/test/test_environment.rb +10 -0
- data/test/validation.rb +92 -26
- metadata +64 -38
- data/lib/SMARTS_InteLigand.txt +0 -983
- data/lib/bbrc.rb +0 -165
- data/lib/descriptor.rb +0 -247
- data/lib/neighbor.rb +0 -25
- data/lib/similarity.rb +0 -58
- data/mongoid.yml +0 -8
- data/test/descriptor-long.rb +0 -26
- data/test/fminer-long.rb +0 -38
- data/test/fminer.rb +0 -52
- data/test/lazar-fminer.rb +0 -50
- data/test/lazar-regression.rb +0 -27
data/test/dataset.rb
CHANGED
@@ -8,10 +8,22 @@ class DatasetTest < MiniTest::Test
|
|
8
8
|
d1 = Dataset.new
|
9
9
|
d1.save
|
10
10
|
datasets = Dataset.all
|
11
|
-
|
11
|
+
assert datasets.first.is_a?(Dataset), "#{datasets.first} is not a Dataset."
|
12
12
|
d1.delete
|
13
13
|
end
|
14
14
|
|
15
|
+
def test_create_without_features_smiles_and_inchi
|
16
|
+
["smiles", "inchi"].each do |type|
|
17
|
+
d = Dataset.from_csv_file File.join(DATA_DIR,"batch_prediction_#{type}_small.csv")
|
18
|
+
assert_equal Dataset, d.class
|
19
|
+
refute_nil d.id
|
20
|
+
dataset = Dataset.find d.id
|
21
|
+
#p dataset.compounds
|
22
|
+
assert_equal 3, d.compounds.size.to_i
|
23
|
+
d.delete
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
15
27
|
def test_create_empty
|
16
28
|
d = Dataset.new
|
17
29
|
assert_equal Dataset, d.class
|
@@ -57,19 +69,15 @@ class DatasetTest < MiniTest::Test
|
|
57
69
|
assert_equal 3, d.compounds.size
|
58
70
|
assert_equal 2, d.features.size
|
59
71
|
assert_equal [[1,2],[4,5],[6,7]], d.data_entries
|
60
|
-
d.
|
72
|
+
d.save
|
61
73
|
# check if dataset has been saved correctly
|
62
74
|
new_dataset = Dataset.find d.id
|
63
75
|
assert_equal 3, new_dataset.compounds.size
|
64
76
|
assert_equal 2, new_dataset.features.size
|
65
77
|
assert_equal [[1,2],[4,5],[6,7]], new_dataset.data_entries
|
66
78
|
d.delete
|
67
|
-
|
68
|
-
|
69
|
-
end
|
70
|
-
assert_raises Mongoid::Errors::DocumentNotFound do
|
71
|
-
Dataset.find new_dataset.id
|
72
|
-
end
|
79
|
+
assert_nil Dataset.find d.id
|
80
|
+
assert_nil Dataset.find new_dataset.id
|
73
81
|
end
|
74
82
|
|
75
83
|
def test_dataset_accessors
|
@@ -78,7 +86,7 @@ class DatasetTest < MiniTest::Test
|
|
78
86
|
new_dataset = Dataset.find d.id
|
79
87
|
# get metadata
|
80
88
|
assert_match "multicolumn.csv", new_dataset.source
|
81
|
-
assert_equal "multicolumn
|
89
|
+
assert_equal "multicolumn", new_dataset.name
|
82
90
|
# get features
|
83
91
|
assert_equal 6, new_dataset.features.size
|
84
92
|
assert_equal 7, new_dataset.compounds.size
|
@@ -119,7 +127,7 @@ class DatasetTest < MiniTest::Test
|
|
119
127
|
original_csv.shift
|
120
128
|
csv.each_with_index do |row,i|
|
121
129
|
compound = Compound.from_smiles row.shift
|
122
|
-
original_compound = Compound.from_smiles original_csv[i].shift
|
130
|
+
original_compound = Compound.from_smiles original_csv[i].shift.strip
|
123
131
|
assert_equal original_compound.inchi, compound.inchi
|
124
132
|
row.each_with_index do |v,j|
|
125
133
|
if v.numeric?
|
@@ -161,7 +169,7 @@ class DatasetTest < MiniTest::Test
|
|
161
169
|
def test_from_csv2
|
162
170
|
File.open("#{DATA_DIR}/temp_test.csv", "w+") { |file| file.write("SMILES,Hamster\nCC=O,true\n ,true\nO=C(N),true") }
|
163
171
|
dataset = Dataset.from_csv_file "#{DATA_DIR}/temp_test.csv"
|
164
|
-
assert_equal "Cannot parse SMILES compound '
|
172
|
+
assert_equal "Cannot parse SMILES compound '' at position 3, all entries are ignored.", dataset.warnings.join
|
165
173
|
File.delete "#{DATA_DIR}/temp_test.csv"
|
166
174
|
dataset.features.each{|f| feature = Feature.find f.id; feature.delete}
|
167
175
|
dataset.delete
|
@@ -195,5 +203,17 @@ class DatasetTest < MiniTest::Test
|
|
195
203
|
assert_equal 0.00323, d2.data_entries[5][0]
|
196
204
|
end
|
197
205
|
|
206
|
+
def test_folds
|
207
|
+
dataset = Dataset.from_csv_file File.join(DATA_DIR,"loael.csv")
|
208
|
+
dataset.folds(10).each do |fold|
|
209
|
+
fold.each do |d|
|
210
|
+
assert_equal d.data_entries.size, d.compound_ids.size
|
211
|
+
assert_operator d.compound_ids.size, :>=, d.compound_ids.uniq.size
|
212
|
+
end
|
213
|
+
assert_operator fold[0].compound_ids.uniq.size, :>=, fold[1].compound_ids.uniq.size
|
214
|
+
end
|
215
|
+
#puts dataset.folds 10
|
216
|
+
end
|
217
|
+
|
198
218
|
end
|
199
219
|
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require_relative '../lib/lazar.rb'
|
3
|
+
include OpenTox
|
4
|
+
class DefaultEnvironmentTest < MiniTest::Test
|
5
|
+
def test_lazar_environment
|
6
|
+
assert_equal "production", ENV["LAZAR_ENV"]
|
7
|
+
assert_equal "production", ENV["MONGOID_ENV"]
|
8
|
+
assert_equal "production", ENV["RACK_ENV"]
|
9
|
+
assert_equal "production", Mongoid.clients["default"]["database"]
|
10
|
+
end
|
11
|
+
end
|
data/test/descriptor.rb
CHANGED
@@ -4,80 +4,65 @@ class DescriptorTest < MiniTest::Test
|
|
4
4
|
|
5
5
|
def test_list
|
6
6
|
# check available descriptors
|
7
|
-
|
8
|
-
assert_equal
|
9
|
-
|
10
|
-
assert_equal
|
11
|
-
sum = 0
|
12
|
-
[ @descriptors, @descriptor_values ].each do |desc|
|
13
|
-
{"Openbabel"=>16,"Cdk"=>(desc==@descriptors ? 50 : 295),"Joelib"=>45}.each do |k,v|
|
14
|
-
assert_equal v,desc.select{|x| x=~/^#{k}\./}.size,"wrong num #{k} descriptors"
|
15
|
-
sum += v
|
16
|
-
end
|
17
|
-
end
|
18
|
-
assert_equal (111+356),sum
|
7
|
+
assert_equal 355,PhysChem.descriptors.size,"incorrect number of physchem descriptors"
|
8
|
+
assert_equal 15,PhysChem.openbabel_descriptors.size,"incorrect number of Openbabel descriptors"
|
9
|
+
assert_equal 295,PhysChem.cdk_descriptors.size,"incorrect number of Cdk descriptors"
|
10
|
+
assert_equal 45,PhysChem.joelib_descriptors.size,"incorrect number of Joelib descriptors"
|
19
11
|
end
|
20
12
|
|
21
13
|
def test_smarts
|
22
14
|
c = OpenTox::Compound.from_smiles "N=C=C1CCC(=F=FO)C1"
|
23
15
|
File.open("tmp.png","w+"){|f| f.puts c.png}
|
24
16
|
s = Smarts.find_or_create_by(:smarts => "F=F")
|
25
|
-
result =
|
17
|
+
result = c.smarts_match [s]
|
26
18
|
assert_equal [1], result
|
27
19
|
smarts = ["CC", "C", "C=C", "CO", "F=F", "C1CCCC1", "NN"].collect{|s| Smarts.find_or_create_by(:smarts => s)}
|
28
|
-
result =
|
20
|
+
result = c.smarts_match smarts
|
29
21
|
assert_equal [1, 1, 1, 0, 1, 1, 0], result
|
30
22
|
smarts_count = [10, 6, 2, 0, 2, 10, 0]
|
31
|
-
result =
|
23
|
+
result = c.smarts_match smarts, true
|
32
24
|
assert_equal smarts_count, result
|
33
25
|
end
|
34
26
|
|
35
27
|
def test_compound_openbabel_single
|
36
28
|
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
|
37
|
-
result =
|
38
|
-
assert_equal 1.12518, result.first
|
29
|
+
result = c.physchem [PhysChem.find_or_create_by(:name => "Openbabel.logP")]
|
30
|
+
assert_equal 1.12518, result.first.last.round(5)
|
39
31
|
end
|
40
32
|
|
41
33
|
def test_compound_cdk_single
|
42
34
|
c = OpenTox::Compound.from_smiles "c1ccccc1"
|
43
|
-
result =
|
44
|
-
assert_equal
|
35
|
+
result = c.physchem [PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")]
|
36
|
+
assert_equal 12, result.first.last
|
45
37
|
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
|
46
|
-
result =
|
47
|
-
assert_equal
|
48
|
-
result = OpenTox::Algorithm::Descriptor.physchem c, ["Cdk.CarbonTypes"]
|
38
|
+
result = c.physchem [PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")]
|
39
|
+
assert_equal 17, result.first.last
|
49
40
|
c_types = {"Cdk.CarbonTypes.C1SP1"=>1, "Cdk.CarbonTypes.C2SP1"=>0, "Cdk.CarbonTypes.C1SP2"=>0, "Cdk.CarbonTypes.C2SP2"=>1, "Cdk.CarbonTypes.C3SP2"=>0, "Cdk.CarbonTypes.C1SP3"=>2, "Cdk.CarbonTypes.C2SP3"=>1, "Cdk.CarbonTypes.C3SP3"=>1, "Cdk.CarbonTypes.C4SP3"=>0}
|
50
|
-
|
41
|
+
physchem_features = c_types.collect{|t,nr| PhysChem.find_or_create_by(:name => t)}
|
42
|
+
result = c.physchem physchem_features
|
43
|
+
assert_equal [1, 0, 0, 1, 0, 2, 1, 1, 0], result.values
|
51
44
|
end
|
52
45
|
|
53
46
|
def test_compound_joelib_single
|
54
47
|
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
|
55
|
-
result =
|
56
|
-
assert_equal
|
48
|
+
result = c.physchem [PhysChem.find_or_create_by(:name => "Joelib.LogP")]
|
49
|
+
assert_equal 2.65908, result.first.last
|
57
50
|
end
|
58
51
|
|
59
52
|
def test_compound_all
|
60
53
|
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
|
61
|
-
result =
|
62
|
-
|
63
|
-
|
64
|
-
assert_equal
|
54
|
+
result = c.physchem PhysChem.descriptors
|
55
|
+
amr = PhysChem.find_or_create_by(:name => "Cdk.ALOGP.AMR", :library => "Cdk")
|
56
|
+
sbonds = PhysChem.find_by(:name => "Openbabel.sbonds")
|
57
|
+
assert_equal 30.8723, result[amr.id.to_s]
|
58
|
+
assert_equal 5, result[sbonds.id.to_s]
|
65
59
|
end
|
66
60
|
|
67
61
|
def test_compound_descriptor_parameters
|
68
62
|
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
|
69
|
-
result =
|
70
|
-
assert_equal
|
71
|
-
assert_equal [1.12518, 17.0,
|
72
|
-
end
|
73
|
-
|
74
|
-
def test_dataset_descriptor_parameters
|
75
|
-
dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.mini.csv")
|
76
|
-
d = OpenTox::Algorithm::Descriptor.physchem dataset, [ "Openbabel.logP", "Cdk.AtomCount", "Cdk.CarbonTypes", "Joelib.LogP" ]
|
77
|
-
assert_kind_of Dataset, d
|
78
|
-
assert_equal dataset.compounds, d.compounds
|
79
|
-
assert_equal dataset.compounds.size, d.data_entries.size
|
80
|
-
assert_equal 12, d.data_entries.first.size
|
63
|
+
result = c.physchem [ "Openbabel.logP", "Cdk.AtomCount.nAtom", "Joelib.LogP" ].collect{|d| PhysChem.find_or_create_by(:name => d)}
|
64
|
+
assert_equal 3, result.size
|
65
|
+
assert_equal [1.12518, 17.0, 2.65908], result.values.collect{|v| v.round 5}
|
81
66
|
end
|
82
67
|
|
83
68
|
end
|
data/test/error.rb
CHANGED
@@ -4,9 +4,7 @@ class ErrorTest < MiniTest::Test
|
|
4
4
|
|
5
5
|
def test_bad_request
|
6
6
|
object = OpenTox::Feature.new
|
7
|
-
|
8
|
-
response = OpenTox::Feature.find(object.id)
|
9
|
-
end
|
7
|
+
assert_nil OpenTox::Feature.find(object.id)
|
10
8
|
end
|
11
9
|
|
12
10
|
def test_error_methods
|
data/test/experiment.rb
ADDED
@@ -0,0 +1,301 @@
|
|
1
|
+
require_relative "setup.rb"
|
2
|
+
|
3
|
+
class ExperimentTest < MiniTest::Test
|
4
|
+
|
5
|
+
def test_regression_experiment
|
6
|
+
skip
|
7
|
+
datasets = [
|
8
|
+
"EPAFHM.medi.csv",
|
9
|
+
#"EPAFHM.csv",
|
10
|
+
#"FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv",
|
11
|
+
"LOAEL_mmol_corrected_smiles.csv"
|
12
|
+
]
|
13
|
+
experiment = Experiment.create(
|
14
|
+
:name => "Default regression for datasets #{datasets}.",
|
15
|
+
:dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
|
16
|
+
:model_settings => [
|
17
|
+
{
|
18
|
+
:algorithm => "OpenTox::Model::LazarRegression",
|
19
|
+
}
|
20
|
+
]
|
21
|
+
)
|
22
|
+
#experiment.run
|
23
|
+
puts experiment.report.to_yaml
|
24
|
+
assert_equal datasets.size, experiment.results.size
|
25
|
+
experiment.results.each do |dataset_id, result|
|
26
|
+
assert_equal 1, result.size
|
27
|
+
result.each do |r|
|
28
|
+
assert_kind_of BSON::ObjectId, r[:model_id]
|
29
|
+
assert_kind_of BSON::ObjectId, r[:repeated_crossvalidation_id]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_classification_experiment
|
35
|
+
|
36
|
+
skip
|
37
|
+
datasets = [ "hamster_carcinogenicity.csv" ]
|
38
|
+
experiment = Experiment.create(
|
39
|
+
:name => "Fminer vs fingerprint classification for datasets #{datasets}.",
|
40
|
+
:dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
|
41
|
+
:model_settings => [
|
42
|
+
{
|
43
|
+
:algorithm => "OpenTox::Model::LazarClassification",
|
44
|
+
},{
|
45
|
+
:algorithm => "OpenTox::Model::LazarClassification",
|
46
|
+
:neighbor_algorithm_parameter => {:min_sim => 0.3}
|
47
|
+
},
|
48
|
+
#{
|
49
|
+
#:algorithm => "OpenTox::Model::LazarFminerClassification",
|
50
|
+
#}
|
51
|
+
]
|
52
|
+
)
|
53
|
+
#experiment.run
|
54
|
+
=begin
|
55
|
+
experiment = Experiment.find "55f944a22b72ed7de2000000"
|
56
|
+
=end
|
57
|
+
puts experiment.report.to_yaml
|
58
|
+
experiment.results.each do |dataset_id, result|
|
59
|
+
assert_equal 2, result.size
|
60
|
+
result.each do |r|
|
61
|
+
assert_kind_of BSON::ObjectId, r[:model_id]
|
62
|
+
assert_kind_of BSON::ObjectId, r[:repeated_crossvalidation_id]
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_regression_fingerprints
|
68
|
+
skip
|
69
|
+
#=begin
|
70
|
+
datasets = [
|
71
|
+
"EPAFHM.medi.csv",
|
72
|
+
#"LOAEL_mmol_corrected_smiles.csv"
|
73
|
+
]
|
74
|
+
min_sims = [0.3,0.7]
|
75
|
+
#min_sims = [0.7]
|
76
|
+
#types = ["FP2","FP3","FP4","MACCS","MP2D"]
|
77
|
+
types = ["MP2D","FP3"]
|
78
|
+
experiment = Experiment.create(
|
79
|
+
:name => "Fingerprint regression with different types for datasets #{datasets}.",
|
80
|
+
:dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
|
81
|
+
)
|
82
|
+
types.each do |type|
|
83
|
+
min_sims.each do |min_sim|
|
84
|
+
experiment.model_settings << {
|
85
|
+
:model_algorithm => "OpenTox::Model::LazarRegression",
|
86
|
+
:prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average",
|
87
|
+
:neighbor_algorithm => "fingerprint_neighbors",
|
88
|
+
:neighbor_algorithm_parameters => {
|
89
|
+
:type => type,
|
90
|
+
:min_sim => min_sim,
|
91
|
+
}
|
92
|
+
}
|
93
|
+
end
|
94
|
+
end
|
95
|
+
experiment.run
|
96
|
+
#=end
|
97
|
+
=begin
|
98
|
+
experiment = Experiment.find '56029cb92b72ed673d000000'
|
99
|
+
=end
|
100
|
+
p experiment.id
|
101
|
+
experiment.results.each do |dataset,result|
|
102
|
+
result.each do |r|
|
103
|
+
params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters]
|
104
|
+
RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv|
|
105
|
+
cv.validation_ids.each do |vid|
|
106
|
+
model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters]
|
107
|
+
assert_equal params[:type], model_params[:type]
|
108
|
+
assert_equal params[:min_sim], model_params[:min_sim]
|
109
|
+
refute_equal params[:training_dataset_id], model_params[:training_dataset_id]
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
puts experiment.report.to_yaml
|
115
|
+
p experiment.summary
|
116
|
+
end
|
117
|
+
|
118
|
+
def test_mpd_fingerprints
|
119
|
+
skip
|
120
|
+
datasets = [
|
121
|
+
"EPAFHM.medi.csv",
|
122
|
+
]
|
123
|
+
types = ["FP2","MP2D"]
|
124
|
+
experiment = Experiment.create(
|
125
|
+
:name => "FP2 vs MP2D fingerprint regression for datasets #{datasets}.",
|
126
|
+
:dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
|
127
|
+
)
|
128
|
+
types.each do |type|
|
129
|
+
experiment.model_settings << {
|
130
|
+
:algorithm => "OpenTox::Model::LazarRegression",
|
131
|
+
:neighbor_algorithm => "fingerprint_neighbors",
|
132
|
+
:neighbor_algorithm_parameter => {
|
133
|
+
:type => type,
|
134
|
+
:min_sim => 0.7,
|
135
|
+
}
|
136
|
+
}
|
137
|
+
end
|
138
|
+
experiment.run
|
139
|
+
p experiment.id
|
140
|
+
=begin
|
141
|
+
=end
|
142
|
+
#experiment = Experiment.find '55ffd0c02b72ed123c000000'
|
143
|
+
p experiment
|
144
|
+
puts experiment.report.to_yaml
|
145
|
+
end
|
146
|
+
|
147
|
+
def test_multiple_datasets
|
148
|
+
skip
|
149
|
+
datasets = [
|
150
|
+
"EPAFHM.medi.csv",
|
151
|
+
"LOAEL_mmol_corrected_smiles.csv"
|
152
|
+
]
|
153
|
+
min_sims = [0.3]
|
154
|
+
types = ["FP2"]
|
155
|
+
experiment = Experiment.create(
|
156
|
+
:name => "Fingerprint regression with mutiple datasets #{datasets}.",
|
157
|
+
:dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
|
158
|
+
)
|
159
|
+
types.each do |type|
|
160
|
+
min_sims.each do |min_sim|
|
161
|
+
experiment.model_settings << {
|
162
|
+
:model_algorithm => "OpenTox::Model::LazarRegression",
|
163
|
+
:prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average",
|
164
|
+
:neighbor_algorithm => "fingerprint_neighbors",
|
165
|
+
:neighbor_algorithm_parameters => {
|
166
|
+
:type => type,
|
167
|
+
:min_sim => min_sim,
|
168
|
+
}
|
169
|
+
}
|
170
|
+
end
|
171
|
+
end
|
172
|
+
experiment.run
|
173
|
+
p experiment.id
|
174
|
+
experiment.results.each do |dataset,result|
|
175
|
+
result.each do |r|
|
176
|
+
params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters]
|
177
|
+
RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv|
|
178
|
+
cv.validation_ids.each do |vid|
|
179
|
+
model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters]
|
180
|
+
assert_equal params[:type], model_params[:type]
|
181
|
+
assert_equal params[:min_sim], model_params[:min_sim]
|
182
|
+
refute_equal params[:training_dataset_id], model_params[:training_dataset_id]
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
puts experiment.report.to_yaml
|
188
|
+
p experiment.summary
|
189
|
+
end
|
190
|
+
|
191
|
+
def test_mpd_mna_regression_fingerprints
|
192
|
+
skip
|
193
|
+
datasets = [
|
194
|
+
"EPAFHM.medi.csv",
|
195
|
+
#"hamster_carcinogenicity.csv"
|
196
|
+
]
|
197
|
+
min_sims = [0.0,0.3]
|
198
|
+
types = ["MP2D","MNA"]
|
199
|
+
neighbor_algos = [
|
200
|
+
"fingerprint_neighbors",
|
201
|
+
"fingerprint_count_neighbors",
|
202
|
+
]
|
203
|
+
experiment = Experiment.create(
|
204
|
+
:name => "MNA vs MPD descriptors",
|
205
|
+
:dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
|
206
|
+
)
|
207
|
+
types.each do |type|
|
208
|
+
min_sims.each do |min_sim|
|
209
|
+
neighbor_algos.each do |neighbor_algo|
|
210
|
+
experiment.model_settings << {
|
211
|
+
:model_algorithm => "OpenTox::Model::LazarRegression",
|
212
|
+
:prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average",
|
213
|
+
:neighbor_algorithm => neighbor_algo,
|
214
|
+
:neighbor_algorithm_parameters => {
|
215
|
+
:type => type,
|
216
|
+
:min_sim => min_sim,
|
217
|
+
}
|
218
|
+
}
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
experiment.run
|
223
|
+
#=end
|
224
|
+
=begin
|
225
|
+
experiment = Experiment.find '56029cb92b72ed673d000000'
|
226
|
+
=end
|
227
|
+
p experiment.id
|
228
|
+
puts experiment.report.to_yaml
|
229
|
+
#p experiment.summary
|
230
|
+
experiment.results.each do |dataset,result|
|
231
|
+
result.each do |r|
|
232
|
+
p r
|
233
|
+
# TODO fix r["model_id"]
|
234
|
+
params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters]
|
235
|
+
RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv|
|
236
|
+
cv.validation_ids.each do |vid|
|
237
|
+
model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters]
|
238
|
+
assert_equal params[:type], model_params[:type]
|
239
|
+
assert_equal params[:min_sim], model_params[:min_sim]
|
240
|
+
refute_equal params[:training_dataset_id], model_params[:training_dataset_id]
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
def test_mpd_mna_classification_fingerprints
|
248
|
+
skip
|
249
|
+
datasets = [
|
250
|
+
#"EPAFHM.medi.csv",
|
251
|
+
"hamster_carcinogenicity.csv"
|
252
|
+
]
|
253
|
+
min_sims = [0.0,0.3]
|
254
|
+
types = ["MP2D","MNA"]
|
255
|
+
neighbor_algos = [
|
256
|
+
"fingerprint_count_neighbors",
|
257
|
+
"fingerprint_neighbors",
|
258
|
+
]
|
259
|
+
experiment = Experiment.create(
|
260
|
+
:name => "MNA vs MPD descriptors",
|
261
|
+
:dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
|
262
|
+
)
|
263
|
+
types.each do |type|
|
264
|
+
min_sims.each do |min_sim|
|
265
|
+
neighbor_algos.each do |neighbor_algo|
|
266
|
+
experiment.model_settings << {
|
267
|
+
:model_algorithm => "OpenTox::Model::LazarClassification",
|
268
|
+
:prediction_algorithm => "OpenTox::Algorithm::Classification.weighted_majority_vote",
|
269
|
+
:neighbor_algorithm => neighbor_algo,
|
270
|
+
:neighbor_algorithm_parameters => {
|
271
|
+
:type => type,
|
272
|
+
:min_sim => min_sim,
|
273
|
+
}
|
274
|
+
}
|
275
|
+
end
|
276
|
+
end
|
277
|
+
end
|
278
|
+
experiment.run
|
279
|
+
#=end
|
280
|
+
=begin
|
281
|
+
experiment = Experiment.find '56029cb92b72ed673d000000'
|
282
|
+
=end
|
283
|
+
p experiment.id
|
284
|
+
puts experiment.report.to_yaml
|
285
|
+
#p experiment.summary
|
286
|
+
experiment.results.each do |dataset,result|
|
287
|
+
result.each do |r|
|
288
|
+
# TODO fix r["model_id"]
|
289
|
+
params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters]
|
290
|
+
RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv|
|
291
|
+
cv.validation_ids.each do |vid|
|
292
|
+
model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters]
|
293
|
+
assert_equal params[:type], model_params[:type]
|
294
|
+
assert_equal params[:min_sim], model_params[:min_sim]
|
295
|
+
refute_equal params[:training_dataset_id], model_params[:training_dataset_id]
|
296
|
+
end
|
297
|
+
end
|
298
|
+
end
|
299
|
+
end
|
300
|
+
end
|
301
|
+
end
|
data/test/feature.rb
CHANGED
@@ -26,16 +26,13 @@ class FeatureTest < MiniTest::Test
|
|
26
26
|
|
27
27
|
id = @feature2.id
|
28
28
|
@feature2.delete
|
29
|
-
|
30
|
-
OpenTox::Feature.find(id)
|
31
|
-
end
|
29
|
+
assert_nil OpenTox::Feature.find(id)
|
32
30
|
end
|
33
31
|
|
34
32
|
def test_duplicated_features
|
35
33
|
metadata = {
|
36
34
|
:name => "feature duplication test",
|
37
35
|
:nominal => true,
|
38
|
-
:description => "feature duplication test"
|
39
36
|
}
|
40
37
|
feature = NumericBioAssay.find_or_create_by metadata
|
41
38
|
dup_feature = NumericBioAssay.find_or_create_by metadata
|
@@ -44,12 +41,8 @@ class FeatureTest < MiniTest::Test
|
|
44
41
|
assert !feature.id.nil?, "No Feature ID in #{dup_feature.inspect}"
|
45
42
|
assert_equal feature.id, dup_feature.id
|
46
43
|
feature.delete
|
47
|
-
|
48
|
-
|
49
|
-
end
|
50
|
-
assert_raises Mongoid::Errors::DocumentNotFound do
|
51
|
-
OpenTox::Feature.find(dup_feature.id)
|
52
|
-
end
|
44
|
+
assert_nil OpenTox::Feature.find(feature.id)
|
45
|
+
assert_nil OpenTox::Feature.find(dup_feature.id)
|
53
46
|
end
|
54
47
|
|
55
48
|
def test_smarts_feature
|
@@ -62,4 +55,23 @@ class FeatureTest < MiniTest::Test
|
|
62
55
|
assert original.smarts, "CN"
|
63
56
|
end
|
64
57
|
|
58
|
+
def test_physchem_description
|
59
|
+
assert_equal 355, PhysChem.descriptors.size
|
60
|
+
assert_equal 15, PhysChem.openbabel_descriptors.size
|
61
|
+
assert_equal 295, PhysChem.cdk_descriptors.size
|
62
|
+
assert_equal 45, PhysChem.joelib_descriptors.size
|
63
|
+
assert_equal 310, PhysChem.unique_descriptors.size
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_physchem
|
67
|
+
assert_equal 355, PhysChem.descriptors.size
|
68
|
+
c = Compound.from_smiles "CC(=O)CC(C)C"
|
69
|
+
logP = PhysChem.find_or_create_by :name => "Openbabel.logP"
|
70
|
+
assert_equal 1.6215, logP.calculate(c)
|
71
|
+
jlogP = PhysChem.find_or_create_by :name => "Joelib.LogP"
|
72
|
+
assert_equal 3.5951, jlogP.calculate(c)
|
73
|
+
alogP = PhysChem.find_or_create_by :name => "Cdk.ALOGP.ALogP"
|
74
|
+
assert_equal 0.35380000000000034, alogP.calculate(c)
|
75
|
+
end
|
76
|
+
|
65
77
|
end
|