lazar 0.0.7 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/README.md +2 -1
- data/VERSION +1 -1
- data/ext/lazar/extconf.rb +15 -76
- data/ext/lazar/rinstall.R +9 -0
- data/lazar.gemspec +7 -7
- data/lib/classification.rb +5 -78
- data/lib/compound.rb +201 -44
- data/lib/crossvalidation.rb +224 -121
- data/lib/dataset.rb +83 -93
- data/lib/error.rb +1 -1
- data/lib/experiment.rb +99 -0
- data/lib/feature.rb +2 -54
- data/lib/lazar.rb +47 -34
- data/lib/leave-one-out-validation.rb +205 -0
- data/lib/model.rb +131 -76
- data/lib/opentox.rb +2 -2
- data/lib/overwrite.rb +37 -0
- data/lib/physchem.rb +133 -0
- data/lib/regression.rb +117 -189
- data/lib/rest-client-wrapper.rb +4 -5
- data/lib/unique_descriptors.rb +6 -7
- data/lib/validation.rb +63 -69
- data/test/all.rb +2 -2
- data/test/classification.rb +41 -0
- data/test/compound.rb +116 -7
- data/test/data/LOAEL_log_mg_corrected_smiles.csv +567 -567
- data/test/data/LOAEL_log_mmol_corrected_smiles.csv +566 -566
- data/test/data/LOAEL_mmol_corrected_smiles.csv +568 -0
- data/test/data/batch_prediction.csv +25 -0
- data/test/data/batch_prediction_inchi_small.csv +4 -0
- data/test/data/batch_prediction_smiles_small.csv +4 -0
- data/test/data/hamster_carcinogenicity.json +3 -0
- data/test/data/loael.csv +568 -0
- data/test/dataset-long.rb +5 -8
- data/test/dataset.rb +31 -11
- data/test/default_environment.rb +11 -0
- data/test/descriptor.rb +26 -41
- data/test/error.rb +1 -3
- data/test/experiment.rb +301 -0
- data/test/feature.rb +22 -10
- data/test/lazar-long.rb +43 -23
- data/test/lazar-physchem-short.rb +19 -16
- data/test/prediction_models.rb +20 -0
- data/test/regression.rb +43 -0
- data/test/setup.rb +3 -1
- data/test/test_environment.rb +10 -0
- data/test/validation.rb +92 -26
- metadata +64 -38
- data/lib/SMARTS_InteLigand.txt +0 -983
- data/lib/bbrc.rb +0 -165
- data/lib/descriptor.rb +0 -247
- data/lib/neighbor.rb +0 -25
- data/lib/similarity.rb +0 -58
- data/mongoid.yml +0 -8
- data/test/descriptor-long.rb +0 -26
- data/test/fminer-long.rb +0 -38
- data/test/fminer.rb +0 -52
- data/test/lazar-fminer.rb +0 -50
- data/test/lazar-regression.rb +0 -27
data/test/dataset.rb
CHANGED
@@ -8,10 +8,22 @@ class DatasetTest < MiniTest::Test
|
|
8
8
|
d1 = Dataset.new
|
9
9
|
d1.save
|
10
10
|
datasets = Dataset.all
|
11
|
-
|
11
|
+
assert datasets.first.is_a?(Dataset), "#{datasets.first} is not a Dataset."
|
12
12
|
d1.delete
|
13
13
|
end
|
14
14
|
|
15
|
+
def test_create_without_features_smiles_and_inchi
|
16
|
+
["smiles", "inchi"].each do |type|
|
17
|
+
d = Dataset.from_csv_file File.join(DATA_DIR,"batch_prediction_#{type}_small.csv")
|
18
|
+
assert_equal Dataset, d.class
|
19
|
+
refute_nil d.id
|
20
|
+
dataset = Dataset.find d.id
|
21
|
+
#p dataset.compounds
|
22
|
+
assert_equal 3, d.compounds.size.to_i
|
23
|
+
d.delete
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
15
27
|
def test_create_empty
|
16
28
|
d = Dataset.new
|
17
29
|
assert_equal Dataset, d.class
|
@@ -57,19 +69,15 @@ class DatasetTest < MiniTest::Test
|
|
57
69
|
assert_equal 3, d.compounds.size
|
58
70
|
assert_equal 2, d.features.size
|
59
71
|
assert_equal [[1,2],[4,5],[6,7]], d.data_entries
|
60
|
-
d.
|
72
|
+
d.save
|
61
73
|
# check if dataset has been saved correctly
|
62
74
|
new_dataset = Dataset.find d.id
|
63
75
|
assert_equal 3, new_dataset.compounds.size
|
64
76
|
assert_equal 2, new_dataset.features.size
|
65
77
|
assert_equal [[1,2],[4,5],[6,7]], new_dataset.data_entries
|
66
78
|
d.delete
|
67
|
-
|
68
|
-
|
69
|
-
end
|
70
|
-
assert_raises Mongoid::Errors::DocumentNotFound do
|
71
|
-
Dataset.find new_dataset.id
|
72
|
-
end
|
79
|
+
assert_nil Dataset.find d.id
|
80
|
+
assert_nil Dataset.find new_dataset.id
|
73
81
|
end
|
74
82
|
|
75
83
|
def test_dataset_accessors
|
@@ -78,7 +86,7 @@ class DatasetTest < MiniTest::Test
|
|
78
86
|
new_dataset = Dataset.find d.id
|
79
87
|
# get metadata
|
80
88
|
assert_match "multicolumn.csv", new_dataset.source
|
81
|
-
assert_equal "multicolumn
|
89
|
+
assert_equal "multicolumn", new_dataset.name
|
82
90
|
# get features
|
83
91
|
assert_equal 6, new_dataset.features.size
|
84
92
|
assert_equal 7, new_dataset.compounds.size
|
@@ -119,7 +127,7 @@ class DatasetTest < MiniTest::Test
|
|
119
127
|
original_csv.shift
|
120
128
|
csv.each_with_index do |row,i|
|
121
129
|
compound = Compound.from_smiles row.shift
|
122
|
-
original_compound = Compound.from_smiles original_csv[i].shift
|
130
|
+
original_compound = Compound.from_smiles original_csv[i].shift.strip
|
123
131
|
assert_equal original_compound.inchi, compound.inchi
|
124
132
|
row.each_with_index do |v,j|
|
125
133
|
if v.numeric?
|
@@ -161,7 +169,7 @@ class DatasetTest < MiniTest::Test
|
|
161
169
|
def test_from_csv2
|
162
170
|
File.open("#{DATA_DIR}/temp_test.csv", "w+") { |file| file.write("SMILES,Hamster\nCC=O,true\n ,true\nO=C(N),true") }
|
163
171
|
dataset = Dataset.from_csv_file "#{DATA_DIR}/temp_test.csv"
|
164
|
-
assert_equal "Cannot parse SMILES compound '
|
172
|
+
assert_equal "Cannot parse SMILES compound '' at position 3, all entries are ignored.", dataset.warnings.join
|
165
173
|
File.delete "#{DATA_DIR}/temp_test.csv"
|
166
174
|
dataset.features.each{|f| feature = Feature.find f.id; feature.delete}
|
167
175
|
dataset.delete
|
@@ -195,5 +203,17 @@ class DatasetTest < MiniTest::Test
|
|
195
203
|
assert_equal 0.00323, d2.data_entries[5][0]
|
196
204
|
end
|
197
205
|
|
206
|
+
def test_folds
|
207
|
+
dataset = Dataset.from_csv_file File.join(DATA_DIR,"loael.csv")
|
208
|
+
dataset.folds(10).each do |fold|
|
209
|
+
fold.each do |d|
|
210
|
+
assert_equal d.data_entries.size, d.compound_ids.size
|
211
|
+
assert_operator d.compound_ids.size, :>=, d.compound_ids.uniq.size
|
212
|
+
end
|
213
|
+
assert_operator fold[0].compound_ids.uniq.size, :>=, fold[1].compound_ids.uniq.size
|
214
|
+
end
|
215
|
+
#puts dataset.folds 10
|
216
|
+
end
|
217
|
+
|
198
218
|
end
|
199
219
|
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require_relative '../lib/lazar.rb'
|
3
|
+
include OpenTox
|
4
|
+
class DefaultEnvironmentTest < MiniTest::Test
|
5
|
+
def test_lazar_environment
|
6
|
+
assert_equal "production", ENV["LAZAR_ENV"]
|
7
|
+
assert_equal "production", ENV["MONGOID_ENV"]
|
8
|
+
assert_equal "production", ENV["RACK_ENV"]
|
9
|
+
assert_equal "production", Mongoid.clients["default"]["database"]
|
10
|
+
end
|
11
|
+
end
|
data/test/descriptor.rb
CHANGED
@@ -4,80 +4,65 @@ class DescriptorTest < MiniTest::Test
|
|
4
4
|
|
5
5
|
def test_list
|
6
6
|
# check available descriptors
|
7
|
-
|
8
|
-
assert_equal
|
9
|
-
|
10
|
-
assert_equal
|
11
|
-
sum = 0
|
12
|
-
[ @descriptors, @descriptor_values ].each do |desc|
|
13
|
-
{"Openbabel"=>16,"Cdk"=>(desc==@descriptors ? 50 : 295),"Joelib"=>45}.each do |k,v|
|
14
|
-
assert_equal v,desc.select{|x| x=~/^#{k}\./}.size,"wrong num #{k} descriptors"
|
15
|
-
sum += v
|
16
|
-
end
|
17
|
-
end
|
18
|
-
assert_equal (111+356),sum
|
7
|
+
assert_equal 355,PhysChem.descriptors.size,"incorrect number of physchem descriptors"
|
8
|
+
assert_equal 15,PhysChem.openbabel_descriptors.size,"incorrect number of Openbabel descriptors"
|
9
|
+
assert_equal 295,PhysChem.cdk_descriptors.size,"incorrect number of Cdk descriptors"
|
10
|
+
assert_equal 45,PhysChem.joelib_descriptors.size,"incorrect number of Joelib descriptors"
|
19
11
|
end
|
20
12
|
|
21
13
|
def test_smarts
|
22
14
|
c = OpenTox::Compound.from_smiles "N=C=C1CCC(=F=FO)C1"
|
23
15
|
File.open("tmp.png","w+"){|f| f.puts c.png}
|
24
16
|
s = Smarts.find_or_create_by(:smarts => "F=F")
|
25
|
-
result =
|
17
|
+
result = c.smarts_match [s]
|
26
18
|
assert_equal [1], result
|
27
19
|
smarts = ["CC", "C", "C=C", "CO", "F=F", "C1CCCC1", "NN"].collect{|s| Smarts.find_or_create_by(:smarts => s)}
|
28
|
-
result =
|
20
|
+
result = c.smarts_match smarts
|
29
21
|
assert_equal [1, 1, 1, 0, 1, 1, 0], result
|
30
22
|
smarts_count = [10, 6, 2, 0, 2, 10, 0]
|
31
|
-
result =
|
23
|
+
result = c.smarts_match smarts, true
|
32
24
|
assert_equal smarts_count, result
|
33
25
|
end
|
34
26
|
|
35
27
|
def test_compound_openbabel_single
|
36
28
|
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
|
37
|
-
result =
|
38
|
-
assert_equal 1.12518, result.first
|
29
|
+
result = c.physchem [PhysChem.find_or_create_by(:name => "Openbabel.logP")]
|
30
|
+
assert_equal 1.12518, result.first.last.round(5)
|
39
31
|
end
|
40
32
|
|
41
33
|
def test_compound_cdk_single
|
42
34
|
c = OpenTox::Compound.from_smiles "c1ccccc1"
|
43
|
-
result =
|
44
|
-
assert_equal
|
35
|
+
result = c.physchem [PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")]
|
36
|
+
assert_equal 12, result.first.last
|
45
37
|
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
|
46
|
-
result =
|
47
|
-
assert_equal
|
48
|
-
result = OpenTox::Algorithm::Descriptor.physchem c, ["Cdk.CarbonTypes"]
|
38
|
+
result = c.physchem [PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")]
|
39
|
+
assert_equal 17, result.first.last
|
49
40
|
c_types = {"Cdk.CarbonTypes.C1SP1"=>1, "Cdk.CarbonTypes.C2SP1"=>0, "Cdk.CarbonTypes.C1SP2"=>0, "Cdk.CarbonTypes.C2SP2"=>1, "Cdk.CarbonTypes.C3SP2"=>0, "Cdk.CarbonTypes.C1SP3"=>2, "Cdk.CarbonTypes.C2SP3"=>1, "Cdk.CarbonTypes.C3SP3"=>1, "Cdk.CarbonTypes.C4SP3"=>0}
|
50
|
-
|
41
|
+
physchem_features = c_types.collect{|t,nr| PhysChem.find_or_create_by(:name => t)}
|
42
|
+
result = c.physchem physchem_features
|
43
|
+
assert_equal [1, 0, 0, 1, 0, 2, 1, 1, 0], result.values
|
51
44
|
end
|
52
45
|
|
53
46
|
def test_compound_joelib_single
|
54
47
|
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
|
55
|
-
result =
|
56
|
-
assert_equal
|
48
|
+
result = c.physchem [PhysChem.find_or_create_by(:name => "Joelib.LogP")]
|
49
|
+
assert_equal 2.65908, result.first.last
|
57
50
|
end
|
58
51
|
|
59
52
|
def test_compound_all
|
60
53
|
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
|
61
|
-
result =
|
62
|
-
|
63
|
-
|
64
|
-
assert_equal
|
54
|
+
result = c.physchem PhysChem.descriptors
|
55
|
+
amr = PhysChem.find_or_create_by(:name => "Cdk.ALOGP.AMR", :library => "Cdk")
|
56
|
+
sbonds = PhysChem.find_by(:name => "Openbabel.sbonds")
|
57
|
+
assert_equal 30.8723, result[amr.id.to_s]
|
58
|
+
assert_equal 5, result[sbonds.id.to_s]
|
65
59
|
end
|
66
60
|
|
67
61
|
def test_compound_descriptor_parameters
|
68
62
|
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
|
69
|
-
result =
|
70
|
-
assert_equal
|
71
|
-
assert_equal [1.12518, 17.0,
|
72
|
-
end
|
73
|
-
|
74
|
-
def test_dataset_descriptor_parameters
|
75
|
-
dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.mini.csv")
|
76
|
-
d = OpenTox::Algorithm::Descriptor.physchem dataset, [ "Openbabel.logP", "Cdk.AtomCount", "Cdk.CarbonTypes", "Joelib.LogP" ]
|
77
|
-
assert_kind_of Dataset, d
|
78
|
-
assert_equal dataset.compounds, d.compounds
|
79
|
-
assert_equal dataset.compounds.size, d.data_entries.size
|
80
|
-
assert_equal 12, d.data_entries.first.size
|
63
|
+
result = c.physchem [ "Openbabel.logP", "Cdk.AtomCount.nAtom", "Joelib.LogP" ].collect{|d| PhysChem.find_or_create_by(:name => d)}
|
64
|
+
assert_equal 3, result.size
|
65
|
+
assert_equal [1.12518, 17.0, 2.65908], result.values.collect{|v| v.round 5}
|
81
66
|
end
|
82
67
|
|
83
68
|
end
|
data/test/error.rb
CHANGED
@@ -4,9 +4,7 @@ class ErrorTest < MiniTest::Test
|
|
4
4
|
|
5
5
|
def test_bad_request
|
6
6
|
object = OpenTox::Feature.new
|
7
|
-
|
8
|
-
response = OpenTox::Feature.find(object.id)
|
9
|
-
end
|
7
|
+
assert_nil OpenTox::Feature.find(object.id)
|
10
8
|
end
|
11
9
|
|
12
10
|
def test_error_methods
|
data/test/experiment.rb
ADDED
@@ -0,0 +1,301 @@
|
|
1
|
+
require_relative "setup.rb"
|
2
|
+
|
3
|
+
class ExperimentTest < MiniTest::Test
|
4
|
+
|
5
|
+
def test_regression_experiment
|
6
|
+
skip
|
7
|
+
datasets = [
|
8
|
+
"EPAFHM.medi.csv",
|
9
|
+
#"EPAFHM.csv",
|
10
|
+
#"FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv",
|
11
|
+
"LOAEL_mmol_corrected_smiles.csv"
|
12
|
+
]
|
13
|
+
experiment = Experiment.create(
|
14
|
+
:name => "Default regression for datasets #{datasets}.",
|
15
|
+
:dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
|
16
|
+
:model_settings => [
|
17
|
+
{
|
18
|
+
:algorithm => "OpenTox::Model::LazarRegression",
|
19
|
+
}
|
20
|
+
]
|
21
|
+
)
|
22
|
+
#experiment.run
|
23
|
+
puts experiment.report.to_yaml
|
24
|
+
assert_equal datasets.size, experiment.results.size
|
25
|
+
experiment.results.each do |dataset_id, result|
|
26
|
+
assert_equal 1, result.size
|
27
|
+
result.each do |r|
|
28
|
+
assert_kind_of BSON::ObjectId, r[:model_id]
|
29
|
+
assert_kind_of BSON::ObjectId, r[:repeated_crossvalidation_id]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_classification_experiment
|
35
|
+
|
36
|
+
skip
|
37
|
+
datasets = [ "hamster_carcinogenicity.csv" ]
|
38
|
+
experiment = Experiment.create(
|
39
|
+
:name => "Fminer vs fingerprint classification for datasets #{datasets}.",
|
40
|
+
:dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
|
41
|
+
:model_settings => [
|
42
|
+
{
|
43
|
+
:algorithm => "OpenTox::Model::LazarClassification",
|
44
|
+
},{
|
45
|
+
:algorithm => "OpenTox::Model::LazarClassification",
|
46
|
+
:neighbor_algorithm_parameter => {:min_sim => 0.3}
|
47
|
+
},
|
48
|
+
#{
|
49
|
+
#:algorithm => "OpenTox::Model::LazarFminerClassification",
|
50
|
+
#}
|
51
|
+
]
|
52
|
+
)
|
53
|
+
#experiment.run
|
54
|
+
=begin
|
55
|
+
experiment = Experiment.find "55f944a22b72ed7de2000000"
|
56
|
+
=end
|
57
|
+
puts experiment.report.to_yaml
|
58
|
+
experiment.results.each do |dataset_id, result|
|
59
|
+
assert_equal 2, result.size
|
60
|
+
result.each do |r|
|
61
|
+
assert_kind_of BSON::ObjectId, r[:model_id]
|
62
|
+
assert_kind_of BSON::ObjectId, r[:repeated_crossvalidation_id]
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_regression_fingerprints
|
68
|
+
skip
|
69
|
+
#=begin
|
70
|
+
datasets = [
|
71
|
+
"EPAFHM.medi.csv",
|
72
|
+
#"LOAEL_mmol_corrected_smiles.csv"
|
73
|
+
]
|
74
|
+
min_sims = [0.3,0.7]
|
75
|
+
#min_sims = [0.7]
|
76
|
+
#types = ["FP2","FP3","FP4","MACCS","MP2D"]
|
77
|
+
types = ["MP2D","FP3"]
|
78
|
+
experiment = Experiment.create(
|
79
|
+
:name => "Fingerprint regression with different types for datasets #{datasets}.",
|
80
|
+
:dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
|
81
|
+
)
|
82
|
+
types.each do |type|
|
83
|
+
min_sims.each do |min_sim|
|
84
|
+
experiment.model_settings << {
|
85
|
+
:model_algorithm => "OpenTox::Model::LazarRegression",
|
86
|
+
:prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average",
|
87
|
+
:neighbor_algorithm => "fingerprint_neighbors",
|
88
|
+
:neighbor_algorithm_parameters => {
|
89
|
+
:type => type,
|
90
|
+
:min_sim => min_sim,
|
91
|
+
}
|
92
|
+
}
|
93
|
+
end
|
94
|
+
end
|
95
|
+
experiment.run
|
96
|
+
#=end
|
97
|
+
=begin
|
98
|
+
experiment = Experiment.find '56029cb92b72ed673d000000'
|
99
|
+
=end
|
100
|
+
p experiment.id
|
101
|
+
experiment.results.each do |dataset,result|
|
102
|
+
result.each do |r|
|
103
|
+
params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters]
|
104
|
+
RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv|
|
105
|
+
cv.validation_ids.each do |vid|
|
106
|
+
model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters]
|
107
|
+
assert_equal params[:type], model_params[:type]
|
108
|
+
assert_equal params[:min_sim], model_params[:min_sim]
|
109
|
+
refute_equal params[:training_dataset_id], model_params[:training_dataset_id]
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
puts experiment.report.to_yaml
|
115
|
+
p experiment.summary
|
116
|
+
end
|
117
|
+
|
118
|
+
def test_mpd_fingerprints
|
119
|
+
skip
|
120
|
+
datasets = [
|
121
|
+
"EPAFHM.medi.csv",
|
122
|
+
]
|
123
|
+
types = ["FP2","MP2D"]
|
124
|
+
experiment = Experiment.create(
|
125
|
+
:name => "FP2 vs MP2D fingerprint regression for datasets #{datasets}.",
|
126
|
+
:dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
|
127
|
+
)
|
128
|
+
types.each do |type|
|
129
|
+
experiment.model_settings << {
|
130
|
+
:algorithm => "OpenTox::Model::LazarRegression",
|
131
|
+
:neighbor_algorithm => "fingerprint_neighbors",
|
132
|
+
:neighbor_algorithm_parameter => {
|
133
|
+
:type => type,
|
134
|
+
:min_sim => 0.7,
|
135
|
+
}
|
136
|
+
}
|
137
|
+
end
|
138
|
+
experiment.run
|
139
|
+
p experiment.id
|
140
|
+
=begin
|
141
|
+
=end
|
142
|
+
#experiment = Experiment.find '55ffd0c02b72ed123c000000'
|
143
|
+
p experiment
|
144
|
+
puts experiment.report.to_yaml
|
145
|
+
end
|
146
|
+
|
147
|
+
def test_multiple_datasets
|
148
|
+
skip
|
149
|
+
datasets = [
|
150
|
+
"EPAFHM.medi.csv",
|
151
|
+
"LOAEL_mmol_corrected_smiles.csv"
|
152
|
+
]
|
153
|
+
min_sims = [0.3]
|
154
|
+
types = ["FP2"]
|
155
|
+
experiment = Experiment.create(
|
156
|
+
:name => "Fingerprint regression with mutiple datasets #{datasets}.",
|
157
|
+
:dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
|
158
|
+
)
|
159
|
+
types.each do |type|
|
160
|
+
min_sims.each do |min_sim|
|
161
|
+
experiment.model_settings << {
|
162
|
+
:model_algorithm => "OpenTox::Model::LazarRegression",
|
163
|
+
:prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average",
|
164
|
+
:neighbor_algorithm => "fingerprint_neighbors",
|
165
|
+
:neighbor_algorithm_parameters => {
|
166
|
+
:type => type,
|
167
|
+
:min_sim => min_sim,
|
168
|
+
}
|
169
|
+
}
|
170
|
+
end
|
171
|
+
end
|
172
|
+
experiment.run
|
173
|
+
p experiment.id
|
174
|
+
experiment.results.each do |dataset,result|
|
175
|
+
result.each do |r|
|
176
|
+
params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters]
|
177
|
+
RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv|
|
178
|
+
cv.validation_ids.each do |vid|
|
179
|
+
model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters]
|
180
|
+
assert_equal params[:type], model_params[:type]
|
181
|
+
assert_equal params[:min_sim], model_params[:min_sim]
|
182
|
+
refute_equal params[:training_dataset_id], model_params[:training_dataset_id]
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|
186
|
+
end
|
187
|
+
puts experiment.report.to_yaml
|
188
|
+
p experiment.summary
|
189
|
+
end
|
190
|
+
|
191
|
+
def test_mpd_mna_regression_fingerprints
|
192
|
+
skip
|
193
|
+
datasets = [
|
194
|
+
"EPAFHM.medi.csv",
|
195
|
+
#"hamster_carcinogenicity.csv"
|
196
|
+
]
|
197
|
+
min_sims = [0.0,0.3]
|
198
|
+
types = ["MP2D","MNA"]
|
199
|
+
neighbor_algos = [
|
200
|
+
"fingerprint_neighbors",
|
201
|
+
"fingerprint_count_neighbors",
|
202
|
+
]
|
203
|
+
experiment = Experiment.create(
|
204
|
+
:name => "MNA vs MPD descriptors",
|
205
|
+
:dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
|
206
|
+
)
|
207
|
+
types.each do |type|
|
208
|
+
min_sims.each do |min_sim|
|
209
|
+
neighbor_algos.each do |neighbor_algo|
|
210
|
+
experiment.model_settings << {
|
211
|
+
:model_algorithm => "OpenTox::Model::LazarRegression",
|
212
|
+
:prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average",
|
213
|
+
:neighbor_algorithm => neighbor_algo,
|
214
|
+
:neighbor_algorithm_parameters => {
|
215
|
+
:type => type,
|
216
|
+
:min_sim => min_sim,
|
217
|
+
}
|
218
|
+
}
|
219
|
+
end
|
220
|
+
end
|
221
|
+
end
|
222
|
+
experiment.run
|
223
|
+
#=end
|
224
|
+
=begin
|
225
|
+
experiment = Experiment.find '56029cb92b72ed673d000000'
|
226
|
+
=end
|
227
|
+
p experiment.id
|
228
|
+
puts experiment.report.to_yaml
|
229
|
+
#p experiment.summary
|
230
|
+
experiment.results.each do |dataset,result|
|
231
|
+
result.each do |r|
|
232
|
+
p r
|
233
|
+
# TODO fix r["model_id"]
|
234
|
+
params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters]
|
235
|
+
RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv|
|
236
|
+
cv.validation_ids.each do |vid|
|
237
|
+
model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters]
|
238
|
+
assert_equal params[:type], model_params[:type]
|
239
|
+
assert_equal params[:min_sim], model_params[:min_sim]
|
240
|
+
refute_equal params[:training_dataset_id], model_params[:training_dataset_id]
|
241
|
+
end
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
def test_mpd_mna_classification_fingerprints
|
248
|
+
skip
|
249
|
+
datasets = [
|
250
|
+
#"EPAFHM.medi.csv",
|
251
|
+
"hamster_carcinogenicity.csv"
|
252
|
+
]
|
253
|
+
min_sims = [0.0,0.3]
|
254
|
+
types = ["MP2D","MNA"]
|
255
|
+
neighbor_algos = [
|
256
|
+
"fingerprint_count_neighbors",
|
257
|
+
"fingerprint_neighbors",
|
258
|
+
]
|
259
|
+
experiment = Experiment.create(
|
260
|
+
:name => "MNA vs MPD descriptors",
|
261
|
+
:dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
|
262
|
+
)
|
263
|
+
types.each do |type|
|
264
|
+
min_sims.each do |min_sim|
|
265
|
+
neighbor_algos.each do |neighbor_algo|
|
266
|
+
experiment.model_settings << {
|
267
|
+
:model_algorithm => "OpenTox::Model::LazarClassification",
|
268
|
+
:prediction_algorithm => "OpenTox::Algorithm::Classification.weighted_majority_vote",
|
269
|
+
:neighbor_algorithm => neighbor_algo,
|
270
|
+
:neighbor_algorithm_parameters => {
|
271
|
+
:type => type,
|
272
|
+
:min_sim => min_sim,
|
273
|
+
}
|
274
|
+
}
|
275
|
+
end
|
276
|
+
end
|
277
|
+
end
|
278
|
+
experiment.run
|
279
|
+
#=end
|
280
|
+
=begin
|
281
|
+
experiment = Experiment.find '56029cb92b72ed673d000000'
|
282
|
+
=end
|
283
|
+
p experiment.id
|
284
|
+
puts experiment.report.to_yaml
|
285
|
+
#p experiment.summary
|
286
|
+
experiment.results.each do |dataset,result|
|
287
|
+
result.each do |r|
|
288
|
+
# TODO fix r["model_id"]
|
289
|
+
params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters]
|
290
|
+
RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv|
|
291
|
+
cv.validation_ids.each do |vid|
|
292
|
+
model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters]
|
293
|
+
assert_equal params[:type], model_params[:type]
|
294
|
+
assert_equal params[:min_sim], model_params[:min_sim]
|
295
|
+
refute_equal params[:training_dataset_id], model_params[:training_dataset_id]
|
296
|
+
end
|
297
|
+
end
|
298
|
+
end
|
299
|
+
end
|
300
|
+
end
|
301
|
+
end
|
data/test/feature.rb
CHANGED
@@ -26,16 +26,13 @@ class FeatureTest < MiniTest::Test
|
|
26
26
|
|
27
27
|
id = @feature2.id
|
28
28
|
@feature2.delete
|
29
|
-
|
30
|
-
OpenTox::Feature.find(id)
|
31
|
-
end
|
29
|
+
assert_nil OpenTox::Feature.find(id)
|
32
30
|
end
|
33
31
|
|
34
32
|
def test_duplicated_features
|
35
33
|
metadata = {
|
36
34
|
:name => "feature duplication test",
|
37
35
|
:nominal => true,
|
38
|
-
:description => "feature duplication test"
|
39
36
|
}
|
40
37
|
feature = NumericBioAssay.find_or_create_by metadata
|
41
38
|
dup_feature = NumericBioAssay.find_or_create_by metadata
|
@@ -44,12 +41,8 @@ class FeatureTest < MiniTest::Test
|
|
44
41
|
assert !feature.id.nil?, "No Feature ID in #{dup_feature.inspect}"
|
45
42
|
assert_equal feature.id, dup_feature.id
|
46
43
|
feature.delete
|
47
|
-
|
48
|
-
|
49
|
-
end
|
50
|
-
assert_raises Mongoid::Errors::DocumentNotFound do
|
51
|
-
OpenTox::Feature.find(dup_feature.id)
|
52
|
-
end
|
44
|
+
assert_nil OpenTox::Feature.find(feature.id)
|
45
|
+
assert_nil OpenTox::Feature.find(dup_feature.id)
|
53
46
|
end
|
54
47
|
|
55
48
|
def test_smarts_feature
|
@@ -62,4 +55,23 @@ class FeatureTest < MiniTest::Test
|
|
62
55
|
assert original.smarts, "CN"
|
63
56
|
end
|
64
57
|
|
58
|
+
def test_physchem_description
|
59
|
+
assert_equal 355, PhysChem.descriptors.size
|
60
|
+
assert_equal 15, PhysChem.openbabel_descriptors.size
|
61
|
+
assert_equal 295, PhysChem.cdk_descriptors.size
|
62
|
+
assert_equal 45, PhysChem.joelib_descriptors.size
|
63
|
+
assert_equal 310, PhysChem.unique_descriptors.size
|
64
|
+
end
|
65
|
+
|
66
|
+
def test_physchem
|
67
|
+
assert_equal 355, PhysChem.descriptors.size
|
68
|
+
c = Compound.from_smiles "CC(=O)CC(C)C"
|
69
|
+
logP = PhysChem.find_or_create_by :name => "Openbabel.logP"
|
70
|
+
assert_equal 1.6215, logP.calculate(c)
|
71
|
+
jlogP = PhysChem.find_or_create_by :name => "Joelib.LogP"
|
72
|
+
assert_equal 3.5951, jlogP.calculate(c)
|
73
|
+
alogP = PhysChem.find_or_create_by :name => "Cdk.ALOGP.ALogP"
|
74
|
+
assert_equal 0.35380000000000034, alogP.calculate(c)
|
75
|
+
end
|
76
|
+
|
65
77
|
end
|