lazar 0.9.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -4
- data/README.md +5 -15
- data/VERSION +1 -1
- data/ext/lazar/extconf.rb +1 -1
- data/ext/lazar/rinstall.R +9 -7
- data/java/CdkDescriptorInfo.class +0 -0
- data/java/CdkDescriptorInfo.java +3 -2
- data/java/CdkDescriptors.class +0 -0
- data/java/CdkDescriptors.java +28 -28
- data/java/Rakefile +3 -3
- data/java/{cdk-1.4.19.jar → cdk-2.0-SNAPSHOT.jar} +0 -0
- data/lazar.gemspec +6 -7
- data/lib/algorithm.rb +2 -11
- data/lib/caret.rb +96 -0
- data/lib/classification.rb +14 -22
- data/lib/compound.rb +21 -87
- data/lib/crossvalidation.rb +80 -279
- data/lib/dataset.rb +105 -174
- data/lib/feature.rb +11 -18
- data/lib/feature_selection.rb +42 -0
- data/lib/import.rb +122 -0
- data/lib/lazar.rb +14 -4
- data/lib/leave-one-out-validation.rb +46 -192
- data/lib/model.rb +319 -128
- data/lib/nanoparticle.rb +98 -0
- data/lib/opentox.rb +7 -4
- data/lib/overwrite.rb +24 -3
- data/lib/physchem.rb +11 -10
- data/lib/regression.rb +7 -137
- data/lib/rest-client-wrapper.rb +0 -6
- data/lib/similarity.rb +65 -0
- data/lib/substance.rb +8 -0
- data/lib/train-test-validation.rb +69 -0
- data/lib/validation-statistics.rb +223 -0
- data/lib/validation.rb +17 -100
- data/scripts/mg2mmol.rb +17 -0
- data/scripts/mirror-enm2test.rb +4 -0
- data/scripts/mmol2-log10.rb +32 -0
- data/test/compound.rb +4 -94
- data/test/data/EPAFHM.medi_log10.csv +92 -0
- data/test/data/EPAFHM.mini_log10.csv +16 -0
- data/test/data/EPAFHM_log10.csv +581 -0
- data/test/data/loael_log10.csv +568 -0
- data/test/dataset.rb +195 -133
- data/test/descriptor.rb +27 -18
- data/test/error.rb +2 -2
- data/test/experiment.rb +4 -4
- data/test/feature.rb +2 -3
- data/test/gridfs.rb +10 -0
- data/test/model-classification.rb +106 -0
- data/test/model-nanoparticle.rb +128 -0
- data/test/model-regression.rb +171 -0
- data/test/model-validation.rb +19 -0
- data/test/nanomaterial-model-validation.rb +55 -0
- data/test/setup.rb +8 -4
- data/test/validation-classification.rb +67 -0
- data/test/validation-nanoparticle.rb +133 -0
- data/test/validation-regression.rb +92 -0
- metadata +50 -121
- data/test/classification.rb +0 -41
- data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +0 -13553
- data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +0 -436
- data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +0 -568
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +0 -87
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +0 -978
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +0 -1120
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +0 -1113
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +0 -850
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +0 -829
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +0 -1198
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +0 -1505
- data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +0 -581
- data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +0 -1217
- data/test/data/LOAEL_log_mg_corrected_smiles.csv +0 -568
- data/test/data/LOAEL_log_mmol_corrected_smiles.csv +0 -568
- data/test/data/boiling_points.ext.sdf +0 -11460
- data/test/data/cpdb_100.csv +0 -101
- data/test/data/hamster_carcinogenicity.ntriples +0 -618
- data/test/data/hamster_carcinogenicity.sdf +0 -2805
- data/test/data/hamster_carcinogenicity.xls +0 -0
- data/test/data/hamster_carcinogenicity.yaml +0 -352
- data/test/dataset-long.rb +0 -114
- data/test/lazar-long.rb +0 -92
- data/test/lazar-physchem-short.rb +0 -31
- data/test/prediction_models.rb +0 -20
- data/test/regression.rb +0 -43
- data/test/validation.rb +0 -108
data/test/dataset.rb
CHANGED
@@ -1,9 +1,16 @@
|
|
1
|
-
# TODO; check compound/data_entry sequences with missing and duplicated values
|
2
|
-
|
3
1
|
require_relative "setup.rb"
|
4
2
|
|
5
3
|
class DatasetTest < MiniTest::Test
|
6
4
|
|
5
|
+
# basics
|
6
|
+
|
7
|
+
def test_create_empty
|
8
|
+
d = Dataset.new
|
9
|
+
assert_equal Dataset, d.class
|
10
|
+
refute_nil d.id
|
11
|
+
assert_kind_of BSON::ObjectId, d.id
|
12
|
+
end
|
13
|
+
|
7
14
|
def test_all
|
8
15
|
d1 = Dataset.new
|
9
16
|
d1.save
|
@@ -12,145 +19,182 @@ class DatasetTest < MiniTest::Test
|
|
12
19
|
d1.delete
|
13
20
|
end
|
14
21
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
22
|
+
# real datasets
|
23
|
+
|
24
|
+
def test_upload_hamster
|
25
|
+
d = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
|
26
|
+
assert_equal Dataset, d.class
|
27
|
+
assert_equal 1, d.features.size
|
28
|
+
assert_equal 85, d.compounds.size
|
29
|
+
csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.csv")
|
30
|
+
csv.shift
|
31
|
+
csv.each do |row|
|
32
|
+
c = Compound.from_smiles row.shift
|
33
|
+
assert_equal row, d.values(c,d.features.first)
|
24
34
|
end
|
35
|
+
d.delete
|
25
36
|
end
|
26
37
|
|
27
|
-
def
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
38
|
+
def test_upload_kazius
|
39
|
+
f = File.join DATA_DIR, "kazius.csv"
|
40
|
+
d = OpenTox::Dataset.from_csv_file f
|
41
|
+
csv = CSV.read f
|
42
|
+
assert_equal csv.size-1, d.compounds.size
|
43
|
+
assert_equal csv.first.size-1, d.features.size
|
44
|
+
assert_empty d.warnings
|
45
|
+
# 493 COC1=C(C=C(C(=C1)Cl)OC)Cl,1
|
46
|
+
c = d.compounds[491]
|
47
|
+
assert_equal c.smiles, "COc1cc(Cl)c(cc1Cl)OC"
|
48
|
+
assert_equal ["1"], d.values(c,d.features.first)
|
49
|
+
d.delete
|
32
50
|
end
|
33
51
|
|
34
|
-
def
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
d
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
+
def test_upload_multicell
|
53
|
+
duplicates = [
|
54
|
+
"InChI=1S/C6HCl5O/c7-1-2(8)4(10)6(12)5(11)3(1)9/h12H",
|
55
|
+
"InChI=1S/C12H8Cl6O/c13-8-9(14)11(16)5-3-1-2(6-7(3)19-6)4(5)10(8,15)12(11,17)18/h2-7H,1H2",
|
56
|
+
"InChI=1S/C2HCl3/c3-1-2(4)5/h1H",
|
57
|
+
"InChI=1S/C4H5Cl/c1-3-4(2)5/h3H,1-2H2",
|
58
|
+
"InChI=1S/C4H7Cl/c1-4(2)3-5/h1,3H2,2H3",
|
59
|
+
"InChI=1S/C8H14O4/c1-5-4-8(11-6(2)9)12-7(3)10-5/h5,7-8H,4H2,1-3H3",
|
60
|
+
"InChI=1S/C19H30O5/c1-3-5-7-20-8-9-21-10-11-22-14-17-13-19-18(23-15-24-19)12-16(17)6-4-2/h12-13H,3-11,14-15H2,1-2H3",
|
61
|
+
].collect{|inchi| Compound.from_inchi(inchi).smiles}
|
62
|
+
errors = ['O=P(H)(OC)OC', 'C=CCNN.HCl' ]
|
63
|
+
f = File.join DATA_DIR, "multi_cell_call.csv"
|
64
|
+
d = OpenTox::Dataset.from_csv_file f
|
65
|
+
csv = CSV.read f
|
66
|
+
assert_equal true, d.features.first.nominal?
|
67
|
+
assert_equal 1056, d.compounds.size
|
68
|
+
assert_equal csv.first.size-1, d.features.size
|
69
|
+
errors.each do |smi|
|
70
|
+
refute_empty d.warnings.grep %r{#{Regexp.escape(smi)}}
|
71
|
+
end
|
72
|
+
duplicates.each do |smi|
|
73
|
+
refute_empty d.warnings.grep %r{#{Regexp.escape(smi)}}
|
52
74
|
end
|
53
|
-
|
54
|
-
# wrong feature size
|
55
|
-
# << operator was removed for efficiency reasons (CH)
|
56
|
-
#assert_raises BadRequestError do
|
57
|
-
# d << [Compound.from_smiles("c1ccccc1NN"), 1,2,3]
|
58
|
-
#end
|
59
|
-
|
60
|
-
# manual low-level insertions without consistency checks for runtime efficiency
|
61
|
-
data_entries = []
|
62
|
-
d.compound_ids << Compound.from_smiles("c1ccccc1NN").id
|
63
|
-
data_entries << [1,2]
|
64
|
-
d.compound_ids << Compound.from_smiles("CC(C)N").id
|
65
|
-
data_entries << [4,5]
|
66
|
-
d.compound_ids << Compound.from_smiles("C1C(C)CCCC1").id
|
67
|
-
data_entries << [6,7]
|
68
|
-
d.data_entries = data_entries
|
69
|
-
assert_equal 3, d.compounds.size
|
70
|
-
assert_equal 2, d.features.size
|
71
|
-
assert_equal [[1,2],[4,5],[6,7]], d.data_entries
|
72
|
-
d.save
|
73
|
-
# check if dataset has been saved correctly
|
74
|
-
new_dataset = Dataset.find d.id
|
75
|
-
assert_equal 3, new_dataset.compounds.size
|
76
|
-
assert_equal 2, new_dataset.features.size
|
77
|
-
assert_equal [[1,2],[4,5],[6,7]], new_dataset.data_entries
|
78
75
|
d.delete
|
79
|
-
assert_nil Dataset.find d.id
|
80
|
-
assert_nil Dataset.find new_dataset.id
|
81
76
|
end
|
82
77
|
|
83
|
-
def
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
assert_equal "multicolumn", new_dataset.name
|
90
|
-
# get features
|
91
|
-
assert_equal 6, new_dataset.features.size
|
92
|
-
assert_equal 7, new_dataset.compounds.size
|
93
|
-
assert_equal ["1", nil, "false", nil, nil, 1.0], new_dataset.data_entries.last
|
78
|
+
def test_upload_isscan
|
79
|
+
f = File.join DATA_DIR, "ISSCAN-multi.csv"
|
80
|
+
d = OpenTox::Dataset.from_csv_file f
|
81
|
+
csv = CSV.read f
|
82
|
+
assert_equal csv.size-1, d.compounds.size
|
83
|
+
assert_equal csv.first.size-1, d.features.size
|
94
84
|
d.delete
|
95
85
|
end
|
96
86
|
|
97
|
-
def
|
98
|
-
|
87
|
+
def test_upload_epafhm
|
88
|
+
f = File.join DATA_DIR, "EPAFHM_log10.csv"
|
89
|
+
d = OpenTox::Dataset.from_csv_file f
|
99
90
|
assert_equal Dataset, d.class
|
100
|
-
|
101
|
-
|
102
|
-
assert_equal
|
103
|
-
d.
|
104
|
-
|
91
|
+
csv = CSV.read f
|
92
|
+
assert_equal csv.size-1, d.compounds.size
|
93
|
+
assert_equal csv.first.size-1, d.features.size
|
94
|
+
assert_match "EPAFHM_log10.csv", d.source
|
95
|
+
assert_equal "EPAFHM_log10", d.name
|
96
|
+
feature = d.features.first
|
97
|
+
assert_kind_of NumericFeature, feature
|
98
|
+
assert_equal -Math.log10(0.0113), d.values(d.compounds.first,feature).first
|
99
|
+
assert_equal -Math.log10(0.00323), d.values(d.compounds[4],feature).first
|
100
|
+
d2 = Dataset.find d.id
|
101
|
+
assert_equal -Math.log10(0.0113), d2.values(d2.compounds[0],feature).first
|
102
|
+
assert_equal -Math.log10(0.00323), d2.values(d2.compounds[4],feature).first
|
103
|
+
d.delete
|
105
104
|
end
|
106
105
|
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
106
|
+
# batch predictions
|
107
|
+
|
108
|
+
def test_create_without_features_smiles_and_inchi
|
109
|
+
["smiles", "inchi"].each do |type|
|
110
|
+
d = Dataset.from_csv_file File.join(DATA_DIR,"batch_prediction_#{type}_small.csv"), true
|
111
|
+
assert_equal Dataset, d.class
|
112
|
+
refute_nil d.id
|
113
|
+
dataset = Dataset.find d.id
|
114
|
+
assert_equal 3, d.compounds.size
|
115
|
+
d.delete
|
116
|
+
end
|
112
117
|
end
|
113
118
|
|
114
|
-
|
119
|
+
# dataset operations
|
120
|
+
|
121
|
+
def test_folds
|
122
|
+
dataset = Dataset.from_csv_file File.join(DATA_DIR,"loael.csv")
|
123
|
+
dataset.folds(10).each do |fold|
|
124
|
+
fold.each do |d|
|
125
|
+
assert_operator d.compounds.size, :>=, d.compounds.uniq.size
|
126
|
+
end
|
127
|
+
assert_operator fold[0].compounds.size, :>=, fold[1].compounds.size
|
128
|
+
assert_equal dataset.substances.size, fold.first.substances.size + fold.last.substances.size
|
129
|
+
assert_empty (fold.first.substances & fold.last.substances)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
# serialisation
|
134
|
+
|
135
|
+
def test_to_csv
|
115
136
|
d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv"
|
116
137
|
refute_nil d.warnings
|
117
138
|
assert d.warnings.grep(/Duplicate compound/)
|
118
139
|
assert d.warnings.grep(/3, 5/)
|
119
140
|
assert_equal 6, d.features.size
|
120
|
-
assert_equal
|
141
|
+
assert_equal 5, d.compounds.uniq.size
|
121
142
|
assert_equal 5, d.compounds.collect{|c| c.inchi}.uniq.size
|
122
|
-
assert_equal [["1", "1", "true", "true", "test", 1.1], ["1", "2", "false", "7.5", "test", 0.24], ["1", "3", "true", "5", "test", 3578.239], ["0", "4", "false", "false", "test", -2.35], ["1", "2", "true", "4", "test_2", 1], ["1", "2", "false", "false", "test", -1.5], ["1", nil, "false", nil, nil, 1.0]], d.data_entries
|
123
|
-
assert_equal "c1ccc[nH]1,1,,false,,,1.0", d.to_csv.split("\n")[7]
|
124
143
|
csv = CSV.parse(d.to_csv)
|
125
144
|
original_csv = CSV.read("#{DATA_DIR}/multicolumn.csv")
|
126
145
|
csv.shift
|
127
146
|
original_csv.shift
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
147
|
+
original = {}
|
148
|
+
original_csv.each do |row|
|
149
|
+
c = Compound.from_smiles row.shift.strip
|
150
|
+
original[c.inchi] = row.collect{|v| v.strip}
|
151
|
+
end
|
152
|
+
serialized = {}
|
153
|
+
csv.each do |row|
|
154
|
+
c = Compound.from_smiles row.shift
|
155
|
+
serialized[c.inchi] = row
|
156
|
+
end
|
157
|
+
#puts serialized.to_yaml
|
158
|
+
original.each do |inchi,row|
|
159
|
+
row.each_with_index do |v,i|
|
133
160
|
if v.numeric?
|
134
|
-
assert_equal
|
161
|
+
assert_equal v.to_f, serialized[inchi][i].to_f
|
135
162
|
else
|
136
|
-
assert_equal
|
163
|
+
assert_equal v, serialized[inchi][i]
|
137
164
|
end
|
138
165
|
end
|
166
|
+
|
139
167
|
end
|
140
168
|
d.delete
|
141
169
|
end
|
142
170
|
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
csv.
|
151
|
-
assert_equal
|
152
|
-
|
153
|
-
|
171
|
+
# special cases/details
|
172
|
+
|
173
|
+
def test_dataset_accessors
|
174
|
+
d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv"
|
175
|
+
# create empty dataset
|
176
|
+
new_dataset = Dataset.find d.id
|
177
|
+
# get metadata
|
178
|
+
assert_match "multicolumn.csv", new_dataset.source
|
179
|
+
assert_equal "multicolumn", new_dataset.name
|
180
|
+
# get features
|
181
|
+
assert_equal 6, new_dataset.features.size
|
182
|
+
assert_equal 5, new_dataset.compounds.uniq.size
|
183
|
+
c = new_dataset.compounds.last
|
184
|
+
f = new_dataset.features.first
|
185
|
+
assert_equal ["1"], new_dataset.values(c,f)
|
186
|
+
f = new_dataset.features.last.id.to_s
|
187
|
+
assert_equal [1.0], new_dataset.values(c,f)
|
188
|
+
f = new_dataset.features[2]
|
189
|
+
assert_equal ["false"], new_dataset.values(c,f)
|
190
|
+
d.delete
|
191
|
+
end
|
192
|
+
|
193
|
+
def test_create_from_file_with_wrong_smiles_compound_entries
|
194
|
+
d = Dataset.from_csv_file File.join(DATA_DIR,"wrong_dataset.csv")
|
195
|
+
refute_nil d.warnings
|
196
|
+
assert_match /2|3|4|5|6|7|8/, d.warnings.join
|
197
|
+
d.delete
|
154
198
|
end
|
155
199
|
|
156
200
|
def test_from_csv_classification
|
@@ -158,9 +202,9 @@ class DatasetTest < MiniTest::Test
|
|
158
202
|
d = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.mini.bool_#{mode}.csv"
|
159
203
|
csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.mini.bool_#{mode}.csv")
|
160
204
|
csv.shift
|
161
|
-
|
162
|
-
|
163
|
-
assert_equal
|
205
|
+
csv.each do |row|
|
206
|
+
c = Compound.from_smiles row.shift
|
207
|
+
assert_equal row, d.values(c,d.features.first)
|
164
208
|
end
|
165
209
|
d.delete
|
166
210
|
end
|
@@ -169,7 +213,7 @@ class DatasetTest < MiniTest::Test
|
|
169
213
|
def test_from_csv2
|
170
214
|
File.open("#{DATA_DIR}/temp_test.csv", "w+") { |file| file.write("SMILES,Hamster\nCC=O,true\n ,true\nO=C(N),true") }
|
171
215
|
dataset = Dataset.from_csv_file "#{DATA_DIR}/temp_test.csv"
|
172
|
-
assert_equal "Cannot parse SMILES compound '' at
|
216
|
+
assert_equal "Cannot parse SMILES compound '' at line 3 of /home/ist/lazar/test/data/temp_test.csv, all entries are ignored.", dataset.warnings.join
|
173
217
|
File.delete "#{DATA_DIR}/temp_test.csv"
|
174
218
|
dataset.features.each{|f| feature = Feature.find f.id; feature.delete}
|
175
219
|
dataset.delete
|
@@ -187,32 +231,50 @@ class DatasetTest < MiniTest::Test
|
|
187
231
|
datasets.each{|d| d.delete}
|
188
232
|
end
|
189
233
|
|
190
|
-
def
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
234
|
+
def test_simultanous_upload
|
235
|
+
threads = []
|
236
|
+
3.times do |t|
|
237
|
+
threads << Thread.new(t) do |up|
|
238
|
+
d = OpenTox::Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
|
239
|
+
assert_equal OpenTox::Dataset, d.class
|
240
|
+
assert_equal 1, d.features.size
|
241
|
+
assert_equal 85, d.compounds.size
|
242
|
+
csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.csv")
|
243
|
+
csv.shift
|
244
|
+
csv.each do |row|
|
245
|
+
c = Compound.from_smiles(row.shift)
|
246
|
+
assert_equal row, d.values(c,d.features.first)
|
247
|
+
end
|
248
|
+
d.delete
|
249
|
+
end
|
250
|
+
end
|
251
|
+
threads.each {|aThread| aThread.join}
|
204
252
|
end
|
205
253
|
|
206
|
-
def
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
254
|
+
def test_upload_feature_dataset
|
255
|
+
skip
|
256
|
+
t = Time.now
|
257
|
+
f = File.join DATA_DIR, "rat_feature_dataset.csv"
|
258
|
+
d = Dataset.from_csv_file f
|
259
|
+
assert_equal 458, d.features.size
|
260
|
+
d.save
|
261
|
+
#p "Upload: #{Time.now-t}"
|
262
|
+
d2 = Dataset.find d.id
|
263
|
+
t = Time.now
|
264
|
+
assert_equal d.features.size, d2.features.size
|
265
|
+
csv = CSV.read f
|
266
|
+
csv.shift # remove header
|
267
|
+
assert_empty d2.warnings
|
268
|
+
assert_equal csv.size, d2.compounds.size
|
269
|
+
assert_equal csv.first.size-1, d2.features.size
|
270
|
+
d2.compounds.each_with_index do |compound,i|
|
271
|
+
row = csv[i]
|
272
|
+
row.shift # remove compound
|
273
|
+
assert_equal row, d2.data_entries[i]
|
214
274
|
end
|
215
|
-
#
|
275
|
+
#p "Dowload: #{Time.now-t}"
|
276
|
+
d2.delete
|
277
|
+
assert_nil Dataset.find d.id
|
216
278
|
end
|
217
279
|
|
218
280
|
end
|
data/test/descriptor.rb
CHANGED
@@ -4,15 +4,17 @@ class DescriptorTest < MiniTest::Test
|
|
4
4
|
|
5
5
|
def test_list
|
6
6
|
# check available descriptors
|
7
|
-
assert_equal 355,PhysChem.descriptors.size,"incorrect number of physchem descriptors"
|
8
7
|
assert_equal 15,PhysChem.openbabel_descriptors.size,"incorrect number of Openbabel descriptors"
|
9
|
-
assert_equal 295,PhysChem.cdk_descriptors.size,"incorrect number of Cdk descriptors"
|
10
8
|
assert_equal 45,PhysChem.joelib_descriptors.size,"incorrect number of Joelib descriptors"
|
9
|
+
assert_equal 286,PhysChem.cdk_descriptors.size,"incorrect number of Cdk descriptors"
|
10
|
+
assert_equal 346,PhysChem.descriptors.size,"incorrect number of physchem descriptors"
|
11
11
|
end
|
12
12
|
|
13
13
|
def test_smarts
|
14
14
|
c = OpenTox::Compound.from_smiles "N=C=C1CCC(=F=FO)C1"
|
15
|
-
File.open("tmp.png","w+"){|f| f.puts c.png}
|
15
|
+
File.open("/tmp/tmp.png","w+"){|f| f.puts c.png}
|
16
|
+
assert_match /^PNG/,`file -b /tmp/tmp.png`
|
17
|
+
File.delete "/tmp/tmp.png"
|
16
18
|
s = Smarts.find_or_create_by(:smarts => "F=F")
|
17
19
|
result = c.smarts_match [s]
|
18
20
|
assert_equal [1], result
|
@@ -26,43 +28,50 @@ class DescriptorTest < MiniTest::Test
|
|
26
28
|
|
27
29
|
def test_compound_openbabel_single
|
28
30
|
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
|
29
|
-
|
30
|
-
|
31
|
+
feature = PhysChem.find_or_create_by(:name => "Openbabel.logP")
|
32
|
+
result = c.calculate_properties([feature])
|
33
|
+
assert_equal 1.12518, result.first.round(5)
|
34
|
+
assert_equal 1.12518, c.properties[feature.id.to_s].round(5)
|
31
35
|
end
|
32
36
|
|
33
37
|
def test_compound_cdk_single
|
34
38
|
c = OpenTox::Compound.from_smiles "c1ccccc1"
|
35
|
-
|
36
|
-
|
39
|
+
feature = PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")
|
40
|
+
result = c.calculate_properties([feature])
|
41
|
+
assert_equal 12, result.first
|
37
42
|
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
|
38
|
-
|
39
|
-
|
43
|
+
feature = PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")
|
44
|
+
result = c.calculate_properties([feature])
|
45
|
+
assert_equal 17, result.first
|
40
46
|
c_types = {"Cdk.CarbonTypes.C1SP1"=>1, "Cdk.CarbonTypes.C2SP1"=>0, "Cdk.CarbonTypes.C1SP2"=>0, "Cdk.CarbonTypes.C2SP2"=>1, "Cdk.CarbonTypes.C3SP2"=>0, "Cdk.CarbonTypes.C1SP3"=>2, "Cdk.CarbonTypes.C2SP3"=>1, "Cdk.CarbonTypes.C3SP3"=>1, "Cdk.CarbonTypes.C4SP3"=>0}
|
41
47
|
physchem_features = c_types.collect{|t,nr| PhysChem.find_or_create_by(:name => t)}
|
42
|
-
result = c.
|
43
|
-
assert_equal [1, 0, 0, 1, 0, 2, 1, 1, 0], result
|
48
|
+
result = c.calculate_properties physchem_features
|
49
|
+
assert_equal [1, 0, 0, 1, 0, 2, 1, 1, 0], result
|
44
50
|
end
|
45
51
|
|
46
52
|
def test_compound_joelib_single
|
47
53
|
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
|
48
|
-
result = c.
|
49
|
-
assert_equal 2.65908, result.first
|
54
|
+
result = c.calculate_properties [PhysChem.find_or_create_by(:name => "Joelib.LogP")]
|
55
|
+
assert_equal 2.65908, result.first
|
50
56
|
end
|
51
57
|
|
52
58
|
def test_compound_all
|
53
59
|
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
|
54
|
-
result = c.physchem PhysChem.descriptors
|
55
60
|
amr = PhysChem.find_or_create_by(:name => "Cdk.ALOGP.AMR", :library => "Cdk")
|
56
61
|
sbonds = PhysChem.find_by(:name => "Openbabel.sbonds")
|
57
|
-
|
58
|
-
assert_equal
|
62
|
+
result = c.calculate_properties([amr,sbonds])
|
63
|
+
assert_equal 30.8723, result[0]
|
64
|
+
assert_equal 5, result[1]
|
59
65
|
end
|
60
66
|
|
61
67
|
def test_compound_descriptor_parameters
|
68
|
+
PhysChem.descriptors
|
62
69
|
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
|
63
|
-
result = c.
|
70
|
+
result = c.calculate_properties [ "Openbabel.logP", "Cdk.AtomCount.nAtom", "Joelib.LogP" ].collect{|d| PhysChem.find_or_create_by(:name => d)}
|
64
71
|
assert_equal 3, result.size
|
65
|
-
assert_equal
|
72
|
+
assert_equal 1.12518, result[0].round(5)
|
73
|
+
assert_equal 17.0, result[1].round(5)
|
74
|
+
assert_equal 2.65908, result[2].round(5)
|
66
75
|
end
|
67
76
|
|
68
77
|
end
|
data/test/error.rb
CHANGED
data/test/experiment.rb
CHANGED
@@ -5,7 +5,7 @@ class ExperimentTest < MiniTest::Test
|
|
5
5
|
def test_regression_experiment
|
6
6
|
skip
|
7
7
|
datasets = [
|
8
|
-
"EPAFHM.
|
8
|
+
"EPAFHM.medi_log10.csv",
|
9
9
|
#"EPAFHM.csv",
|
10
10
|
#"FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv",
|
11
11
|
"LOAEL_mmol_corrected_smiles.csv"
|
@@ -68,7 +68,7 @@ class ExperimentTest < MiniTest::Test
|
|
68
68
|
skip
|
69
69
|
#=begin
|
70
70
|
datasets = [
|
71
|
-
"EPAFHM.
|
71
|
+
"EPAFHM.medi_log10.csv",
|
72
72
|
#"LOAEL_mmol_corrected_smiles.csv"
|
73
73
|
]
|
74
74
|
min_sims = [0.3,0.7]
|
@@ -118,7 +118,7 @@ class ExperimentTest < MiniTest::Test
|
|
118
118
|
def test_mpd_fingerprints
|
119
119
|
skip
|
120
120
|
datasets = [
|
121
|
-
"EPAFHM.
|
121
|
+
"EPAFHM.medi_log10.csv",
|
122
122
|
]
|
123
123
|
types = ["FP2","MP2D"]
|
124
124
|
experiment = Experiment.create(
|
@@ -147,7 +147,7 @@ class ExperimentTest < MiniTest::Test
|
|
147
147
|
def test_multiple_datasets
|
148
148
|
skip
|
149
149
|
datasets = [
|
150
|
-
"EPAFHM.
|
150
|
+
"EPAFHM.medi_log10.csv",
|
151
151
|
"LOAEL_mmol_corrected_smiles.csv"
|
152
152
|
]
|
153
153
|
min_sims = [0.3]
|
data/test/feature.rb
CHANGED
@@ -32,10 +32,9 @@ class FeatureTest < MiniTest::Test
|
|
32
32
|
def test_duplicated_features
|
33
33
|
metadata = {
|
34
34
|
:name => "feature duplication test",
|
35
|
-
:nominal => true,
|
36
35
|
}
|
37
|
-
feature =
|
38
|
-
dup_feature =
|
36
|
+
feature = NumericFeature.find_or_create_by metadata
|
37
|
+
dup_feature = NumericFeature.find_or_create_by metadata
|
39
38
|
assert_kind_of Feature, feature
|
40
39
|
assert !feature.id.nil?, "No Feature ID in #{feature.inspect}"
|
41
40
|
assert !feature.id.nil?, "No Feature ID in #{dup_feature.inspect}"
|
data/test/gridfs.rb
ADDED