lazar 0.9.3 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -4
  3. data/README.md +5 -15
  4. data/VERSION +1 -1
  5. data/ext/lazar/extconf.rb +1 -1
  6. data/ext/lazar/rinstall.R +9 -7
  7. data/java/CdkDescriptorInfo.class +0 -0
  8. data/java/CdkDescriptorInfo.java +3 -2
  9. data/java/CdkDescriptors.class +0 -0
  10. data/java/CdkDescriptors.java +28 -28
  11. data/java/Rakefile +3 -3
  12. data/java/{cdk-1.4.19.jar → cdk-2.0-SNAPSHOT.jar} +0 -0
  13. data/lazar.gemspec +6 -7
  14. data/lib/algorithm.rb +2 -11
  15. data/lib/caret.rb +96 -0
  16. data/lib/classification.rb +14 -22
  17. data/lib/compound.rb +21 -87
  18. data/lib/crossvalidation.rb +80 -279
  19. data/lib/dataset.rb +105 -174
  20. data/lib/feature.rb +11 -18
  21. data/lib/feature_selection.rb +42 -0
  22. data/lib/import.rb +122 -0
  23. data/lib/lazar.rb +14 -4
  24. data/lib/leave-one-out-validation.rb +46 -192
  25. data/lib/model.rb +319 -128
  26. data/lib/nanoparticle.rb +98 -0
  27. data/lib/opentox.rb +7 -4
  28. data/lib/overwrite.rb +24 -3
  29. data/lib/physchem.rb +11 -10
  30. data/lib/regression.rb +7 -137
  31. data/lib/rest-client-wrapper.rb +0 -6
  32. data/lib/similarity.rb +65 -0
  33. data/lib/substance.rb +8 -0
  34. data/lib/train-test-validation.rb +69 -0
  35. data/lib/validation-statistics.rb +223 -0
  36. data/lib/validation.rb +17 -100
  37. data/scripts/mg2mmol.rb +17 -0
  38. data/scripts/mirror-enm2test.rb +4 -0
  39. data/scripts/mmol2-log10.rb +32 -0
  40. data/test/compound.rb +4 -94
  41. data/test/data/EPAFHM.medi_log10.csv +92 -0
  42. data/test/data/EPAFHM.mini_log10.csv +16 -0
  43. data/test/data/EPAFHM_log10.csv +581 -0
  44. data/test/data/loael_log10.csv +568 -0
  45. data/test/dataset.rb +195 -133
  46. data/test/descriptor.rb +27 -18
  47. data/test/error.rb +2 -2
  48. data/test/experiment.rb +4 -4
  49. data/test/feature.rb +2 -3
  50. data/test/gridfs.rb +10 -0
  51. data/test/model-classification.rb +106 -0
  52. data/test/model-nanoparticle.rb +128 -0
  53. data/test/model-regression.rb +171 -0
  54. data/test/model-validation.rb +19 -0
  55. data/test/nanomaterial-model-validation.rb +55 -0
  56. data/test/setup.rb +8 -4
  57. data/test/validation-classification.rb +67 -0
  58. data/test/validation-nanoparticle.rb +133 -0
  59. data/test/validation-regression.rb +92 -0
  60. metadata +50 -121
  61. data/test/classification.rb +0 -41
  62. data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +0 -13553
  63. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +0 -436
  64. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +0 -568
  65. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +0 -87
  66. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +0 -978
  67. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +0 -1120
  68. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +0 -1113
  69. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +0 -850
  70. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +0 -829
  71. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +0 -1198
  72. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +0 -1505
  73. data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +0 -581
  74. data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +0 -1217
  75. data/test/data/LOAEL_log_mg_corrected_smiles.csv +0 -568
  76. data/test/data/LOAEL_log_mmol_corrected_smiles.csv +0 -568
  77. data/test/data/boiling_points.ext.sdf +0 -11460
  78. data/test/data/cpdb_100.csv +0 -101
  79. data/test/data/hamster_carcinogenicity.ntriples +0 -618
  80. data/test/data/hamster_carcinogenicity.sdf +0 -2805
  81. data/test/data/hamster_carcinogenicity.xls +0 -0
  82. data/test/data/hamster_carcinogenicity.yaml +0 -352
  83. data/test/dataset-long.rb +0 -114
  84. data/test/lazar-long.rb +0 -92
  85. data/test/lazar-physchem-short.rb +0 -31
  86. data/test/prediction_models.rb +0 -20
  87. data/test/regression.rb +0 -43
  88. data/test/validation.rb +0 -108
data/test/dataset.rb CHANGED
@@ -1,9 +1,16 @@
1
- # TODO; check compound/data_entry sequences with missing and duplicated values
2
-
3
1
  require_relative "setup.rb"
4
2
 
5
3
  class DatasetTest < MiniTest::Test
6
4
 
5
+ # basics
6
+
7
+ def test_create_empty
8
+ d = Dataset.new
9
+ assert_equal Dataset, d.class
10
+ refute_nil d.id
11
+ assert_kind_of BSON::ObjectId, d.id
12
+ end
13
+
7
14
  def test_all
8
15
  d1 = Dataset.new
9
16
  d1.save
@@ -12,145 +19,182 @@ class DatasetTest < MiniTest::Test
12
19
  d1.delete
13
20
  end
14
21
 
15
- def test_create_without_features_smiles_and_inchi
16
- ["smiles", "inchi"].each do |type|
17
- d = Dataset.from_csv_file File.join(DATA_DIR,"batch_prediction_#{type}_small.csv")
18
- assert_equal Dataset, d.class
19
- refute_nil d.id
20
- dataset = Dataset.find d.id
21
- #p dataset.compounds
22
- assert_equal 3, d.compounds.size.to_i
23
- d.delete
22
+ # real datasets
23
+
24
+ def test_upload_hamster
25
+ d = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
26
+ assert_equal Dataset, d.class
27
+ assert_equal 1, d.features.size
28
+ assert_equal 85, d.compounds.size
29
+ csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.csv")
30
+ csv.shift
31
+ csv.each do |row|
32
+ c = Compound.from_smiles row.shift
33
+ assert_equal row, d.values(c,d.features.first)
24
34
  end
35
+ d.delete
25
36
  end
26
37
 
27
- def test_create_empty
28
- d = Dataset.new
29
- assert_equal Dataset, d.class
30
- refute_nil d.id
31
- assert_kind_of BSON::ObjectId, d.id
38
+ def test_upload_kazius
39
+ f = File.join DATA_DIR, "kazius.csv"
40
+ d = OpenTox::Dataset.from_csv_file f
41
+ csv = CSV.read f
42
+ assert_equal csv.size-1, d.compounds.size
43
+ assert_equal csv.first.size-1, d.features.size
44
+ assert_empty d.warnings
45
+ # 493 COC1=C(C=C(C(=C1)Cl)OC)Cl,1
46
+ c = d.compounds[491]
47
+ assert_equal c.smiles, "COc1cc(Cl)c(cc1Cl)OC"
48
+ assert_equal ["1"], d.values(c,d.features.first)
49
+ d.delete
32
50
  end
33
51
 
34
- def test_client_create
35
- d = Dataset.new
36
- assert_equal Dataset, d.class
37
- d.name = "Create dataset test"
38
-
39
- # features not set
40
- # << operator was removed for efficiency reasons (CH)
41
- #assert_raises BadRequestError do
42
- # d << [Compound.from_smiles("c1ccccc1NN"), 1,2]
43
- #end
44
-
45
- # add data entries
46
- d.features = ["test1", "test2"].collect do |title|
47
- f = Feature.new
48
- f.name = title
49
- f.numeric = true
50
- f.save
51
- f
52
+ def test_upload_multicell
53
+ duplicates = [
54
+ "InChI=1S/C6HCl5O/c7-1-2(8)4(10)6(12)5(11)3(1)9/h12H",
55
+ "InChI=1S/C12H8Cl6O/c13-8-9(14)11(16)5-3-1-2(6-7(3)19-6)4(5)10(8,15)12(11,17)18/h2-7H,1H2",
56
+ "InChI=1S/C2HCl3/c3-1-2(4)5/h1H",
57
+ "InChI=1S/C4H5Cl/c1-3-4(2)5/h3H,1-2H2",
58
+ "InChI=1S/C4H7Cl/c1-4(2)3-5/h1,3H2,2H3",
59
+ "InChI=1S/C8H14O4/c1-5-4-8(11-6(2)9)12-7(3)10-5/h5,7-8H,4H2,1-3H3",
60
+ "InChI=1S/C19H30O5/c1-3-5-7-20-8-9-21-10-11-22-14-17-13-19-18(23-15-24-19)12-16(17)6-4-2/h12-13H,3-11,14-15H2,1-2H3",
61
+ ].collect{|inchi| Compound.from_inchi(inchi).smiles}
62
+ errors = ['O=P(H)(OC)OC', 'C=CCNN.HCl' ]
63
+ f = File.join DATA_DIR, "multi_cell_call.csv"
64
+ d = OpenTox::Dataset.from_csv_file f
65
+ csv = CSV.read f
66
+ assert_equal true, d.features.first.nominal?
67
+ assert_equal 1056, d.compounds.size
68
+ assert_equal csv.first.size-1, d.features.size
69
+ errors.each do |smi|
70
+ refute_empty d.warnings.grep %r{#{Regexp.escape(smi)}}
71
+ end
72
+ duplicates.each do |smi|
73
+ refute_empty d.warnings.grep %r{#{Regexp.escape(smi)}}
52
74
  end
53
-
54
- # wrong feature size
55
- # << operator was removed for efficiency reasons (CH)
56
- #assert_raises BadRequestError do
57
- # d << [Compound.from_smiles("c1ccccc1NN"), 1,2,3]
58
- #end
59
-
60
- # manual low-level insertions without consistency checks for runtime efficiency
61
- data_entries = []
62
- d.compound_ids << Compound.from_smiles("c1ccccc1NN").id
63
- data_entries << [1,2]
64
- d.compound_ids << Compound.from_smiles("CC(C)N").id
65
- data_entries << [4,5]
66
- d.compound_ids << Compound.from_smiles("C1C(C)CCCC1").id
67
- data_entries << [6,7]
68
- d.data_entries = data_entries
69
- assert_equal 3, d.compounds.size
70
- assert_equal 2, d.features.size
71
- assert_equal [[1,2],[4,5],[6,7]], d.data_entries
72
- d.save
73
- # check if dataset has been saved correctly
74
- new_dataset = Dataset.find d.id
75
- assert_equal 3, new_dataset.compounds.size
76
- assert_equal 2, new_dataset.features.size
77
- assert_equal [[1,2],[4,5],[6,7]], new_dataset.data_entries
78
75
  d.delete
79
- assert_nil Dataset.find d.id
80
- assert_nil Dataset.find new_dataset.id
81
76
  end
82
77
 
83
- def test_dataset_accessors
84
- d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv"
85
- # create empty dataset
86
- new_dataset = Dataset.find d.id
87
- # get metadata
88
- assert_match "multicolumn.csv", new_dataset.source
89
- assert_equal "multicolumn", new_dataset.name
90
- # get features
91
- assert_equal 6, new_dataset.features.size
92
- assert_equal 7, new_dataset.compounds.size
93
- assert_equal ["1", nil, "false", nil, nil, 1.0], new_dataset.data_entries.last
78
+ def test_upload_isscan
79
+ f = File.join DATA_DIR, "ISSCAN-multi.csv"
80
+ d = OpenTox::Dataset.from_csv_file f
81
+ csv = CSV.read f
82
+ assert_equal csv.size-1, d.compounds.size
83
+ assert_equal csv.first.size-1, d.features.size
94
84
  d.delete
95
85
  end
96
86
 
97
- def test_create_from_file
98
- d = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv")
87
+ def test_upload_epafhm
88
+ f = File.join DATA_DIR, "EPAFHM_log10.csv"
89
+ d = OpenTox::Dataset.from_csv_file f
99
90
  assert_equal Dataset, d.class
100
- refute_nil d.warnings
101
- assert_match "EPAFHM.mini.csv", d.source
102
- assert_equal "EPAFHM.mini.csv", d.name
103
- d.delete
104
- #assert_equal false, URI.accessible?(d.uri)
91
+ csv = CSV.read f
92
+ assert_equal csv.size-1, d.compounds.size
93
+ assert_equal csv.first.size-1, d.features.size
94
+ assert_match "EPAFHM_log10.csv", d.source
95
+ assert_equal "EPAFHM_log10", d.name
96
+ feature = d.features.first
97
+ assert_kind_of NumericFeature, feature
98
+ assert_equal -Math.log10(0.0113), d.values(d.compounds.first,feature).first
99
+ assert_equal -Math.log10(0.00323), d.values(d.compounds[4],feature).first
100
+ d2 = Dataset.find d.id
101
+ assert_equal -Math.log10(0.0113), d2.values(d2.compounds[0],feature).first
102
+ assert_equal -Math.log10(0.00323), d2.values(d2.compounds[4],feature).first
103
+ d.delete
105
104
  end
106
105
 
107
- def test_create_from_file_with_wrong_smiles_compound_entries
108
- d = Dataset.from_csv_file File.join(DATA_DIR,"wrong_dataset.csv")
109
- refute_nil d.warnings
110
- assert_match /2|3|4|5|6|7|8/, d.warnings.join
111
- d.delete
106
+ # batch predictions
107
+
108
+ def test_create_without_features_smiles_and_inchi
109
+ ["smiles", "inchi"].each do |type|
110
+ d = Dataset.from_csv_file File.join(DATA_DIR,"batch_prediction_#{type}_small.csv"), true
111
+ assert_equal Dataset, d.class
112
+ refute_nil d.id
113
+ dataset = Dataset.find d.id
114
+ assert_equal 3, d.compounds.size
115
+ d.delete
116
+ end
112
117
  end
113
118
 
114
- def test_multicolumn_csv
119
+ # dataset operations
120
+
121
+ def test_folds
122
+ dataset = Dataset.from_csv_file File.join(DATA_DIR,"loael.csv")
123
+ dataset.folds(10).each do |fold|
124
+ fold.each do |d|
125
+ assert_operator d.compounds.size, :>=, d.compounds.uniq.size
126
+ end
127
+ assert_operator fold[0].compounds.size, :>=, fold[1].compounds.size
128
+ assert_equal dataset.substances.size, fold.first.substances.size + fold.last.substances.size
129
+ assert_empty (fold.first.substances & fold.last.substances)
130
+ end
131
+ end
132
+
133
+ # serialisation
134
+
135
+ def test_to_csv
115
136
  d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv"
116
137
  refute_nil d.warnings
117
138
  assert d.warnings.grep(/Duplicate compound/)
118
139
  assert d.warnings.grep(/3, 5/)
119
140
  assert_equal 6, d.features.size
120
- assert_equal 7, d.compounds.size
141
+ assert_equal 5, d.compounds.uniq.size
121
142
  assert_equal 5, d.compounds.collect{|c| c.inchi}.uniq.size
122
- assert_equal [["1", "1", "true", "true", "test", 1.1], ["1", "2", "false", "7.5", "test", 0.24], ["1", "3", "true", "5", "test", 3578.239], ["0", "4", "false", "false", "test", -2.35], ["1", "2", "true", "4", "test_2", 1], ["1", "2", "false", "false", "test", -1.5], ["1", nil, "false", nil, nil, 1.0]], d.data_entries
123
- assert_equal "c1ccc[nH]1,1,,false,,,1.0", d.to_csv.split("\n")[7]
124
143
  csv = CSV.parse(d.to_csv)
125
144
  original_csv = CSV.read("#{DATA_DIR}/multicolumn.csv")
126
145
  csv.shift
127
146
  original_csv.shift
128
- csv.each_with_index do |row,i|
129
- compound = Compound.from_smiles row.shift
130
- original_compound = Compound.from_smiles original_csv[i].shift.strip
131
- assert_equal original_compound.inchi, compound.inchi
132
- row.each_with_index do |v,j|
147
+ original = {}
148
+ original_csv.each do |row|
149
+ c = Compound.from_smiles row.shift.strip
150
+ original[c.inchi] = row.collect{|v| v.strip}
151
+ end
152
+ serialized = {}
153
+ csv.each do |row|
154
+ c = Compound.from_smiles row.shift
155
+ serialized[c.inchi] = row
156
+ end
157
+ #puts serialized.to_yaml
158
+ original.each do |inchi,row|
159
+ row.each_with_index do |v,i|
133
160
  if v.numeric?
134
- assert_equal original_csv[i][j].strip.to_f, row[j].to_f
161
+ assert_equal v.to_f, serialized[inchi][i].to_f
135
162
  else
136
- assert_equal original_csv[i][j].strip, row[j].to_s
163
+ assert_equal v, serialized[inchi][i]
137
164
  end
138
165
  end
166
+
139
167
  end
140
168
  d.delete
141
169
  end
142
170
 
143
- def test_from_csv
144
- d = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
145
- assert_equal Dataset, d.class
146
- assert_equal 1, d.features.size
147
- assert_equal 85, d.compounds.size
148
- assert_equal 85, d.data_entries.size
149
- csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.csv")
150
- csv.shift
151
- assert_equal csv.collect{|r| r[1]}, d.data_entries.flatten
152
- d.delete
153
- #assert_equal false, URI.accessible?(d.uri)
171
+ # special cases/details
172
+
173
+ def test_dataset_accessors
174
+ d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv"
175
+ # create empty dataset
176
+ new_dataset = Dataset.find d.id
177
+ # get metadata
178
+ assert_match "multicolumn.csv", new_dataset.source
179
+ assert_equal "multicolumn", new_dataset.name
180
+ # get features
181
+ assert_equal 6, new_dataset.features.size
182
+ assert_equal 5, new_dataset.compounds.uniq.size
183
+ c = new_dataset.compounds.last
184
+ f = new_dataset.features.first
185
+ assert_equal ["1"], new_dataset.values(c,f)
186
+ f = new_dataset.features.last.id.to_s
187
+ assert_equal [1.0], new_dataset.values(c,f)
188
+ f = new_dataset.features[2]
189
+ assert_equal ["false"], new_dataset.values(c,f)
190
+ d.delete
191
+ end
192
+
193
+ def test_create_from_file_with_wrong_smiles_compound_entries
194
+ d = Dataset.from_csv_file File.join(DATA_DIR,"wrong_dataset.csv")
195
+ refute_nil d.warnings
196
+ assert_match /2|3|4|5|6|7|8/, d.warnings.join
197
+ d.delete
154
198
  end
155
199
 
156
200
  def test_from_csv_classification
@@ -158,9 +202,9 @@ class DatasetTest < MiniTest::Test
158
202
  d = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.mini.bool_#{mode}.csv"
159
203
  csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.mini.bool_#{mode}.csv")
160
204
  csv.shift
161
- entries = d.data_entries.flatten
162
- csv.each_with_index do |r, i|
163
- assert_equal r[1].to_s, entries[i]
205
+ csv.each do |row|
206
+ c = Compound.from_smiles row.shift
207
+ assert_equal row, d.values(c,d.features.first)
164
208
  end
165
209
  d.delete
166
210
  end
@@ -169,7 +213,7 @@ class DatasetTest < MiniTest::Test
169
213
  def test_from_csv2
170
214
  File.open("#{DATA_DIR}/temp_test.csv", "w+") { |file| file.write("SMILES,Hamster\nCC=O,true\n ,true\nO=C(N),true") }
171
215
  dataset = Dataset.from_csv_file "#{DATA_DIR}/temp_test.csv"
172
- assert_equal "Cannot parse SMILES compound '' at position 3, all entries are ignored.", dataset.warnings.join
216
+ assert_equal "Cannot parse SMILES compound '' at line 3 of /home/ist/lazar/test/data/temp_test.csv, all entries are ignored.", dataset.warnings.join
173
217
  File.delete "#{DATA_DIR}/temp_test.csv"
174
218
  dataset.features.each{|f| feature = Feature.find f.id; feature.delete}
175
219
  dataset.delete
@@ -187,32 +231,50 @@ class DatasetTest < MiniTest::Test
187
231
  datasets.each{|d| d.delete}
188
232
  end
189
233
 
190
- def test_create_from_file
191
- d = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv")
192
- assert_equal Dataset, d.class
193
- refute_nil d.warnings
194
- assert_match /row 13/, d.warnings.join
195
- assert_match "EPAFHM.mini.csv", d.source
196
- assert_equal 1, d.features.size
197
- feature = d.features.first
198
- assert_kind_of NumericBioAssay, feature
199
- assert_equal 0.0113, d.data_entries[0][0]
200
- assert_equal 0.00323, d.data_entries[5][0]
201
- d2 = Dataset.find d.id
202
- assert_equal 0.0113, d2.data_entries[0][0]
203
- assert_equal 0.00323, d2.data_entries[5][0]
234
+ def test_simultanous_upload
235
+ threads = []
236
+ 3.times do |t|
237
+ threads << Thread.new(t) do |up|
238
+ d = OpenTox::Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
239
+ assert_equal OpenTox::Dataset, d.class
240
+ assert_equal 1, d.features.size
241
+ assert_equal 85, d.compounds.size
242
+ csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.csv")
243
+ csv.shift
244
+ csv.each do |row|
245
+ c = Compound.from_smiles(row.shift)
246
+ assert_equal row, d.values(c,d.features.first)
247
+ end
248
+ d.delete
249
+ end
250
+ end
251
+ threads.each {|aThread| aThread.join}
204
252
  end
205
253
 
206
- def test_folds
207
- dataset = Dataset.from_csv_file File.join(DATA_DIR,"loael.csv")
208
- dataset.folds(10).each do |fold|
209
- fold.each do |d|
210
- assert_equal d.data_entries.size, d.compound_ids.size
211
- assert_operator d.compound_ids.size, :>=, d.compound_ids.uniq.size
212
- end
213
- assert_operator fold[0].compound_ids.uniq.size, :>=, fold[1].compound_ids.uniq.size
254
+ def test_upload_feature_dataset
255
+ skip
256
+ t = Time.now
257
+ f = File.join DATA_DIR, "rat_feature_dataset.csv"
258
+ d = Dataset.from_csv_file f
259
+ assert_equal 458, d.features.size
260
+ d.save
261
+ #p "Upload: #{Time.now-t}"
262
+ d2 = Dataset.find d.id
263
+ t = Time.now
264
+ assert_equal d.features.size, d2.features.size
265
+ csv = CSV.read f
266
+ csv.shift # remove header
267
+ assert_empty d2.warnings
268
+ assert_equal csv.size, d2.compounds.size
269
+ assert_equal csv.first.size-1, d2.features.size
270
+ d2.compounds.each_with_index do |compound,i|
271
+ row = csv[i]
272
+ row.shift # remove compound
273
+ assert_equal row, d2.data_entries[i]
214
274
  end
215
- #puts dataset.folds 10
275
+ #p "Dowload: #{Time.now-t}"
276
+ d2.delete
277
+ assert_nil Dataset.find d.id
216
278
  end
217
279
 
218
280
  end
data/test/descriptor.rb CHANGED
@@ -4,15 +4,17 @@ class DescriptorTest < MiniTest::Test
4
4
 
5
5
  def test_list
6
6
  # check available descriptors
7
- assert_equal 355,PhysChem.descriptors.size,"incorrect number of physchem descriptors"
8
7
  assert_equal 15,PhysChem.openbabel_descriptors.size,"incorrect number of Openbabel descriptors"
9
- assert_equal 295,PhysChem.cdk_descriptors.size,"incorrect number of Cdk descriptors"
10
8
  assert_equal 45,PhysChem.joelib_descriptors.size,"incorrect number of Joelib descriptors"
9
+ assert_equal 286,PhysChem.cdk_descriptors.size,"incorrect number of Cdk descriptors"
10
+ assert_equal 346,PhysChem.descriptors.size,"incorrect number of physchem descriptors"
11
11
  end
12
12
 
13
13
  def test_smarts
14
14
  c = OpenTox::Compound.from_smiles "N=C=C1CCC(=F=FO)C1"
15
- File.open("tmp.png","w+"){|f| f.puts c.png}
15
+ File.open("/tmp/tmp.png","w+"){|f| f.puts c.png}
16
+ assert_match /^PNG/,`file -b /tmp/tmp.png`
17
+ File.delete "/tmp/tmp.png"
16
18
  s = Smarts.find_or_create_by(:smarts => "F=F")
17
19
  result = c.smarts_match [s]
18
20
  assert_equal [1], result
@@ -26,43 +28,50 @@ class DescriptorTest < MiniTest::Test
26
28
 
27
29
  def test_compound_openbabel_single
28
30
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
29
- result = c.physchem [PhysChem.find_or_create_by(:name => "Openbabel.logP")]
30
- assert_equal 1.12518, result.first.last.round(5)
31
+ feature = PhysChem.find_or_create_by(:name => "Openbabel.logP")
32
+ result = c.calculate_properties([feature])
33
+ assert_equal 1.12518, result.first.round(5)
34
+ assert_equal 1.12518, c.properties[feature.id.to_s].round(5)
31
35
  end
32
36
 
33
37
  def test_compound_cdk_single
34
38
  c = OpenTox::Compound.from_smiles "c1ccccc1"
35
- result = c.physchem [PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")]
36
- assert_equal 12, result.first.last
39
+ feature = PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")
40
+ result = c.calculate_properties([feature])
41
+ assert_equal 12, result.first
37
42
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
38
- result = c.physchem [PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")]
39
- assert_equal 17, result.first.last
43
+ feature = PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")
44
+ result = c.calculate_properties([feature])
45
+ assert_equal 17, result.first
40
46
  c_types = {"Cdk.CarbonTypes.C1SP1"=>1, "Cdk.CarbonTypes.C2SP1"=>0, "Cdk.CarbonTypes.C1SP2"=>0, "Cdk.CarbonTypes.C2SP2"=>1, "Cdk.CarbonTypes.C3SP2"=>0, "Cdk.CarbonTypes.C1SP3"=>2, "Cdk.CarbonTypes.C2SP3"=>1, "Cdk.CarbonTypes.C3SP3"=>1, "Cdk.CarbonTypes.C4SP3"=>0}
41
47
  physchem_features = c_types.collect{|t,nr| PhysChem.find_or_create_by(:name => t)}
42
- result = c.physchem physchem_features
43
- assert_equal [1, 0, 0, 1, 0, 2, 1, 1, 0], result.values
48
+ result = c.calculate_properties physchem_features
49
+ assert_equal [1, 0, 0, 1, 0, 2, 1, 1, 0], result
44
50
  end
45
51
 
46
52
  def test_compound_joelib_single
47
53
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
48
- result = c.physchem [PhysChem.find_or_create_by(:name => "Joelib.LogP")]
49
- assert_equal 2.65908, result.first.last
54
+ result = c.calculate_properties [PhysChem.find_or_create_by(:name => "Joelib.LogP")]
55
+ assert_equal 2.65908, result.first
50
56
  end
51
57
 
52
58
  def test_compound_all
53
59
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
54
- result = c.physchem PhysChem.descriptors
55
60
  amr = PhysChem.find_or_create_by(:name => "Cdk.ALOGP.AMR", :library => "Cdk")
56
61
  sbonds = PhysChem.find_by(:name => "Openbabel.sbonds")
57
- assert_equal 30.8723, result[amr.id.to_s]
58
- assert_equal 5, result[sbonds.id.to_s]
62
+ result = c.calculate_properties([amr,sbonds])
63
+ assert_equal 30.8723, result[0]
64
+ assert_equal 5, result[1]
59
65
  end
60
66
 
61
67
  def test_compound_descriptor_parameters
68
+ PhysChem.descriptors
62
69
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
63
- result = c.physchem [ "Openbabel.logP", "Cdk.AtomCount.nAtom", "Joelib.LogP" ].collect{|d| PhysChem.find_or_create_by(:name => d)}
70
+ result = c.calculate_properties [ "Openbabel.logP", "Cdk.AtomCount.nAtom", "Joelib.LogP" ].collect{|d| PhysChem.find_or_create_by(:name => d)}
64
71
  assert_equal 3, result.size
65
- assert_equal [1.12518, 17.0, 2.65908], result.values.collect{|v| v.round 5}
72
+ assert_equal 1.12518, result[0].round(5)
73
+ assert_equal 17.0, result[1].round(5)
74
+ assert_equal 2.65908, result[2].round(5)
66
75
  end
67
76
 
68
77
  end
data/test/error.rb CHANGED
@@ -8,8 +8,8 @@ class ErrorTest < MiniTest::Test
8
8
  end
9
9
 
10
10
  def test_error_methods
11
- assert_raises OpenTox::ResourceNotFoundError do
12
- resource_not_found_error "This is a test"
11
+ assert_raises OpenTox::NotFoundError do
12
+ not_found_error "This is a test"
13
13
  end
14
14
  end
15
15
 
data/test/experiment.rb CHANGED
@@ -5,7 +5,7 @@ class ExperimentTest < MiniTest::Test
5
5
  def test_regression_experiment
6
6
  skip
7
7
  datasets = [
8
- "EPAFHM.medi.csv",
8
+ "EPAFHM.medi_log10.csv",
9
9
  #"EPAFHM.csv",
10
10
  #"FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv",
11
11
  "LOAEL_mmol_corrected_smiles.csv"
@@ -68,7 +68,7 @@ class ExperimentTest < MiniTest::Test
68
68
  skip
69
69
  #=begin
70
70
  datasets = [
71
- "EPAFHM.medi.csv",
71
+ "EPAFHM.medi_log10.csv",
72
72
  #"LOAEL_mmol_corrected_smiles.csv"
73
73
  ]
74
74
  min_sims = [0.3,0.7]
@@ -118,7 +118,7 @@ class ExperimentTest < MiniTest::Test
118
118
  def test_mpd_fingerprints
119
119
  skip
120
120
  datasets = [
121
- "EPAFHM.medi.csv",
121
+ "EPAFHM.medi_log10.csv",
122
122
  ]
123
123
  types = ["FP2","MP2D"]
124
124
  experiment = Experiment.create(
@@ -147,7 +147,7 @@ class ExperimentTest < MiniTest::Test
147
147
  def test_multiple_datasets
148
148
  skip
149
149
  datasets = [
150
- "EPAFHM.medi.csv",
150
+ "EPAFHM.medi_log10.csv",
151
151
  "LOAEL_mmol_corrected_smiles.csv"
152
152
  ]
153
153
  min_sims = [0.3]
data/test/feature.rb CHANGED
@@ -32,10 +32,9 @@ class FeatureTest < MiniTest::Test
32
32
  def test_duplicated_features
33
33
  metadata = {
34
34
  :name => "feature duplication test",
35
- :nominal => true,
36
35
  }
37
- feature = NumericBioAssay.find_or_create_by metadata
38
- dup_feature = NumericBioAssay.find_or_create_by metadata
36
+ feature = NumericFeature.find_or_create_by metadata
37
+ dup_feature = NumericFeature.find_or_create_by metadata
39
38
  assert_kind_of Feature, feature
40
39
  assert !feature.id.nil?, "No Feature ID in #{feature.inspect}"
41
40
  assert !feature.id.nil?, "No Feature ID in #{dup_feature.inspect}"
data/test/gridfs.rb ADDED
@@ -0,0 +1,10 @@
1
+ require_relative "setup.rb"
2
+
3
+ class GridFSTest < MiniTest::Test
4
+
5
+ def test_gridfs
6
+ file = Mongo::Grid::File.new("TEST", :filename => "test.txt",:content_type => "text/plain")
7
+ id = $gridfs.insert_one file
8
+ refute_nil id
9
+ end
10
+ end