lazar 0.9.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -4
  3. data/README.md +5 -15
  4. data/VERSION +1 -1
  5. data/ext/lazar/extconf.rb +1 -1
  6. data/ext/lazar/rinstall.R +9 -7
  7. data/java/CdkDescriptorInfo.class +0 -0
  8. data/java/CdkDescriptorInfo.java +3 -2
  9. data/java/CdkDescriptors.class +0 -0
  10. data/java/CdkDescriptors.java +28 -28
  11. data/java/Rakefile +3 -3
  12. data/java/{cdk-1.4.19.jar → cdk-2.0-SNAPSHOT.jar} +0 -0
  13. data/lazar.gemspec +6 -7
  14. data/lib/algorithm.rb +2 -11
  15. data/lib/caret.rb +96 -0
  16. data/lib/classification.rb +14 -22
  17. data/lib/compound.rb +21 -87
  18. data/lib/crossvalidation.rb +80 -279
  19. data/lib/dataset.rb +105 -174
  20. data/lib/feature.rb +11 -18
  21. data/lib/feature_selection.rb +42 -0
  22. data/lib/import.rb +122 -0
  23. data/lib/lazar.rb +14 -4
  24. data/lib/leave-one-out-validation.rb +46 -192
  25. data/lib/model.rb +319 -128
  26. data/lib/nanoparticle.rb +98 -0
  27. data/lib/opentox.rb +7 -4
  28. data/lib/overwrite.rb +24 -3
  29. data/lib/physchem.rb +11 -10
  30. data/lib/regression.rb +7 -137
  31. data/lib/rest-client-wrapper.rb +0 -6
  32. data/lib/similarity.rb +65 -0
  33. data/lib/substance.rb +8 -0
  34. data/lib/train-test-validation.rb +69 -0
  35. data/lib/validation-statistics.rb +223 -0
  36. data/lib/validation.rb +17 -100
  37. data/scripts/mg2mmol.rb +17 -0
  38. data/scripts/mirror-enm2test.rb +4 -0
  39. data/scripts/mmol2-log10.rb +32 -0
  40. data/test/compound.rb +4 -94
  41. data/test/data/EPAFHM.medi_log10.csv +92 -0
  42. data/test/data/EPAFHM.mini_log10.csv +16 -0
  43. data/test/data/EPAFHM_log10.csv +581 -0
  44. data/test/data/loael_log10.csv +568 -0
  45. data/test/dataset.rb +195 -133
  46. data/test/descriptor.rb +27 -18
  47. data/test/error.rb +2 -2
  48. data/test/experiment.rb +4 -4
  49. data/test/feature.rb +2 -3
  50. data/test/gridfs.rb +10 -0
  51. data/test/model-classification.rb +106 -0
  52. data/test/model-nanoparticle.rb +128 -0
  53. data/test/model-regression.rb +171 -0
  54. data/test/model-validation.rb +19 -0
  55. data/test/nanomaterial-model-validation.rb +55 -0
  56. data/test/setup.rb +8 -4
  57. data/test/validation-classification.rb +67 -0
  58. data/test/validation-nanoparticle.rb +133 -0
  59. data/test/validation-regression.rb +92 -0
  60. metadata +50 -121
  61. data/test/classification.rb +0 -41
  62. data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +0 -13553
  63. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +0 -436
  64. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +0 -568
  65. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +0 -87
  66. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +0 -978
  67. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +0 -1120
  68. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +0 -1113
  69. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +0 -850
  70. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +0 -829
  71. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +0 -1198
  72. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +0 -1505
  73. data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +0 -581
  74. data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +0 -1217
  75. data/test/data/LOAEL_log_mg_corrected_smiles.csv +0 -568
  76. data/test/data/LOAEL_log_mmol_corrected_smiles.csv +0 -568
  77. data/test/data/boiling_points.ext.sdf +0 -11460
  78. data/test/data/cpdb_100.csv +0 -101
  79. data/test/data/hamster_carcinogenicity.ntriples +0 -618
  80. data/test/data/hamster_carcinogenicity.sdf +0 -2805
  81. data/test/data/hamster_carcinogenicity.xls +0 -0
  82. data/test/data/hamster_carcinogenicity.yaml +0 -352
  83. data/test/dataset-long.rb +0 -114
  84. data/test/lazar-long.rb +0 -92
  85. data/test/lazar-physchem-short.rb +0 -31
  86. data/test/prediction_models.rb +0 -20
  87. data/test/regression.rb +0 -43
  88. data/test/validation.rb +0 -108
data/test/dataset.rb CHANGED
@@ -1,9 +1,16 @@
1
- # TODO; check compound/data_entry sequences with missing and duplicated values
2
-
3
1
  require_relative "setup.rb"
4
2
 
5
3
  class DatasetTest < MiniTest::Test
6
4
 
5
+ # basics
6
+
7
+ def test_create_empty
8
+ d = Dataset.new
9
+ assert_equal Dataset, d.class
10
+ refute_nil d.id
11
+ assert_kind_of BSON::ObjectId, d.id
12
+ end
13
+
7
14
  def test_all
8
15
  d1 = Dataset.new
9
16
  d1.save
@@ -12,145 +19,182 @@ class DatasetTest < MiniTest::Test
12
19
  d1.delete
13
20
  end
14
21
 
15
- def test_create_without_features_smiles_and_inchi
16
- ["smiles", "inchi"].each do |type|
17
- d = Dataset.from_csv_file File.join(DATA_DIR,"batch_prediction_#{type}_small.csv")
18
- assert_equal Dataset, d.class
19
- refute_nil d.id
20
- dataset = Dataset.find d.id
21
- #p dataset.compounds
22
- assert_equal 3, d.compounds.size.to_i
23
- d.delete
22
+ # real datasets
23
+
24
+ def test_upload_hamster
25
+ d = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
26
+ assert_equal Dataset, d.class
27
+ assert_equal 1, d.features.size
28
+ assert_equal 85, d.compounds.size
29
+ csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.csv")
30
+ csv.shift
31
+ csv.each do |row|
32
+ c = Compound.from_smiles row.shift
33
+ assert_equal row, d.values(c,d.features.first)
24
34
  end
35
+ d.delete
25
36
  end
26
37
 
27
- def test_create_empty
28
- d = Dataset.new
29
- assert_equal Dataset, d.class
30
- refute_nil d.id
31
- assert_kind_of BSON::ObjectId, d.id
38
+ def test_upload_kazius
39
+ f = File.join DATA_DIR, "kazius.csv"
40
+ d = OpenTox::Dataset.from_csv_file f
41
+ csv = CSV.read f
42
+ assert_equal csv.size-1, d.compounds.size
43
+ assert_equal csv.first.size-1, d.features.size
44
+ assert_empty d.warnings
45
+ # 493 COC1=C(C=C(C(=C1)Cl)OC)Cl,1
46
+ c = d.compounds[491]
47
+ assert_equal c.smiles, "COc1cc(Cl)c(cc1Cl)OC"
48
+ assert_equal ["1"], d.values(c,d.features.first)
49
+ d.delete
32
50
  end
33
51
 
34
- def test_client_create
35
- d = Dataset.new
36
- assert_equal Dataset, d.class
37
- d.name = "Create dataset test"
38
-
39
- # features not set
40
- # << operator was removed for efficiency reasons (CH)
41
- #assert_raises BadRequestError do
42
- # d << [Compound.from_smiles("c1ccccc1NN"), 1,2]
43
- #end
44
-
45
- # add data entries
46
- d.features = ["test1", "test2"].collect do |title|
47
- f = Feature.new
48
- f.name = title
49
- f.numeric = true
50
- f.save
51
- f
52
+ def test_upload_multicell
53
+ duplicates = [
54
+ "InChI=1S/C6HCl5O/c7-1-2(8)4(10)6(12)5(11)3(1)9/h12H",
55
+ "InChI=1S/C12H8Cl6O/c13-8-9(14)11(16)5-3-1-2(6-7(3)19-6)4(5)10(8,15)12(11,17)18/h2-7H,1H2",
56
+ "InChI=1S/C2HCl3/c3-1-2(4)5/h1H",
57
+ "InChI=1S/C4H5Cl/c1-3-4(2)5/h3H,1-2H2",
58
+ "InChI=1S/C4H7Cl/c1-4(2)3-5/h1,3H2,2H3",
59
+ "InChI=1S/C8H14O4/c1-5-4-8(11-6(2)9)12-7(3)10-5/h5,7-8H,4H2,1-3H3",
60
+ "InChI=1S/C19H30O5/c1-3-5-7-20-8-9-21-10-11-22-14-17-13-19-18(23-15-24-19)12-16(17)6-4-2/h12-13H,3-11,14-15H2,1-2H3",
61
+ ].collect{|inchi| Compound.from_inchi(inchi).smiles}
62
+ errors = ['O=P(H)(OC)OC', 'C=CCNN.HCl' ]
63
+ f = File.join DATA_DIR, "multi_cell_call.csv"
64
+ d = OpenTox::Dataset.from_csv_file f
65
+ csv = CSV.read f
66
+ assert_equal true, d.features.first.nominal?
67
+ assert_equal 1056, d.compounds.size
68
+ assert_equal csv.first.size-1, d.features.size
69
+ errors.each do |smi|
70
+ refute_empty d.warnings.grep %r{#{Regexp.escape(smi)}}
71
+ end
72
+ duplicates.each do |smi|
73
+ refute_empty d.warnings.grep %r{#{Regexp.escape(smi)}}
52
74
  end
53
-
54
- # wrong feature size
55
- # << operator was removed for efficiency reasons (CH)
56
- #assert_raises BadRequestError do
57
- # d << [Compound.from_smiles("c1ccccc1NN"), 1,2,3]
58
- #end
59
-
60
- # manual low-level insertions without consistency checks for runtime efficiency
61
- data_entries = []
62
- d.compound_ids << Compound.from_smiles("c1ccccc1NN").id
63
- data_entries << [1,2]
64
- d.compound_ids << Compound.from_smiles("CC(C)N").id
65
- data_entries << [4,5]
66
- d.compound_ids << Compound.from_smiles("C1C(C)CCCC1").id
67
- data_entries << [6,7]
68
- d.data_entries = data_entries
69
- assert_equal 3, d.compounds.size
70
- assert_equal 2, d.features.size
71
- assert_equal [[1,2],[4,5],[6,7]], d.data_entries
72
- d.save
73
- # check if dataset has been saved correctly
74
- new_dataset = Dataset.find d.id
75
- assert_equal 3, new_dataset.compounds.size
76
- assert_equal 2, new_dataset.features.size
77
- assert_equal [[1,2],[4,5],[6,7]], new_dataset.data_entries
78
75
  d.delete
79
- assert_nil Dataset.find d.id
80
- assert_nil Dataset.find new_dataset.id
81
76
  end
82
77
 
83
- def test_dataset_accessors
84
- d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv"
85
- # create empty dataset
86
- new_dataset = Dataset.find d.id
87
- # get metadata
88
- assert_match "multicolumn.csv", new_dataset.source
89
- assert_equal "multicolumn", new_dataset.name
90
- # get features
91
- assert_equal 6, new_dataset.features.size
92
- assert_equal 7, new_dataset.compounds.size
93
- assert_equal ["1", nil, "false", nil, nil, 1.0], new_dataset.data_entries.last
78
+ def test_upload_isscan
79
+ f = File.join DATA_DIR, "ISSCAN-multi.csv"
80
+ d = OpenTox::Dataset.from_csv_file f
81
+ csv = CSV.read f
82
+ assert_equal csv.size-1, d.compounds.size
83
+ assert_equal csv.first.size-1, d.features.size
94
84
  d.delete
95
85
  end
96
86
 
97
- def test_create_from_file
98
- d = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv")
87
+ def test_upload_epafhm
88
+ f = File.join DATA_DIR, "EPAFHM_log10.csv"
89
+ d = OpenTox::Dataset.from_csv_file f
99
90
  assert_equal Dataset, d.class
100
- refute_nil d.warnings
101
- assert_match "EPAFHM.mini.csv", d.source
102
- assert_equal "EPAFHM.mini.csv", d.name
103
- d.delete
104
- #assert_equal false, URI.accessible?(d.uri)
91
+ csv = CSV.read f
92
+ assert_equal csv.size-1, d.compounds.size
93
+ assert_equal csv.first.size-1, d.features.size
94
+ assert_match "EPAFHM_log10.csv", d.source
95
+ assert_equal "EPAFHM_log10", d.name
96
+ feature = d.features.first
97
+ assert_kind_of NumericFeature, feature
98
+ assert_equal -Math.log10(0.0113), d.values(d.compounds.first,feature).first
99
+ assert_equal -Math.log10(0.00323), d.values(d.compounds[4],feature).first
100
+ d2 = Dataset.find d.id
101
+ assert_equal -Math.log10(0.0113), d2.values(d2.compounds[0],feature).first
102
+ assert_equal -Math.log10(0.00323), d2.values(d2.compounds[4],feature).first
103
+ d.delete
105
104
  end
106
105
 
107
- def test_create_from_file_with_wrong_smiles_compound_entries
108
- d = Dataset.from_csv_file File.join(DATA_DIR,"wrong_dataset.csv")
109
- refute_nil d.warnings
110
- assert_match /2|3|4|5|6|7|8/, d.warnings.join
111
- d.delete
106
+ # batch predictions
107
+
108
+ def test_create_without_features_smiles_and_inchi
109
+ ["smiles", "inchi"].each do |type|
110
+ d = Dataset.from_csv_file File.join(DATA_DIR,"batch_prediction_#{type}_small.csv"), true
111
+ assert_equal Dataset, d.class
112
+ refute_nil d.id
113
+ dataset = Dataset.find d.id
114
+ assert_equal 3, d.compounds.size
115
+ d.delete
116
+ end
112
117
  end
113
118
 
114
- def test_multicolumn_csv
119
+ # dataset operations
120
+
121
+ def test_folds
122
+ dataset = Dataset.from_csv_file File.join(DATA_DIR,"loael.csv")
123
+ dataset.folds(10).each do |fold|
124
+ fold.each do |d|
125
+ assert_operator d.compounds.size, :>=, d.compounds.uniq.size
126
+ end
127
+ assert_operator fold[0].compounds.size, :>=, fold[1].compounds.size
128
+ assert_equal dataset.substances.size, fold.first.substances.size + fold.last.substances.size
129
+ assert_empty (fold.first.substances & fold.last.substances)
130
+ end
131
+ end
132
+
133
+ # serialisation
134
+
135
+ def test_to_csv
115
136
  d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv"
116
137
  refute_nil d.warnings
117
138
  assert d.warnings.grep(/Duplicate compound/)
118
139
  assert d.warnings.grep(/3, 5/)
119
140
  assert_equal 6, d.features.size
120
- assert_equal 7, d.compounds.size
141
+ assert_equal 5, d.compounds.uniq.size
121
142
  assert_equal 5, d.compounds.collect{|c| c.inchi}.uniq.size
122
- assert_equal [["1", "1", "true", "true", "test", 1.1], ["1", "2", "false", "7.5", "test", 0.24], ["1", "3", "true", "5", "test", 3578.239], ["0", "4", "false", "false", "test", -2.35], ["1", "2", "true", "4", "test_2", 1], ["1", "2", "false", "false", "test", -1.5], ["1", nil, "false", nil, nil, 1.0]], d.data_entries
123
- assert_equal "c1ccc[nH]1,1,,false,,,1.0", d.to_csv.split("\n")[7]
124
143
  csv = CSV.parse(d.to_csv)
125
144
  original_csv = CSV.read("#{DATA_DIR}/multicolumn.csv")
126
145
  csv.shift
127
146
  original_csv.shift
128
- csv.each_with_index do |row,i|
129
- compound = Compound.from_smiles row.shift
130
- original_compound = Compound.from_smiles original_csv[i].shift.strip
131
- assert_equal original_compound.inchi, compound.inchi
132
- row.each_with_index do |v,j|
147
+ original = {}
148
+ original_csv.each do |row|
149
+ c = Compound.from_smiles row.shift.strip
150
+ original[c.inchi] = row.collect{|v| v.strip}
151
+ end
152
+ serialized = {}
153
+ csv.each do |row|
154
+ c = Compound.from_smiles row.shift
155
+ serialized[c.inchi] = row
156
+ end
157
+ #puts serialized.to_yaml
158
+ original.each do |inchi,row|
159
+ row.each_with_index do |v,i|
133
160
  if v.numeric?
134
- assert_equal original_csv[i][j].strip.to_f, row[j].to_f
161
+ assert_equal v.to_f, serialized[inchi][i].to_f
135
162
  else
136
- assert_equal original_csv[i][j].strip, row[j].to_s
163
+ assert_equal v, serialized[inchi][i]
137
164
  end
138
165
  end
166
+
139
167
  end
140
168
  d.delete
141
169
  end
142
170
 
143
- def test_from_csv
144
- d = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
145
- assert_equal Dataset, d.class
146
- assert_equal 1, d.features.size
147
- assert_equal 85, d.compounds.size
148
- assert_equal 85, d.data_entries.size
149
- csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.csv")
150
- csv.shift
151
- assert_equal csv.collect{|r| r[1]}, d.data_entries.flatten
152
- d.delete
153
- #assert_equal false, URI.accessible?(d.uri)
171
+ # special cases/details
172
+
173
+ def test_dataset_accessors
174
+ d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv"
175
+ # create empty dataset
176
+ new_dataset = Dataset.find d.id
177
+ # get metadata
178
+ assert_match "multicolumn.csv", new_dataset.source
179
+ assert_equal "multicolumn", new_dataset.name
180
+ # get features
181
+ assert_equal 6, new_dataset.features.size
182
+ assert_equal 5, new_dataset.compounds.uniq.size
183
+ c = new_dataset.compounds.last
184
+ f = new_dataset.features.first
185
+ assert_equal ["1"], new_dataset.values(c,f)
186
+ f = new_dataset.features.last.id.to_s
187
+ assert_equal [1.0], new_dataset.values(c,f)
188
+ f = new_dataset.features[2]
189
+ assert_equal ["false"], new_dataset.values(c,f)
190
+ d.delete
191
+ end
192
+
193
+ def test_create_from_file_with_wrong_smiles_compound_entries
194
+ d = Dataset.from_csv_file File.join(DATA_DIR,"wrong_dataset.csv")
195
+ refute_nil d.warnings
196
+ assert_match /2|3|4|5|6|7|8/, d.warnings.join
197
+ d.delete
154
198
  end
155
199
 
156
200
  def test_from_csv_classification
@@ -158,9 +202,9 @@ class DatasetTest < MiniTest::Test
158
202
  d = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.mini.bool_#{mode}.csv"
159
203
  csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.mini.bool_#{mode}.csv")
160
204
  csv.shift
161
- entries = d.data_entries.flatten
162
- csv.each_with_index do |r, i|
163
- assert_equal r[1].to_s, entries[i]
205
+ csv.each do |row|
206
+ c = Compound.from_smiles row.shift
207
+ assert_equal row, d.values(c,d.features.first)
164
208
  end
165
209
  d.delete
166
210
  end
@@ -169,7 +213,7 @@ class DatasetTest < MiniTest::Test
169
213
  def test_from_csv2
170
214
  File.open("#{DATA_DIR}/temp_test.csv", "w+") { |file| file.write("SMILES,Hamster\nCC=O,true\n ,true\nO=C(N),true") }
171
215
  dataset = Dataset.from_csv_file "#{DATA_DIR}/temp_test.csv"
172
- assert_equal "Cannot parse SMILES compound '' at position 3, all entries are ignored.", dataset.warnings.join
216
+ assert_equal "Cannot parse SMILES compound '' at line 3 of /home/ist/lazar/test/data/temp_test.csv, all entries are ignored.", dataset.warnings.join
173
217
  File.delete "#{DATA_DIR}/temp_test.csv"
174
218
  dataset.features.each{|f| feature = Feature.find f.id; feature.delete}
175
219
  dataset.delete
@@ -187,32 +231,50 @@ class DatasetTest < MiniTest::Test
187
231
  datasets.each{|d| d.delete}
188
232
  end
189
233
 
190
- def test_create_from_file
191
- d = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv")
192
- assert_equal Dataset, d.class
193
- refute_nil d.warnings
194
- assert_match /row 13/, d.warnings.join
195
- assert_match "EPAFHM.mini.csv", d.source
196
- assert_equal 1, d.features.size
197
- feature = d.features.first
198
- assert_kind_of NumericBioAssay, feature
199
- assert_equal 0.0113, d.data_entries[0][0]
200
- assert_equal 0.00323, d.data_entries[5][0]
201
- d2 = Dataset.find d.id
202
- assert_equal 0.0113, d2.data_entries[0][0]
203
- assert_equal 0.00323, d2.data_entries[5][0]
234
+ def test_simultanous_upload
235
+ threads = []
236
+ 3.times do |t|
237
+ threads << Thread.new(t) do |up|
238
+ d = OpenTox::Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
239
+ assert_equal OpenTox::Dataset, d.class
240
+ assert_equal 1, d.features.size
241
+ assert_equal 85, d.compounds.size
242
+ csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.csv")
243
+ csv.shift
244
+ csv.each do |row|
245
+ c = Compound.from_smiles(row.shift)
246
+ assert_equal row, d.values(c,d.features.first)
247
+ end
248
+ d.delete
249
+ end
250
+ end
251
+ threads.each {|aThread| aThread.join}
204
252
  end
205
253
 
206
- def test_folds
207
- dataset = Dataset.from_csv_file File.join(DATA_DIR,"loael.csv")
208
- dataset.folds(10).each do |fold|
209
- fold.each do |d|
210
- assert_equal d.data_entries.size, d.compound_ids.size
211
- assert_operator d.compound_ids.size, :>=, d.compound_ids.uniq.size
212
- end
213
- assert_operator fold[0].compound_ids.uniq.size, :>=, fold[1].compound_ids.uniq.size
254
+ def test_upload_feature_dataset
255
+ skip
256
+ t = Time.now
257
+ f = File.join DATA_DIR, "rat_feature_dataset.csv"
258
+ d = Dataset.from_csv_file f
259
+ assert_equal 458, d.features.size
260
+ d.save
261
+ #p "Upload: #{Time.now-t}"
262
+ d2 = Dataset.find d.id
263
+ t = Time.now
264
+ assert_equal d.features.size, d2.features.size
265
+ csv = CSV.read f
266
+ csv.shift # remove header
267
+ assert_empty d2.warnings
268
+ assert_equal csv.size, d2.compounds.size
269
+ assert_equal csv.first.size-1, d2.features.size
270
+ d2.compounds.each_with_index do |compound,i|
271
+ row = csv[i]
272
+ row.shift # remove compound
273
+ assert_equal row, d2.data_entries[i]
214
274
  end
215
- #puts dataset.folds 10
275
+ #p "Dowload: #{Time.now-t}"
276
+ d2.delete
277
+ assert_nil Dataset.find d.id
216
278
  end
217
279
 
218
280
  end
data/test/descriptor.rb CHANGED
@@ -4,15 +4,17 @@ class DescriptorTest < MiniTest::Test
4
4
 
5
5
  def test_list
6
6
  # check available descriptors
7
- assert_equal 355,PhysChem.descriptors.size,"incorrect number of physchem descriptors"
8
7
  assert_equal 15,PhysChem.openbabel_descriptors.size,"incorrect number of Openbabel descriptors"
9
- assert_equal 295,PhysChem.cdk_descriptors.size,"incorrect number of Cdk descriptors"
10
8
  assert_equal 45,PhysChem.joelib_descriptors.size,"incorrect number of Joelib descriptors"
9
+ assert_equal 286,PhysChem.cdk_descriptors.size,"incorrect number of Cdk descriptors"
10
+ assert_equal 346,PhysChem.descriptors.size,"incorrect number of physchem descriptors"
11
11
  end
12
12
 
13
13
  def test_smarts
14
14
  c = OpenTox::Compound.from_smiles "N=C=C1CCC(=F=FO)C1"
15
- File.open("tmp.png","w+"){|f| f.puts c.png}
15
+ File.open("/tmp/tmp.png","w+"){|f| f.puts c.png}
16
+ assert_match /^PNG/,`file -b /tmp/tmp.png`
17
+ File.delete "/tmp/tmp.png"
16
18
  s = Smarts.find_or_create_by(:smarts => "F=F")
17
19
  result = c.smarts_match [s]
18
20
  assert_equal [1], result
@@ -26,43 +28,50 @@ class DescriptorTest < MiniTest::Test
26
28
 
27
29
  def test_compound_openbabel_single
28
30
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
29
- result = c.physchem [PhysChem.find_or_create_by(:name => "Openbabel.logP")]
30
- assert_equal 1.12518, result.first.last.round(5)
31
+ feature = PhysChem.find_or_create_by(:name => "Openbabel.logP")
32
+ result = c.calculate_properties([feature])
33
+ assert_equal 1.12518, result.first.round(5)
34
+ assert_equal 1.12518, c.properties[feature.id.to_s].round(5)
31
35
  end
32
36
 
33
37
  def test_compound_cdk_single
34
38
  c = OpenTox::Compound.from_smiles "c1ccccc1"
35
- result = c.physchem [PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")]
36
- assert_equal 12, result.first.last
39
+ feature = PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")
40
+ result = c.calculate_properties([feature])
41
+ assert_equal 12, result.first
37
42
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
38
- result = c.physchem [PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")]
39
- assert_equal 17, result.first.last
43
+ feature = PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")
44
+ result = c.calculate_properties([feature])
45
+ assert_equal 17, result.first
40
46
  c_types = {"Cdk.CarbonTypes.C1SP1"=>1, "Cdk.CarbonTypes.C2SP1"=>0, "Cdk.CarbonTypes.C1SP2"=>0, "Cdk.CarbonTypes.C2SP2"=>1, "Cdk.CarbonTypes.C3SP2"=>0, "Cdk.CarbonTypes.C1SP3"=>2, "Cdk.CarbonTypes.C2SP3"=>1, "Cdk.CarbonTypes.C3SP3"=>1, "Cdk.CarbonTypes.C4SP3"=>0}
41
47
  physchem_features = c_types.collect{|t,nr| PhysChem.find_or_create_by(:name => t)}
42
- result = c.physchem physchem_features
43
- assert_equal [1, 0, 0, 1, 0, 2, 1, 1, 0], result.values
48
+ result = c.calculate_properties physchem_features
49
+ assert_equal [1, 0, 0, 1, 0, 2, 1, 1, 0], result
44
50
  end
45
51
 
46
52
  def test_compound_joelib_single
47
53
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
48
- result = c.physchem [PhysChem.find_or_create_by(:name => "Joelib.LogP")]
49
- assert_equal 2.65908, result.first.last
54
+ result = c.calculate_properties [PhysChem.find_or_create_by(:name => "Joelib.LogP")]
55
+ assert_equal 2.65908, result.first
50
56
  end
51
57
 
52
58
  def test_compound_all
53
59
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
54
- result = c.physchem PhysChem.descriptors
55
60
  amr = PhysChem.find_or_create_by(:name => "Cdk.ALOGP.AMR", :library => "Cdk")
56
61
  sbonds = PhysChem.find_by(:name => "Openbabel.sbonds")
57
- assert_equal 30.8723, result[amr.id.to_s]
58
- assert_equal 5, result[sbonds.id.to_s]
62
+ result = c.calculate_properties([amr,sbonds])
63
+ assert_equal 30.8723, result[0]
64
+ assert_equal 5, result[1]
59
65
  end
60
66
 
61
67
  def test_compound_descriptor_parameters
68
+ PhysChem.descriptors
62
69
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
63
- result = c.physchem [ "Openbabel.logP", "Cdk.AtomCount.nAtom", "Joelib.LogP" ].collect{|d| PhysChem.find_or_create_by(:name => d)}
70
+ result = c.calculate_properties [ "Openbabel.logP", "Cdk.AtomCount.nAtom", "Joelib.LogP" ].collect{|d| PhysChem.find_or_create_by(:name => d)}
64
71
  assert_equal 3, result.size
65
- assert_equal [1.12518, 17.0, 2.65908], result.values.collect{|v| v.round 5}
72
+ assert_equal 1.12518, result[0].round(5)
73
+ assert_equal 17.0, result[1].round(5)
74
+ assert_equal 2.65908, result[2].round(5)
66
75
  end
67
76
 
68
77
  end
data/test/error.rb CHANGED
@@ -8,8 +8,8 @@ class ErrorTest < MiniTest::Test
8
8
  end
9
9
 
10
10
  def test_error_methods
11
- assert_raises OpenTox::ResourceNotFoundError do
12
- resource_not_found_error "This is a test"
11
+ assert_raises OpenTox::NotFoundError do
12
+ not_found_error "This is a test"
13
13
  end
14
14
  end
15
15
 
data/test/experiment.rb CHANGED
@@ -5,7 +5,7 @@ class ExperimentTest < MiniTest::Test
5
5
  def test_regression_experiment
6
6
  skip
7
7
  datasets = [
8
- "EPAFHM.medi.csv",
8
+ "EPAFHM.medi_log10.csv",
9
9
  #"EPAFHM.csv",
10
10
  #"FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv",
11
11
  "LOAEL_mmol_corrected_smiles.csv"
@@ -68,7 +68,7 @@ class ExperimentTest < MiniTest::Test
68
68
  skip
69
69
  #=begin
70
70
  datasets = [
71
- "EPAFHM.medi.csv",
71
+ "EPAFHM.medi_log10.csv",
72
72
  #"LOAEL_mmol_corrected_smiles.csv"
73
73
  ]
74
74
  min_sims = [0.3,0.7]
@@ -118,7 +118,7 @@ class ExperimentTest < MiniTest::Test
118
118
  def test_mpd_fingerprints
119
119
  skip
120
120
  datasets = [
121
- "EPAFHM.medi.csv",
121
+ "EPAFHM.medi_log10.csv",
122
122
  ]
123
123
  types = ["FP2","MP2D"]
124
124
  experiment = Experiment.create(
@@ -147,7 +147,7 @@ class ExperimentTest < MiniTest::Test
147
147
  def test_multiple_datasets
148
148
  skip
149
149
  datasets = [
150
- "EPAFHM.medi.csv",
150
+ "EPAFHM.medi_log10.csv",
151
151
  "LOAEL_mmol_corrected_smiles.csv"
152
152
  ]
153
153
  min_sims = [0.3]
data/test/feature.rb CHANGED
@@ -32,10 +32,9 @@ class FeatureTest < MiniTest::Test
32
32
  def test_duplicated_features
33
33
  metadata = {
34
34
  :name => "feature duplication test",
35
- :nominal => true,
36
35
  }
37
- feature = NumericBioAssay.find_or_create_by metadata
38
- dup_feature = NumericBioAssay.find_or_create_by metadata
36
+ feature = NumericFeature.find_or_create_by metadata
37
+ dup_feature = NumericFeature.find_or_create_by metadata
39
38
  assert_kind_of Feature, feature
40
39
  assert !feature.id.nil?, "No Feature ID in #{feature.inspect}"
41
40
  assert !feature.id.nil?, "No Feature ID in #{dup_feature.inspect}"
data/test/gridfs.rb ADDED
@@ -0,0 +1,10 @@
1
+ require_relative "setup.rb"
2
+
3
+ class GridFSTest < MiniTest::Test
4
+
5
+ def test_gridfs
6
+ file = Mongo::Grid::File.new("TEST", :filename => "test.txt",:content_type => "text/plain")
7
+ id = $gridfs.insert_one file
8
+ refute_nil id
9
+ end
10
+ end