lazar 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/.yardopts +4 -0
  4. data/Gemfile +2 -0
  5. data/LICENSE +674 -0
  6. data/README.md +44 -0
  7. data/Rakefile +1 -0
  8. data/VERSION +1 -0
  9. data/ext/lazar/extconf.rb +87 -0
  10. data/java/CdkDescriptorInfo.class +0 -0
  11. data/java/CdkDescriptorInfo.java +22 -0
  12. data/java/CdkDescriptors.class +0 -0
  13. data/java/CdkDescriptors.java +141 -0
  14. data/java/Jmol.jar +0 -0
  15. data/java/JoelibDescriptorInfo.class +0 -0
  16. data/java/JoelibDescriptorInfo.java +15 -0
  17. data/java/JoelibDescriptors.class +0 -0
  18. data/java/JoelibDescriptors.java +60 -0
  19. data/java/Rakefile +15 -0
  20. data/java/cdk-1.4.19.jar +0 -0
  21. data/java/joelib2.jar +0 -0
  22. data/java/log4j.jar +0 -0
  23. data/lazar.gemspec +29 -0
  24. data/lib/SMARTS_InteLigand.txt +983 -0
  25. data/lib/algorithm.rb +21 -0
  26. data/lib/bbrc.rb +165 -0
  27. data/lib/classification.rb +107 -0
  28. data/lib/compound.rb +254 -0
  29. data/lib/crossvalidation.rb +187 -0
  30. data/lib/dataset.rb +334 -0
  31. data/lib/descriptor.rb +247 -0
  32. data/lib/error.rb +66 -0
  33. data/lib/feature.rb +97 -0
  34. data/lib/lazar-model.rb +170 -0
  35. data/lib/lazar.rb +69 -0
  36. data/lib/neighbor.rb +25 -0
  37. data/lib/opentox.rb +22 -0
  38. data/lib/overwrite.rb +119 -0
  39. data/lib/regression.rb +199 -0
  40. data/lib/rest-client-wrapper.rb +98 -0
  41. data/lib/similarity.rb +58 -0
  42. data/lib/unique_descriptors.rb +120 -0
  43. data/lib/validation.rb +114 -0
  44. data/mongoid.yml +8 -0
  45. data/test/all.rb +5 -0
  46. data/test/compound.rb +100 -0
  47. data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +13553 -0
  48. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +436 -0
  49. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +568 -0
  50. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +87 -0
  51. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +978 -0
  52. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +1120 -0
  53. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +1113 -0
  54. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +850 -0
  55. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +829 -0
  56. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +1198 -0
  57. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +1505 -0
  58. data/test/data/EPAFHM.csv +618 -0
  59. data/test/data/EPAFHM.medi.csv +100 -0
  60. data/test/data/EPAFHM.mini.csv +22 -0
  61. data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +581 -0
  62. data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +1217 -0
  63. data/test/data/ISSCAN-multi.csv +59 -0
  64. data/test/data/LOAEL_log_mg_corrected_smiles.csv +568 -0
  65. data/test/data/LOAEL_log_mmol_corrected_smiles.csv +568 -0
  66. data/test/data/acetaldehyde.sdf +14 -0
  67. data/test/data/boiling_points.ext.sdf +11460 -0
  68. data/test/data/cpdb_100.csv +101 -0
  69. data/test/data/hamster_carcinogenicity.csv +86 -0
  70. data/test/data/hamster_carcinogenicity.mini.bool_float.csv +11 -0
  71. data/test/data/hamster_carcinogenicity.mini.bool_int.csv +11 -0
  72. data/test/data/hamster_carcinogenicity.mini.bool_string.csv +11 -0
  73. data/test/data/hamster_carcinogenicity.mini.csv +11 -0
  74. data/test/data/hamster_carcinogenicity.ntriples +618 -0
  75. data/test/data/hamster_carcinogenicity.sdf +2805 -0
  76. data/test/data/hamster_carcinogenicity.xls +0 -0
  77. data/test/data/hamster_carcinogenicity.yaml +352 -0
  78. data/test/data/hamster_carcinogenicity_with_errors.csv +88 -0
  79. data/test/data/kazius.csv +4070 -0
  80. data/test/data/multi_cell_call.csv +1067 -0
  81. data/test/data/multi_cell_call_no_dup.csv +1057 -0
  82. data/test/data/multicolumn.csv +8 -0
  83. data/test/data/rat_feature_dataset.csv +1179 -0
  84. data/test/data/wrong_dataset.csv +8 -0
  85. data/test/dataset-long.rb +117 -0
  86. data/test/dataset.rb +199 -0
  87. data/test/descriptor-long.rb +26 -0
  88. data/test/descriptor.rb +83 -0
  89. data/test/error.rb +24 -0
  90. data/test/feature.rb +65 -0
  91. data/test/fminer-long.rb +38 -0
  92. data/test/fminer.rb +52 -0
  93. data/test/lazar-fminer.rb +50 -0
  94. data/test/lazar-long.rb +72 -0
  95. data/test/lazar-physchem-short.rb +27 -0
  96. data/test/setup.rb +6 -0
  97. data/test/validation.rb +41 -0
  98. metadata +212 -0
@@ -0,0 +1,8 @@
1
+ SMILES,Wrong Dataset
2
+ Tost,0
3
+ Is,1
4
+ A,0
5
+ Wrong,1
6
+ Dataset,0
7
+ Entry,1
8
+ O[C@@H]8[C@@H](O)[C@@H]1O[C@H](CO)[C@H]8O[C@H]7O[C@H](CO)[C@@H](O[C@H]6O[C@H](CO)[C@@H](O[C@H]5O[C@H](CO)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O[C@H]3O[C@H](CO)[C@@H](O[C@H]2O[C@H](CO)[C@@H](O1)[C@H](O)[C@H]2O)[C@H](O)[C@H]3O)[C@H](O)[C@H]4O)[C@H](O)[C@H]5O)[C, 0
@@ -0,0 +1,117 @@
1
+ require_relative "setup.rb"
2
+
3
+ class DatasetLongTest < MiniTest::Test
4
+
5
+ def test_01_upload_epafhm
6
+ f = File.join DATA_DIR, "EPAFHM.csv"
7
+ d = OpenTox::Dataset.from_csv_file f
8
+ csv = CSV.read f
9
+ assert_equal csv.size-1, d.compounds.size
10
+ assert_equal csv.first.size-1, d.features.size
11
+ assert_equal csv.size-1, d.data_entries.size
12
+ d.delete
13
+ end
14
+
15
+ =begin
16
+ # TODO catch OpenBabel segfaults and identify/remove cause
17
+ def test_02_upload_multicell
18
+ duplicates = [
19
+ "http://localhost:8082/compound/InChI=1S/C6HCl5O/c7-1-2(8)4(10)6(12)5(11)3(1)9/h12H",
20
+ "http://localhost:8082/compound/InChI=1S/C12H8Cl6O/c13-8-9(14)11(16)5-3-1-2(6-7(3)19-6)4(5)10(8,15)12(11,17)18/h2-7H,1H2",
21
+ "http://localhost:8082/compound/InChI=1S/C2HCl3/c3-1-2(4)5/h1H",
22
+ "http://localhost:8082/compound/InChI=1S/C4H5Cl/c1-3-4(2)5/h3H,1-2H2",
23
+ "http://localhost:8082/compound/InChI=1S/C4H7Cl/c1-4(2)3-5/h1,3H2,2H3",
24
+ "http://localhost:8082/compound/InChI=1S/C8H14O4/c1-5-4-8(11-6(2)9)12-7(3)10-5/h5,7-8H,4H2,1-3H3",
25
+ "http://localhost:8082/compound/InChI=1S/C19H30O5/c1-3-5-7-20-8-9-21-10-11-22-14-17-13-19-18(23-15-24-19)12-16(17)6-4-2/h12-13H,3-11,14-15H2,1-2H3",
26
+ ]
27
+ errors = ['O=P(H)(OC)OC', 'C=CCNN.HCl' ]
28
+ f = File.join DATA_DIR, "multi_cell_call.csv"
29
+ d = OpenTox::Dataset.from_csv_file f
30
+ csv = CSV.read f
31
+ assert_equal true, d.features.first.nominal
32
+ assert_nil d["index"]
33
+ assert_equal csv.size-1-errors.size, d.compounds.size
34
+ assert_equal csv.first.size-1, d.features.size
35
+ assert_equal csv.size-1-errors.size, d.data_entries.size
36
+ p d.warnings
37
+ (duplicates+errors).each do |uri|
38
+ assert d.warnings.grep %r{#{uri}}
39
+ end
40
+ d.delete
41
+ end
42
+ =end
43
+
44
+ def test_03_upload_isscan
45
+ f = File.join DATA_DIR, "ISSCAN-multi.csv"
46
+ d = OpenTox::Dataset.from_csv_file f
47
+ csv = CSV.read f
48
+ assert_equal csv.size-1, d.compounds.size
49
+ assert_equal csv.first.size-1, d.features.size
50
+ assert_equal csv.size-1, d.data_entries.size
51
+ d.delete
52
+ #assert_equal false, URI.accessible?(d.uri)
53
+ end
54
+
55
+ def test_04_simultanous_upload
56
+ threads = []
57
+ 3.times do |t|
58
+ threads << Thread.new(t) do |up|
59
+ d = OpenTox::Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
60
+ assert_equal OpenTox::Dataset, d.class
61
+ assert_equal 1, d.features.size
62
+ assert_equal 85, d.compounds.size
63
+ assert_equal 85, d.data_entries.size
64
+ csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.csv")
65
+ csv.shift
66
+ assert_equal csv.collect{|r| r[1]}, d.data_entries.flatten
67
+ d.delete
68
+ end
69
+ end
70
+ threads.each {|aThread| aThread.join}
71
+ end
72
+
73
+ def test_05_upload_kazius
74
+ f = File.join DATA_DIR, "kazius.csv"
75
+ d = OpenTox::Dataset.from_csv_file f
76
+ csv = CSV.read f
77
+ assert_equal csv.size-1, d.compounds.size
78
+ assert_equal csv.first.size-1, d.features.size
79
+ assert_equal csv.size-1, d.data_entries.size
80
+ assert_empty d.warnings
81
+ # 493 COC1=C(C=C(C(=C1)Cl)OC)Cl,1
82
+ c = d.compounds[491]
83
+ assert_equal c.smiles, "COc1cc(Cl)c(cc1Cl)OC"
84
+ assert_equal d.data_entries[491][0], "1"
85
+ d.delete
86
+ end
87
+
88
+ def test_upload_feature_dataset
89
+ t = Time.now
90
+ f = File.join DATA_DIR, "rat_feature_dataset.csv"
91
+ d = Dataset.from_csv_file f
92
+ assert_equal 458, d.features.size
93
+ d.save
94
+ p "Upload: #{Time.now-t}"
95
+ d2 = Dataset.find d.id
96
+ t = Time.now
97
+ assert_equal d.features.size, d2.features.size
98
+ csv = CSV.read f
99
+ csv.delete_at(248) # remove entry with InChi segfault
100
+ csv.shift # remove header
101
+ refute_empty d2.warnings
102
+ assert_match /249/, d2.warnings.join
103
+ assert_equal csv.size, d2.compounds.size
104
+ assert_equal csv.first.size-1, d2.features.size
105
+ d2.compounds.each_with_index do |compound,i|
106
+ row = csv[i]
107
+ row.shift # remove compound
108
+ assert_equal row, d2.data_entries[i]
109
+ end
110
+ p "Dowload: #{Time.now-t}"
111
+ d2.delete
112
+ assert_raises Mongoid::Errors::DocumentNotFound do
113
+ Dataset.find d.id
114
+ end
115
+ end
116
+
117
+ end
data/test/dataset.rb ADDED
@@ -0,0 +1,199 @@
1
+ # TODO; check compound/data_entry sequences with missing and duplicated values
2
+
3
+ require_relative "setup.rb"
4
+
5
+ class DatasetTest < MiniTest::Test
6
+
7
+ def test_all
8
+ d1 = Dataset.new
9
+ d1.save
10
+ datasets = Dataset.all
11
+ assert_equal Dataset, datasets.first.class
12
+ d1.delete
13
+ end
14
+
15
+ def test_create_empty
16
+ d = Dataset.new
17
+ assert_equal Dataset, d.class
18
+ refute_nil d.id
19
+ assert_kind_of BSON::ObjectId, d.id
20
+ end
21
+
22
+ def test_client_create
23
+ d = Dataset.new
24
+ assert_equal Dataset, d.class
25
+ d.name = "Create dataset test"
26
+
27
+ # features not set
28
+ # << operator was removed for efficiency reasons (CH)
29
+ #assert_raises BadRequestError do
30
+ # d << [Compound.from_smiles("c1ccccc1NN"), 1,2]
31
+ #end
32
+
33
+ # add data entries
34
+ d.features = ["test1", "test2"].collect do |title|
35
+ f = Feature.new
36
+ f.name = title
37
+ f.numeric = true
38
+ f.save
39
+ f
40
+ end
41
+
42
+ # wrong feature size
43
+ # << operator was removed for efficiency reasons (CH)
44
+ #assert_raises BadRequestError do
45
+ # d << [Compound.from_smiles("c1ccccc1NN"), 1,2,3]
46
+ #end
47
+
48
+ # manual low-level insertions without consistency checks for runtime efficiency
49
+ data_entries = []
50
+ d.compound_ids << Compound.from_smiles("c1ccccc1NN").id
51
+ data_entries << [1,2]
52
+ d.compound_ids << Compound.from_smiles("CC(C)N").id
53
+ data_entries << [4,5]
54
+ d.compound_ids << Compound.from_smiles("C1C(C)CCCC1").id
55
+ data_entries << [6,7]
56
+ d.data_entries = data_entries
57
+ assert_equal 3, d.compounds.size
58
+ assert_equal 2, d.features.size
59
+ assert_equal [[1,2],[4,5],[6,7]], d.data_entries
60
+ d.save_all
61
+ # check if dataset has been saved correctly
62
+ new_dataset = Dataset.find d.id
63
+ assert_equal 3, new_dataset.compounds.size
64
+ assert_equal 2, new_dataset.features.size
65
+ assert_equal [[1,2],[4,5],[6,7]], new_dataset.data_entries
66
+ d.delete
67
+ assert_raises Mongoid::Errors::DocumentNotFound do
68
+ Dataset.find d.id
69
+ end
70
+ assert_raises Mongoid::Errors::DocumentNotFound do
71
+ Dataset.find new_dataset.id
72
+ end
73
+ end
74
+
75
+ def test_dataset_accessors
76
+ d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv"
77
+ # create empty dataset
78
+ new_dataset = Dataset.find d.id
79
+ # get metadata
80
+ assert_match "multicolumn.csv", new_dataset.source
81
+ assert_equal "multicolumn.csv", new_dataset.title
82
+ # get features
83
+ assert_equal 6, new_dataset.features.size
84
+ assert_equal 7, new_dataset.compounds.size
85
+ assert_equal ["1", nil, "false", nil, nil, 1.0], new_dataset.data_entries.last
86
+ d.delete
87
+ end
88
+
89
+ def test_create_from_file
90
+ d = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv")
91
+ assert_equal Dataset, d.class
92
+ refute_nil d.warnings
93
+ assert_match "EPAFHM.mini.csv", d.source
94
+ assert_equal "EPAFHM.mini.csv", d.name
95
+ d.delete
96
+ #assert_equal false, URI.accessible?(d.uri)
97
+ end
98
+
99
+ def test_create_from_file_with_wrong_smiles_compound_entries
100
+ d = Dataset.from_csv_file File.join(DATA_DIR,"wrong_dataset.csv")
101
+ refute_nil d.warnings
102
+ assert_match /2|3|4|5|6|7|8/, d.warnings.join
103
+ d.delete
104
+ end
105
+
106
+ def test_multicolumn_csv
107
+ d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv"
108
+ refute_nil d.warnings
109
+ assert d.warnings.grep(/Duplicate compound/)
110
+ assert d.warnings.grep(/3, 5/)
111
+ assert_equal 6, d.features.size
112
+ assert_equal 7, d.compounds.size
113
+ assert_equal 5, d.compounds.collect{|c| c.inchi}.uniq.size
114
+ assert_equal [["1", "1", "true", "true", "test", 1.1], ["1", "2", "false", "7.5", "test", 0.24], ["1", "3", "true", "5", "test", 3578.239], ["0", "4", "false", "false", "test", -2.35], ["1", "2", "true", "4", "test_2", 1], ["1", "2", "false", "false", "test", -1.5], ["1", nil, "false", nil, nil, 1.0]], d.data_entries
115
+ assert_equal "c1ccc[nH]1,1,,false,,,1.0", d.to_csv.split("\n")[7]
116
+ csv = CSV.parse(d.to_csv)
117
+ original_csv = CSV.read("#{DATA_DIR}/multicolumn.csv")
118
+ csv.shift
119
+ original_csv.shift
120
+ csv.each_with_index do |row,i|
121
+ compound = Compound.from_smiles row.shift
122
+ original_compound = Compound.from_smiles original_csv[i].shift
123
+ assert_equal original_compound.inchi, compound.inchi
124
+ row.each_with_index do |v,j|
125
+ if v.numeric?
126
+ assert_equal original_csv[i][j].strip.to_f, row[j].to_f
127
+ else
128
+ assert_equal original_csv[i][j].strip, row[j].to_s
129
+ end
130
+ end
131
+ end
132
+ d.delete
133
+ end
134
+
135
+ def test_from_csv
136
+ d = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
137
+ assert_equal Dataset, d.class
138
+ assert_equal 1, d.features.size
139
+ assert_equal 85, d.compounds.size
140
+ assert_equal 85, d.data_entries.size
141
+ csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.csv")
142
+ csv.shift
143
+ assert_equal csv.collect{|r| r[1]}, d.data_entries.flatten
144
+ d.delete
145
+ #assert_equal false, URI.accessible?(d.uri)
146
+ end
147
+
148
+ def test_from_csv_classification
149
+ ["int", "float", "string"].each do |mode|
150
+ d = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.mini.bool_#{mode}.csv"
151
+ csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.mini.bool_#{mode}.csv")
152
+ csv.shift
153
+ entries = d.data_entries.flatten
154
+ csv.each_with_index do |r, i|
155
+ assert_equal r[1].to_s, entries[i]
156
+ end
157
+ d.delete
158
+ end
159
+ end
160
+
161
+ def test_from_csv2
162
+ File.open("#{DATA_DIR}/temp_test.csv", "w+") { |file| file.write("SMILES,Hamster\nCC=O,true\n ,true\nO=C(N),true") }
163
+ dataset = Dataset.from_csv_file "#{DATA_DIR}/temp_test.csv"
164
+ assert_equal "Cannot parse SMILES compound ' ' at position 3, all entries are ignored.", dataset.warnings.join
165
+ File.delete "#{DATA_DIR}/temp_test.csv"
166
+ dataset.features.each{|f| feature = Feature.find f.id; feature.delete}
167
+ dataset.delete
168
+ end
169
+
170
+ def test_same_feature
171
+ datasets = []
172
+ features = []
173
+ 2.times do |i|
174
+ d = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.mini.csv"
175
+ features << d.features.first
176
+ assert features[0].id==features[-1].id,"re-upload should find old feature, but created new one"
177
+ datasets << d
178
+ end
179
+ datasets.each{|d| d.delete}
180
+ end
181
+
182
+ def test_create_from_file
183
+ d = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv")
184
+ assert_equal Dataset, d.class
185
+ refute_nil d.warnings
186
+ assert_match /row 13/, d.warnings.join
187
+ assert_match "EPAFHM.mini.csv", d.source
188
+ assert_equal 1, d.features.size
189
+ feature = d.features.first
190
+ assert_kind_of NumericBioAssay, feature
191
+ assert_equal 0.0113, d.data_entries[0][0]
192
+ assert_equal 0.00323, d.data_entries[5][0]
193
+ d2 = Dataset.find d.id
194
+ assert_equal 0.0113, d2.data_entries[0][0]
195
+ assert_equal 0.00323, d2.data_entries[5][0]
196
+ end
197
+
198
+ end
199
+
@@ -0,0 +1,26 @@
1
+ require_relative "setup.rb"
2
+ class DescriptorLongTest < MiniTest::Test
3
+
4
+ def test_dataset_all
5
+ # TODO: improve CDK descriptor calculation speed or add timeout
6
+ skip "CDK descriptor calculation takes too long for some compounds"
7
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.mini.csv")
8
+ d = OpenTox::Algorithm::Descriptor.physchem dataset
9
+ assert_equal dataset.compounds, d.compounds
10
+ assert_equal 332, d.features.size
11
+ assert_equal 332, d.data_entries.first.size
12
+ d.delete
13
+ end
14
+
15
+ def test_dataset_openbabel
16
+ # TODO: improve CDK descriptor calculation speed or add timeout
17
+ dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.mini.csv")
18
+ d = Algorithm::Descriptor.physchem dataset, Algorithm::Descriptor::OBDESCRIPTORS.keys
19
+ assert_equal dataset.compounds, d.compounds
20
+ size = Algorithm::Descriptor::OBDESCRIPTORS.keys.size
21
+ assert_equal size, d.features.size
22
+ assert_equal size, d.data_entries.first.size
23
+ d.delete
24
+ end
25
+
26
+ end
@@ -0,0 +1,83 @@
1
+ require_relative "setup.rb"
2
+
3
+ class DescriptorTest < MiniTest::Test
4
+
5
+ def test_list
6
+ # check available descriptors
7
+ @descriptors = OpenTox::Algorithm::Descriptor::DESCRIPTORS.keys
8
+ assert_equal 111,@descriptors.size,"wrong num physchem descriptors"
9
+ @descriptor_values = OpenTox::Algorithm::Descriptor::DESCRIPTOR_VALUES
10
+ assert_equal 356,@descriptor_values.size,"wrong num physchem descriptors"
11
+ sum = 0
12
+ [ @descriptors, @descriptor_values ].each do |desc|
13
+ {"Openbabel"=>16,"Cdk"=>(desc==@descriptors ? 50 : 295),"Joelib"=>45}.each do |k,v|
14
+ assert_equal v,desc.select{|x| x=~/^#{k}\./}.size,"wrong num #{k} descriptors"
15
+ sum += v
16
+ end
17
+ end
18
+ assert_equal (111+356),sum
19
+ end
20
+
21
+ def test_smarts
22
+ c = OpenTox::Compound.from_smiles "N=C=C1CCC(=F=FO)C1"
23
+ File.open("tmp.png","w+"){|f| f.puts c.png}
24
+ s = Smarts.find_or_create_by(:smarts => "F=F")
25
+ result = OpenTox::Algorithm::Descriptor.smarts_match c, s
26
+ assert_equal [1], result
27
+ smarts = ["CC", "C", "C=C", "CO", "F=F", "C1CCCC1", "NN"].collect{|s| Smarts.find_or_create_by(:smarts => s)}
28
+ result = OpenTox::Algorithm::Descriptor.smarts_match c, smarts
29
+ assert_equal [1, 1, 1, 0, 1, 1, 0], result
30
+ smarts_count = [10, 6, 2, 0, 2, 10, 0]
31
+ result = OpenTox::Algorithm::Descriptor.smarts_count c, smarts
32
+ assert_equal smarts_count, result
33
+ end
34
+
35
+ def test_compound_openbabel_single
36
+ c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
37
+ result = OpenTox::Algorithm::Descriptor.physchem c, ["Openbabel.logP"]
38
+ assert_equal 1.12518, result.first
39
+ end
40
+
41
+ def test_compound_cdk_single
42
+ c = OpenTox::Compound.from_smiles "c1ccccc1"
43
+ result = OpenTox::Algorithm::Descriptor.physchem c, ["Cdk.AtomCount"]
44
+ assert_equal [12], result
45
+ c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
46
+ result = OpenTox::Algorithm::Descriptor.physchem c, ["Cdk.AtomCount"]
47
+ assert_equal [17], result
48
+ result = OpenTox::Algorithm::Descriptor.physchem c, ["Cdk.CarbonTypes"]
49
+ c_types = {"Cdk.CarbonTypes.C1SP1"=>1, "Cdk.CarbonTypes.C2SP1"=>0, "Cdk.CarbonTypes.C1SP2"=>0, "Cdk.CarbonTypes.C2SP2"=>1, "Cdk.CarbonTypes.C3SP2"=>0, "Cdk.CarbonTypes.C1SP3"=>2, "Cdk.CarbonTypes.C2SP3"=>1, "Cdk.CarbonTypes.C3SP3"=>1, "Cdk.CarbonTypes.C4SP3"=>0}
50
+ assert_equal [1, 0, 0, 1, 0, 2, 1, 1, 0], result
51
+ end
52
+
53
+ def test_compound_joelib_single
54
+ c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
55
+ result = OpenTox::Algorithm::Descriptor.physchem c, ["Joelib.LogP"]
56
+ assert_equal [2.65908], result
57
+ end
58
+
59
+ def test_compound_all
60
+ c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
61
+ result = OpenTox::Algorithm::Descriptor.physchem c
62
+ assert_equal 332, result.size
63
+ assert_equal 30.8723, result[2]
64
+ assert_equal 1.12518, result[328]
65
+ end
66
+
67
+ def test_compound_descriptor_parameters
68
+ c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
69
+ result = OpenTox::Algorithm::Descriptor.physchem c, [ "Openbabel.logP", "Cdk.AtomCount", "Cdk.CarbonTypes", "Joelib.LogP" ]#, true
70
+ assert_equal 12, result.size
71
+ assert_equal [1.12518, 17.0, 1, 0, 0, 1, 0, 2, 1, 1, 0, 2.65908], result#.last
72
+ end
73
+
74
+ def test_dataset_descriptor_parameters
75
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.mini.csv")
76
+ d = OpenTox::Algorithm::Descriptor.physchem dataset, [ "Openbabel.logP", "Cdk.AtomCount", "Cdk.CarbonTypes", "Joelib.LogP" ]
77
+ assert_kind_of Dataset, d
78
+ assert_equal dataset.compounds, d.compounds
79
+ assert_equal dataset.compounds.size, d.data_entries.size
80
+ assert_equal 12, d.data_entries.first.size
81
+ end
82
+
83
+ end
data/test/error.rb ADDED
@@ -0,0 +1,24 @@
1
+ require_relative "setup.rb"
2
+
3
+ class ErrorTest < MiniTest::Test
4
+
5
+ def test_bad_request
6
+ object = OpenTox::Feature.new
7
+ assert_raises Mongoid::Errors::DocumentNotFound do
8
+ response = OpenTox::Feature.find(object.id)
9
+ end
10
+ end
11
+
12
+ def test_error_methods
13
+ assert_raises OpenTox::ResourceNotFoundError do
14
+ resource_not_found_error "This is a test"
15
+ end
16
+ end
17
+
18
+ def test_exception
19
+ assert_raises Exception do
20
+ raise Exception.new "Basic Exception"
21
+ end
22
+ end
23
+
24
+ end
data/test/feature.rb ADDED
@@ -0,0 +1,65 @@
1
+ require_relative "setup.rb"
2
+
3
+ class FeatureTest < MiniTest::Test
4
+
5
+ def test_opentox_feature
6
+ @feature = OpenTox::Feature.create(:name => "tost")
7
+ assert_equal true, OpenTox::Feature.where(name: "tost").exists?, "#{@feature.id} is not accessible."
8
+ assert_equal true, OpenTox::Feature.where(id: @feature.id).exists?, "#{@feature.id} is not accessible."
9
+
10
+ list = OpenTox::Feature.all
11
+ listsize1 = list.length
12
+ assert_equal true, list.collect{|f| f.id}.include?(@feature.id)
13
+ # modify feature
14
+ @feature2 = OpenTox::Feature.find(@feature.id)
15
+ assert_equal "tost", @feature2[:name]
16
+ assert_equal "tost", @feature2.name
17
+ assert_kind_of Feature, @feature2
18
+
19
+ @feature2[:name] = "feature2"
20
+ @feature2.save
21
+ list = OpenTox::Feature.all
22
+ listsize2 = list.length
23
+ assert_match "feature2", @feature2.name
24
+ refute_match "tost", @feature2.name
25
+ assert_equal listsize1, listsize2
26
+
27
+ id = @feature2.id
28
+ @feature2.delete
29
+ assert_raises Mongoid::Errors::DocumentNotFound do
30
+ OpenTox::Feature.find(id)
31
+ end
32
+ end
33
+
34
+ def test_duplicated_features
35
+ metadata = {
36
+ :name => "feature duplication test",
37
+ :nominal => true,
38
+ :description => "feature duplication test"
39
+ }
40
+ feature = NumericBioAssay.find_or_create_by metadata
41
+ dup_feature = NumericBioAssay.find_or_create_by metadata
42
+ assert_kind_of Feature, feature
43
+ assert !feature.id.nil?, "No Feature ID in #{feature.inspect}"
44
+ assert !feature.id.nil?, "No Feature ID in #{dup_feature.inspect}"
45
+ assert_equal feature.id, dup_feature.id
46
+ feature.delete
47
+ assert_raises Mongoid::Errors::DocumentNotFound do
48
+ OpenTox::Feature.find(feature.id)
49
+ end
50
+ assert_raises Mongoid::Errors::DocumentNotFound do
51
+ OpenTox::Feature.find(dup_feature.id)
52
+ end
53
+ end
54
+
55
+ def test_smarts_feature
56
+ feature = Smarts.find_or_create_by(:smarts => "CN")
57
+ assert feature.smarts, "CN"
58
+ assert_kind_of Smarts, feature
59
+ feature.smarts = 'cc'
60
+ assert feature.smarts, "cc"
61
+ original = Feature.where(:smarts => 'CN').first
62
+ assert original.smarts, "CN"
63
+ end
64
+
65
+ end
@@ -0,0 +1,38 @@
1
+ require_relative "setup.rb"
2
+
3
+ class FminerTest < MiniTest::Test
4
+
5
+ def test_fminer_multicell
6
+ #skip "multicell segfaults"
7
+ # TODO aborts, probably fminer
8
+ # or OpenBabel segfault
9
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"multi_cell_call.csv")
10
+ feature_dataset = OpenTox::Algorithm::Fminer.bbrc(dataset)#, :min_frequency => 15)
11
+ p feature_dataset.training_parameters
12
+ assert_equal dataset.compound_ids, feature_dataset.compound_ids
13
+ dataset.delete
14
+ feature_dataset.delete
15
+ end
16
+
17
+ def test_fminer_isscan
18
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"ISSCAN-multi.csv")
19
+ feature_dataset = OpenTox::Algorithm::Fminer.bbrc(dataset)#, :min_frequency => 15)
20
+ assert_equal feature_dataset.compounds.size, dataset.compounds.size
21
+ p feature_dataset.features.size
22
+ p feature_dataset.training_parameters
23
+ dataset.delete
24
+ feature_dataset.delete
25
+ end
26
+
27
+ def test_fminer_kazius
28
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"kazius.csv")
29
+ # TODO reactivate default settings
30
+ feature_dataset = OpenTox::Algorithm::Fminer.bbrc(dataset, :min_frequency => 20)
31
+ assert_equal feature_dataset.compounds.size, dataset.compounds.size
32
+ feature_dataset = Dataset.find feature_dataset.id
33
+ assert feature_dataset.data_entries.size, dataset.compounds.size
34
+ dataset.delete
35
+ feature_dataset.delete
36
+ end
37
+
38
+ end
data/test/fminer.rb ADDED
@@ -0,0 +1,52 @@
1
+ require_relative "setup.rb"
2
+
3
+ class FminerTest < MiniTest::Test
4
+
5
+ def test_fminer_bbrc
6
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
7
+ refute_nil dataset.id
8
+ feature_dataset = OpenTox::Algorithm::Fminer.bbrc dataset
9
+ feature_dataset = Dataset.find feature_dataset.id
10
+ assert_equal dataset.compounds.size, feature_dataset.compounds.size
11
+ # TODO: fminer calculates 62 instead of 54 features
12
+ # it is unclear which commit changed the numbers (occurs with old libraries/mongodb branch too
13
+ # modification of Compound to use smiles instead of inchis seems to have no effect
14
+ #assert_equal 54, feature_dataset.features.size
15
+ #assert_equal "C-C-C=C", feature_dataset.features.first.smarts
16
+ compounds = feature_dataset.compounds
17
+ smarts = feature_dataset.features
18
+ smarts.each do |smart|
19
+ assert smart.p_value.round(2) >= 0.95
20
+ end
21
+ match = OpenTox::Algorithm::Descriptor.smarts_match compounds, smarts
22
+ feature_dataset.data_entries.each_with_index do |fingerprint,i|
23
+ assert_equal match[i], fingerprint
24
+ end
25
+
26
+ dataset.delete
27
+ feature_dataset.delete
28
+ end
29
+
30
+ def test_fminer_last
31
+ skip "last features have to be activated"
32
+ dataset = OpenTox::Dataset.new
33
+ dataset.upload File.join(DATA_DIR,"hamster_carcinogenicity.csv")
34
+ feature_dataset = OpenTox::Algorithm::Fminer.last :dataset => dataset
35
+ assert_equal dataset.compounds.size, feature_dataset.compounds.size
36
+ assert_equal 21, feature_dataset.features.size
37
+ assert_equal '[#6&A]-[#6&a]:[#6&a]:[#6&a]:[#6&a]:[#6&a]', feature_dataset.features.first.smarts
38
+
39
+ compounds = feature_dataset.compounds
40
+ smarts = feature_dataset.features.collect{|f| f.smarts}
41
+ match = OpenTox::Algorithm::Descriptor.smarts_match compounds, smarts
42
+ compounds.each_with_index do |c,i|
43
+ smarts.each_with_index do |s,j|
44
+ assert_equal match[i][j], feature_dataset.data_entries[i][j].to_i
45
+ end
46
+ end
47
+
48
+ dataset.delete
49
+ feature_dataset.delete
50
+ end
51
+
52
+ end
@@ -0,0 +1,50 @@
1
+ require_relative "setup.rb"
2
+
3
+ class LazarFminerTest < MiniTest::Test
4
+
5
+ def test_lazar_fminer
6
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
7
+ model = Model::LazarFminerClassification.create training_dataset#, feature_dataset
8
+ feature_dataset = Dataset.find model.neighbor_algorithm_parameters[:feature_dataset_id]
9
+ assert_equal training_dataset.compounds.size, feature_dataset.compounds.size
10
+ #TODO check fminer features, see fminer.rb
11
+ #assert_equal 54, feature_dataset.features.size
12
+ feature_dataset.data_entries.each do |e|
13
+ assert_equal e.size, feature_dataset.features.size
14
+ end
15
+ #assert_equal 'C-C-C=C', feature_dataset.features.first.smarts
16
+
17
+ [ {
18
+ :compound => OpenTox::Compound.from_inchi("InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"),
19
+ :prediction => "false",
20
+ :confidence => 0.25281385281385277,
21
+ :nr_neighbors => 11
22
+ },{
23
+ :compound => OpenTox::Compound.from_smiles("c1ccccc1NN"),
24
+ :prediction => "false",
25
+ :confidence => 0.3639589577089577,
26
+ :nr_neighbors => 14
27
+ }, {
28
+ :compound => Compound.from_smiles('OCCCCCCCC\C=C/CCCCCCCC'),
29
+ :prediction => "false",
30
+ :confidence => 0.5555555555555556,
31
+ :nr_neighbors => 1
32
+ }].each do |example|
33
+ prediction = model.predict example[:compound]
34
+
35
+ assert_equal example[:prediction], prediction[:value]
36
+ #assert_equal example[:confidence], prediction[:confidence]
37
+ #assert_equal example[:nr_neighbors], prediction[:neighbors].size
38
+ end
39
+
40
+ # make a dataset prediction
41
+ compound_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv")
42
+ prediction = model.predict compound_dataset
43
+ assert_equal compound_dataset.compounds, prediction.compounds
44
+
45
+ assert_equal "Cound not find similar compounds.", prediction.data_entries[7][2]
46
+ assert_equal "measured", prediction.data_entries[14][1]
47
+ # cleanup
48
+ [training_dataset,model,feature_dataset,compound_dataset].each{|o| o.delete}
49
+ end
50
+ end