lazar 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +10 -0
  3. data/.yardopts +4 -0
  4. data/Gemfile +2 -0
  5. data/LICENSE +674 -0
  6. data/README.md +44 -0
  7. data/Rakefile +1 -0
  8. data/VERSION +1 -0
  9. data/ext/lazar/extconf.rb +87 -0
  10. data/java/CdkDescriptorInfo.class +0 -0
  11. data/java/CdkDescriptorInfo.java +22 -0
  12. data/java/CdkDescriptors.class +0 -0
  13. data/java/CdkDescriptors.java +141 -0
  14. data/java/Jmol.jar +0 -0
  15. data/java/JoelibDescriptorInfo.class +0 -0
  16. data/java/JoelibDescriptorInfo.java +15 -0
  17. data/java/JoelibDescriptors.class +0 -0
  18. data/java/JoelibDescriptors.java +60 -0
  19. data/java/Rakefile +15 -0
  20. data/java/cdk-1.4.19.jar +0 -0
  21. data/java/joelib2.jar +0 -0
  22. data/java/log4j.jar +0 -0
  23. data/lazar.gemspec +29 -0
  24. data/lib/SMARTS_InteLigand.txt +983 -0
  25. data/lib/algorithm.rb +21 -0
  26. data/lib/bbrc.rb +165 -0
  27. data/lib/classification.rb +107 -0
  28. data/lib/compound.rb +254 -0
  29. data/lib/crossvalidation.rb +187 -0
  30. data/lib/dataset.rb +334 -0
  31. data/lib/descriptor.rb +247 -0
  32. data/lib/error.rb +66 -0
  33. data/lib/feature.rb +97 -0
  34. data/lib/lazar-model.rb +170 -0
  35. data/lib/lazar.rb +69 -0
  36. data/lib/neighbor.rb +25 -0
  37. data/lib/opentox.rb +22 -0
  38. data/lib/overwrite.rb +119 -0
  39. data/lib/regression.rb +199 -0
  40. data/lib/rest-client-wrapper.rb +98 -0
  41. data/lib/similarity.rb +58 -0
  42. data/lib/unique_descriptors.rb +120 -0
  43. data/lib/validation.rb +114 -0
  44. data/mongoid.yml +8 -0
  45. data/test/all.rb +5 -0
  46. data/test/compound.rb +100 -0
  47. data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +13553 -0
  48. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +436 -0
  49. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +568 -0
  50. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +87 -0
  51. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +978 -0
  52. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +1120 -0
  53. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +1113 -0
  54. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +850 -0
  55. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +829 -0
  56. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +1198 -0
  57. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +1505 -0
  58. data/test/data/EPAFHM.csv +618 -0
  59. data/test/data/EPAFHM.medi.csv +100 -0
  60. data/test/data/EPAFHM.mini.csv +22 -0
  61. data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +581 -0
  62. data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +1217 -0
  63. data/test/data/ISSCAN-multi.csv +59 -0
  64. data/test/data/LOAEL_log_mg_corrected_smiles.csv +568 -0
  65. data/test/data/LOAEL_log_mmol_corrected_smiles.csv +568 -0
  66. data/test/data/acetaldehyde.sdf +14 -0
  67. data/test/data/boiling_points.ext.sdf +11460 -0
  68. data/test/data/cpdb_100.csv +101 -0
  69. data/test/data/hamster_carcinogenicity.csv +86 -0
  70. data/test/data/hamster_carcinogenicity.mini.bool_float.csv +11 -0
  71. data/test/data/hamster_carcinogenicity.mini.bool_int.csv +11 -0
  72. data/test/data/hamster_carcinogenicity.mini.bool_string.csv +11 -0
  73. data/test/data/hamster_carcinogenicity.mini.csv +11 -0
  74. data/test/data/hamster_carcinogenicity.ntriples +618 -0
  75. data/test/data/hamster_carcinogenicity.sdf +2805 -0
  76. data/test/data/hamster_carcinogenicity.xls +0 -0
  77. data/test/data/hamster_carcinogenicity.yaml +352 -0
  78. data/test/data/hamster_carcinogenicity_with_errors.csv +88 -0
  79. data/test/data/kazius.csv +4070 -0
  80. data/test/data/multi_cell_call.csv +1067 -0
  81. data/test/data/multi_cell_call_no_dup.csv +1057 -0
  82. data/test/data/multicolumn.csv +8 -0
  83. data/test/data/rat_feature_dataset.csv +1179 -0
  84. data/test/data/wrong_dataset.csv +8 -0
  85. data/test/dataset-long.rb +117 -0
  86. data/test/dataset.rb +199 -0
  87. data/test/descriptor-long.rb +26 -0
  88. data/test/descriptor.rb +83 -0
  89. data/test/error.rb +24 -0
  90. data/test/feature.rb +65 -0
  91. data/test/fminer-long.rb +38 -0
  92. data/test/fminer.rb +52 -0
  93. data/test/lazar-fminer.rb +50 -0
  94. data/test/lazar-long.rb +72 -0
  95. data/test/lazar-physchem-short.rb +27 -0
  96. data/test/setup.rb +6 -0
  97. data/test/validation.rb +41 -0
  98. metadata +212 -0
@@ -0,0 +1,8 @@
1
+ SMILES,Wrong Dataset
2
+ Tost,0
3
+ Is,1
4
+ A,0
5
+ Wrong,1
6
+ Dataset,0
7
+ Entry,1
8
+ O[C@@H]8[C@@H](O)[C@@H]1O[C@H](CO)[C@H]8O[C@H]7O[C@H](CO)[C@@H](O[C@H]6O[C@H](CO)[C@@H](O[C@H]5O[C@H](CO)[C@@H](O[C@H]4O[C@H](CO)[C@@H](O[C@H]3O[C@H](CO)[C@@H](O[C@H]2O[C@H](CO)[C@@H](O1)[C@H](O)[C@H]2O)[C@H](O)[C@H]3O)[C@H](O)[C@H]4O)[C@H](O)[C@H]5O)[C, 0
@@ -0,0 +1,117 @@
1
+ require_relative "setup.rb"
2
+
3
+ class DatasetLongTest < MiniTest::Test
4
+
5
+ def test_01_upload_epafhm
6
+ f = File.join DATA_DIR, "EPAFHM.csv"
7
+ d = OpenTox::Dataset.from_csv_file f
8
+ csv = CSV.read f
9
+ assert_equal csv.size-1, d.compounds.size
10
+ assert_equal csv.first.size-1, d.features.size
11
+ assert_equal csv.size-1, d.data_entries.size
12
+ d.delete
13
+ end
14
+
15
+ =begin
16
+ # TODO catch OpenBabel segfaults and identify/remove cause
17
+ def test_02_upload_multicell
18
+ duplicates = [
19
+ "http://localhost:8082/compound/InChI=1S/C6HCl5O/c7-1-2(8)4(10)6(12)5(11)3(1)9/h12H",
20
+ "http://localhost:8082/compound/InChI=1S/C12H8Cl6O/c13-8-9(14)11(16)5-3-1-2(6-7(3)19-6)4(5)10(8,15)12(11,17)18/h2-7H,1H2",
21
+ "http://localhost:8082/compound/InChI=1S/C2HCl3/c3-1-2(4)5/h1H",
22
+ "http://localhost:8082/compound/InChI=1S/C4H5Cl/c1-3-4(2)5/h3H,1-2H2",
23
+ "http://localhost:8082/compound/InChI=1S/C4H7Cl/c1-4(2)3-5/h1,3H2,2H3",
24
+ "http://localhost:8082/compound/InChI=1S/C8H14O4/c1-5-4-8(11-6(2)9)12-7(3)10-5/h5,7-8H,4H2,1-3H3",
25
+ "http://localhost:8082/compound/InChI=1S/C19H30O5/c1-3-5-7-20-8-9-21-10-11-22-14-17-13-19-18(23-15-24-19)12-16(17)6-4-2/h12-13H,3-11,14-15H2,1-2H3",
26
+ ]
27
+ errors = ['O=P(H)(OC)OC', 'C=CCNN.HCl' ]
28
+ f = File.join DATA_DIR, "multi_cell_call.csv"
29
+ d = OpenTox::Dataset.from_csv_file f
30
+ csv = CSV.read f
31
+ assert_equal true, d.features.first.nominal
32
+ assert_nil d["index"]
33
+ assert_equal csv.size-1-errors.size, d.compounds.size
34
+ assert_equal csv.first.size-1, d.features.size
35
+ assert_equal csv.size-1-errors.size, d.data_entries.size
36
+ p d.warnings
37
+ (duplicates+errors).each do |uri|
38
+ assert d.warnings.grep %r{#{uri}}
39
+ end
40
+ d.delete
41
+ end
42
+ =end
43
+
44
+ def test_03_upload_isscan
45
+ f = File.join DATA_DIR, "ISSCAN-multi.csv"
46
+ d = OpenTox::Dataset.from_csv_file f
47
+ csv = CSV.read f
48
+ assert_equal csv.size-1, d.compounds.size
49
+ assert_equal csv.first.size-1, d.features.size
50
+ assert_equal csv.size-1, d.data_entries.size
51
+ d.delete
52
+ #assert_equal false, URI.accessible?(d.uri)
53
+ end
54
+
55
+ def test_04_simultanous_upload
56
+ threads = []
57
+ 3.times do |t|
58
+ threads << Thread.new(t) do |up|
59
+ d = OpenTox::Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
60
+ assert_equal OpenTox::Dataset, d.class
61
+ assert_equal 1, d.features.size
62
+ assert_equal 85, d.compounds.size
63
+ assert_equal 85, d.data_entries.size
64
+ csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.csv")
65
+ csv.shift
66
+ assert_equal csv.collect{|r| r[1]}, d.data_entries.flatten
67
+ d.delete
68
+ end
69
+ end
70
+ threads.each {|aThread| aThread.join}
71
+ end
72
+
73
+ def test_05_upload_kazius
74
+ f = File.join DATA_DIR, "kazius.csv"
75
+ d = OpenTox::Dataset.from_csv_file f
76
+ csv = CSV.read f
77
+ assert_equal csv.size-1, d.compounds.size
78
+ assert_equal csv.first.size-1, d.features.size
79
+ assert_equal csv.size-1, d.data_entries.size
80
+ assert_empty d.warnings
81
+ # 493 COC1=C(C=C(C(=C1)Cl)OC)Cl,1
82
+ c = d.compounds[491]
83
+ assert_equal c.smiles, "COc1cc(Cl)c(cc1Cl)OC"
84
+ assert_equal d.data_entries[491][0], "1"
85
+ d.delete
86
+ end
87
+
88
+ def test_upload_feature_dataset
89
+ t = Time.now
90
+ f = File.join DATA_DIR, "rat_feature_dataset.csv"
91
+ d = Dataset.from_csv_file f
92
+ assert_equal 458, d.features.size
93
+ d.save
94
+ p "Upload: #{Time.now-t}"
95
+ d2 = Dataset.find d.id
96
+ t = Time.now
97
+ assert_equal d.features.size, d2.features.size
98
+ csv = CSV.read f
99
+ csv.delete_at(248) # remove entry with InChi segfault
100
+ csv.shift # remove header
101
+ refute_empty d2.warnings
102
+ assert_match /249/, d2.warnings.join
103
+ assert_equal csv.size, d2.compounds.size
104
+ assert_equal csv.first.size-1, d2.features.size
105
+ d2.compounds.each_with_index do |compound,i|
106
+ row = csv[i]
107
+ row.shift # remove compound
108
+ assert_equal row, d2.data_entries[i]
109
+ end
110
+ p "Dowload: #{Time.now-t}"
111
+ d2.delete
112
+ assert_raises Mongoid::Errors::DocumentNotFound do
113
+ Dataset.find d.id
114
+ end
115
+ end
116
+
117
+ end
data/test/dataset.rb ADDED
@@ -0,0 +1,199 @@
1
+ # TODO; check compound/data_entry sequences with missing and duplicated values
2
+
3
+ require_relative "setup.rb"
4
+
5
+ class DatasetTest < MiniTest::Test
6
+
7
+ def test_all
8
+ d1 = Dataset.new
9
+ d1.save
10
+ datasets = Dataset.all
11
+ assert_equal Dataset, datasets.first.class
12
+ d1.delete
13
+ end
14
+
15
+ def test_create_empty
16
+ d = Dataset.new
17
+ assert_equal Dataset, d.class
18
+ refute_nil d.id
19
+ assert_kind_of BSON::ObjectId, d.id
20
+ end
21
+
22
+ def test_client_create
23
+ d = Dataset.new
24
+ assert_equal Dataset, d.class
25
+ d.name = "Create dataset test"
26
+
27
+ # features not set
28
+ # << operator was removed for efficiency reasons (CH)
29
+ #assert_raises BadRequestError do
30
+ # d << [Compound.from_smiles("c1ccccc1NN"), 1,2]
31
+ #end
32
+
33
+ # add data entries
34
+ d.features = ["test1", "test2"].collect do |title|
35
+ f = Feature.new
36
+ f.name = title
37
+ f.numeric = true
38
+ f.save
39
+ f
40
+ end
41
+
42
+ # wrong feature size
43
+ # << operator was removed for efficiency reasons (CH)
44
+ #assert_raises BadRequestError do
45
+ # d << [Compound.from_smiles("c1ccccc1NN"), 1,2,3]
46
+ #end
47
+
48
+ # manual low-level insertions without consistency checks for runtime efficiency
49
+ data_entries = []
50
+ d.compound_ids << Compound.from_smiles("c1ccccc1NN").id
51
+ data_entries << [1,2]
52
+ d.compound_ids << Compound.from_smiles("CC(C)N").id
53
+ data_entries << [4,5]
54
+ d.compound_ids << Compound.from_smiles("C1C(C)CCCC1").id
55
+ data_entries << [6,7]
56
+ d.data_entries = data_entries
57
+ assert_equal 3, d.compounds.size
58
+ assert_equal 2, d.features.size
59
+ assert_equal [[1,2],[4,5],[6,7]], d.data_entries
60
+ d.save_all
61
+ # check if dataset has been saved correctly
62
+ new_dataset = Dataset.find d.id
63
+ assert_equal 3, new_dataset.compounds.size
64
+ assert_equal 2, new_dataset.features.size
65
+ assert_equal [[1,2],[4,5],[6,7]], new_dataset.data_entries
66
+ d.delete
67
+ assert_raises Mongoid::Errors::DocumentNotFound do
68
+ Dataset.find d.id
69
+ end
70
+ assert_raises Mongoid::Errors::DocumentNotFound do
71
+ Dataset.find new_dataset.id
72
+ end
73
+ end
74
+
75
+ def test_dataset_accessors
76
+ d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv"
77
+ # create empty dataset
78
+ new_dataset = Dataset.find d.id
79
+ # get metadata
80
+ assert_match "multicolumn.csv", new_dataset.source
81
+ assert_equal "multicolumn.csv", new_dataset.title
82
+ # get features
83
+ assert_equal 6, new_dataset.features.size
84
+ assert_equal 7, new_dataset.compounds.size
85
+ assert_equal ["1", nil, "false", nil, nil, 1.0], new_dataset.data_entries.last
86
+ d.delete
87
+ end
88
+
89
+ def test_create_from_file
90
+ d = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv")
91
+ assert_equal Dataset, d.class
92
+ refute_nil d.warnings
93
+ assert_match "EPAFHM.mini.csv", d.source
94
+ assert_equal "EPAFHM.mini.csv", d.name
95
+ d.delete
96
+ #assert_equal false, URI.accessible?(d.uri)
97
+ end
98
+
99
+ def test_create_from_file_with_wrong_smiles_compound_entries
100
+ d = Dataset.from_csv_file File.join(DATA_DIR,"wrong_dataset.csv")
101
+ refute_nil d.warnings
102
+ assert_match /2|3|4|5|6|7|8/, d.warnings.join
103
+ d.delete
104
+ end
105
+
106
+ def test_multicolumn_csv
107
+ d = Dataset.from_csv_file "#{DATA_DIR}/multicolumn.csv"
108
+ refute_nil d.warnings
109
+ assert d.warnings.grep(/Duplicate compound/)
110
+ assert d.warnings.grep(/3, 5/)
111
+ assert_equal 6, d.features.size
112
+ assert_equal 7, d.compounds.size
113
+ assert_equal 5, d.compounds.collect{|c| c.inchi}.uniq.size
114
+ assert_equal [["1", "1", "true", "true", "test", 1.1], ["1", "2", "false", "7.5", "test", 0.24], ["1", "3", "true", "5", "test", 3578.239], ["0", "4", "false", "false", "test", -2.35], ["1", "2", "true", "4", "test_2", 1], ["1", "2", "false", "false", "test", -1.5], ["1", nil, "false", nil, nil, 1.0]], d.data_entries
115
+ assert_equal "c1ccc[nH]1,1,,false,,,1.0", d.to_csv.split("\n")[7]
116
+ csv = CSV.parse(d.to_csv)
117
+ original_csv = CSV.read("#{DATA_DIR}/multicolumn.csv")
118
+ csv.shift
119
+ original_csv.shift
120
+ csv.each_with_index do |row,i|
121
+ compound = Compound.from_smiles row.shift
122
+ original_compound = Compound.from_smiles original_csv[i].shift
123
+ assert_equal original_compound.inchi, compound.inchi
124
+ row.each_with_index do |v,j|
125
+ if v.numeric?
126
+ assert_equal original_csv[i][j].strip.to_f, row[j].to_f
127
+ else
128
+ assert_equal original_csv[i][j].strip, row[j].to_s
129
+ end
130
+ end
131
+ end
132
+ d.delete
133
+ end
134
+
135
+ def test_from_csv
136
+ d = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
137
+ assert_equal Dataset, d.class
138
+ assert_equal 1, d.features.size
139
+ assert_equal 85, d.compounds.size
140
+ assert_equal 85, d.data_entries.size
141
+ csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.csv")
142
+ csv.shift
143
+ assert_equal csv.collect{|r| r[1]}, d.data_entries.flatten
144
+ d.delete
145
+ #assert_equal false, URI.accessible?(d.uri)
146
+ end
147
+
148
+ def test_from_csv_classification
149
+ ["int", "float", "string"].each do |mode|
150
+ d = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.mini.bool_#{mode}.csv"
151
+ csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.mini.bool_#{mode}.csv")
152
+ csv.shift
153
+ entries = d.data_entries.flatten
154
+ csv.each_with_index do |r, i|
155
+ assert_equal r[1].to_s, entries[i]
156
+ end
157
+ d.delete
158
+ end
159
+ end
160
+
161
+ def test_from_csv2
162
+ File.open("#{DATA_DIR}/temp_test.csv", "w+") { |file| file.write("SMILES,Hamster\nCC=O,true\n ,true\nO=C(N),true") }
163
+ dataset = Dataset.from_csv_file "#{DATA_DIR}/temp_test.csv"
164
+ assert_equal "Cannot parse SMILES compound ' ' at position 3, all entries are ignored.", dataset.warnings.join
165
+ File.delete "#{DATA_DIR}/temp_test.csv"
166
+ dataset.features.each{|f| feature = Feature.find f.id; feature.delete}
167
+ dataset.delete
168
+ end
169
+
170
+ def test_same_feature
171
+ datasets = []
172
+ features = []
173
+ 2.times do |i|
174
+ d = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.mini.csv"
175
+ features << d.features.first
176
+ assert features[0].id==features[-1].id,"re-upload should find old feature, but created new one"
177
+ datasets << d
178
+ end
179
+ datasets.each{|d| d.delete}
180
+ end
181
+
182
+ def test_create_from_file
183
+ d = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv")
184
+ assert_equal Dataset, d.class
185
+ refute_nil d.warnings
186
+ assert_match /row 13/, d.warnings.join
187
+ assert_match "EPAFHM.mini.csv", d.source
188
+ assert_equal 1, d.features.size
189
+ feature = d.features.first
190
+ assert_kind_of NumericBioAssay, feature
191
+ assert_equal 0.0113, d.data_entries[0][0]
192
+ assert_equal 0.00323, d.data_entries[5][0]
193
+ d2 = Dataset.find d.id
194
+ assert_equal 0.0113, d2.data_entries[0][0]
195
+ assert_equal 0.00323, d2.data_entries[5][0]
196
+ end
197
+
198
+ end
199
+
@@ -0,0 +1,26 @@
1
+ require_relative "setup.rb"
2
+ class DescriptorLongTest < MiniTest::Test
3
+
4
+ def test_dataset_all
5
+ # TODO: improve CDK descriptor calculation speed or add timeout
6
+ skip "CDK descriptor calculation takes too long for some compounds"
7
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.mini.csv")
8
+ d = OpenTox::Algorithm::Descriptor.physchem dataset
9
+ assert_equal dataset.compounds, d.compounds
10
+ assert_equal 332, d.features.size
11
+ assert_equal 332, d.data_entries.first.size
12
+ d.delete
13
+ end
14
+
15
+ def test_dataset_openbabel
16
+ # TODO: improve CDK descriptor calculation speed or add timeout
17
+ dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.mini.csv")
18
+ d = Algorithm::Descriptor.physchem dataset, Algorithm::Descriptor::OBDESCRIPTORS.keys
19
+ assert_equal dataset.compounds, d.compounds
20
+ size = Algorithm::Descriptor::OBDESCRIPTORS.keys.size
21
+ assert_equal size, d.features.size
22
+ assert_equal size, d.data_entries.first.size
23
+ d.delete
24
+ end
25
+
26
+ end
@@ -0,0 +1,83 @@
1
+ require_relative "setup.rb"
2
+
3
+ class DescriptorTest < MiniTest::Test
4
+
5
+ def test_list
6
+ # check available descriptors
7
+ @descriptors = OpenTox::Algorithm::Descriptor::DESCRIPTORS.keys
8
+ assert_equal 111,@descriptors.size,"wrong num physchem descriptors"
9
+ @descriptor_values = OpenTox::Algorithm::Descriptor::DESCRIPTOR_VALUES
10
+ assert_equal 356,@descriptor_values.size,"wrong num physchem descriptors"
11
+ sum = 0
12
+ [ @descriptors, @descriptor_values ].each do |desc|
13
+ {"Openbabel"=>16,"Cdk"=>(desc==@descriptors ? 50 : 295),"Joelib"=>45}.each do |k,v|
14
+ assert_equal v,desc.select{|x| x=~/^#{k}\./}.size,"wrong num #{k} descriptors"
15
+ sum += v
16
+ end
17
+ end
18
+ assert_equal (111+356),sum
19
+ end
20
+
21
+ def test_smarts
22
+ c = OpenTox::Compound.from_smiles "N=C=C1CCC(=F=FO)C1"
23
+ File.open("tmp.png","w+"){|f| f.puts c.png}
24
+ s = Smarts.find_or_create_by(:smarts => "F=F")
25
+ result = OpenTox::Algorithm::Descriptor.smarts_match c, s
26
+ assert_equal [1], result
27
+ smarts = ["CC", "C", "C=C", "CO", "F=F", "C1CCCC1", "NN"].collect{|s| Smarts.find_or_create_by(:smarts => s)}
28
+ result = OpenTox::Algorithm::Descriptor.smarts_match c, smarts
29
+ assert_equal [1, 1, 1, 0, 1, 1, 0], result
30
+ smarts_count = [10, 6, 2, 0, 2, 10, 0]
31
+ result = OpenTox::Algorithm::Descriptor.smarts_count c, smarts
32
+ assert_equal smarts_count, result
33
+ end
34
+
35
+ def test_compound_openbabel_single
36
+ c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
37
+ result = OpenTox::Algorithm::Descriptor.physchem c, ["Openbabel.logP"]
38
+ assert_equal 1.12518, result.first
39
+ end
40
+
41
+ def test_compound_cdk_single
42
+ c = OpenTox::Compound.from_smiles "c1ccccc1"
43
+ result = OpenTox::Algorithm::Descriptor.physchem c, ["Cdk.AtomCount"]
44
+ assert_equal [12], result
45
+ c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
46
+ result = OpenTox::Algorithm::Descriptor.physchem c, ["Cdk.AtomCount"]
47
+ assert_equal [17], result
48
+ result = OpenTox::Algorithm::Descriptor.physchem c, ["Cdk.CarbonTypes"]
49
+ c_types = {"Cdk.CarbonTypes.C1SP1"=>1, "Cdk.CarbonTypes.C2SP1"=>0, "Cdk.CarbonTypes.C1SP2"=>0, "Cdk.CarbonTypes.C2SP2"=>1, "Cdk.CarbonTypes.C3SP2"=>0, "Cdk.CarbonTypes.C1SP3"=>2, "Cdk.CarbonTypes.C2SP3"=>1, "Cdk.CarbonTypes.C3SP3"=>1, "Cdk.CarbonTypes.C4SP3"=>0}
50
+ assert_equal [1, 0, 0, 1, 0, 2, 1, 1, 0], result
51
+ end
52
+
53
+ def test_compound_joelib_single
54
+ c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
55
+ result = OpenTox::Algorithm::Descriptor.physchem c, ["Joelib.LogP"]
56
+ assert_equal [2.65908], result
57
+ end
58
+
59
+ def test_compound_all
60
+ c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
61
+ result = OpenTox::Algorithm::Descriptor.physchem c
62
+ assert_equal 332, result.size
63
+ assert_equal 30.8723, result[2]
64
+ assert_equal 1.12518, result[328]
65
+ end
66
+
67
+ def test_compound_descriptor_parameters
68
+ c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
69
+ result = OpenTox::Algorithm::Descriptor.physchem c, [ "Openbabel.logP", "Cdk.AtomCount", "Cdk.CarbonTypes", "Joelib.LogP" ]#, true
70
+ assert_equal 12, result.size
71
+ assert_equal [1.12518, 17.0, 1, 0, 0, 1, 0, 2, 1, 1, 0, 2.65908], result#.last
72
+ end
73
+
74
+ def test_dataset_descriptor_parameters
75
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.mini.csv")
76
+ d = OpenTox::Algorithm::Descriptor.physchem dataset, [ "Openbabel.logP", "Cdk.AtomCount", "Cdk.CarbonTypes", "Joelib.LogP" ]
77
+ assert_kind_of Dataset, d
78
+ assert_equal dataset.compounds, d.compounds
79
+ assert_equal dataset.compounds.size, d.data_entries.size
80
+ assert_equal 12, d.data_entries.first.size
81
+ end
82
+
83
+ end
data/test/error.rb ADDED
@@ -0,0 +1,24 @@
1
+ require_relative "setup.rb"
2
+
3
+ class ErrorTest < MiniTest::Test
4
+
5
+ def test_bad_request
6
+ object = OpenTox::Feature.new
7
+ assert_raises Mongoid::Errors::DocumentNotFound do
8
+ response = OpenTox::Feature.find(object.id)
9
+ end
10
+ end
11
+
12
+ def test_error_methods
13
+ assert_raises OpenTox::ResourceNotFoundError do
14
+ resource_not_found_error "This is a test"
15
+ end
16
+ end
17
+
18
+ def test_exception
19
+ assert_raises Exception do
20
+ raise Exception.new "Basic Exception"
21
+ end
22
+ end
23
+
24
+ end
data/test/feature.rb ADDED
@@ -0,0 +1,65 @@
1
+ require_relative "setup.rb"
2
+
3
+ class FeatureTest < MiniTest::Test
4
+
5
+ def test_opentox_feature
6
+ @feature = OpenTox::Feature.create(:name => "tost")
7
+ assert_equal true, OpenTox::Feature.where(name: "tost").exists?, "#{@feature.id} is not accessible."
8
+ assert_equal true, OpenTox::Feature.where(id: @feature.id).exists?, "#{@feature.id} is not accessible."
9
+
10
+ list = OpenTox::Feature.all
11
+ listsize1 = list.length
12
+ assert_equal true, list.collect{|f| f.id}.include?(@feature.id)
13
+ # modify feature
14
+ @feature2 = OpenTox::Feature.find(@feature.id)
15
+ assert_equal "tost", @feature2[:name]
16
+ assert_equal "tost", @feature2.name
17
+ assert_kind_of Feature, @feature2
18
+
19
+ @feature2[:name] = "feature2"
20
+ @feature2.save
21
+ list = OpenTox::Feature.all
22
+ listsize2 = list.length
23
+ assert_match "feature2", @feature2.name
24
+ refute_match "tost", @feature2.name
25
+ assert_equal listsize1, listsize2
26
+
27
+ id = @feature2.id
28
+ @feature2.delete
29
+ assert_raises Mongoid::Errors::DocumentNotFound do
30
+ OpenTox::Feature.find(id)
31
+ end
32
+ end
33
+
34
+ def test_duplicated_features
35
+ metadata = {
36
+ :name => "feature duplication test",
37
+ :nominal => true,
38
+ :description => "feature duplication test"
39
+ }
40
+ feature = NumericBioAssay.find_or_create_by metadata
41
+ dup_feature = NumericBioAssay.find_or_create_by metadata
42
+ assert_kind_of Feature, feature
43
+ assert !feature.id.nil?, "No Feature ID in #{feature.inspect}"
44
+ assert !feature.id.nil?, "No Feature ID in #{dup_feature.inspect}"
45
+ assert_equal feature.id, dup_feature.id
46
+ feature.delete
47
+ assert_raises Mongoid::Errors::DocumentNotFound do
48
+ OpenTox::Feature.find(feature.id)
49
+ end
50
+ assert_raises Mongoid::Errors::DocumentNotFound do
51
+ OpenTox::Feature.find(dup_feature.id)
52
+ end
53
+ end
54
+
55
+ def test_smarts_feature
56
+ feature = Smarts.find_or_create_by(:smarts => "CN")
57
+ assert feature.smarts, "CN"
58
+ assert_kind_of Smarts, feature
59
+ feature.smarts = 'cc'
60
+ assert feature.smarts, "cc"
61
+ original = Feature.where(:smarts => 'CN').first
62
+ assert original.smarts, "CN"
63
+ end
64
+
65
+ end
@@ -0,0 +1,38 @@
1
+ require_relative "setup.rb"
2
+
3
+ class FminerTest < MiniTest::Test
4
+
5
+ def test_fminer_multicell
6
+ #skip "multicell segfaults"
7
+ # TODO aborts, probably fminer
8
+ # or OpenBabel segfault
9
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"multi_cell_call.csv")
10
+ feature_dataset = OpenTox::Algorithm::Fminer.bbrc(dataset)#, :min_frequency => 15)
11
+ p feature_dataset.training_parameters
12
+ assert_equal dataset.compound_ids, feature_dataset.compound_ids
13
+ dataset.delete
14
+ feature_dataset.delete
15
+ end
16
+
17
+ def test_fminer_isscan
18
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"ISSCAN-multi.csv")
19
+ feature_dataset = OpenTox::Algorithm::Fminer.bbrc(dataset)#, :min_frequency => 15)
20
+ assert_equal feature_dataset.compounds.size, dataset.compounds.size
21
+ p feature_dataset.features.size
22
+ p feature_dataset.training_parameters
23
+ dataset.delete
24
+ feature_dataset.delete
25
+ end
26
+
27
+ def test_fminer_kazius
28
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"kazius.csv")
29
+ # TODO reactivate default settings
30
+ feature_dataset = OpenTox::Algorithm::Fminer.bbrc(dataset, :min_frequency => 20)
31
+ assert_equal feature_dataset.compounds.size, dataset.compounds.size
32
+ feature_dataset = Dataset.find feature_dataset.id
33
+ assert feature_dataset.data_entries.size, dataset.compounds.size
34
+ dataset.delete
35
+ feature_dataset.delete
36
+ end
37
+
38
+ end
data/test/fminer.rb ADDED
@@ -0,0 +1,52 @@
1
+ require_relative "setup.rb"
2
+
3
+ class FminerTest < MiniTest::Test
4
+
5
+ def test_fminer_bbrc
6
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
7
+ refute_nil dataset.id
8
+ feature_dataset = OpenTox::Algorithm::Fminer.bbrc dataset
9
+ feature_dataset = Dataset.find feature_dataset.id
10
+ assert_equal dataset.compounds.size, feature_dataset.compounds.size
11
+ # TODO: fminer calculates 62 instead of 54 features
12
+ # it is unclear which commit changed the numbers (occurs with old libraries/mongodb branch too
13
+ # modification of Compound to use smiles instead of inchis seems to have no effect
14
+ #assert_equal 54, feature_dataset.features.size
15
+ #assert_equal "C-C-C=C", feature_dataset.features.first.smarts
16
+ compounds = feature_dataset.compounds
17
+ smarts = feature_dataset.features
18
+ smarts.each do |smart|
19
+ assert smart.p_value.round(2) >= 0.95
20
+ end
21
+ match = OpenTox::Algorithm::Descriptor.smarts_match compounds, smarts
22
+ feature_dataset.data_entries.each_with_index do |fingerprint,i|
23
+ assert_equal match[i], fingerprint
24
+ end
25
+
26
+ dataset.delete
27
+ feature_dataset.delete
28
+ end
29
+
30
+ def test_fminer_last
31
+ skip "last features have to be activated"
32
+ dataset = OpenTox::Dataset.new
33
+ dataset.upload File.join(DATA_DIR,"hamster_carcinogenicity.csv")
34
+ feature_dataset = OpenTox::Algorithm::Fminer.last :dataset => dataset
35
+ assert_equal dataset.compounds.size, feature_dataset.compounds.size
36
+ assert_equal 21, feature_dataset.features.size
37
+ assert_equal '[#6&A]-[#6&a]:[#6&a]:[#6&a]:[#6&a]:[#6&a]', feature_dataset.features.first.smarts
38
+
39
+ compounds = feature_dataset.compounds
40
+ smarts = feature_dataset.features.collect{|f| f.smarts}
41
+ match = OpenTox::Algorithm::Descriptor.smarts_match compounds, smarts
42
+ compounds.each_with_index do |c,i|
43
+ smarts.each_with_index do |s,j|
44
+ assert_equal match[i][j], feature_dataset.data_entries[i][j].to_i
45
+ end
46
+ end
47
+
48
+ dataset.delete
49
+ feature_dataset.delete
50
+ end
51
+
52
+ end
@@ -0,0 +1,50 @@
1
+ require_relative "setup.rb"
2
+
3
+ class LazarFminerTest < MiniTest::Test
4
+
5
+ def test_lazar_fminer
6
+ training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
7
+ model = Model::LazarFminerClassification.create training_dataset#, feature_dataset
8
+ feature_dataset = Dataset.find model.neighbor_algorithm_parameters[:feature_dataset_id]
9
+ assert_equal training_dataset.compounds.size, feature_dataset.compounds.size
10
+ #TODO check fminer features, see fminer.rb
11
+ #assert_equal 54, feature_dataset.features.size
12
+ feature_dataset.data_entries.each do |e|
13
+ assert_equal e.size, feature_dataset.features.size
14
+ end
15
+ #assert_equal 'C-C-C=C', feature_dataset.features.first.smarts
16
+
17
+ [ {
18
+ :compound => OpenTox::Compound.from_inchi("InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"),
19
+ :prediction => "false",
20
+ :confidence => 0.25281385281385277,
21
+ :nr_neighbors => 11
22
+ },{
23
+ :compound => OpenTox::Compound.from_smiles("c1ccccc1NN"),
24
+ :prediction => "false",
25
+ :confidence => 0.3639589577089577,
26
+ :nr_neighbors => 14
27
+ }, {
28
+ :compound => Compound.from_smiles('OCCCCCCCC\C=C/CCCCCCCC'),
29
+ :prediction => "false",
30
+ :confidence => 0.5555555555555556,
31
+ :nr_neighbors => 1
32
+ }].each do |example|
33
+ prediction = model.predict example[:compound]
34
+
35
+ assert_equal example[:prediction], prediction[:value]
36
+ #assert_equal example[:confidence], prediction[:confidence]
37
+ #assert_equal example[:nr_neighbors], prediction[:neighbors].size
38
+ end
39
+
40
+ # make a dataset prediction
41
+ compound_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv")
42
+ prediction = model.predict compound_dataset
43
+ assert_equal compound_dataset.compounds, prediction.compounds
44
+
45
+ assert_equal "Cound not find similar compounds.", prediction.data_entries[7][2]
46
+ assert_equal "measured", prediction.data_entries[14][1]
47
+ # cleanup
48
+ [training_dataset,model,feature_dataset,compound_dataset].each{|o| o.delete}
49
+ end
50
+ end