lazar 0.9.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -4
  3. data/README.md +5 -15
  4. data/VERSION +1 -1
  5. data/ext/lazar/extconf.rb +1 -1
  6. data/ext/lazar/rinstall.R +9 -7
  7. data/java/CdkDescriptorInfo.class +0 -0
  8. data/java/CdkDescriptorInfo.java +3 -2
  9. data/java/CdkDescriptors.class +0 -0
  10. data/java/CdkDescriptors.java +28 -28
  11. data/java/Rakefile +3 -3
  12. data/java/{cdk-1.4.19.jar → cdk-2.0-SNAPSHOT.jar} +0 -0
  13. data/lazar.gemspec +6 -7
  14. data/lib/algorithm.rb +2 -11
  15. data/lib/caret.rb +96 -0
  16. data/lib/classification.rb +14 -22
  17. data/lib/compound.rb +21 -87
  18. data/lib/crossvalidation.rb +80 -279
  19. data/lib/dataset.rb +105 -174
  20. data/lib/feature.rb +11 -18
  21. data/lib/feature_selection.rb +42 -0
  22. data/lib/import.rb +122 -0
  23. data/lib/lazar.rb +14 -4
  24. data/lib/leave-one-out-validation.rb +46 -192
  25. data/lib/model.rb +319 -128
  26. data/lib/nanoparticle.rb +98 -0
  27. data/lib/opentox.rb +7 -4
  28. data/lib/overwrite.rb +24 -3
  29. data/lib/physchem.rb +11 -10
  30. data/lib/regression.rb +7 -137
  31. data/lib/rest-client-wrapper.rb +0 -6
  32. data/lib/similarity.rb +65 -0
  33. data/lib/substance.rb +8 -0
  34. data/lib/train-test-validation.rb +69 -0
  35. data/lib/validation-statistics.rb +223 -0
  36. data/lib/validation.rb +17 -100
  37. data/scripts/mg2mmol.rb +17 -0
  38. data/scripts/mirror-enm2test.rb +4 -0
  39. data/scripts/mmol2-log10.rb +32 -0
  40. data/test/compound.rb +4 -94
  41. data/test/data/EPAFHM.medi_log10.csv +92 -0
  42. data/test/data/EPAFHM.mini_log10.csv +16 -0
  43. data/test/data/EPAFHM_log10.csv +581 -0
  44. data/test/data/loael_log10.csv +568 -0
  45. data/test/dataset.rb +195 -133
  46. data/test/descriptor.rb +27 -18
  47. data/test/error.rb +2 -2
  48. data/test/experiment.rb +4 -4
  49. data/test/feature.rb +2 -3
  50. data/test/gridfs.rb +10 -0
  51. data/test/model-classification.rb +106 -0
  52. data/test/model-nanoparticle.rb +128 -0
  53. data/test/model-regression.rb +171 -0
  54. data/test/model-validation.rb +19 -0
  55. data/test/nanomaterial-model-validation.rb +55 -0
  56. data/test/setup.rb +8 -4
  57. data/test/validation-classification.rb +67 -0
  58. data/test/validation-nanoparticle.rb +133 -0
  59. data/test/validation-regression.rb +92 -0
  60. metadata +50 -121
  61. data/test/classification.rb +0 -41
  62. data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +0 -13553
  63. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +0 -436
  64. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +0 -568
  65. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +0 -87
  66. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +0 -978
  67. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +0 -1120
  68. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +0 -1113
  69. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +0 -850
  70. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +0 -829
  71. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +0 -1198
  72. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +0 -1505
  73. data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +0 -581
  74. data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +0 -1217
  75. data/test/data/LOAEL_log_mg_corrected_smiles.csv +0 -568
  76. data/test/data/LOAEL_log_mmol_corrected_smiles.csv +0 -568
  77. data/test/data/boiling_points.ext.sdf +0 -11460
  78. data/test/data/cpdb_100.csv +0 -101
  79. data/test/data/hamster_carcinogenicity.ntriples +0 -618
  80. data/test/data/hamster_carcinogenicity.sdf +0 -2805
  81. data/test/data/hamster_carcinogenicity.xls +0 -0
  82. data/test/data/hamster_carcinogenicity.yaml +0 -352
  83. data/test/dataset-long.rb +0 -114
  84. data/test/lazar-long.rb +0 -92
  85. data/test/lazar-physchem-short.rb +0 -31
  86. data/test/prediction_models.rb +0 -20
  87. data/test/regression.rb +0 -43
  88. data/test/validation.rb +0 -108
Binary file
@@ -1,352 +0,0 @@
1
- --- !ruby/object:OpenTox::Dataset
2
- compounds:
3
- - http://localhost/compound/InChI=1S/C2H4O/c1-2-3/h2H,1H3
4
- - http://localhost/compound/InChI=1S/C15H13NO/c1-10(17)16-13-6-7-15-12(9-13)8-11-4-2-3-5-14(11)15/h2-7,9H,8H2,1H3,(H,16,17)
5
- - http://localhost/compound/InChI=1S/C11H8N2O5/c12-11(14)8(9-2-1-5-17-9)6-7-3-4-10(18-7)13(15)16/h1-6H,(H2,12,14)
6
- - http://localhost/compound/InChI=1S/C2H4N4/c3-2-4-1-5-6-2/h1H,(H3,3,4,5,6)
7
- - http://localhost/compound/InChI=1S/BrHO3.K/c2-1(3)4;/h(H,2,3,4);/q;+1/p-1
8
- - http://localhost/compound/InChI=1S/Cd.2ClH/h;2*1H/q+2;;/p-2
9
- - http://localhost/compound/InChI=1S/Cd.H2O4S/c;1-5(2,3)4/h;(H2,1,2,3,4)/q+2;/p-2
10
- - http://localhost/compound/InChI=1S/C14H14ClN3O2S/c1-8-4-3-5-10(9(8)2)16-12-6-11(15)17-14(18-12)21-7-13(19)20/h3-6H,7H2,1-2H3,(H,19,20)(H,16,17,18)
11
- - http://localhost/compound/InChI=1S/C2H5ClO/c1-4-2-3/h2H2,1H3
12
- - http://localhost/compound/InChI=1S/C4H5Cl/c1-3-4(2)5/h3H,1-2H2
13
- - http://localhost/compound/InChI=1S/C17H17ClO3/c1-17(2,16(19)20)21-11-12-3-5-13(6-4-12)14-7-9-15(18)10-8-14/h3-10H,11H2,1-2H3,(H,19,20)
14
- - http://localhost/compound/InChI=1S/C9H6O2/c10-9-6-5-7-3-1-2-4-8(7)11-9/h1-6H
15
- - http://localhost/compound/InChI=1S/C14H8Cl4/c15-11-5-1-9(2-6-11)13(14(17)18)10-3-7-12(16)8-4-10/h1-8H
16
- - http://localhost/compound/InChI=1S/C14H9Cl5/c15-11-5-1-9(2-6-11)13(14(17,18)19)10-3-7-12(16)8-4-10/h1-8,13H
17
- - http://localhost/compound/InChI=1S/C6H10N2O/c1-3-5-8(7-9)6-4-2/h3-4H,1-2,5-6H2
18
- - http://localhost/compound/InChI=1S/C12H8Cl6O/c13-8-9(14)11(16)5-3-1-2(6-7(3)19-6)4(5)10(8,15)12(11,17)18/h2-7H,1H2
19
- - http://localhost/compound/InChI=1S/C3H6ClNO/c1-5(2)3(4)6/h1-2H3
20
- - http://localhost/compound/InChI=1S/C2H8N2/c1-4(2)3/h3H2,1-2H3
21
- - http://localhost/compound/InChI=1S/C2H8N2.2ClH/c1-3-4-2;;/h3-4H,1-2H3;2*1H
22
- - http://localhost/compound/InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3
23
- - http://localhost/compound/InChI=1S/C5H11N3O3/c1-2-8(7-11)5(10)6-3-4-9/h9H,2-4H2,1H3,(H,6,10)
24
- - http://localhost/compound/InChI=1S/C6H11N3O3/c1-3-9(8-12)6(11)7-4-5(2)10/h3-4H2,1-2H3,(H,7,11)
25
- - http://localhost/compound/InChI=1S/CH2O/c1-2/h1H2
26
- - http://localhost/compound/InChI=1S/C8H6N4O4S/c13-4-9-11-8-10-5(3-17-8)6-1-2-7(16-6)12(14)15/h1-4H,(H,9,13)(H,10,11)
27
- - http://localhost/compound/InChI=1S/C5H4O2/c6-4-5-2-1-3-7-5/h1-4H
28
- - http://localhost/compound/InChI=1S/C3H6O2/c4-1-3-2-5-3/h3-4H,1-2H2
29
- - http://localhost/compound/InChI=1S/C17H17ClO6/c1-8-5-9(19)6-12(23-4)17(8)16(20)13-10(21-2)7-11(22-3)14(18)15(13)24-17/h6-8H,5H2,1-4H3/t8-,17?/m1/s1
30
- - http://localhost/compound/InChI=1S/C6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9
31
- - http://localhost/compound/InChI=1S/H4N2/c1-2/h1-2H2
32
- - http://localhost/compound/InChI=1S/H4N2.H2O4S/c1-2;1-5(2,3)4/h1-2H2;(H2,1,2,3,4)
33
- - http://localhost/compound/InChI=1S/C15H13NO2/c1-10(17)16(18)13-6-7-15-12(9-13)8-11-4-2-3-5-14(11)15/h2-7,9,18H,8H2,1H3
34
- - http://localhost/compound/InChI=1S/C2H8N2O/c3-4-1-2-5/h4-5H,1-3H2
35
- - http://localhost/compound/InChI=1S/C6H7N3O/c7-9-6(10)5-1-3-8-4-2-5/h1-4H,7H2,(H,9,10)
36
- - http://localhost/compound/InChI=1S/C6H5NO2/c8-6(9)5-1-3-7-4-2-5/h1-4H,(H,8,9)
37
- - http://localhost/compound/InChI=1S/C10H12ClNO2/c1-7(2)14-10(13)12-9-5-3-4-8(11)6-9/h3-7H,1-2H3,(H,12,13)
38
- - http://localhost/compound/InChI=1S/C10H13NO2/c1-8(2)13-10(12)11-9-6-4-3-5-7-9/h3-8H,1-2H3,(H,11,12)
39
- - http://localhost/compound/InChI=1S/2C2H4O2.4H2O.3Pb/c2*1-2(3)4;;;;;;;/h2*1H3,(H,3,4);4*1H2;;;/q;;;;;;3*+2/p-6
40
- - http://localhost/compound/InChI=1S/C14H19N3S.ClH/c1-16(2)9-10-17(12-13-6-5-11-18-13)14-7-3-4-8-15-14;/h3-8,11H,9-10,12H2,1-2H3;1H
41
- - http://localhost/compound/InChI=1S/C20H22N8O5/c1-28(9-11-8-23-17-15(24-11)16(21)26-20(22)27-17)12-4-2-10(3-5-12)18(31)25-13(19(32)33)6-7-14(29)30/h2-5,8,13H,6-7,9H2,1H3,(H,25,31)(H,29,30)(H,32,33)(H4,21,22,23,26,27)/t13-/m0/s1
42
- - http://localhost/compound/InChI=1S/C2H6N2O/c1-4(3)2-5/h2H,3H2,1H3
43
- - http://localhost/compound/InChI=1S/C5H8O2/c1-4(2)5(6)7-3/h1H2,2-3H3
44
- - http://localhost/compound/InChI=1S/CH6N2/c1-3-2/h3H,2H2,1H3
45
- - http://localhost/compound/InChI=1S/C10H13N3O2/c1-13(12-15)7-3-5-10(14)9-4-2-6-11-8-9/h2,4,6,8H,3,5,7H2,1H3
46
- - http://localhost/compound/InChI=1S/C5H6N2OS/c1-3-2-4(8)7-5(9)6-3/h2H,1H3,(H2,6,7,8,9)
47
- - http://localhost/compound/InChI=1S/C20H22O3/c1-20(2,19(21)22)23-16-12-10-15(11-13-16)18-9-5-7-14-6-3-4-8-17(14)18/h3-4,6,8,10-13,18H,5,7,9H2,1-2H3,(H,21,22)
48
- - http://localhost/compound/InChI=1S/HNO2.Na/c2-1-3;/h(H,2,3);/q;+1/p-1
49
- - http://localhost/compound/InChI=1S/C9H7N3O4S/c1-5(13)10-9-11-6(4-17-9)7-2-3-8(16-7)12(14)15/h2-4H,1H3,(H,10,11,13)
50
- - http://localhost/compound/InChI=1S/C8H5N3O4S/c12-4-9-8-10-5(3-16-8)6-1-2-7(15-6)11(13)14/h1-4H,(H,9,10,12)
51
- - http://localhost/compound/InChI=1S/C12H9NO2/c14-13(15)11-7-6-9-5-4-8-2-1-3-10(11)12(8)9/h1-3,6-7H,4-5H2
52
- - http://localhost/compound/InChI=1S/C6H14N2O4/c1-5(10)2-8(7-12)3-6(11)4-9/h5-6,9-11H,2-4H2,1H3
53
- - http://localhost/compound/InChI=1S/C6H12N2O4/c1-5(10)2-8(7-12)3-6(11)4-9/h6,9,11H,2-4H2,1H3
54
- - http://localhost/compound/InChI=1S/C5H12N2O4/c8-2-1-7(6-11)3-5(10)4-9/h5,8-10H,1-4H2
55
- - http://localhost/compound/InChI=1S/C5H10N2O3/c1-5(9)4-7(6-10)2-3-8/h8H,2-4H2,1H3
56
- - http://localhost/compound/InChI=1S/C7H15N3O/c1-6-4-10(8-11)5-7(2)9(6)3/h6-7H,4-5H2,1-3H3
57
- - http://localhost/compound/InChI=1S/C6H10N2O2/c1-3-4-8(7-10)5-6(2)9/h3H,1,4-5H2,2H3
58
- - http://localhost/compound/InChI=1S/C4H10N2O3/c1-6(5-9)2-4(8)3-7/h4,7-8H,2-3H2,1H3
59
- - http://localhost/compound/InChI=1S/C4H8N2O2/c7-5-6-1-3-8-4-2-6/h1-4H2
60
- - http://localhost/compound/InChI=1S/C9H11N3O/c13-11-12-6-2-4-9(12)8-3-1-5-10-7-8/h1,3,5,7,9H,2,4,6H2
61
- - http://localhost/compound/InChI=1S/C9H11N3O2/c13-10-12-6-2-4-9(12)8-3-1-5-11(14)7-8/h1,3,5,7,9H,2,4,6H2
62
- - http://localhost/compound/InChI=1S/C5H10N2O/c8-6-7-4-2-1-3-5-7/h1-5H2
63
- - http://localhost/compound/InChI=1S/C4H8N2O/c7-5-6-3-1-2-4-6/h1-4H2
64
- - http://localhost/compound/InChI=1S/C6H10ClN3O3/c1-5(11)4-10(9-13)6(12)8-3-2-7/h2-4H2,1H3,(H,8,12)
65
- - http://localhost/compound/InChI=1S/C4H7N3O3/c1-3(8)2-7(6-10)4(5)9/h2H2,1H3,(H2,5,9)
66
- - http://localhost/compound/InChI=1S/C9H9NS/c11-8-10-7-6-9-4-2-1-3-5-9/h1-5H,6-7H2
67
- - http://localhost/compound/InChI=1S/C12H12N2O3/c1-2-12(8-6-4-3-5-7-8)9(15)13-11(17)14-10(12)16/h3-7H,2H2,1H3,(H2,13,14,15,16,17)
68
- - http://localhost/compound/InChI=1S/C16H13N/c1-2-8-15(9-3-1)17-16-11-10-13-6-4-5-7-14(13)12-16/h1-12,17H
69
- - http://localhost/compound/InChI=1S/C19H24N2O2/c22-18-13-20(19(23)15-7-2-1-3-8-15)12-17-16-9-5-4-6-14(16)10-11-21(17)18/h4-6,9,15,17H,1-3,7-8,10-13H2
70
- - http://localhost/compound/InChI=1S/C7H6O4/c8-5-2-1-4(7(10)11)3-6(5)9/h1-3,8-9H,(H,10,11)
71
- - http://localhost/compound/InChI=1S/C15H10O7.2H2O/c16-7-4-10(19)12-11(5-7)22-15(14(21)13(12)20)6-1-2-8(17)9(18)3-6;;/h1-5,16-19,21H;2*1H2
72
- - http://localhost/compound/InChI=1S/C20H19N3.ClH/c1-13-12-16(6-11-19(13)23)20(14-2-7-17(21)8-3-14)15-4-9-18(22)10-5-15;/h2-12,21H,22-23H2,1H3;1H
73
- - http://localhost/compound/InChI=1S/C19H17N3.ClH/c20-16-7-1-13(2-8-16)19(14-3-9-17(21)10-4-14)15-5-11-18(22)12-6-15;/h1-12,20H,21-22H2;1H
74
- - http://localhost/compound/InChI=1S/C27H30O16/c1-8-17(32)20(35)22(37)26(40-8)39-7-15-18(33)21(36)23(38)27(42-15)43-25-19(34)16-13(31)5-10(28)6-14(16)41-24(25)9-2-3-11(29)12(30)4-9/h2-6,8,15,17-18,20-23,26-33,35-38H,7H2,1H3/t8-,15+,17-,18+,20+,21-,22+,23+,26+,27?/m0/s1
75
- - http://localhost/compound/InChI=1S/C2HCl3/c3-1-2(4)5/h1H
76
- - http://localhost/compound/InChI=1S/C3H7NO2/c1-2-6-3(4)5/h2H2,1H3,(H2,4,5)
77
- - http://localhost/compound/InChI=1S/C2H3Cl/c1-2-3/h2H,1H2
78
- - http://localhost/compound/InChI=1S/C6H5N2.BF4/c7-8-6-4-2-1-3-5-6;2-1(3,4)5/h1-5H;/q+1;-1
79
- - http://localhost/compound/InChI=1S/C6H12N4O2/c1-5-3-9(7-11)4-6(2)10(5)8-12/h5-6H,3-4H2,1-2H3
80
- - http://localhost/compound/InChI=1S/C5H13N3O/c1-7(2)4-5-8(3)6-9/h4-5H2,1-3H3
81
- - http://localhost/compound/InChI=1S/C6H12N2O2/c1-5-3-8(7-9)4-6(2)10-5/h5-6H,3-4H2,1-2H3
82
- - http://localhost/compound/InChI=1S/C4H6N2O3/c1-3-2-6(5-8)4(7)9-3/h3H,2H2,1H3
83
- - http://localhost/compound/InChI=1S/C4H8N2O3/c1-3-9-4(7)6(2)5-8/h3H2,1-2H3
84
- - http://localhost/compound/InChI=1S/C3H6N2O2/c6-4-5-1-2-7-3-5/h1-3H2
85
- - http://localhost/compound/InChI=1S/C9H11N3O2/c10-9(13)12(11-14)7-6-8-4-2-1-3-5-8/h1-5H,6-7H2,(H2,10,13)
86
- - http://localhost/compound/InChI=1S/C3H6N2O/c6-4-5-2-1-3-5/h1-3H2
87
- - http://localhost/compound/InChI=1S/BF4.Na/c2-1(3,4)5;/q-1;+1
88
- data_entries:
89
- http://localhost/compound/InChI=1S/C14H8Cl4/c15-11-5-1-9(2-6-11)13(14(17)18)10-3-7-12(16)8-4-10/h1-8H:
90
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
91
- - true
92
- http://localhost/compound/InChI=1S/C3H6ClNO/c1-5(2)3(4)6/h1-2H3:
93
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
94
- - true
95
- http://localhost/compound/InChI=1S/C2H8N2O/c3-4-1-2-5/h4-5H,1-3H2:
96
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
97
- - false
98
- http://localhost/compound/InChI=1S/C4H10N2O3/c1-6(5-9)2-4(8)3-7/h4,7-8H,2-3H2,1H3:
99
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
100
- - true
101
- http://localhost/compound/InChI=1S/CH2O/c1-2/h1H2:
102
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
103
- - false
104
- http://localhost/compound/InChI=1S/C5H12N2O4/c8-2-1-7(6-11)3-5(10)4-9/h5,8-10H,1-4H2:
105
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
106
- - false
107
- http://localhost/compound/InChI=1S/C7H15N3O/c1-6-4-10(8-11)5-7(2)9(6)3/h6-7H,4-5H2,1-3H3:
108
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
109
- - true
110
- http://localhost/compound/InChI=1S/C4H8N2O2/c7-5-6-1-3-8-4-2-6/h1-4H2:
111
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
112
- - true
113
- http://localhost/compound/InChI=1S/C16H13N/c1-2-8-15(9-3-1)17-16-11-10-13-6-4-5-7-14(13)12-16/h1-12,17H:
114
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
115
- - false
116
- http://localhost/compound/InChI=1S/C3H6O2/c4-1-3-2-5-3/h3-4H,1-2H2:
117
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
118
- - true
119
- http://localhost/compound/InChI=1S/C4H6N2O3/c1-3-2-6(5-8)4(7)9-3/h3H,2H2,1H3:
120
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
121
- - true
122
- http://localhost/compound/InChI=1S/C6H5NO2/c8-6(9)5-1-3-7-4-2-5/h1-4H,(H,8,9):
123
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
124
- - false
125
- http://localhost/compound/InChI=1S/2C2H4O2.4H2O.3Pb/c2*1-2(3)4;;;;;;;/h2*1H3,(H,3,4);4*1H2;;;/q;;;;;;3*+2/p-6:
126
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
127
- - false
128
- http://localhost/compound/InChI=1S/C17H17ClO6/c1-8-5-9(19)6-12(23-4)17(8)16(20)13-10(21-2)7-11(22-3)14(18)15(13)24-17/h6-8H,5H2,1-4H3/t8-,17?/m1/s1:
129
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
130
- - false
131
- http://localhost/compound/InChI=1S/C3H6N2O2/c6-4-5-1-2-7-3-5/h1-3H2:
132
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
133
- - true
134
- http://localhost/compound/InChI=1S/C3H7NO2/c1-2-6-3(4)5/h2H2,1H3,(H2,4,5):
135
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
136
- - true
137
- http://localhost/compound/InChI=1S/C5H8O2/c1-4(2)5(6)7-3/h1H2,2-3H3:
138
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
139
- - false
140
- http://localhost/compound/InChI=1S/C2H6N2O/c1-4(3)2-5/h2H,3H2,1H3:
141
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
142
- - true
143
- http://localhost/compound/InChI=1S/C6H12N2O4/c1-5(10)2-8(7-12)3-6(11)4-9/h6,9,11H,2-4H2,1H3:
144
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
145
- - true
146
- http://localhost/compound/InChI=1S/C5H4O2/c6-4-5-2-1-3-7-5/h1-4H:
147
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
148
- - false
149
- http://localhost/compound/InChI=1S/C4H8N2O/c7-5-6-3-1-2-4-6/h1-4H2:
150
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
151
- - true
152
- http://localhost/compound/InChI=1S/C9H11N3O2/c10-9(13)12(11-14)7-6-8-4-2-1-3-5-8/h1-5H,6-7H2,(H2,10,13):
153
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
154
- - true
155
- http://localhost/compound/InChI=1S/C14H14ClN3O2S/c1-8-4-3-5-10(9(8)2)16-12-6-11(15)17-14(18-12)21-7-13(19)20/h3-6H,7H2,1-2H3,(H,19,20)(H,16,17,18):
156
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
157
- - false
158
- http://localhost/compound/InChI=1S/H4N2.H2O4S/c1-2;1-5(2,3)4/h1-2H2;(H2,1,2,3,4):
159
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
160
- - true
161
- http://localhost/compound/InChI=1S/C5H10N2O/c8-6-7-4-2-1-3-5-7/h1-5H2:
162
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
163
- - true
164
- http://localhost/compound/InChI=1S/C10H13N3O2/c1-13(12-15)7-3-5-10(14)9-4-2-6-11-8-9/h2,4,6,8H,3,5,7H2,1H3:
165
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
166
- - false
167
- http://localhost/compound/InChI=1S/C3H6N2O/c6-4-5-2-1-3-5/h1-3H2:
168
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
169
- - true
170
- http://localhost/compound/InChI=1S/C4H8N2O3/c1-3-9-4(7)6(2)5-8/h3H2,1-2H3:
171
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
172
- - true
173
- http://localhost/compound/InChI=1S/C6H10N2O2/c1-3-4-8(7-10)5-6(2)9/h3H,1,4-5H2,2H3:
174
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
175
- - true
176
- http://localhost/compound/InChI=1S/C14H9Cl5/c15-11-5-1-9(2-6-11)13(14(17,18)19)10-3-7-12(16)8-4-10/h1-8,13H:
177
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
178
- - false
179
- http://localhost/compound/InChI=1S/BrHO3.K/c2-1(3)4;/h(H,2,3,4);/q;+1/p-1:
180
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
181
- - true
182
- http://localhost/compound/InChI=1S/C2H5ClO/c1-4-2-3/h2H2,1H3:
183
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
184
- - true
185
- http://localhost/compound/InChI=1S/C10H12ClNO2/c1-7(2)14-10(13)12-9-5-3-4-8(11)6-9/h3-7H,1-2H3,(H,12,13):
186
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
187
- - false
188
- http://localhost/compound/InChI=1S/C8H5N3O4S/c12-4-9-8-10-5(3-16-8)6-1-2-7(15-6)11(13)14/h1-4H,(H,9,10,12):
189
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
190
- - true
191
- http://localhost/compound/InChI=1S/Cd.2ClH/h;2*1H/q+2;;/p-2:
192
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
193
- - false
194
- http://localhost/compound/InChI=1S/C20H19N3.ClH/c1-13-12-16(6-11-19(13)23)20(14-2-7-17(21)8-3-14)15-4-9-18(22)10-5-15;/h2-12,21H,22-23H2,1H3;1H:
195
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
196
- - false
197
- http://localhost/compound/InChI=1S/BF4.Na/c2-1(3,4)5;/q-1;+1:
198
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
199
- - false
200
- http://localhost/compound/InChI=1S/C6H5N2.BF4/c7-8-6-4-2-1-3-5-6;2-1(3,4)5/h1-5H;/q+1;-1:
201
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
202
- - false
203
- http://localhost/compound/InChI=1S/C2H4N4/c3-2-4-1-5-6-2/h1H,(H3,3,4,5,6):
204
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
205
- - false
206
- http://localhost/compound/InChI=1S/C9H6O2/c10-9-6-5-7-3-1-2-4-8(7)11-9/h1-6H:
207
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
208
- - false
209
- http://localhost/compound/InChI=1S/C2HCl3/c3-1-2(4)5/h1H:
210
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
211
- - false
212
- http://localhost/compound/InChI=1S/C2H8N2/c1-4(2)3/h3H2,1-2H3:
213
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
214
- - true
215
- http://localhost/compound/InChI=1S/C6H7N3O/c7-9-6(10)5-1-3-8-4-2-5/h1-4H,7H2,(H,9,10):
216
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
217
- - false
218
- http://localhost/compound/InChI=1S/C12H8Cl6O/c13-8-9(14)11(16)5-3-1-2(6-7(3)19-6)4(5)10(8,15)12(11,17)18/h2-7H,1H2:
219
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
220
- - false
221
- http://localhost/compound/InChI=1S/Cd.H2O4S/c;1-5(2,3)4/h;(H2,1,2,3,4)/q+2;/p-2:
222
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
223
- - false
224
- http://localhost/compound/InChI=1S/C5H10N2O3/c1-5(9)4-7(6-10)2-3-8/h8H,2-4H2,1H3:
225
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
226
- - true
227
- http://localhost/compound/InChI=1S/C27H30O16/c1-8-17(32)20(35)22(37)26(40-8)39-7-15-18(33)21(36)23(38)27(42-15)43-25-19(34)16-13(31)5-10(28)6-14(16)41-24(25)9-2-3-11(29)12(30)4-9/h2-6,8,15,17-18,20-23,26-33,35-38H,7H2,1H3/t8-,15+,17-,18+,20+,21-,22+,23+,26+,27?/m0/s1:
228
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
229
- - false
230
- http://localhost/compound/InChI=1S/C12H12N2O3/c1-2-12(8-6-4-3-5-7-8)9(15)13-11(17)14-10(12)16/h3-7H,2H2,1H3,(H2,13,14,15,16,17):
231
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
232
- - false
233
- http://localhost/compound/InChI=1S/C8H6N4O4S/c13-4-9-11-8-10-5(3-17-8)6-1-2-7(16-6)12(14)15/h1-4H,(H,9,13)(H,10,11):
234
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
235
- - true
236
- http://localhost/compound/InChI=1S/C9H7N3O4S/c1-5(13)10-9-11-6(4-17-9)7-2-3-8(16-7)12(14)15/h2-4H,1H3,(H,10,11,13):
237
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
238
- - true
239
- http://localhost/compound/InChI=1S/CH6N2/c1-3-2/h3H,2H2,1H3:
240
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
241
- - true
242
- http://localhost/compound/InChI=1S/C12H9NO2/c14-13(15)11-7-6-9-5-4-8-2-1-3-10(11)12(8)9/h1-3,6-7H,4-5H2:
243
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
244
- - false
245
- http://localhost/compound/InChI=1S/C15H10O7.2H2O/c16-7-4-10(19)12-11(5-7)22-15(14(21)13(12)20)6-1-2-8(17)9(18)3-6;;/h1-5,16-19,21H;2*1H2:
246
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
247
- - false
248
- http://localhost/compound/InChI=1S/C7H6O4/c8-5-2-1-4(7(10)11)3-6(5)9/h1-3,8-9H,(H,10,11):
249
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
250
- - false
251
- http://localhost/compound/InChI=1S/C9H9NS/c11-8-10-7-6-9-4-2-1-3-5-9/h1-5H,6-7H2:
252
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
253
- - false
254
- http://localhost/compound/InChI=1S/C20H22O3/c1-20(2,19(21)22)23-16-12-10-15(11-13-16)18-9-5-7-14-6-3-4-8-17(14)18/h3-4,6,8,10-13,18H,5,7,9H2,1-2H3,(H,21,22):
255
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
256
- - false
257
- http://localhost/compound/InChI=1S/C6H12N2O2/c1-5-3-8(7-9)4-6(2)10-5/h5-6H,3-4H2,1-2H3:
258
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
259
- - true
260
- http://localhost/compound/InChI=1S/C10H13NO2/c1-8(2)13-10(12)11-9-6-4-3-5-7-9/h3-8H,1-2H3,(H,11,12):
261
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
262
- - false
263
- http://localhost/compound/InChI=1S/C6H14N2O4/c1-5(10)2-8(7-12)3-6(11)4-9/h5-6,9-11H,2-4H2,1H3:
264
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
265
- - true
266
- http://localhost/compound/InChI=1S/C19H24N2O2/c22-18-13-20(19(23)15-7-2-1-3-8-15)12-17-16-9-5-4-6-14(16)10-11-21(17)18/h4-6,9,15,17H,1-3,7-8,10-13H2:
267
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
268
- - false
269
- http://localhost/compound/InChI=1S/C5H11N3O3/c1-2-8(7-11)5(10)6-3-4-9/h9H,2-4H2,1H3,(H,6,10):
270
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
271
- - true
272
- http://localhost/compound/InChI=1S/C14H19N3S.ClH/c1-16(2)9-10-17(12-13-6-5-11-18-13)14-7-3-4-8-15-14;/h3-8,11H,9-10,12H2,1-2H3;1H:
273
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
274
- - false
275
- http://localhost/compound/InChI=1S/H4N2/c1-2/h1-2H2:
276
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
277
- - true
278
- http://localhost/compound/InChI=1S/C4H5Cl/c1-3-4(2)5/h3H,1-2H2:
279
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
280
- - false
281
- http://localhost/compound/InChI=1S/C17H17ClO3/c1-17(2,16(19)20)21-11-12-3-5-13(6-4-12)14-7-9-15(18)10-8-14/h3-10H,11H2,1-2H3,(H,19,20):
282
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
283
- - false
284
- http://localhost/compound/InChI=1S/C2H8N2.2ClH/c1-3-4-2;;/h3-4H,1-2H3;2*1H:
285
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
286
- - true
287
- http://localhost/compound/InChI=1S/C6H10ClN3O3/c1-5(11)4-10(9-13)6(12)8-3-2-7/h2-4H2,1H3,(H,8,12):
288
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
289
- - true
290
- http://localhost/compound/InChI=1S/C6H11N3O3/c1-3-9(8-12)6(11)7-4-5(2)10/h3-4H2,1-2H3,(H,7,11):
291
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
292
- - true
293
- http://localhost/compound/InChI=1S/C11H8N2O5/c12-11(14)8(9-2-1-5-17-9)6-7-3-4-10(18-7)13(15)16/h1-6H,(H2,12,14):
294
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
295
- - true
296
- http://localhost/compound/InChI=1S/C2H6O/c1-2-3/h3H,2H2,1H3:
297
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
298
- - false
299
- http://localhost/compound/InChI=1S/C5H13N3O/c1-7(2)4-5-8(3)6-9/h4-5H2,1-3H3:
300
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
301
- - true
302
- http://localhost/compound/InChI=1S/C15H13NO/c1-10(17)16-13-6-7-15-12(9-13)8-11-4-2-3-5-14(11)15/h2-7,9H,8H2,1H3,(H,16,17):
303
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
304
- - true
305
- http://localhost/compound/InChI=1S/C5H6N2OS/c1-3-2-4(8)7-5(9)6-3/h2H,1H3,(H2,6,7,8,9):
306
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
307
- - true
308
- http://localhost/compound/InChI=1S/C9H11N3O/c13-11-12-6-2-4-9(12)8-3-1-5-10-7-8/h1,3,5,7,9H,2,4,6H2:
309
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
310
- - true
311
- http://localhost/compound/InChI=1S/C6H12N4O2/c1-5-3-9(7-11)4-6(2)10(5)8-12/h5-6H,3-4H2,1-2H3:
312
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
313
- - true
314
- http://localhost/compound/InChI=1S/C19H17N3.ClH/c20-16-7-1-13(2-8-16)19(14-3-9-17(21)10-4-14)15-5-11-18(22)12-6-15;/h1-12,20H,21-22H2;1H:
315
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
316
- - false
317
- http://localhost/compound/InChI=1S/HNO2.Na/c2-1-3;/h(H,2,3);/q;+1/p-1:
318
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
319
- - false
320
- http://localhost/compound/InChI=1S/C6Cl6/c7-1-2(8)4(10)6(12)5(11)3(1)9:
321
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
322
- - true
323
- http://localhost/compound/InChI=1S/C2H3Cl/c1-2-3/h2H,1H2:
324
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
325
- - true
326
- http://localhost/compound/InChI=1S/C6H10N2O/c1-3-5-8(7-9)6-4-2/h3-4H,1-2,5-6H2:
327
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
328
- - true
329
- http://localhost/compound/InChI=1S/C9H11N3O2/c13-10-12-6-2-4-9(12)8-3-1-5-11(14)7-8/h1,3,5,7,9H,2,4,6H2:
330
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
331
- - false
332
- http://localhost/compound/InChI=1S/C15H13NO2/c1-10(17)16(18)13-6-7-15-12(9-13)8-11-4-2-3-5-14(11)15/h2-7,9,18H,8H2,1H3:
333
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
334
- - true
335
- http://localhost/compound/InChI=1S/C20H22N8O5/c1-28(9-11-8-23-17-15(24-11)16(21)26-20(22)27-17)12-4-2-10(3-5-12)18(31)25-13(19(32)33)6-7-14(29)30/h2-5,8,13H,6-7,9H2,1H3,(H,25,31)(H,29,30)(H,32,33)(H4,21,22,23,26,27)/t13-/m0/s1:
336
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
337
- - false
338
- http://localhost/compound/InChI=1S/C4H7N3O3/c1-3(8)2-7(6-10)4(5)9/h2H2,1H3,(H2,5,9):
339
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
340
- - true
341
- http://localhost/compound/InChI=1S/C2H4O/c1-2-3/h2H,1H3:
342
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
343
- - true
344
- features:
345
- http://localhost/dataset/1/feature/hamster_carcinogenicity:
346
- http://www.opentox.org/api/1.1#hasSource: hamster_carcinogenicity.csv
347
- http://purl.org/dc/elements/1.1/title: hamster_carcinogenicity
348
- metadata:
349
- http://www.opentox.org/api/1.1#hasSource: hamster_carcinogenicity.csv
350
- http://purl.org/dc/elements/1.1/title: hamster_carcinogenicity
351
- http://www.w3.org/2001/XMLSchema#anyUri: http://localhost/dataset/1
352
- uri: http://localhost/dataset/1
data/test/dataset-long.rb DELETED
@@ -1,114 +0,0 @@
1
- require_relative "setup.rb"
2
-
3
- class DatasetLongTest < MiniTest::Test
4
-
5
- def test_01_upload_epafhm
6
- f = File.join DATA_DIR, "EPAFHM.csv"
7
- d = OpenTox::Dataset.from_csv_file f
8
- csv = CSV.read f
9
- assert_equal csv.size-1, d.compounds.size
10
- assert_equal csv.first.size-1, d.features.size
11
- assert_equal csv.size-1, d.data_entries.size
12
- d.delete
13
- end
14
-
15
- =begin
16
- # TODO catch OpenBabel segfaults and identify/remove cause
17
- def test_02_upload_multicell
18
- duplicates = [
19
- "http://localhost:8082/compound/InChI=1S/C6HCl5O/c7-1-2(8)4(10)6(12)5(11)3(1)9/h12H",
20
- "http://localhost:8082/compound/InChI=1S/C12H8Cl6O/c13-8-9(14)11(16)5-3-1-2(6-7(3)19-6)4(5)10(8,15)12(11,17)18/h2-7H,1H2",
21
- "http://localhost:8082/compound/InChI=1S/C2HCl3/c3-1-2(4)5/h1H",
22
- "http://localhost:8082/compound/InChI=1S/C4H5Cl/c1-3-4(2)5/h3H,1-2H2",
23
- "http://localhost:8082/compound/InChI=1S/C4H7Cl/c1-4(2)3-5/h1,3H2,2H3",
24
- "http://localhost:8082/compound/InChI=1S/C8H14O4/c1-5-4-8(11-6(2)9)12-7(3)10-5/h5,7-8H,4H2,1-3H3",
25
- "http://localhost:8082/compound/InChI=1S/C19H30O5/c1-3-5-7-20-8-9-21-10-11-22-14-17-13-19-18(23-15-24-19)12-16(17)6-4-2/h12-13H,3-11,14-15H2,1-2H3",
26
- ]
27
- errors = ['O=P(H)(OC)OC', 'C=CCNN.HCl' ]
28
- f = File.join DATA_DIR, "multi_cell_call.csv"
29
- d = OpenTox::Dataset.from_csv_file f
30
- csv = CSV.read f
31
- assert_equal true, d.features.first.nominal
32
- assert_nil d["index"]
33
- assert_equal csv.size-1-errors.size, d.compounds.size
34
- assert_equal csv.first.size-1, d.features.size
35
- assert_equal csv.size-1-errors.size, d.data_entries.size
36
- p d.warnings
37
- (duplicates+errors).each do |uri|
38
- assert d.warnings.grep %r{#{uri}}
39
- end
40
- d.delete
41
- end
42
- =end
43
-
44
- def test_03_upload_isscan
45
- f = File.join DATA_DIR, "ISSCAN-multi.csv"
46
- d = OpenTox::Dataset.from_csv_file f
47
- csv = CSV.read f
48
- assert_equal csv.size-1, d.compounds.size
49
- assert_equal csv.first.size-1, d.features.size
50
- assert_equal csv.size-1, d.data_entries.size
51
- d.delete
52
- #assert_equal false, URI.accessible?(d.uri)
53
- end
54
-
55
- def test_04_simultanous_upload
56
- threads = []
57
- 3.times do |t|
58
- threads << Thread.new(t) do |up|
59
- d = OpenTox::Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
60
- assert_equal OpenTox::Dataset, d.class
61
- assert_equal 1, d.features.size
62
- assert_equal 85, d.compounds.size
63
- assert_equal 85, d.data_entries.size
64
- csv = CSV.read("#{DATA_DIR}/hamster_carcinogenicity.csv")
65
- csv.shift
66
- assert_equal csv.collect{|r| r[1]}, d.data_entries.flatten
67
- d.delete
68
- end
69
- end
70
- threads.each {|aThread| aThread.join}
71
- end
72
-
73
- def test_05_upload_kazius
74
- f = File.join DATA_DIR, "kazius.csv"
75
- d = OpenTox::Dataset.from_csv_file f
76
- csv = CSV.read f
77
- assert_equal csv.size-1, d.compounds.size
78
- assert_equal csv.first.size-1, d.features.size
79
- assert_equal csv.size-1, d.data_entries.size
80
- assert_empty d.warnings
81
- # 493 COC1=C(C=C(C(=C1)Cl)OC)Cl,1
82
- c = d.compounds[491]
83
- assert_equal c.smiles, "COc1cc(Cl)c(cc1Cl)OC"
84
- assert_equal d.data_entries[491][0], "1"
85
- d.delete
86
- end
87
-
88
- def test_upload_feature_dataset
89
- skip
90
- t = Time.now
91
- f = File.join DATA_DIR, "rat_feature_dataset.csv"
92
- d = Dataset.from_csv_file f
93
- assert_equal 458, d.features.size
94
- d.save
95
- #p "Upload: #{Time.now-t}"
96
- d2 = Dataset.find d.id
97
- t = Time.now
98
- assert_equal d.features.size, d2.features.size
99
- csv = CSV.read f
100
- csv.shift # remove header
101
- assert_empty d2.warnings
102
- assert_equal csv.size, d2.compounds.size
103
- assert_equal csv.first.size-1, d2.features.size
104
- d2.compounds.each_with_index do |compound,i|
105
- row = csv[i]
106
- row.shift # remove compound
107
- assert_equal row, d2.data_entries[i]
108
- end
109
- #p "Dowload: #{Time.now-t}"
110
- d2.delete
111
- assert_nil Dataset.find d.id
112
- end
113
-
114
- end
data/test/lazar-long.rb DELETED
@@ -1,92 +0,0 @@
1
- require_relative "setup.rb"
2
-
3
- class LazarExtendedTest < MiniTest::Test
4
-
5
- def test_lazar_bbrc_ham_minfreq
6
- skip
7
- dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
8
- model = Model::LazarFminerClassification.create(dataset, :min_frequency => 5)
9
- feature_dataset = Dataset.find model.neighbor_algorithm_parameters[:feature_dataset_id]
10
- assert_equal dataset.compounds.size, feature_dataset.compounds.size
11
- assert_equal model.feature_calculation_parameters, {"min_frequency"=>5}
12
- #TODO check frequencies, features and confidence
13
- #assert_equal 41, feature_dataset.features.size
14
- #assert_equal 'N-C=N', feature_dataset.features.first.smarts
15
- compound = OpenTox::Compound.from_inchi("InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H")
16
- prediction = model.predict compound
17
- assert_equal "false", prediction[:value]
18
- #assert_equal 0.12380952380952381, prediction[:confidence]
19
- dataset.delete
20
- model.delete
21
- feature_dataset.delete
22
- end
23
-
24
- def test_lazar_bbrc_large_ds
25
- skip
26
- dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"multi_cell_call_no_dup.csv")
27
- model = Model::LazarFminerClassification.create dataset
28
- feature_dataset = Dataset.find model.neighbor_algorithm_parameters[:feature_dataset_id]
29
- model.save
30
- p model.id
31
- assert_equal dataset.compounds.size, feature_dataset.compounds.size
32
- #assert_equal 52, feature_dataset.features.size
33
- #assert_equal '[#17&A]-[#6&A]', feature_dataset.features.first.name
34
- compound = OpenTox::Compound.from_inchi("InChI=1S/C10H9NO2S/c1-8-2-4-9(5-3-8)13-6-10(12)11-7-14/h2-5H,6H2,1H3")
35
- prediction = model.predict compound
36
- assert_equal "1", prediction[:value]
37
- #p prediction
38
- #prediction = prediction_dataset.data_entries.first
39
- #assert_in_delta 0.025, prediction[:confidence], 0.001
40
- #assert_equal 0.025885845574483608, prediction[:confidence]
41
- # with compound change in training_dataset see:
42
- # https://github.com/opentox/opentox-test/commit/0e78c9c59d087adbd4cc58bab60fb29cbe0c1da0
43
- #assert_equal 0.02422364949075546, prediction[:confidence]
44
- dataset.delete
45
- model.delete
46
- feature_dataset.delete
47
- end
48
-
49
- def test_lazar_fminer_kazius
50
- skip
51
- t = Time.now
52
- dataset = Dataset.from_csv_file File.join(DATA_DIR,"kazius.csv")
53
- p "Dataset upload: #{Time.now-t}"
54
- t = Time.now
55
- model = Model::LazarFminerClassification.create(dataset, :min_frequency => 100)
56
- p "Feature mining: #{Time.now-t}"
57
- t = Time.now
58
- feature_dataset = Dataset.find model.neighbor_algorithm_parameters[:feature_dataset_id]
59
- assert_equal feature_dataset.compounds.size, dataset.compounds.size
60
- #model = Model::Lazar.find('55bcf5bf7a7838381200017e')
61
- #p model.id
62
- #prediction_times = []
63
- 2.times do
64
- compound = Compound.from_smiles("Clc1ccccc1NN")
65
- prediction = model.predict compound
66
- p prediction
67
- #assert_equal "1", prediction[:value]
68
- #assert_in_delta 0.019858401199860445, prediction[:confidence], 0.001
69
- end
70
- #dataset.delete
71
- #feature_dataset.delete
72
- end
73
-
74
- def test_lazar_kazius
75
- t = Time.now
76
- dataset = Dataset.from_csv_file File.join(DATA_DIR,"kazius.csv")
77
- p "Dataset upload: #{Time.now-t}"
78
- t = Time.now
79
- model = Model::LazarClassification.create(dataset)
80
- p "Feature mining: #{Time.now-t}"
81
- t = Time.now
82
- 2.times do
83
- compound = Compound.from_smiles("Clc1ccccc1NN")
84
- prediction = model.predict compound
85
- #p prediction
86
- assert_equal "1", prediction[:value]
87
- #assert_in_delta 0.019858401199860445, prediction[:confidence], 0.001
88
- end
89
- dataset.delete
90
- end
91
-
92
- end
@@ -1,31 +0,0 @@
1
- require_relative "setup.rb"
2
-
3
- class LazarPhyschemDescriptorTest < MiniTest::Test
4
- def test_epafhm
5
-
6
- skip
7
- @descriptors = OpenTox::Algorithm::Descriptor::OBDESCRIPTORS.keys
8
- refute_empty @descriptors
9
-
10
- # UPLOAD DATA
11
- training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi.csv")
12
- feature_dataset = Algorithm::Descriptor.physchem training_dataset, @descriptors
13
- scaled_feature_dataset = feature_dataset.scale
14
- model = Model::LazarRegression.create training_dataset
15
- model.neighbor_algorithm = "physchem_neighbors"
16
- model.neighbor_algorithm_parameters = {
17
- :feature_calculation_algorithm => "OpenTox::Algorithm::Descriptor.physchem",
18
- :descriptors => @descriptors,
19
- :feature_dataset_id => scaled_feature_dataset.id,
20
- :min_sim => 0.3
21
- }
22
- model.save
23
- compound = Compound.from_smiles "CC(C)(C)CN"
24
- prediction = model.predict compound
25
- refute_nil prediction[:value]
26
- refute_nil prediction[:confidence]
27
- prediction[:neighbors].each do |line|
28
- assert_operator line[1], :>, 0.3
29
- end
30
- end
31
- end
@@ -1,20 +0,0 @@
1
- require_relative "setup.rb"
2
-
3
- class PredictionModelTest < MiniTest::Test
4
-
5
- def test_prediction_model
6
- pm = Model::Prediction.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
7
- [:endpoint,:species,:source].each do |p|
8
- refute_empty pm[p]
9
- end
10
- assert pm.classification?
11
- refute pm.regression?
12
- pm.crossvalidations.each do |cv|
13
- p cv
14
- assert cv.accuracy > 0.74, "Crossvalidation accuracy (#{cv.accuracy}) should be larger than 0.75. This may happen due to an unfavorable training/test set split."
15
- end
16
- prediction = pm.predict Compound.from_smiles("CCCC(NN)C")
17
- assert_equal "true", prediction[:value]
18
- pm.delete
19
- end
20
- end