lazar 0.9.3 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -4
  3. data/README.md +5 -15
  4. data/VERSION +1 -1
  5. data/ext/lazar/extconf.rb +1 -1
  6. data/ext/lazar/rinstall.R +9 -7
  7. data/java/CdkDescriptorInfo.class +0 -0
  8. data/java/CdkDescriptorInfo.java +3 -2
  9. data/java/CdkDescriptors.class +0 -0
  10. data/java/CdkDescriptors.java +28 -28
  11. data/java/Rakefile +3 -3
  12. data/java/{cdk-1.4.19.jar → cdk-2.0-SNAPSHOT.jar} +0 -0
  13. data/lazar.gemspec +6 -7
  14. data/lib/algorithm.rb +2 -11
  15. data/lib/caret.rb +96 -0
  16. data/lib/classification.rb +14 -22
  17. data/lib/compound.rb +21 -87
  18. data/lib/crossvalidation.rb +80 -279
  19. data/lib/dataset.rb +105 -174
  20. data/lib/feature.rb +11 -18
  21. data/lib/feature_selection.rb +42 -0
  22. data/lib/import.rb +122 -0
  23. data/lib/lazar.rb +14 -4
  24. data/lib/leave-one-out-validation.rb +46 -192
  25. data/lib/model.rb +319 -128
  26. data/lib/nanoparticle.rb +98 -0
  27. data/lib/opentox.rb +7 -4
  28. data/lib/overwrite.rb +24 -3
  29. data/lib/physchem.rb +11 -10
  30. data/lib/regression.rb +7 -137
  31. data/lib/rest-client-wrapper.rb +0 -6
  32. data/lib/similarity.rb +65 -0
  33. data/lib/substance.rb +8 -0
  34. data/lib/train-test-validation.rb +69 -0
  35. data/lib/validation-statistics.rb +223 -0
  36. data/lib/validation.rb +17 -100
  37. data/scripts/mg2mmol.rb +17 -0
  38. data/scripts/mirror-enm2test.rb +4 -0
  39. data/scripts/mmol2-log10.rb +32 -0
  40. data/test/compound.rb +4 -94
  41. data/test/data/EPAFHM.medi_log10.csv +92 -0
  42. data/test/data/EPAFHM.mini_log10.csv +16 -0
  43. data/test/data/EPAFHM_log10.csv +581 -0
  44. data/test/data/loael_log10.csv +568 -0
  45. data/test/dataset.rb +195 -133
  46. data/test/descriptor.rb +27 -18
  47. data/test/error.rb +2 -2
  48. data/test/experiment.rb +4 -4
  49. data/test/feature.rb +2 -3
  50. data/test/gridfs.rb +10 -0
  51. data/test/model-classification.rb +106 -0
  52. data/test/model-nanoparticle.rb +128 -0
  53. data/test/model-regression.rb +171 -0
  54. data/test/model-validation.rb +19 -0
  55. data/test/nanomaterial-model-validation.rb +55 -0
  56. data/test/setup.rb +8 -4
  57. data/test/validation-classification.rb +67 -0
  58. data/test/validation-nanoparticle.rb +133 -0
  59. data/test/validation-regression.rb +92 -0
  60. metadata +50 -121
  61. data/test/classification.rb +0 -41
  62. data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +0 -13553
  63. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +0 -436
  64. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +0 -568
  65. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +0 -87
  66. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +0 -978
  67. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +0 -1120
  68. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +0 -1113
  69. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +0 -850
  70. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +0 -829
  71. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +0 -1198
  72. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +0 -1505
  73. data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +0 -581
  74. data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +0 -1217
  75. data/test/data/LOAEL_log_mg_corrected_smiles.csv +0 -568
  76. data/test/data/LOAEL_log_mmol_corrected_smiles.csv +0 -568
  77. data/test/data/boiling_points.ext.sdf +0 -11460
  78. data/test/data/cpdb_100.csv +0 -101
  79. data/test/data/hamster_carcinogenicity.ntriples +0 -618
  80. data/test/data/hamster_carcinogenicity.sdf +0 -2805
  81. data/test/data/hamster_carcinogenicity.xls +0 -0
  82. data/test/data/hamster_carcinogenicity.yaml +0 -352
  83. data/test/dataset-long.rb +0 -114
  84. data/test/lazar-long.rb +0 -92
  85. data/test/lazar-physchem-short.rb +0 -31
  86. data/test/prediction_models.rb +0 -20
  87. data/test/regression.rb +0 -43
  88. data/test/validation.rb +0 -108
data/lib/validation.rb CHANGED
@@ -1,108 +1,25 @@
1
1
  module OpenTox
2
2
 
3
- class Validation
4
-
5
- field :model_id, type: BSON::ObjectId
6
- field :prediction_dataset_id, type: BSON::ObjectId
7
- field :crossvalidation_id, type: BSON::ObjectId
8
- field :test_dataset_id, type: BSON::ObjectId
9
- field :nr_instances, type: Integer
10
- field :nr_unpredicted, type: Integer
11
- field :predictions, type: Array
12
-
13
- def prediction_dataset
14
- Dataset.find prediction_dataset_id
15
- end
16
-
17
- def test_dataset
18
- Dataset.find test_dataset_id
19
- end
20
-
21
- def model
22
- Model::Lazar.find model_id
23
- end
24
-
25
- def self.create model, training_set, test_set, crossvalidation=nil
26
-
27
- atts = model.attributes.dup # do not modify attributes from original model
28
- atts["_id"] = BSON::ObjectId.new
29
- atts[:training_dataset_id] = training_set.id
30
- validation_model = model.class.create training_set, atts
31
- validation_model.save
32
- cids = test_set.compound_ids
33
-
34
- test_set_without_activities = Dataset.new(:compound_ids => cids.uniq) # remove duplicates and make sure that activities cannot be used
35
- prediction_dataset = validation_model.predict test_set_without_activities
36
- predictions = []
37
- nr_unpredicted = 0
38
- activities = test_set.data_entries.collect{|de| de.first}
39
- prediction_dataset.data_entries.each_with_index do |de,i|
40
- if de[0] #and de[1]
41
- cid = prediction_dataset.compound_ids[i]
42
- rows = cids.each_index.select{|r| cids[r] == cid }
43
- activities = rows.collect{|r| test_set.data_entries[r][0]}
44
- prediction = de.first
45
- confidence = de[1]
46
- predictions << [prediction_dataset.compound_ids[i], activities, prediction, de[1]]
47
- else
48
- nr_unpredicted += 1
49
- end
3
+ module Validation
4
+
5
+ class Validation
6
+ include OpenTox
7
+ include Mongoid::Document
8
+ include Mongoid::Timestamps
9
+ store_in collection: "validations"
10
+ field :name, type: String
11
+ field :model_id, type: BSON::ObjectId
12
+ field :nr_instances, type: Integer, default: 0
13
+ field :nr_unpredicted, type: Integer, default: 0
14
+ field :predictions, type: Hash, default: {}
15
+ field :finished_at, type: Time
16
+
17
+ def model
18
+ Model::Lazar.find model_id
50
19
  end
51
- validation = self.new(
52
- :model_id => validation_model.id,
53
- :prediction_dataset_id => prediction_dataset.id,
54
- :test_dataset_id => test_set.id,
55
- :nr_instances => test_set.compound_ids.size,
56
- :nr_unpredicted => nr_unpredicted,
57
- :predictions => predictions#.sort{|a,b| p a; b[3] <=> a[3]} # sort according to confidence
58
- )
59
- validation.crossvalidation_id = crossvalidation.id if crossvalidation
60
- validation.save
61
- validation
62
- end
63
-
64
- end
65
-
66
- class ClassificationValidation < Validation
67
- end
68
20
 
69
- class RegressionValidation < Validation
70
-
71
- def statistics
72
- rmse = 0
73
- weighted_rmse = 0
74
- rse = 0
75
- weighted_rse = 0
76
- mae = 0
77
- weighted_mae = 0
78
- confidence_sum = 0
79
- predictions.each do |pred|
80
- compound_id,activity,prediction,confidence = pred
81
- if activity and prediction
82
- error = Math.log10(prediction)-Math.log10(activity.median)
83
- rmse += error**2
84
- weighted_rmse += confidence*error**2
85
- mae += error.abs
86
- weighted_mae += confidence*error.abs
87
- confidence_sum += confidence
88
- else
89
- warnings << "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
90
- $logger.debug "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
91
- end
92
- end
93
- x = predictions.collect{|p| p[1].median}
94
- y = predictions.collect{|p| p[2]}
95
- R.assign "measurement", x
96
- R.assign "prediction", y
97
- R.eval "r <- cor(-log(measurement),-log(prediction),use='complete')"
98
- r = R.eval("r").to_ruby
99
-
100
- mae = mae/predictions.size
101
- weighted_mae = weighted_mae/confidence_sum
102
- rmse = Math.sqrt(rmse/predictions.size)
103
- weighted_rmse = Math.sqrt(weighted_rmse/confidence_sum)
104
- { "R^2" => r**2, "RMSE" => rmse, "MAE" => mae }
105
21
  end
22
+
106
23
  end
107
24
 
108
25
  end
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../lazar/lib/lazar'
3
+ include OpenTox
4
+ newfile = ARGV[0].sub(/.csv/,"_mmol.csv")
5
+ p newfile
6
+ CSV.open(newfile, "wb") do |csv|
7
+ CSV.read(ARGV[0]).each do |line|
8
+ smi,mg = line
9
+ if mg.numeric?
10
+ c = Compound.from_smiles smi
11
+ mmol = c.mg_to_mmol mg.to_f
12
+ csv << [smi, mmol]
13
+ else
14
+ csv << [smi, mg.gsub(/mg/,'mmol')]
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../lib/lazar'
3
+ include OpenTox
4
+ Import::Enanomapper.mirror File.join(File.dirname(__FILE__),"..","test","data","enm")
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../lib/lazar'
3
+ include OpenTox
4
+
5
+ newfile = ARGV[0].sub(/.csv/,"_log10.csv")
6
+ p newfile
7
+ CSV.open(newfile, "wb") do |csv|
8
+ i = 1
9
+ CSV.read(ARGV[0]).each do |line|
10
+ type,mmol = line
11
+ if i == 1
12
+ @type = type
13
+ csv << ["SMILES", "-log10(#{mmol})"]
14
+ else
15
+ if mmol.numeric?
16
+ if @type =~ /smiles/i
17
+ c = Compound.from_smiles type
18
+ elsif @type =~ /inchi/i
19
+ c = Compound.from_inchi type
20
+ type = c.smiles
21
+ else
22
+ p "Unknown type '#{type}' at line 1."
23
+ end
24
+ mmol = -Math.log10(mmol.to_f)
25
+ csv << [type, mmol]
26
+ else
27
+ p "Line #{i}: '#{mmol}' is not a numeric value."
28
+ end
29
+ end
30
+ i += 1
31
+ end
32
+ end
data/test/compound.rb CHANGED
@@ -2,19 +2,16 @@ require_relative "setup.rb"
2
2
 
3
3
  class CompoundTest < MiniTest::Test
4
4
 
5
- def test_0_compound_from_smiles
5
+ def test_compound_from_smiles
6
6
  c = OpenTox::Compound.from_smiles "F[B-](F)(F)F.[Na+]"
7
7
  assert_equal "InChI=1S/BF4.Na/c2-1(3,4)5;/q-1;+1", c.inchi.chomp
8
8
  assert_equal "F[B-](F)(F)F.[Na+]", c.smiles, "A failure here might be caused by a compound webservice running on 64bit architectures using an outdated version of OpenBabel. Please install OpenBabel version 2.3.2 or higher." # seems to be fixed in 2.3.2
9
9
  end
10
10
 
11
- def test_1_compound_from_smiles
11
+ def test_compound_from_smiles
12
12
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
13
13
  assert_equal "InChI=1S/C6H9NO/c1-5(4-7)3-6(2)8/h5H,3H2,1-2H3", c.inchi
14
14
  assert_equal "CC(C#N)CC(=O)C", c.smiles
15
- end
16
-
17
- def test_2_compound_from_smiles
18
15
  c = OpenTox::Compound.from_smiles "N#[N+]C1=CC=CC=C1.F[B-](F)(F)F"
19
16
  assert_equal "InChI=1S/C6H5N2.BF4/c7-8-6-4-2-1-3-5-6;2-1(3,4)5/h1-5H;/q+1;-1", c.inchi
20
17
  assert_equal "F[B-](F)(F)F.N#[N+]c1ccccc1", c.smiles
@@ -79,16 +76,6 @@ print c.sdf
79
76
  assert_equal 9, c.fingerprint("FP4").size
80
77
  end
81
78
 
82
- def test_neighbors
83
- d = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.csv")
84
- d.compounds.each do |c|
85
- refute_nil c.fingerprint("MP2D")
86
- end
87
- c = d.compounds[371]
88
- n = c.fingerprint_neighbors({:type => "FP4", :min_sim => 0.7, :training_dataset_id => d.id })
89
- assert n.size >= 18, "Neighbors size (#{n.size}) should be larger than 17"
90
- end
91
-
92
79
  def test_openbabel_segfault
93
80
  inchi = "InChI=1S/C19H27NO7/c1-11-9-19(12(2)27-19)17(23)26-14-6-8-20(4)7-5-13(15(14)21)10-25-16(22)18(11,3)24/h5,11-12,14,24H,6-10H2,1-4H3/b13-5-/t11-,12-,14-,18-,19?/m1/s1"
94
81
 
@@ -107,25 +94,6 @@ print c.sdf
107
94
  end
108
95
  end
109
96
 
110
- def test_fingerprint_neighbors
111
- types = ["FP2", "FP3", "FP4", "MACCS"]
112
- min_sim = 0.7
113
- training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.csv")
114
- [
115
- "CC(=O)CC(C)C#N",
116
- "CC(=O)CC(C)C",
117
- "C(=O)CC(C)C#N",
118
- ].each do |smi|
119
- c = OpenTox::Compound.from_smiles smi
120
- types.each do |type|
121
- neighbors = c.fingerprint_neighbors({:type => type, :training_dataset_id => training_dataset.id, :min_sim => min_sim})
122
- unless type == "FP2" and smi == "CC(=O)CC(C)C#N" or smi == "C(=O)CC(C)C#N" and (type == "FP2" or type == "MACCS")
123
- refute_empty neighbors
124
- end
125
- end
126
- end
127
- end
128
-
129
97
  def test_mna
130
98
  c = OpenTox::Compound.from_smiles "N#[N+]C1=CC=CC=C1.F[B-](F)(F)F"
131
99
  assert_equal 18, c.fingerprint("MNA").size
@@ -138,72 +106,14 @@ print c.sdf
138
106
  assert 7, c.fingerprint("MP2D").uniq.size
139
107
  end
140
108
 
141
- def test_fingerprint_count_neighbors
142
- types = ["MP2D", "MNA"]
143
- min_sim = 0.0
144
- training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.csv")
145
- [
146
- "CC(=O)CC(C)C#N",
147
- "CC(=O)CC(C)C",
148
- "C(=O)CC(C)C#N",
149
- ].each do |smi|
150
- c = OpenTox::Compound.from_smiles smi
151
- types.each do |type|
152
- neighbors = c.fingerprint_count_neighbors({:type => type, :training_dataset_id => training_dataset.id, :min_sim => min_sim})
153
- if type == "FP4"
154
- fp4_neighbors = c.neighbors
155
- neighbors.each do |n|
156
- assert_includes fp4_neighbors, n
157
- end
158
- end
159
- end
160
- end
161
- end
162
-
163
- def test_fingerprint_db_neighbors
164
- #skip
165
- training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.csv")
166
- [
167
- "CC(=O)CC(C)C#N",
168
- "CC(=O)CC(C)C",
169
- "C(=O)CC(C)C#N",
170
- ].each do |smi|
171
- c = OpenTox::Compound.from_smiles smi
172
- t = Time.now
173
- neighbors = c.db_neighbors(:training_dataset_id => training_dataset.id, :min_sim => 0.2)
174
- p Time.now - t
175
- t = Time.now
176
- neighbors2 = c.fingerprint_neighbors({:type => "MP2D", :training_dataset_id => training_dataset.id, :min_sim => 0.2})
177
- p Time.now - t
178
- p neighbors.size
179
- p neighbors2.size
180
- #p neighbors
181
- #p neighbors2
182
- #p neighbors2 - neighbors
183
- #assert_equal neighbors, neighbors2
184
- end
185
- end
186
-
187
109
  def test_molecular_weight
188
110
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C"
189
111
  assert_equal 100.15888, c.molecular_weight
190
112
  end
191
113
 
192
- def test_mg_conversions
193
- # TODO fix!
194
- skip
195
- c = OpenTox::Compound.from_smiles "O"
196
- mw = c.molecular_weight
197
- assert_equal 18.01528, mw
198
- assert_equal 0.8105107141417474, c.logmmol_to_mg(4.34688225631145, mw)
199
- assert_equal 9007.64, c.mmol_to_mg(500, mw)
200
- assert_equal 2437.9999984148976, c.logmg_to_mg(3.387033701)
201
- end
202
-
203
114
  def test_physchem
204
115
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C"
205
- assert_equal PhysChem::OBDESCRIPTORS.size, c.physchem.size
206
- assert_equal PhysChem::OBDESCRIPTORS.size, c.physchem(PhysChem.openbabel_descriptors).size
207
- assert_equal PhysChem::unique_descriptors.size, c.physchem(PhysChem.unique_descriptors).size
116
+ properties = c.calculate_properties(PhysChem.openbabel_descriptors)
117
+ assert_equal PhysChem::OPENBABEL.size, properties.size
208
118
  end
209
119
  end
@@ -0,0 +1,92 @@
1
+ STRUCTURE_SMILES,-log10(LC50_mmol)
2
+ C1=CC(C=O)=CC(OC)=C1OCCCCCC,1.9469215565165803
3
+ C1(OC)=C([N+]([O-])=O)C(C=O)=CC(Br)=C1O,0.575118363368933
4
+ C1=CC(Cl)=CC=C1OC2=C([N+](=O)[O-])C=CC=C2,2.114073660198569
5
+ CC1=C(NC=O)C=CC=C1Cl,0.5606673061697374
6
+ CCCCOC(=O)C1=CC=CC(C(=O)OCCCC)=C1,2.490797477668897
7
+ C(C1=CC=CC=C1)(C2=CC=CC=C2)(O)C#C,1.2732727909734278
8
+ CCCSCCSCCC,1.3746875490383261
9
+ CCCSCCCCSCCC,1.8386319977650252
10
+ CCCSCCCCSCCC,1.7328282715969863
11
+ ClCCOC(=O)NC1CCCCC1,0.7695510786217261
12
+ O=C1C(C2=CC=CC=C2)(C(=O)NC(=O)N1)CC,-0.31806333496276157
13
+ OC1=C(C=C(C=C1)[N+](=O)[O-])[N+](=O)[O-],1.2276782932770802
14
+ NC(=O)OCC,-1.7693773260761385
15
+ [O-]C(C1=CC=CC=C1O)=O.[Na+],-1.0969100130080565
16
+ C1=CC=CC=C1C(=O)N,-0.7371926427047373
17
+ CC[N+](CC)(CC)CC1(=CC=CC=C1).[Cl-],0.1505805862031006
18
+ CN(C)N,0.8827287043442358
19
+ CC(C(C(NC([O-])=N1)=O)(C1=O)CC)CCC.[Na+],0.7011469235902933
20
+ N1C(=O)C(CC)(CCC(C)C)C(=O)NC1=O,0.42365864979420714
21
+ O=C1C2=C(N=CN2C)N(C(=O)N1C)C,0.10902040301031106
22
+ C1=CC=C2C(=C1)C(=O)C(C)=CC2=O,3.1944991418415998
23
+ OC1=C(Cl)C(Cl)=C(Cl)C=C1Cl,2.3526170298853804
24
+ OC1=CC(C)=C(Cl)C=C1,1.4156687756324693
25
+ O=S(O)(O)=O.C1(=CC=CC=C1CC(N)C).C2=CC=CC=C2CC(N)C,1.106793246940152
26
+ O(CC)CC,-1.5378190950732742
27
+ NC1=CC=CC=C1,-0.05307844348341968
28
+ O=C(OC1=C2C(=CC=C1)C=CC=C2)NC,1.3615107430453628
29
+ CCO,-2.503790683057181
30
+ C1(=NC=CC=C1C2CCCN2C).OS(O)(=O)=O,1.275724130399211
31
+ C1(O)=CC=CC=C1C(=O)N,0.13312218566250114
32
+ CCCCCC=O,0.7569619513137056
33
+ O=C1OC2=CC=CC=C2C(O)=C1CC3=C(O)C4=CC=CC=C4OC3=O,1.8181564120552274
34
+ C1(C=O)=CC=C(OC2=CC=CC=C2)C=C1,1.6345120151091004
35
+ CO,-2.962369335670021
36
+ OC(C)C,-2.1583624920952498
37
+ CC(=O)C,-2.089905111439398
38
+ ClC(Cl)Cl,0.22767829327708025
39
+ CS(=O)C,-2.6384892569546374
40
+ ClC(C(Cl)(Cl)Cl)(Cl)Cl,2.221848749616356
41
+ OC1=C(C=C(C(=C1CC2=C(C(=CC(=C2Cl)Cl)Cl)O)Cl)Cl)Cl,4.287350298372789
42
+ C1=CC(=CC=C1N)C(=O)CC,0.009217308196862182
43
+ OCCC,-1.8790958795000727
44
+ CCCCO,-1.3673559210260189
45
+ CCCCCO,-0.72916478969277
46
+ C1=CC=CC=C1,0.6478174818886375
47
+ CC(Cl)(Cl)Cl,0.4497716469449059
48
+ [S-]C1=NC(C(C(C)CCC)(CC)C(N1)=O)=O.[Na+],1.0039263455147247
49
+ CC#N,-1.6031443726201824
50
+ CC=O,0.11520463605101904
51
+ ClCCl,-0.5899496013257077
52
+ IC(I)I,2.129596094720973
53
+ [N+](C)(C)(C)C.[Cl-],-0.6253124509616739
54
+ CC(C)(C)O,-1.9370161074648142
55
+ C(F)(F)(F)CO,-0.07554696139253074
56
+ CC(=O)C(C)(C)C,0.06098022355133353
57
+ ClC(C(Cl)Cl)(Cl)Cl,1.4294570601181025
58
+ CC1(C)NC(=O)NC1=O,-2.110589710299249
59
+ CCC(O)(C)CC,-0.8182258936139555
60
+ C#CC(O)(C)CC,-1.0934216851622351
61
+ C1CCCC(C#C)(O)C1,-0.31386722036915343
62
+ CCCCOCCOP(=O)(OCCOCCCC)OCCOCCCC,1.5512936800949202
63
+ CCCCOCCOP(=O)(OCCOCCCC)OCCOCCCC,1.7423214251308154
64
+ OCC(C)C,-1.2855573090077739
65
+ CC(Cl)CCl,-0.04921802267018165
66
+ NCC(N)C,-1.1335389083702174
67
+ CC(O)CC,-1.6946051989335686
68
+ CCC(=O)C,-1.6503075231319364
69
+ OC(C)CN,-1.526339277389844
70
+ ClC(CCl)Cl,0.21324857785443882
71
+ ClC(=CCl)Cl,0.4736607226101559
72
+ CC(=O)OC,-0.6830470382388496
73
+ ClC(C(Cl)Cl)Cl,0.9172146296835499
74
+ C1(C)(C)CCCC(C)=C1C=CC(C)=O,1.576754126063192
75
+ ClC1=C(O)C(Cl)=CC(=C1)C(C2=CC(Cl)=C(O)C(=C2)Cl)(C)C,2.4400933749638876
76
+ C(C1C=CC(=CC=1)O)(CC)(C)C,1.8013429130455774
77
+ C1CC(CCC1(N)C)C(C)(N)C,0.41680122603137726
78
+ ClC(Cl)C1=C(Cl)C=CC=C1Cl,2.374687549038326
79
+ C1=CC=C2C=CC=C3C2=C1CC3,1.9507819773298183
80
+ CC1=CNC2=C1C=CC=C2,1.1713401034646802
81
+ C1=CC=CC=C1OC(=O)C2=CC=CC=C2C(=O)OC3=CC=CC=C3,3.600326278518962
82
+ CCOC(=O)C1=CC=CC=C1C(=O)OCC,0.8446639625349383
83
+ C1=CC=C(C(=O)OCCCC)C(=C1)C(=O)OCCCC,2.444905551421681
84
+ O=C1C2=C(C=CC=C2)N=NN1CSP(=S)(OC)OC,3.694648630553376
85
+ C1=CC=CC=C1NC(=O)C2=C(O)C=CC=C2,1.7328282715969863
86
+ Cl\C(Cl)=C(Cl)/C(Cl)=C(Cl)\Cl,3.462180904926726
87
+ OC1=C(C(=C(C(=C1Cl)Cl)Cl)Cl)Cl,3.040005161671584
88
+ OC1=C(C=C(C=C1Cl)Cl)Cl,1.6055483191737838
89
+ OC1=CC(C(F)(F)F)=C([N+]([O-])=O)C=C1,1.3555614105321614
90
+ C1(N)=CC=CC=C1C(=O)N,-0.4623979978989561
91
+ C1(N)=CC=CC=C1C(=O)N,-0.3979400086720376
92
+ OC1=C([N+]([O-])=O)C=CC=C1,-0.06069784035361165
@@ -0,0 +1,16 @@
1
+ STRUCTURE_SMILES,-log10(LC50_mmol)
2
+ C1=CC(C=O)=CC(OC)=C1OCCCCCC,1.9469215565165803
3
+ C1(OC)=C([N+]([O-])=O)C(C=O)=CC(Br)=C1O,0.575118363368933
4
+ C1=CC(Cl)=CC=C1OC2=C([N+](=O)[O-])C=CC=C2,2.114073660198569
5
+ CC1=C(NC=O)C=CC=C1Cl,0.5606673061697374
6
+ CCCCOC(=O)C1=CC=CC(C(=O)OCCCC)=C1,2.490797477668897
7
+ C(C1=CC=CC=C1)(C2=CC=CC=C2)(O)C#C,1.2732727909734278
8
+ CCCSCCSCCC,1.3746875490383261
9
+ CCCSCCCCSCCC,1.8386319977650252
10
+ ClCCOC(=O)NC1CCCCC1,0.7695510786217261
11
+ O=C1C(C2=CC=CC=C2)(C(=O)NC(=O)N1)CC,-0.31806333496276157
12
+ OC1=C(C=C(C=C1)[N+](=O)[O-])[N+](=O)[O-],1.2276782932770802
13
+ NC(=O)OCC,-1.7693773260761385
14
+ [O-]C(C1=CC=CC=C1O)=O.[Na+],-1.0969100130080565
15
+ C1=CC=CC=C1C(=O)N,-0.7371926427047373
16
+ CC[N+](CC)(CC)CC1(=CC=CC=C1).[Cl-],0.1505805862031006