lazar 0.9.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -4
  3. data/README.md +5 -15
  4. data/VERSION +1 -1
  5. data/ext/lazar/extconf.rb +1 -1
  6. data/ext/lazar/rinstall.R +9 -7
  7. data/java/CdkDescriptorInfo.class +0 -0
  8. data/java/CdkDescriptorInfo.java +3 -2
  9. data/java/CdkDescriptors.class +0 -0
  10. data/java/CdkDescriptors.java +28 -28
  11. data/java/Rakefile +3 -3
  12. data/java/{cdk-1.4.19.jar → cdk-2.0-SNAPSHOT.jar} +0 -0
  13. data/lazar.gemspec +6 -7
  14. data/lib/algorithm.rb +2 -11
  15. data/lib/caret.rb +96 -0
  16. data/lib/classification.rb +14 -22
  17. data/lib/compound.rb +21 -87
  18. data/lib/crossvalidation.rb +80 -279
  19. data/lib/dataset.rb +105 -174
  20. data/lib/feature.rb +11 -18
  21. data/lib/feature_selection.rb +42 -0
  22. data/lib/import.rb +122 -0
  23. data/lib/lazar.rb +14 -4
  24. data/lib/leave-one-out-validation.rb +46 -192
  25. data/lib/model.rb +319 -128
  26. data/lib/nanoparticle.rb +98 -0
  27. data/lib/opentox.rb +7 -4
  28. data/lib/overwrite.rb +24 -3
  29. data/lib/physchem.rb +11 -10
  30. data/lib/regression.rb +7 -137
  31. data/lib/rest-client-wrapper.rb +0 -6
  32. data/lib/similarity.rb +65 -0
  33. data/lib/substance.rb +8 -0
  34. data/lib/train-test-validation.rb +69 -0
  35. data/lib/validation-statistics.rb +223 -0
  36. data/lib/validation.rb +17 -100
  37. data/scripts/mg2mmol.rb +17 -0
  38. data/scripts/mirror-enm2test.rb +4 -0
  39. data/scripts/mmol2-log10.rb +32 -0
  40. data/test/compound.rb +4 -94
  41. data/test/data/EPAFHM.medi_log10.csv +92 -0
  42. data/test/data/EPAFHM.mini_log10.csv +16 -0
  43. data/test/data/EPAFHM_log10.csv +581 -0
  44. data/test/data/loael_log10.csv +568 -0
  45. data/test/dataset.rb +195 -133
  46. data/test/descriptor.rb +27 -18
  47. data/test/error.rb +2 -2
  48. data/test/experiment.rb +4 -4
  49. data/test/feature.rb +2 -3
  50. data/test/gridfs.rb +10 -0
  51. data/test/model-classification.rb +106 -0
  52. data/test/model-nanoparticle.rb +128 -0
  53. data/test/model-regression.rb +171 -0
  54. data/test/model-validation.rb +19 -0
  55. data/test/nanomaterial-model-validation.rb +55 -0
  56. data/test/setup.rb +8 -4
  57. data/test/validation-classification.rb +67 -0
  58. data/test/validation-nanoparticle.rb +133 -0
  59. data/test/validation-regression.rb +92 -0
  60. metadata +50 -121
  61. data/test/classification.rb +0 -41
  62. data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +0 -13553
  63. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +0 -436
  64. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +0 -568
  65. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +0 -87
  66. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +0 -978
  67. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +0 -1120
  68. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +0 -1113
  69. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +0 -850
  70. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +0 -829
  71. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +0 -1198
  72. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +0 -1505
  73. data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +0 -581
  74. data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +0 -1217
  75. data/test/data/LOAEL_log_mg_corrected_smiles.csv +0 -568
  76. data/test/data/LOAEL_log_mmol_corrected_smiles.csv +0 -568
  77. data/test/data/boiling_points.ext.sdf +0 -11460
  78. data/test/data/cpdb_100.csv +0 -101
  79. data/test/data/hamster_carcinogenicity.ntriples +0 -618
  80. data/test/data/hamster_carcinogenicity.sdf +0 -2805
  81. data/test/data/hamster_carcinogenicity.xls +0 -0
  82. data/test/data/hamster_carcinogenicity.yaml +0 -352
  83. data/test/dataset-long.rb +0 -114
  84. data/test/lazar-long.rb +0 -92
  85. data/test/lazar-physchem-short.rb +0 -31
  86. data/test/prediction_models.rb +0 -20
  87. data/test/regression.rb +0 -43
  88. data/test/validation.rb +0 -108
data/lib/validation.rb CHANGED
@@ -1,108 +1,25 @@
1
1
  module OpenTox
2
2
 
3
- class Validation
4
-
5
- field :model_id, type: BSON::ObjectId
6
- field :prediction_dataset_id, type: BSON::ObjectId
7
- field :crossvalidation_id, type: BSON::ObjectId
8
- field :test_dataset_id, type: BSON::ObjectId
9
- field :nr_instances, type: Integer
10
- field :nr_unpredicted, type: Integer
11
- field :predictions, type: Array
12
-
13
- def prediction_dataset
14
- Dataset.find prediction_dataset_id
15
- end
16
-
17
- def test_dataset
18
- Dataset.find test_dataset_id
19
- end
20
-
21
- def model
22
- Model::Lazar.find model_id
23
- end
24
-
25
- def self.create model, training_set, test_set, crossvalidation=nil
26
-
27
- atts = model.attributes.dup # do not modify attributes from original model
28
- atts["_id"] = BSON::ObjectId.new
29
- atts[:training_dataset_id] = training_set.id
30
- validation_model = model.class.create training_set, atts
31
- validation_model.save
32
- cids = test_set.compound_ids
33
-
34
- test_set_without_activities = Dataset.new(:compound_ids => cids.uniq) # remove duplicates and make sure that activities cannot be used
35
- prediction_dataset = validation_model.predict test_set_without_activities
36
- predictions = []
37
- nr_unpredicted = 0
38
- activities = test_set.data_entries.collect{|de| de.first}
39
- prediction_dataset.data_entries.each_with_index do |de,i|
40
- if de[0] #and de[1]
41
- cid = prediction_dataset.compound_ids[i]
42
- rows = cids.each_index.select{|r| cids[r] == cid }
43
- activities = rows.collect{|r| test_set.data_entries[r][0]}
44
- prediction = de.first
45
- confidence = de[1]
46
- predictions << [prediction_dataset.compound_ids[i], activities, prediction, de[1]]
47
- else
48
- nr_unpredicted += 1
49
- end
3
+ module Validation
4
+
5
+ class Validation
6
+ include OpenTox
7
+ include Mongoid::Document
8
+ include Mongoid::Timestamps
9
+ store_in collection: "validations"
10
+ field :name, type: String
11
+ field :model_id, type: BSON::ObjectId
12
+ field :nr_instances, type: Integer, default: 0
13
+ field :nr_unpredicted, type: Integer, default: 0
14
+ field :predictions, type: Hash, default: {}
15
+ field :finished_at, type: Time
16
+
17
+ def model
18
+ Model::Lazar.find model_id
50
19
  end
51
- validation = self.new(
52
- :model_id => validation_model.id,
53
- :prediction_dataset_id => prediction_dataset.id,
54
- :test_dataset_id => test_set.id,
55
- :nr_instances => test_set.compound_ids.size,
56
- :nr_unpredicted => nr_unpredicted,
57
- :predictions => predictions#.sort{|a,b| p a; b[3] <=> a[3]} # sort according to confidence
58
- )
59
- validation.crossvalidation_id = crossvalidation.id if crossvalidation
60
- validation.save
61
- validation
62
- end
63
-
64
- end
65
-
66
- class ClassificationValidation < Validation
67
- end
68
20
 
69
- class RegressionValidation < Validation
70
-
71
- def statistics
72
- rmse = 0
73
- weighted_rmse = 0
74
- rse = 0
75
- weighted_rse = 0
76
- mae = 0
77
- weighted_mae = 0
78
- confidence_sum = 0
79
- predictions.each do |pred|
80
- compound_id,activity,prediction,confidence = pred
81
- if activity and prediction
82
- error = Math.log10(prediction)-Math.log10(activity.median)
83
- rmse += error**2
84
- weighted_rmse += confidence*error**2
85
- mae += error.abs
86
- weighted_mae += confidence*error.abs
87
- confidence_sum += confidence
88
- else
89
- warnings << "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
90
- $logger.debug "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
91
- end
92
- end
93
- x = predictions.collect{|p| p[1].median}
94
- y = predictions.collect{|p| p[2]}
95
- R.assign "measurement", x
96
- R.assign "prediction", y
97
- R.eval "r <- cor(-log(measurement),-log(prediction),use='complete')"
98
- r = R.eval("r").to_ruby
99
-
100
- mae = mae/predictions.size
101
- weighted_mae = weighted_mae/confidence_sum
102
- rmse = Math.sqrt(rmse/predictions.size)
103
- weighted_rmse = Math.sqrt(weighted_rmse/confidence_sum)
104
- { "R^2" => r**2, "RMSE" => rmse, "MAE" => mae }
105
21
  end
22
+
106
23
  end
107
24
 
108
25
  end
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../lazar/lib/lazar'
3
+ include OpenTox
4
+ newfile = ARGV[0].sub(/.csv/,"_mmol.csv")
5
+ p newfile
6
+ CSV.open(newfile, "wb") do |csv|
7
+ CSV.read(ARGV[0]).each do |line|
8
+ smi,mg = line
9
+ if mg.numeric?
10
+ c = Compound.from_smiles smi
11
+ mmol = c.mg_to_mmol mg.to_f
12
+ csv << [smi, mmol]
13
+ else
14
+ csv << [smi, mg.gsub(/mg/,'mmol')]
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,4 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../lib/lazar'
3
+ include OpenTox
4
+ Import::Enanomapper.mirror File.join(File.dirname(__FILE__),"..","test","data","enm")
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env ruby
2
+ require_relative '../lib/lazar'
3
+ include OpenTox
4
+
5
+ newfile = ARGV[0].sub(/.csv/,"_log10.csv")
6
+ p newfile
7
+ CSV.open(newfile, "wb") do |csv|
8
+ i = 1
9
+ CSV.read(ARGV[0]).each do |line|
10
+ type,mmol = line
11
+ if i == 1
12
+ @type = type
13
+ csv << ["SMILES", "-log10(#{mmol})"]
14
+ else
15
+ if mmol.numeric?
16
+ if @type =~ /smiles/i
17
+ c = Compound.from_smiles type
18
+ elsif @type =~ /inchi/i
19
+ c = Compound.from_inchi type
20
+ type = c.smiles
21
+ else
22
+ p "Unknown type '#{type}' at line 1."
23
+ end
24
+ mmol = -Math.log10(mmol.to_f)
25
+ csv << [type, mmol]
26
+ else
27
+ p "Line #{i}: '#{mmol}' is not a numeric value."
28
+ end
29
+ end
30
+ i += 1
31
+ end
32
+ end
data/test/compound.rb CHANGED
@@ -2,19 +2,16 @@ require_relative "setup.rb"
2
2
 
3
3
  class CompoundTest < MiniTest::Test
4
4
 
5
- def test_0_compound_from_smiles
5
+ def test_compound_from_smiles
6
6
  c = OpenTox::Compound.from_smiles "F[B-](F)(F)F.[Na+]"
7
7
  assert_equal "InChI=1S/BF4.Na/c2-1(3,4)5;/q-1;+1", c.inchi.chomp
8
8
  assert_equal "F[B-](F)(F)F.[Na+]", c.smiles, "A failure here might be caused by a compound webservice running on 64bit architectures using an outdated version of OpenBabel. Please install OpenBabel version 2.3.2 or higher." # seems to be fixed in 2.3.2
9
9
  end
10
10
 
11
- def test_1_compound_from_smiles
11
+ def test_compound_from_smiles
12
12
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
13
13
  assert_equal "InChI=1S/C6H9NO/c1-5(4-7)3-6(2)8/h5H,3H2,1-2H3", c.inchi
14
14
  assert_equal "CC(C#N)CC(=O)C", c.smiles
15
- end
16
-
17
- def test_2_compound_from_smiles
18
15
  c = OpenTox::Compound.from_smiles "N#[N+]C1=CC=CC=C1.F[B-](F)(F)F"
19
16
  assert_equal "InChI=1S/C6H5N2.BF4/c7-8-6-4-2-1-3-5-6;2-1(3,4)5/h1-5H;/q+1;-1", c.inchi
20
17
  assert_equal "F[B-](F)(F)F.N#[N+]c1ccccc1", c.smiles
@@ -79,16 +76,6 @@ print c.sdf
79
76
  assert_equal 9, c.fingerprint("FP4").size
80
77
  end
81
78
 
82
- def test_neighbors
83
- d = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.csv")
84
- d.compounds.each do |c|
85
- refute_nil c.fingerprint("MP2D")
86
- end
87
- c = d.compounds[371]
88
- n = c.fingerprint_neighbors({:type => "FP4", :min_sim => 0.7, :training_dataset_id => d.id })
89
- assert n.size >= 18, "Neighbors size (#{n.size}) should be larger than 17"
90
- end
91
-
92
79
  def test_openbabel_segfault
93
80
  inchi = "InChI=1S/C19H27NO7/c1-11-9-19(12(2)27-19)17(23)26-14-6-8-20(4)7-5-13(15(14)21)10-25-16(22)18(11,3)24/h5,11-12,14,24H,6-10H2,1-4H3/b13-5-/t11-,12-,14-,18-,19?/m1/s1"
94
81
 
@@ -107,25 +94,6 @@ print c.sdf
107
94
  end
108
95
  end
109
96
 
110
- def test_fingerprint_neighbors
111
- types = ["FP2", "FP3", "FP4", "MACCS"]
112
- min_sim = 0.7
113
- training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.csv")
114
- [
115
- "CC(=O)CC(C)C#N",
116
- "CC(=O)CC(C)C",
117
- "C(=O)CC(C)C#N",
118
- ].each do |smi|
119
- c = OpenTox::Compound.from_smiles smi
120
- types.each do |type|
121
- neighbors = c.fingerprint_neighbors({:type => type, :training_dataset_id => training_dataset.id, :min_sim => min_sim})
122
- unless type == "FP2" and smi == "CC(=O)CC(C)C#N" or smi == "C(=O)CC(C)C#N" and (type == "FP2" or type == "MACCS")
123
- refute_empty neighbors
124
- end
125
- end
126
- end
127
- end
128
-
129
97
  def test_mna
130
98
  c = OpenTox::Compound.from_smiles "N#[N+]C1=CC=CC=C1.F[B-](F)(F)F"
131
99
  assert_equal 18, c.fingerprint("MNA").size
@@ -138,72 +106,14 @@ print c.sdf
138
106
  assert 7, c.fingerprint("MP2D").uniq.size
139
107
  end
140
108
 
141
- def test_fingerprint_count_neighbors
142
- types = ["MP2D", "MNA"]
143
- min_sim = 0.0
144
- training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.csv")
145
- [
146
- "CC(=O)CC(C)C#N",
147
- "CC(=O)CC(C)C",
148
- "C(=O)CC(C)C#N",
149
- ].each do |smi|
150
- c = OpenTox::Compound.from_smiles smi
151
- types.each do |type|
152
- neighbors = c.fingerprint_count_neighbors({:type => type, :training_dataset_id => training_dataset.id, :min_sim => min_sim})
153
- if type == "FP4"
154
- fp4_neighbors = c.neighbors
155
- neighbors.each do |n|
156
- assert_includes fp4_neighbors, n
157
- end
158
- end
159
- end
160
- end
161
- end
162
-
163
- def test_fingerprint_db_neighbors
164
- #skip
165
- training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.csv")
166
- [
167
- "CC(=O)CC(C)C#N",
168
- "CC(=O)CC(C)C",
169
- "C(=O)CC(C)C#N",
170
- ].each do |smi|
171
- c = OpenTox::Compound.from_smiles smi
172
- t = Time.now
173
- neighbors = c.db_neighbors(:training_dataset_id => training_dataset.id, :min_sim => 0.2)
174
- p Time.now - t
175
- t = Time.now
176
- neighbors2 = c.fingerprint_neighbors({:type => "MP2D", :training_dataset_id => training_dataset.id, :min_sim => 0.2})
177
- p Time.now - t
178
- p neighbors.size
179
- p neighbors2.size
180
- #p neighbors
181
- #p neighbors2
182
- #p neighbors2 - neighbors
183
- #assert_equal neighbors, neighbors2
184
- end
185
- end
186
-
187
109
  def test_molecular_weight
188
110
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C"
189
111
  assert_equal 100.15888, c.molecular_weight
190
112
  end
191
113
 
192
- def test_mg_conversions
193
- # TODO fix!
194
- skip
195
- c = OpenTox::Compound.from_smiles "O"
196
- mw = c.molecular_weight
197
- assert_equal 18.01528, mw
198
- assert_equal 0.8105107141417474, c.logmmol_to_mg(4.34688225631145, mw)
199
- assert_equal 9007.64, c.mmol_to_mg(500, mw)
200
- assert_equal 2437.9999984148976, c.logmg_to_mg(3.387033701)
201
- end
202
-
203
114
  def test_physchem
204
115
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C"
205
- assert_equal PhysChem::OBDESCRIPTORS.size, c.physchem.size
206
- assert_equal PhysChem::OBDESCRIPTORS.size, c.physchem(PhysChem.openbabel_descriptors).size
207
- assert_equal PhysChem::unique_descriptors.size, c.physchem(PhysChem.unique_descriptors).size
116
+ properties = c.calculate_properties(PhysChem.openbabel_descriptors)
117
+ assert_equal PhysChem::OPENBABEL.size, properties.size
208
118
  end
209
119
  end
@@ -0,0 +1,92 @@
1
+ STRUCTURE_SMILES,-log10(LC50_mmol)
2
+ C1=CC(C=O)=CC(OC)=C1OCCCCCC,1.9469215565165803
3
+ C1(OC)=C([N+]([O-])=O)C(C=O)=CC(Br)=C1O,0.575118363368933
4
+ C1=CC(Cl)=CC=C1OC2=C([N+](=O)[O-])C=CC=C2,2.114073660198569
5
+ CC1=C(NC=O)C=CC=C1Cl,0.5606673061697374
6
+ CCCCOC(=O)C1=CC=CC(C(=O)OCCCC)=C1,2.490797477668897
7
+ C(C1=CC=CC=C1)(C2=CC=CC=C2)(O)C#C,1.2732727909734278
8
+ CCCSCCSCCC,1.3746875490383261
9
+ CCCSCCCCSCCC,1.8386319977650252
10
+ CCCSCCCCSCCC,1.7328282715969863
11
+ ClCCOC(=O)NC1CCCCC1,0.7695510786217261
12
+ O=C1C(C2=CC=CC=C2)(C(=O)NC(=O)N1)CC,-0.31806333496276157
13
+ OC1=C(C=C(C=C1)[N+](=O)[O-])[N+](=O)[O-],1.2276782932770802
14
+ NC(=O)OCC,-1.7693773260761385
15
+ [O-]C(C1=CC=CC=C1O)=O.[Na+],-1.0969100130080565
16
+ C1=CC=CC=C1C(=O)N,-0.7371926427047373
17
+ CC[N+](CC)(CC)CC1(=CC=CC=C1).[Cl-],0.1505805862031006
18
+ CN(C)N,0.8827287043442358
19
+ CC(C(C(NC([O-])=N1)=O)(C1=O)CC)CCC.[Na+],0.7011469235902933
20
+ N1C(=O)C(CC)(CCC(C)C)C(=O)NC1=O,0.42365864979420714
21
+ O=C1C2=C(N=CN2C)N(C(=O)N1C)C,0.10902040301031106
22
+ C1=CC=C2C(=C1)C(=O)C(C)=CC2=O,3.1944991418415998
23
+ OC1=C(Cl)C(Cl)=C(Cl)C=C1Cl,2.3526170298853804
24
+ OC1=CC(C)=C(Cl)C=C1,1.4156687756324693
25
+ O=S(O)(O)=O.C1(=CC=CC=C1CC(N)C).C2=CC=CC=C2CC(N)C,1.106793246940152
26
+ O(CC)CC,-1.5378190950732742
27
+ NC1=CC=CC=C1,-0.05307844348341968
28
+ O=C(OC1=C2C(=CC=C1)C=CC=C2)NC,1.3615107430453628
29
+ CCO,-2.503790683057181
30
+ C1(=NC=CC=C1C2CCCN2C).OS(O)(=O)=O,1.275724130399211
31
+ C1(O)=CC=CC=C1C(=O)N,0.13312218566250114
32
+ CCCCCC=O,0.7569619513137056
33
+ O=C1OC2=CC=CC=C2C(O)=C1CC3=C(O)C4=CC=CC=C4OC3=O,1.8181564120552274
34
+ C1(C=O)=CC=C(OC2=CC=CC=C2)C=C1,1.6345120151091004
35
+ CO,-2.962369335670021
36
+ OC(C)C,-2.1583624920952498
37
+ CC(=O)C,-2.089905111439398
38
+ ClC(Cl)Cl,0.22767829327708025
39
+ CS(=O)C,-2.6384892569546374
40
+ ClC(C(Cl)(Cl)Cl)(Cl)Cl,2.221848749616356
41
+ OC1=C(C=C(C(=C1CC2=C(C(=CC(=C2Cl)Cl)Cl)O)Cl)Cl)Cl,4.287350298372789
42
+ C1=CC(=CC=C1N)C(=O)CC,0.009217308196862182
43
+ OCCC,-1.8790958795000727
44
+ CCCCO,-1.3673559210260189
45
+ CCCCCO,-0.72916478969277
46
+ C1=CC=CC=C1,0.6478174818886375
47
+ CC(Cl)(Cl)Cl,0.4497716469449059
48
+ [S-]C1=NC(C(C(C)CCC)(CC)C(N1)=O)=O.[Na+],1.0039263455147247
49
+ CC#N,-1.6031443726201824
50
+ CC=O,0.11520463605101904
51
+ ClCCl,-0.5899496013257077
52
+ IC(I)I,2.129596094720973
53
+ [N+](C)(C)(C)C.[Cl-],-0.6253124509616739
54
+ CC(C)(C)O,-1.9370161074648142
55
+ C(F)(F)(F)CO,-0.07554696139253074
56
+ CC(=O)C(C)(C)C,0.06098022355133353
57
+ ClC(C(Cl)Cl)(Cl)Cl,1.4294570601181025
58
+ CC1(C)NC(=O)NC1=O,-2.110589710299249
59
+ CCC(O)(C)CC,-0.8182258936139555
60
+ C#CC(O)(C)CC,-1.0934216851622351
61
+ C1CCCC(C#C)(O)C1,-0.31386722036915343
62
+ CCCCOCCOP(=O)(OCCOCCCC)OCCOCCCC,1.5512936800949202
63
+ CCCCOCCOP(=O)(OCCOCCCC)OCCOCCCC,1.7423214251308154
64
+ OCC(C)C,-1.2855573090077739
65
+ CC(Cl)CCl,-0.04921802267018165
66
+ NCC(N)C,-1.1335389083702174
67
+ CC(O)CC,-1.6946051989335686
68
+ CCC(=O)C,-1.6503075231319364
69
+ OC(C)CN,-1.526339277389844
70
+ ClC(CCl)Cl,0.21324857785443882
71
+ ClC(=CCl)Cl,0.4736607226101559
72
+ CC(=O)OC,-0.6830470382388496
73
+ ClC(C(Cl)Cl)Cl,0.9172146296835499
74
+ C1(C)(C)CCCC(C)=C1C=CC(C)=O,1.576754126063192
75
+ ClC1=C(O)C(Cl)=CC(=C1)C(C2=CC(Cl)=C(O)C(=C2)Cl)(C)C,2.4400933749638876
76
+ C(C1C=CC(=CC=1)O)(CC)(C)C,1.8013429130455774
77
+ C1CC(CCC1(N)C)C(C)(N)C,0.41680122603137726
78
+ ClC(Cl)C1=C(Cl)C=CC=C1Cl,2.374687549038326
79
+ C1=CC=C2C=CC=C3C2=C1CC3,1.9507819773298183
80
+ CC1=CNC2=C1C=CC=C2,1.1713401034646802
81
+ C1=CC=CC=C1OC(=O)C2=CC=CC=C2C(=O)OC3=CC=CC=C3,3.600326278518962
82
+ CCOC(=O)C1=CC=CC=C1C(=O)OCC,0.8446639625349383
83
+ C1=CC=C(C(=O)OCCCC)C(=C1)C(=O)OCCCC,2.444905551421681
84
+ O=C1C2=C(C=CC=C2)N=NN1CSP(=S)(OC)OC,3.694648630553376
85
+ C1=CC=CC=C1NC(=O)C2=C(O)C=CC=C2,1.7328282715969863
86
+ Cl\C(Cl)=C(Cl)/C(Cl)=C(Cl)\Cl,3.462180904926726
87
+ OC1=C(C(=C(C(=C1Cl)Cl)Cl)Cl)Cl,3.040005161671584
88
+ OC1=C(C=C(C=C1Cl)Cl)Cl,1.6055483191737838
89
+ OC1=CC(C(F)(F)F)=C([N+]([O-])=O)C=C1,1.3555614105321614
90
+ C1(N)=CC=CC=C1C(=O)N,-0.4623979978989561
91
+ C1(N)=CC=CC=C1C(=O)N,-0.3979400086720376
92
+ OC1=C([N+]([O-])=O)C=CC=C1,-0.06069784035361165
@@ -0,0 +1,16 @@
1
+ STRUCTURE_SMILES,-log10(LC50_mmol)
2
+ C1=CC(C=O)=CC(OC)=C1OCCCCCC,1.9469215565165803
3
+ C1(OC)=C([N+]([O-])=O)C(C=O)=CC(Br)=C1O,0.575118363368933
4
+ C1=CC(Cl)=CC=C1OC2=C([N+](=O)[O-])C=CC=C2,2.114073660198569
5
+ CC1=C(NC=O)C=CC=C1Cl,0.5606673061697374
6
+ CCCCOC(=O)C1=CC=CC(C(=O)OCCCC)=C1,2.490797477668897
7
+ C(C1=CC=CC=C1)(C2=CC=CC=C2)(O)C#C,1.2732727909734278
8
+ CCCSCCSCCC,1.3746875490383261
9
+ CCCSCCCCSCCC,1.8386319977650252
10
+ ClCCOC(=O)NC1CCCCC1,0.7695510786217261
11
+ O=C1C(C2=CC=CC=C2)(C(=O)NC(=O)N1)CC,-0.31806333496276157
12
+ OC1=C(C=C(C=C1)[N+](=O)[O-])[N+](=O)[O-],1.2276782932770802
13
+ NC(=O)OCC,-1.7693773260761385
14
+ [O-]C(C1=CC=CC=C1O)=O.[Na+],-1.0969100130080565
15
+ C1=CC=CC=C1C(=O)N,-0.7371926427047373
16
+ CC[N+](CC)(CC)CC1(=CC=CC=C1).[Cl-],0.1505805862031006