lazar 0.9.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -4
- data/README.md +5 -15
- data/VERSION +1 -1
- data/ext/lazar/extconf.rb +1 -1
- data/ext/lazar/rinstall.R +9 -7
- data/java/CdkDescriptorInfo.class +0 -0
- data/java/CdkDescriptorInfo.java +3 -2
- data/java/CdkDescriptors.class +0 -0
- data/java/CdkDescriptors.java +28 -28
- data/java/Rakefile +3 -3
- data/java/{cdk-1.4.19.jar → cdk-2.0-SNAPSHOT.jar} +0 -0
- data/lazar.gemspec +6 -7
- data/lib/algorithm.rb +2 -11
- data/lib/caret.rb +96 -0
- data/lib/classification.rb +14 -22
- data/lib/compound.rb +21 -87
- data/lib/crossvalidation.rb +80 -279
- data/lib/dataset.rb +105 -174
- data/lib/feature.rb +11 -18
- data/lib/feature_selection.rb +42 -0
- data/lib/import.rb +122 -0
- data/lib/lazar.rb +14 -4
- data/lib/leave-one-out-validation.rb +46 -192
- data/lib/model.rb +319 -128
- data/lib/nanoparticle.rb +98 -0
- data/lib/opentox.rb +7 -4
- data/lib/overwrite.rb +24 -3
- data/lib/physchem.rb +11 -10
- data/lib/regression.rb +7 -137
- data/lib/rest-client-wrapper.rb +0 -6
- data/lib/similarity.rb +65 -0
- data/lib/substance.rb +8 -0
- data/lib/train-test-validation.rb +69 -0
- data/lib/validation-statistics.rb +223 -0
- data/lib/validation.rb +17 -100
- data/scripts/mg2mmol.rb +17 -0
- data/scripts/mirror-enm2test.rb +4 -0
- data/scripts/mmol2-log10.rb +32 -0
- data/test/compound.rb +4 -94
- data/test/data/EPAFHM.medi_log10.csv +92 -0
- data/test/data/EPAFHM.mini_log10.csv +16 -0
- data/test/data/EPAFHM_log10.csv +581 -0
- data/test/data/loael_log10.csv +568 -0
- data/test/dataset.rb +195 -133
- data/test/descriptor.rb +27 -18
- data/test/error.rb +2 -2
- data/test/experiment.rb +4 -4
- data/test/feature.rb +2 -3
- data/test/gridfs.rb +10 -0
- data/test/model-classification.rb +106 -0
- data/test/model-nanoparticle.rb +128 -0
- data/test/model-regression.rb +171 -0
- data/test/model-validation.rb +19 -0
- data/test/nanomaterial-model-validation.rb +55 -0
- data/test/setup.rb +8 -4
- data/test/validation-classification.rb +67 -0
- data/test/validation-nanoparticle.rb +133 -0
- data/test/validation-regression.rb +92 -0
- metadata +50 -121
- data/test/classification.rb +0 -41
- data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +0 -13553
- data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +0 -436
- data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +0 -568
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +0 -87
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +0 -978
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +0 -1120
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +0 -1113
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +0 -850
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +0 -829
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +0 -1198
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +0 -1505
- data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +0 -581
- data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +0 -1217
- data/test/data/LOAEL_log_mg_corrected_smiles.csv +0 -568
- data/test/data/LOAEL_log_mmol_corrected_smiles.csv +0 -568
- data/test/data/boiling_points.ext.sdf +0 -11460
- data/test/data/cpdb_100.csv +0 -101
- data/test/data/hamster_carcinogenicity.ntriples +0 -618
- data/test/data/hamster_carcinogenicity.sdf +0 -2805
- data/test/data/hamster_carcinogenicity.xls +0 -0
- data/test/data/hamster_carcinogenicity.yaml +0 -352
- data/test/dataset-long.rb +0 -114
- data/test/lazar-long.rb +0 -92
- data/test/lazar-physchem-short.rb +0 -31
- data/test/prediction_models.rb +0 -20
- data/test/regression.rb +0 -43
- data/test/validation.rb +0 -108
data/lib/validation.rb
CHANGED
@@ -1,108 +1,25 @@
|
|
1
1
|
module OpenTox
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
end
|
20
|
-
|
21
|
-
def model
|
22
|
-
Model::Lazar.find model_id
|
23
|
-
end
|
24
|
-
|
25
|
-
def self.create model, training_set, test_set, crossvalidation=nil
|
26
|
-
|
27
|
-
atts = model.attributes.dup # do not modify attributes from original model
|
28
|
-
atts["_id"] = BSON::ObjectId.new
|
29
|
-
atts[:training_dataset_id] = training_set.id
|
30
|
-
validation_model = model.class.create training_set, atts
|
31
|
-
validation_model.save
|
32
|
-
cids = test_set.compound_ids
|
33
|
-
|
34
|
-
test_set_without_activities = Dataset.new(:compound_ids => cids.uniq) # remove duplicates and make sure that activities cannot be used
|
35
|
-
prediction_dataset = validation_model.predict test_set_without_activities
|
36
|
-
predictions = []
|
37
|
-
nr_unpredicted = 0
|
38
|
-
activities = test_set.data_entries.collect{|de| de.first}
|
39
|
-
prediction_dataset.data_entries.each_with_index do |de,i|
|
40
|
-
if de[0] #and de[1]
|
41
|
-
cid = prediction_dataset.compound_ids[i]
|
42
|
-
rows = cids.each_index.select{|r| cids[r] == cid }
|
43
|
-
activities = rows.collect{|r| test_set.data_entries[r][0]}
|
44
|
-
prediction = de.first
|
45
|
-
confidence = de[1]
|
46
|
-
predictions << [prediction_dataset.compound_ids[i], activities, prediction, de[1]]
|
47
|
-
else
|
48
|
-
nr_unpredicted += 1
|
49
|
-
end
|
3
|
+
module Validation
|
4
|
+
|
5
|
+
class Validation
|
6
|
+
include OpenTox
|
7
|
+
include Mongoid::Document
|
8
|
+
include Mongoid::Timestamps
|
9
|
+
store_in collection: "validations"
|
10
|
+
field :name, type: String
|
11
|
+
field :model_id, type: BSON::ObjectId
|
12
|
+
field :nr_instances, type: Integer, default: 0
|
13
|
+
field :nr_unpredicted, type: Integer, default: 0
|
14
|
+
field :predictions, type: Hash, default: {}
|
15
|
+
field :finished_at, type: Time
|
16
|
+
|
17
|
+
def model
|
18
|
+
Model::Lazar.find model_id
|
50
19
|
end
|
51
|
-
validation = self.new(
|
52
|
-
:model_id => validation_model.id,
|
53
|
-
:prediction_dataset_id => prediction_dataset.id,
|
54
|
-
:test_dataset_id => test_set.id,
|
55
|
-
:nr_instances => test_set.compound_ids.size,
|
56
|
-
:nr_unpredicted => nr_unpredicted,
|
57
|
-
:predictions => predictions#.sort{|a,b| p a; b[3] <=> a[3]} # sort according to confidence
|
58
|
-
)
|
59
|
-
validation.crossvalidation_id = crossvalidation.id if crossvalidation
|
60
|
-
validation.save
|
61
|
-
validation
|
62
|
-
end
|
63
|
-
|
64
|
-
end
|
65
|
-
|
66
|
-
class ClassificationValidation < Validation
|
67
|
-
end
|
68
20
|
|
69
|
-
class RegressionValidation < Validation
|
70
|
-
|
71
|
-
def statistics
|
72
|
-
rmse = 0
|
73
|
-
weighted_rmse = 0
|
74
|
-
rse = 0
|
75
|
-
weighted_rse = 0
|
76
|
-
mae = 0
|
77
|
-
weighted_mae = 0
|
78
|
-
confidence_sum = 0
|
79
|
-
predictions.each do |pred|
|
80
|
-
compound_id,activity,prediction,confidence = pred
|
81
|
-
if activity and prediction
|
82
|
-
error = Math.log10(prediction)-Math.log10(activity.median)
|
83
|
-
rmse += error**2
|
84
|
-
weighted_rmse += confidence*error**2
|
85
|
-
mae += error.abs
|
86
|
-
weighted_mae += confidence*error.abs
|
87
|
-
confidence_sum += confidence
|
88
|
-
else
|
89
|
-
warnings << "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
|
90
|
-
$logger.debug "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
|
91
|
-
end
|
92
|
-
end
|
93
|
-
x = predictions.collect{|p| p[1].median}
|
94
|
-
y = predictions.collect{|p| p[2]}
|
95
|
-
R.assign "measurement", x
|
96
|
-
R.assign "prediction", y
|
97
|
-
R.eval "r <- cor(-log(measurement),-log(prediction),use='complete')"
|
98
|
-
r = R.eval("r").to_ruby
|
99
|
-
|
100
|
-
mae = mae/predictions.size
|
101
|
-
weighted_mae = weighted_mae/confidence_sum
|
102
|
-
rmse = Math.sqrt(rmse/predictions.size)
|
103
|
-
weighted_rmse = Math.sqrt(weighted_rmse/confidence_sum)
|
104
|
-
{ "R^2" => r**2, "RMSE" => rmse, "MAE" => mae }
|
105
21
|
end
|
22
|
+
|
106
23
|
end
|
107
24
|
|
108
25
|
end
|
data/scripts/mg2mmol.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require_relative '../lazar/lib/lazar'
|
3
|
+
include OpenTox
|
4
|
+
newfile = ARGV[0].sub(/.csv/,"_mmol.csv")
|
5
|
+
p newfile
|
6
|
+
CSV.open(newfile, "wb") do |csv|
|
7
|
+
CSV.read(ARGV[0]).each do |line|
|
8
|
+
smi,mg = line
|
9
|
+
if mg.numeric?
|
10
|
+
c = Compound.from_smiles smi
|
11
|
+
mmol = c.mg_to_mmol mg.to_f
|
12
|
+
csv << [smi, mmol]
|
13
|
+
else
|
14
|
+
csv << [smi, mg.gsub(/mg/,'mmol')]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require_relative '../lib/lazar'
|
3
|
+
include OpenTox
|
4
|
+
|
5
|
+
newfile = ARGV[0].sub(/.csv/,"_log10.csv")
|
6
|
+
p newfile
|
7
|
+
CSV.open(newfile, "wb") do |csv|
|
8
|
+
i = 1
|
9
|
+
CSV.read(ARGV[0]).each do |line|
|
10
|
+
type,mmol = line
|
11
|
+
if i == 1
|
12
|
+
@type = type
|
13
|
+
csv << ["SMILES", "-log10(#{mmol})"]
|
14
|
+
else
|
15
|
+
if mmol.numeric?
|
16
|
+
if @type =~ /smiles/i
|
17
|
+
c = Compound.from_smiles type
|
18
|
+
elsif @type =~ /inchi/i
|
19
|
+
c = Compound.from_inchi type
|
20
|
+
type = c.smiles
|
21
|
+
else
|
22
|
+
p "Unknown type '#{type}' at line 1."
|
23
|
+
end
|
24
|
+
mmol = -Math.log10(mmol.to_f)
|
25
|
+
csv << [type, mmol]
|
26
|
+
else
|
27
|
+
p "Line #{i}: '#{mmol}' is not a numeric value."
|
28
|
+
end
|
29
|
+
end
|
30
|
+
i += 1
|
31
|
+
end
|
32
|
+
end
|
data/test/compound.rb
CHANGED
@@ -2,19 +2,16 @@ require_relative "setup.rb"
|
|
2
2
|
|
3
3
|
class CompoundTest < MiniTest::Test
|
4
4
|
|
5
|
-
def
|
5
|
+
def test_compound_from_smiles
|
6
6
|
c = OpenTox::Compound.from_smiles "F[B-](F)(F)F.[Na+]"
|
7
7
|
assert_equal "InChI=1S/BF4.Na/c2-1(3,4)5;/q-1;+1", c.inchi.chomp
|
8
8
|
assert_equal "F[B-](F)(F)F.[Na+]", c.smiles, "A failure here might be caused by a compound webservice running on 64bit architectures using an outdated version of OpenBabel. Please install OpenBabel version 2.3.2 or higher." # seems to be fixed in 2.3.2
|
9
9
|
end
|
10
10
|
|
11
|
-
def
|
11
|
+
def test_compound_from_smiles
|
12
12
|
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
|
13
13
|
assert_equal "InChI=1S/C6H9NO/c1-5(4-7)3-6(2)8/h5H,3H2,1-2H3", c.inchi
|
14
14
|
assert_equal "CC(C#N)CC(=O)C", c.smiles
|
15
|
-
end
|
16
|
-
|
17
|
-
def test_2_compound_from_smiles
|
18
15
|
c = OpenTox::Compound.from_smiles "N#[N+]C1=CC=CC=C1.F[B-](F)(F)F"
|
19
16
|
assert_equal "InChI=1S/C6H5N2.BF4/c7-8-6-4-2-1-3-5-6;2-1(3,4)5/h1-5H;/q+1;-1", c.inchi
|
20
17
|
assert_equal "F[B-](F)(F)F.N#[N+]c1ccccc1", c.smiles
|
@@ -79,16 +76,6 @@ print c.sdf
|
|
79
76
|
assert_equal 9, c.fingerprint("FP4").size
|
80
77
|
end
|
81
78
|
|
82
|
-
def test_neighbors
|
83
|
-
d = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.csv")
|
84
|
-
d.compounds.each do |c|
|
85
|
-
refute_nil c.fingerprint("MP2D")
|
86
|
-
end
|
87
|
-
c = d.compounds[371]
|
88
|
-
n = c.fingerprint_neighbors({:type => "FP4", :min_sim => 0.7, :training_dataset_id => d.id })
|
89
|
-
assert n.size >= 18, "Neighbors size (#{n.size}) should be larger than 17"
|
90
|
-
end
|
91
|
-
|
92
79
|
def test_openbabel_segfault
|
93
80
|
inchi = "InChI=1S/C19H27NO7/c1-11-9-19(12(2)27-19)17(23)26-14-6-8-20(4)7-5-13(15(14)21)10-25-16(22)18(11,3)24/h5,11-12,14,24H,6-10H2,1-4H3/b13-5-/t11-,12-,14-,18-,19?/m1/s1"
|
94
81
|
|
@@ -107,25 +94,6 @@ print c.sdf
|
|
107
94
|
end
|
108
95
|
end
|
109
96
|
|
110
|
-
def test_fingerprint_neighbors
|
111
|
-
types = ["FP2", "FP3", "FP4", "MACCS"]
|
112
|
-
min_sim = 0.7
|
113
|
-
training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.csv")
|
114
|
-
[
|
115
|
-
"CC(=O)CC(C)C#N",
|
116
|
-
"CC(=O)CC(C)C",
|
117
|
-
"C(=O)CC(C)C#N",
|
118
|
-
].each do |smi|
|
119
|
-
c = OpenTox::Compound.from_smiles smi
|
120
|
-
types.each do |type|
|
121
|
-
neighbors = c.fingerprint_neighbors({:type => type, :training_dataset_id => training_dataset.id, :min_sim => min_sim})
|
122
|
-
unless type == "FP2" and smi == "CC(=O)CC(C)C#N" or smi == "C(=O)CC(C)C#N" and (type == "FP2" or type == "MACCS")
|
123
|
-
refute_empty neighbors
|
124
|
-
end
|
125
|
-
end
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
97
|
def test_mna
|
130
98
|
c = OpenTox::Compound.from_smiles "N#[N+]C1=CC=CC=C1.F[B-](F)(F)F"
|
131
99
|
assert_equal 18, c.fingerprint("MNA").size
|
@@ -138,72 +106,14 @@ print c.sdf
|
|
138
106
|
assert 7, c.fingerprint("MP2D").uniq.size
|
139
107
|
end
|
140
108
|
|
141
|
-
def test_fingerprint_count_neighbors
|
142
|
-
types = ["MP2D", "MNA"]
|
143
|
-
min_sim = 0.0
|
144
|
-
training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.csv")
|
145
|
-
[
|
146
|
-
"CC(=O)CC(C)C#N",
|
147
|
-
"CC(=O)CC(C)C",
|
148
|
-
"C(=O)CC(C)C#N",
|
149
|
-
].each do |smi|
|
150
|
-
c = OpenTox::Compound.from_smiles smi
|
151
|
-
types.each do |type|
|
152
|
-
neighbors = c.fingerprint_count_neighbors({:type => type, :training_dataset_id => training_dataset.id, :min_sim => min_sim})
|
153
|
-
if type == "FP4"
|
154
|
-
fp4_neighbors = c.neighbors
|
155
|
-
neighbors.each do |n|
|
156
|
-
assert_includes fp4_neighbors, n
|
157
|
-
end
|
158
|
-
end
|
159
|
-
end
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
def test_fingerprint_db_neighbors
|
164
|
-
#skip
|
165
|
-
training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.csv")
|
166
|
-
[
|
167
|
-
"CC(=O)CC(C)C#N",
|
168
|
-
"CC(=O)CC(C)C",
|
169
|
-
"C(=O)CC(C)C#N",
|
170
|
-
].each do |smi|
|
171
|
-
c = OpenTox::Compound.from_smiles smi
|
172
|
-
t = Time.now
|
173
|
-
neighbors = c.db_neighbors(:training_dataset_id => training_dataset.id, :min_sim => 0.2)
|
174
|
-
p Time.now - t
|
175
|
-
t = Time.now
|
176
|
-
neighbors2 = c.fingerprint_neighbors({:type => "MP2D", :training_dataset_id => training_dataset.id, :min_sim => 0.2})
|
177
|
-
p Time.now - t
|
178
|
-
p neighbors.size
|
179
|
-
p neighbors2.size
|
180
|
-
#p neighbors
|
181
|
-
#p neighbors2
|
182
|
-
#p neighbors2 - neighbors
|
183
|
-
#assert_equal neighbors, neighbors2
|
184
|
-
end
|
185
|
-
end
|
186
|
-
|
187
109
|
def test_molecular_weight
|
188
110
|
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C"
|
189
111
|
assert_equal 100.15888, c.molecular_weight
|
190
112
|
end
|
191
113
|
|
192
|
-
def test_mg_conversions
|
193
|
-
# TODO fix!
|
194
|
-
skip
|
195
|
-
c = OpenTox::Compound.from_smiles "O"
|
196
|
-
mw = c.molecular_weight
|
197
|
-
assert_equal 18.01528, mw
|
198
|
-
assert_equal 0.8105107141417474, c.logmmol_to_mg(4.34688225631145, mw)
|
199
|
-
assert_equal 9007.64, c.mmol_to_mg(500, mw)
|
200
|
-
assert_equal 2437.9999984148976, c.logmg_to_mg(3.387033701)
|
201
|
-
end
|
202
|
-
|
203
114
|
def test_physchem
|
204
115
|
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C"
|
205
|
-
|
206
|
-
assert_equal PhysChem::
|
207
|
-
assert_equal PhysChem::unique_descriptors.size, c.physchem(PhysChem.unique_descriptors).size
|
116
|
+
properties = c.calculate_properties(PhysChem.openbabel_descriptors)
|
117
|
+
assert_equal PhysChem::OPENBABEL.size, properties.size
|
208
118
|
end
|
209
119
|
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
STRUCTURE_SMILES,-log10(LC50_mmol)
|
2
|
+
C1=CC(C=O)=CC(OC)=C1OCCCCCC,1.9469215565165803
|
3
|
+
C1(OC)=C([N+]([O-])=O)C(C=O)=CC(Br)=C1O,0.575118363368933
|
4
|
+
C1=CC(Cl)=CC=C1OC2=C([N+](=O)[O-])C=CC=C2,2.114073660198569
|
5
|
+
CC1=C(NC=O)C=CC=C1Cl,0.5606673061697374
|
6
|
+
CCCCOC(=O)C1=CC=CC(C(=O)OCCCC)=C1,2.490797477668897
|
7
|
+
C(C1=CC=CC=C1)(C2=CC=CC=C2)(O)C#C,1.2732727909734278
|
8
|
+
CCCSCCSCCC,1.3746875490383261
|
9
|
+
CCCSCCCCSCCC,1.8386319977650252
|
10
|
+
CCCSCCCCSCCC,1.7328282715969863
|
11
|
+
ClCCOC(=O)NC1CCCCC1,0.7695510786217261
|
12
|
+
O=C1C(C2=CC=CC=C2)(C(=O)NC(=O)N1)CC,-0.31806333496276157
|
13
|
+
OC1=C(C=C(C=C1)[N+](=O)[O-])[N+](=O)[O-],1.2276782932770802
|
14
|
+
NC(=O)OCC,-1.7693773260761385
|
15
|
+
[O-]C(C1=CC=CC=C1O)=O.[Na+],-1.0969100130080565
|
16
|
+
C1=CC=CC=C1C(=O)N,-0.7371926427047373
|
17
|
+
CC[N+](CC)(CC)CC1(=CC=CC=C1).[Cl-],0.1505805862031006
|
18
|
+
CN(C)N,0.8827287043442358
|
19
|
+
CC(C(C(NC([O-])=N1)=O)(C1=O)CC)CCC.[Na+],0.7011469235902933
|
20
|
+
N1C(=O)C(CC)(CCC(C)C)C(=O)NC1=O,0.42365864979420714
|
21
|
+
O=C1C2=C(N=CN2C)N(C(=O)N1C)C,0.10902040301031106
|
22
|
+
C1=CC=C2C(=C1)C(=O)C(C)=CC2=O,3.1944991418415998
|
23
|
+
OC1=C(Cl)C(Cl)=C(Cl)C=C1Cl,2.3526170298853804
|
24
|
+
OC1=CC(C)=C(Cl)C=C1,1.4156687756324693
|
25
|
+
O=S(O)(O)=O.C1(=CC=CC=C1CC(N)C).C2=CC=CC=C2CC(N)C,1.106793246940152
|
26
|
+
O(CC)CC,-1.5378190950732742
|
27
|
+
NC1=CC=CC=C1,-0.05307844348341968
|
28
|
+
O=C(OC1=C2C(=CC=C1)C=CC=C2)NC,1.3615107430453628
|
29
|
+
CCO,-2.503790683057181
|
30
|
+
C1(=NC=CC=C1C2CCCN2C).OS(O)(=O)=O,1.275724130399211
|
31
|
+
C1(O)=CC=CC=C1C(=O)N,0.13312218566250114
|
32
|
+
CCCCCC=O,0.7569619513137056
|
33
|
+
O=C1OC2=CC=CC=C2C(O)=C1CC3=C(O)C4=CC=CC=C4OC3=O,1.8181564120552274
|
34
|
+
C1(C=O)=CC=C(OC2=CC=CC=C2)C=C1,1.6345120151091004
|
35
|
+
CO,-2.962369335670021
|
36
|
+
OC(C)C,-2.1583624920952498
|
37
|
+
CC(=O)C,-2.089905111439398
|
38
|
+
ClC(Cl)Cl,0.22767829327708025
|
39
|
+
CS(=O)C,-2.6384892569546374
|
40
|
+
ClC(C(Cl)(Cl)Cl)(Cl)Cl,2.221848749616356
|
41
|
+
OC1=C(C=C(C(=C1CC2=C(C(=CC(=C2Cl)Cl)Cl)O)Cl)Cl)Cl,4.287350298372789
|
42
|
+
C1=CC(=CC=C1N)C(=O)CC,0.009217308196862182
|
43
|
+
OCCC,-1.8790958795000727
|
44
|
+
CCCCO,-1.3673559210260189
|
45
|
+
CCCCCO,-0.72916478969277
|
46
|
+
C1=CC=CC=C1,0.6478174818886375
|
47
|
+
CC(Cl)(Cl)Cl,0.4497716469449059
|
48
|
+
[S-]C1=NC(C(C(C)CCC)(CC)C(N1)=O)=O.[Na+],1.0039263455147247
|
49
|
+
CC#N,-1.6031443726201824
|
50
|
+
CC=O,0.11520463605101904
|
51
|
+
ClCCl,-0.5899496013257077
|
52
|
+
IC(I)I,2.129596094720973
|
53
|
+
[N+](C)(C)(C)C.[Cl-],-0.6253124509616739
|
54
|
+
CC(C)(C)O,-1.9370161074648142
|
55
|
+
C(F)(F)(F)CO,-0.07554696139253074
|
56
|
+
CC(=O)C(C)(C)C,0.06098022355133353
|
57
|
+
ClC(C(Cl)Cl)(Cl)Cl,1.4294570601181025
|
58
|
+
CC1(C)NC(=O)NC1=O,-2.110589710299249
|
59
|
+
CCC(O)(C)CC,-0.8182258936139555
|
60
|
+
C#CC(O)(C)CC,-1.0934216851622351
|
61
|
+
C1CCCC(C#C)(O)C1,-0.31386722036915343
|
62
|
+
CCCCOCCOP(=O)(OCCOCCCC)OCCOCCCC,1.5512936800949202
|
63
|
+
CCCCOCCOP(=O)(OCCOCCCC)OCCOCCCC,1.7423214251308154
|
64
|
+
OCC(C)C,-1.2855573090077739
|
65
|
+
CC(Cl)CCl,-0.04921802267018165
|
66
|
+
NCC(N)C,-1.1335389083702174
|
67
|
+
CC(O)CC,-1.6946051989335686
|
68
|
+
CCC(=O)C,-1.6503075231319364
|
69
|
+
OC(C)CN,-1.526339277389844
|
70
|
+
ClC(CCl)Cl,0.21324857785443882
|
71
|
+
ClC(=CCl)Cl,0.4736607226101559
|
72
|
+
CC(=O)OC,-0.6830470382388496
|
73
|
+
ClC(C(Cl)Cl)Cl,0.9172146296835499
|
74
|
+
C1(C)(C)CCCC(C)=C1C=CC(C)=O,1.576754126063192
|
75
|
+
ClC1=C(O)C(Cl)=CC(=C1)C(C2=CC(Cl)=C(O)C(=C2)Cl)(C)C,2.4400933749638876
|
76
|
+
C(C1C=CC(=CC=1)O)(CC)(C)C,1.8013429130455774
|
77
|
+
C1CC(CCC1(N)C)C(C)(N)C,0.41680122603137726
|
78
|
+
ClC(Cl)C1=C(Cl)C=CC=C1Cl,2.374687549038326
|
79
|
+
C1=CC=C2C=CC=C3C2=C1CC3,1.9507819773298183
|
80
|
+
CC1=CNC2=C1C=CC=C2,1.1713401034646802
|
81
|
+
C1=CC=CC=C1OC(=O)C2=CC=CC=C2C(=O)OC3=CC=CC=C3,3.600326278518962
|
82
|
+
CCOC(=O)C1=CC=CC=C1C(=O)OCC,0.8446639625349383
|
83
|
+
C1=CC=C(C(=O)OCCCC)C(=C1)C(=O)OCCCC,2.444905551421681
|
84
|
+
O=C1C2=C(C=CC=C2)N=NN1CSP(=S)(OC)OC,3.694648630553376
|
85
|
+
C1=CC=CC=C1NC(=O)C2=C(O)C=CC=C2,1.7328282715969863
|
86
|
+
Cl\C(Cl)=C(Cl)/C(Cl)=C(Cl)\Cl,3.462180904926726
|
87
|
+
OC1=C(C(=C(C(=C1Cl)Cl)Cl)Cl)Cl,3.040005161671584
|
88
|
+
OC1=C(C=C(C=C1Cl)Cl)Cl,1.6055483191737838
|
89
|
+
OC1=CC(C(F)(F)F)=C([N+]([O-])=O)C=C1,1.3555614105321614
|
90
|
+
C1(N)=CC=CC=C1C(=O)N,-0.4623979978989561
|
91
|
+
C1(N)=CC=CC=C1C(=O)N,-0.3979400086720376
|
92
|
+
OC1=C([N+]([O-])=O)C=CC=C1,-0.06069784035361165
|
@@ -0,0 +1,16 @@
|
|
1
|
+
STRUCTURE_SMILES,-log10(LC50_mmol)
|
2
|
+
C1=CC(C=O)=CC(OC)=C1OCCCCCC,1.9469215565165803
|
3
|
+
C1(OC)=C([N+]([O-])=O)C(C=O)=CC(Br)=C1O,0.575118363368933
|
4
|
+
C1=CC(Cl)=CC=C1OC2=C([N+](=O)[O-])C=CC=C2,2.114073660198569
|
5
|
+
CC1=C(NC=O)C=CC=C1Cl,0.5606673061697374
|
6
|
+
CCCCOC(=O)C1=CC=CC(C(=O)OCCCC)=C1,2.490797477668897
|
7
|
+
C(C1=CC=CC=C1)(C2=CC=CC=C2)(O)C#C,1.2732727909734278
|
8
|
+
CCCSCCSCCC,1.3746875490383261
|
9
|
+
CCCSCCCCSCCC,1.8386319977650252
|
10
|
+
ClCCOC(=O)NC1CCCCC1,0.7695510786217261
|
11
|
+
O=C1C(C2=CC=CC=C2)(C(=O)NC(=O)N1)CC,-0.31806333496276157
|
12
|
+
OC1=C(C=C(C=C1)[N+](=O)[O-])[N+](=O)[O-],1.2276782932770802
|
13
|
+
NC(=O)OCC,-1.7693773260761385
|
14
|
+
[O-]C(C1=CC=CC=C1O)=O.[Na+],-1.0969100130080565
|
15
|
+
C1=CC=CC=C1C(=O)N,-0.7371926427047373
|
16
|
+
CC[N+](CC)(CC)CC1(=CC=CC=C1).[Cl-],0.1505805862031006
|