lazar 0.9.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -4
- data/README.md +5 -15
- data/VERSION +1 -1
- data/ext/lazar/extconf.rb +1 -1
- data/ext/lazar/rinstall.R +9 -7
- data/java/CdkDescriptorInfo.class +0 -0
- data/java/CdkDescriptorInfo.java +3 -2
- data/java/CdkDescriptors.class +0 -0
- data/java/CdkDescriptors.java +28 -28
- data/java/Rakefile +3 -3
- data/java/{cdk-1.4.19.jar → cdk-2.0-SNAPSHOT.jar} +0 -0
- data/lazar.gemspec +6 -7
- data/lib/algorithm.rb +2 -11
- data/lib/caret.rb +96 -0
- data/lib/classification.rb +14 -22
- data/lib/compound.rb +21 -87
- data/lib/crossvalidation.rb +80 -279
- data/lib/dataset.rb +105 -174
- data/lib/feature.rb +11 -18
- data/lib/feature_selection.rb +42 -0
- data/lib/import.rb +122 -0
- data/lib/lazar.rb +14 -4
- data/lib/leave-one-out-validation.rb +46 -192
- data/lib/model.rb +319 -128
- data/lib/nanoparticle.rb +98 -0
- data/lib/opentox.rb +7 -4
- data/lib/overwrite.rb +24 -3
- data/lib/physchem.rb +11 -10
- data/lib/regression.rb +7 -137
- data/lib/rest-client-wrapper.rb +0 -6
- data/lib/similarity.rb +65 -0
- data/lib/substance.rb +8 -0
- data/lib/train-test-validation.rb +69 -0
- data/lib/validation-statistics.rb +223 -0
- data/lib/validation.rb +17 -100
- data/scripts/mg2mmol.rb +17 -0
- data/scripts/mirror-enm2test.rb +4 -0
- data/scripts/mmol2-log10.rb +32 -0
- data/test/compound.rb +4 -94
- data/test/data/EPAFHM.medi_log10.csv +92 -0
- data/test/data/EPAFHM.mini_log10.csv +16 -0
- data/test/data/EPAFHM_log10.csv +581 -0
- data/test/data/loael_log10.csv +568 -0
- data/test/dataset.rb +195 -133
- data/test/descriptor.rb +27 -18
- data/test/error.rb +2 -2
- data/test/experiment.rb +4 -4
- data/test/feature.rb +2 -3
- data/test/gridfs.rb +10 -0
- data/test/model-classification.rb +106 -0
- data/test/model-nanoparticle.rb +128 -0
- data/test/model-regression.rb +171 -0
- data/test/model-validation.rb +19 -0
- data/test/nanomaterial-model-validation.rb +55 -0
- data/test/setup.rb +8 -4
- data/test/validation-classification.rb +67 -0
- data/test/validation-nanoparticle.rb +133 -0
- data/test/validation-regression.rb +92 -0
- metadata +50 -121
- data/test/classification.rb +0 -41
- data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +0 -13553
- data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +0 -436
- data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +0 -568
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +0 -87
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +0 -978
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +0 -1120
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +0 -1113
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +0 -850
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +0 -829
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +0 -1198
- data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +0 -1505
- data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +0 -581
- data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +0 -1217
- data/test/data/LOAEL_log_mg_corrected_smiles.csv +0 -568
- data/test/data/LOAEL_log_mmol_corrected_smiles.csv +0 -568
- data/test/data/boiling_points.ext.sdf +0 -11460
- data/test/data/cpdb_100.csv +0 -101
- data/test/data/hamster_carcinogenicity.ntriples +0 -618
- data/test/data/hamster_carcinogenicity.sdf +0 -2805
- data/test/data/hamster_carcinogenicity.xls +0 -0
- data/test/data/hamster_carcinogenicity.yaml +0 -352
- data/test/dataset-long.rb +0 -114
- data/test/lazar-long.rb +0 -92
- data/test/lazar-physchem-short.rb +0 -31
- data/test/prediction_models.rb +0 -20
- data/test/regression.rb +0 -43
- data/test/validation.rb +0 -108
data/lib/validation.rb
CHANGED
@@ -1,108 +1,25 @@
|
|
1
1
|
module OpenTox
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
end
|
20
|
-
|
21
|
-
def model
|
22
|
-
Model::Lazar.find model_id
|
23
|
-
end
|
24
|
-
|
25
|
-
def self.create model, training_set, test_set, crossvalidation=nil
|
26
|
-
|
27
|
-
atts = model.attributes.dup # do not modify attributes from original model
|
28
|
-
atts["_id"] = BSON::ObjectId.new
|
29
|
-
atts[:training_dataset_id] = training_set.id
|
30
|
-
validation_model = model.class.create training_set, atts
|
31
|
-
validation_model.save
|
32
|
-
cids = test_set.compound_ids
|
33
|
-
|
34
|
-
test_set_without_activities = Dataset.new(:compound_ids => cids.uniq) # remove duplicates and make sure that activities cannot be used
|
35
|
-
prediction_dataset = validation_model.predict test_set_without_activities
|
36
|
-
predictions = []
|
37
|
-
nr_unpredicted = 0
|
38
|
-
activities = test_set.data_entries.collect{|de| de.first}
|
39
|
-
prediction_dataset.data_entries.each_with_index do |de,i|
|
40
|
-
if de[0] #and de[1]
|
41
|
-
cid = prediction_dataset.compound_ids[i]
|
42
|
-
rows = cids.each_index.select{|r| cids[r] == cid }
|
43
|
-
activities = rows.collect{|r| test_set.data_entries[r][0]}
|
44
|
-
prediction = de.first
|
45
|
-
confidence = de[1]
|
46
|
-
predictions << [prediction_dataset.compound_ids[i], activities, prediction, de[1]]
|
47
|
-
else
|
48
|
-
nr_unpredicted += 1
|
49
|
-
end
|
3
|
+
module Validation
|
4
|
+
|
5
|
+
class Validation
|
6
|
+
include OpenTox
|
7
|
+
include Mongoid::Document
|
8
|
+
include Mongoid::Timestamps
|
9
|
+
store_in collection: "validations"
|
10
|
+
field :name, type: String
|
11
|
+
field :model_id, type: BSON::ObjectId
|
12
|
+
field :nr_instances, type: Integer, default: 0
|
13
|
+
field :nr_unpredicted, type: Integer, default: 0
|
14
|
+
field :predictions, type: Hash, default: {}
|
15
|
+
field :finished_at, type: Time
|
16
|
+
|
17
|
+
def model
|
18
|
+
Model::Lazar.find model_id
|
50
19
|
end
|
51
|
-
validation = self.new(
|
52
|
-
:model_id => validation_model.id,
|
53
|
-
:prediction_dataset_id => prediction_dataset.id,
|
54
|
-
:test_dataset_id => test_set.id,
|
55
|
-
:nr_instances => test_set.compound_ids.size,
|
56
|
-
:nr_unpredicted => nr_unpredicted,
|
57
|
-
:predictions => predictions#.sort{|a,b| p a; b[3] <=> a[3]} # sort according to confidence
|
58
|
-
)
|
59
|
-
validation.crossvalidation_id = crossvalidation.id if crossvalidation
|
60
|
-
validation.save
|
61
|
-
validation
|
62
|
-
end
|
63
|
-
|
64
|
-
end
|
65
|
-
|
66
|
-
class ClassificationValidation < Validation
|
67
|
-
end
|
68
20
|
|
69
|
-
class RegressionValidation < Validation
|
70
|
-
|
71
|
-
def statistics
|
72
|
-
rmse = 0
|
73
|
-
weighted_rmse = 0
|
74
|
-
rse = 0
|
75
|
-
weighted_rse = 0
|
76
|
-
mae = 0
|
77
|
-
weighted_mae = 0
|
78
|
-
confidence_sum = 0
|
79
|
-
predictions.each do |pred|
|
80
|
-
compound_id,activity,prediction,confidence = pred
|
81
|
-
if activity and prediction
|
82
|
-
error = Math.log10(prediction)-Math.log10(activity.median)
|
83
|
-
rmse += error**2
|
84
|
-
weighted_rmse += confidence*error**2
|
85
|
-
mae += error.abs
|
86
|
-
weighted_mae += confidence*error.abs
|
87
|
-
confidence_sum += confidence
|
88
|
-
else
|
89
|
-
warnings << "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
|
90
|
-
$logger.debug "No training activities for #{Compound.find(compound_id).smiles} in training dataset #{model.training_dataset_id}."
|
91
|
-
end
|
92
|
-
end
|
93
|
-
x = predictions.collect{|p| p[1].median}
|
94
|
-
y = predictions.collect{|p| p[2]}
|
95
|
-
R.assign "measurement", x
|
96
|
-
R.assign "prediction", y
|
97
|
-
R.eval "r <- cor(-log(measurement),-log(prediction),use='complete')"
|
98
|
-
r = R.eval("r").to_ruby
|
99
|
-
|
100
|
-
mae = mae/predictions.size
|
101
|
-
weighted_mae = weighted_mae/confidence_sum
|
102
|
-
rmse = Math.sqrt(rmse/predictions.size)
|
103
|
-
weighted_rmse = Math.sqrt(weighted_rmse/confidence_sum)
|
104
|
-
{ "R^2" => r**2, "RMSE" => rmse, "MAE" => mae }
|
105
21
|
end
|
22
|
+
|
106
23
|
end
|
107
24
|
|
108
25
|
end
|
data/scripts/mg2mmol.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require_relative '../lazar/lib/lazar'
|
3
|
+
include OpenTox
|
4
|
+
newfile = ARGV[0].sub(/.csv/,"_mmol.csv")
|
5
|
+
p newfile
|
6
|
+
CSV.open(newfile, "wb") do |csv|
|
7
|
+
CSV.read(ARGV[0]).each do |line|
|
8
|
+
smi,mg = line
|
9
|
+
if mg.numeric?
|
10
|
+
c = Compound.from_smiles smi
|
11
|
+
mmol = c.mg_to_mmol mg.to_f
|
12
|
+
csv << [smi, mmol]
|
13
|
+
else
|
14
|
+
csv << [smi, mg.gsub(/mg/,'mmol')]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require_relative '../lib/lazar'
|
3
|
+
include OpenTox
|
4
|
+
|
5
|
+
newfile = ARGV[0].sub(/.csv/,"_log10.csv")
|
6
|
+
p newfile
|
7
|
+
CSV.open(newfile, "wb") do |csv|
|
8
|
+
i = 1
|
9
|
+
CSV.read(ARGV[0]).each do |line|
|
10
|
+
type,mmol = line
|
11
|
+
if i == 1
|
12
|
+
@type = type
|
13
|
+
csv << ["SMILES", "-log10(#{mmol})"]
|
14
|
+
else
|
15
|
+
if mmol.numeric?
|
16
|
+
if @type =~ /smiles/i
|
17
|
+
c = Compound.from_smiles type
|
18
|
+
elsif @type =~ /inchi/i
|
19
|
+
c = Compound.from_inchi type
|
20
|
+
type = c.smiles
|
21
|
+
else
|
22
|
+
p "Unknown type '#{type}' at line 1."
|
23
|
+
end
|
24
|
+
mmol = -Math.log10(mmol.to_f)
|
25
|
+
csv << [type, mmol]
|
26
|
+
else
|
27
|
+
p "Line #{i}: '#{mmol}' is not a numeric value."
|
28
|
+
end
|
29
|
+
end
|
30
|
+
i += 1
|
31
|
+
end
|
32
|
+
end
|
data/test/compound.rb
CHANGED
@@ -2,19 +2,16 @@ require_relative "setup.rb"
|
|
2
2
|
|
3
3
|
class CompoundTest < MiniTest::Test
|
4
4
|
|
5
|
-
def
|
5
|
+
def test_compound_from_smiles
|
6
6
|
c = OpenTox::Compound.from_smiles "F[B-](F)(F)F.[Na+]"
|
7
7
|
assert_equal "InChI=1S/BF4.Na/c2-1(3,4)5;/q-1;+1", c.inchi.chomp
|
8
8
|
assert_equal "F[B-](F)(F)F.[Na+]", c.smiles, "A failure here might be caused by a compound webservice running on 64bit architectures using an outdated version of OpenBabel. Please install OpenBabel version 2.3.2 or higher." # seems to be fixed in 2.3.2
|
9
9
|
end
|
10
10
|
|
11
|
-
def
|
11
|
+
def test_compound_from_smiles
|
12
12
|
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
|
13
13
|
assert_equal "InChI=1S/C6H9NO/c1-5(4-7)3-6(2)8/h5H,3H2,1-2H3", c.inchi
|
14
14
|
assert_equal "CC(C#N)CC(=O)C", c.smiles
|
15
|
-
end
|
16
|
-
|
17
|
-
def test_2_compound_from_smiles
|
18
15
|
c = OpenTox::Compound.from_smiles "N#[N+]C1=CC=CC=C1.F[B-](F)(F)F"
|
19
16
|
assert_equal "InChI=1S/C6H5N2.BF4/c7-8-6-4-2-1-3-5-6;2-1(3,4)5/h1-5H;/q+1;-1", c.inchi
|
20
17
|
assert_equal "F[B-](F)(F)F.N#[N+]c1ccccc1", c.smiles
|
@@ -79,16 +76,6 @@ print c.sdf
|
|
79
76
|
assert_equal 9, c.fingerprint("FP4").size
|
80
77
|
end
|
81
78
|
|
82
|
-
def test_neighbors
|
83
|
-
d = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.csv")
|
84
|
-
d.compounds.each do |c|
|
85
|
-
refute_nil c.fingerprint("MP2D")
|
86
|
-
end
|
87
|
-
c = d.compounds[371]
|
88
|
-
n = c.fingerprint_neighbors({:type => "FP4", :min_sim => 0.7, :training_dataset_id => d.id })
|
89
|
-
assert n.size >= 18, "Neighbors size (#{n.size}) should be larger than 17"
|
90
|
-
end
|
91
|
-
|
92
79
|
def test_openbabel_segfault
|
93
80
|
inchi = "InChI=1S/C19H27NO7/c1-11-9-19(12(2)27-19)17(23)26-14-6-8-20(4)7-5-13(15(14)21)10-25-16(22)18(11,3)24/h5,11-12,14,24H,6-10H2,1-4H3/b13-5-/t11-,12-,14-,18-,19?/m1/s1"
|
94
81
|
|
@@ -107,25 +94,6 @@ print c.sdf
|
|
107
94
|
end
|
108
95
|
end
|
109
96
|
|
110
|
-
def test_fingerprint_neighbors
|
111
|
-
types = ["FP2", "FP3", "FP4", "MACCS"]
|
112
|
-
min_sim = 0.7
|
113
|
-
training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.csv")
|
114
|
-
[
|
115
|
-
"CC(=O)CC(C)C#N",
|
116
|
-
"CC(=O)CC(C)C",
|
117
|
-
"C(=O)CC(C)C#N",
|
118
|
-
].each do |smi|
|
119
|
-
c = OpenTox::Compound.from_smiles smi
|
120
|
-
types.each do |type|
|
121
|
-
neighbors = c.fingerprint_neighbors({:type => type, :training_dataset_id => training_dataset.id, :min_sim => min_sim})
|
122
|
-
unless type == "FP2" and smi == "CC(=O)CC(C)C#N" or smi == "C(=O)CC(C)C#N" and (type == "FP2" or type == "MACCS")
|
123
|
-
refute_empty neighbors
|
124
|
-
end
|
125
|
-
end
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
97
|
def test_mna
|
130
98
|
c = OpenTox::Compound.from_smiles "N#[N+]C1=CC=CC=C1.F[B-](F)(F)F"
|
131
99
|
assert_equal 18, c.fingerprint("MNA").size
|
@@ -138,72 +106,14 @@ print c.sdf
|
|
138
106
|
assert 7, c.fingerprint("MP2D").uniq.size
|
139
107
|
end
|
140
108
|
|
141
|
-
def test_fingerprint_count_neighbors
|
142
|
-
types = ["MP2D", "MNA"]
|
143
|
-
min_sim = 0.0
|
144
|
-
training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.csv")
|
145
|
-
[
|
146
|
-
"CC(=O)CC(C)C#N",
|
147
|
-
"CC(=O)CC(C)C",
|
148
|
-
"C(=O)CC(C)C#N",
|
149
|
-
].each do |smi|
|
150
|
-
c = OpenTox::Compound.from_smiles smi
|
151
|
-
types.each do |type|
|
152
|
-
neighbors = c.fingerprint_count_neighbors({:type => type, :training_dataset_id => training_dataset.id, :min_sim => min_sim})
|
153
|
-
if type == "FP4"
|
154
|
-
fp4_neighbors = c.neighbors
|
155
|
-
neighbors.each do |n|
|
156
|
-
assert_includes fp4_neighbors, n
|
157
|
-
end
|
158
|
-
end
|
159
|
-
end
|
160
|
-
end
|
161
|
-
end
|
162
|
-
|
163
|
-
def test_fingerprint_db_neighbors
|
164
|
-
#skip
|
165
|
-
training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.csv")
|
166
|
-
[
|
167
|
-
"CC(=O)CC(C)C#N",
|
168
|
-
"CC(=O)CC(C)C",
|
169
|
-
"C(=O)CC(C)C#N",
|
170
|
-
].each do |smi|
|
171
|
-
c = OpenTox::Compound.from_smiles smi
|
172
|
-
t = Time.now
|
173
|
-
neighbors = c.db_neighbors(:training_dataset_id => training_dataset.id, :min_sim => 0.2)
|
174
|
-
p Time.now - t
|
175
|
-
t = Time.now
|
176
|
-
neighbors2 = c.fingerprint_neighbors({:type => "MP2D", :training_dataset_id => training_dataset.id, :min_sim => 0.2})
|
177
|
-
p Time.now - t
|
178
|
-
p neighbors.size
|
179
|
-
p neighbors2.size
|
180
|
-
#p neighbors
|
181
|
-
#p neighbors2
|
182
|
-
#p neighbors2 - neighbors
|
183
|
-
#assert_equal neighbors, neighbors2
|
184
|
-
end
|
185
|
-
end
|
186
|
-
|
187
109
|
def test_molecular_weight
|
188
110
|
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C"
|
189
111
|
assert_equal 100.15888, c.molecular_weight
|
190
112
|
end
|
191
113
|
|
192
|
-
def test_mg_conversions
|
193
|
-
# TODO fix!
|
194
|
-
skip
|
195
|
-
c = OpenTox::Compound.from_smiles "O"
|
196
|
-
mw = c.molecular_weight
|
197
|
-
assert_equal 18.01528, mw
|
198
|
-
assert_equal 0.8105107141417474, c.logmmol_to_mg(4.34688225631145, mw)
|
199
|
-
assert_equal 9007.64, c.mmol_to_mg(500, mw)
|
200
|
-
assert_equal 2437.9999984148976, c.logmg_to_mg(3.387033701)
|
201
|
-
end
|
202
|
-
|
203
114
|
def test_physchem
|
204
115
|
c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C"
|
205
|
-
|
206
|
-
assert_equal PhysChem::
|
207
|
-
assert_equal PhysChem::unique_descriptors.size, c.physchem(PhysChem.unique_descriptors).size
|
116
|
+
properties = c.calculate_properties(PhysChem.openbabel_descriptors)
|
117
|
+
assert_equal PhysChem::OPENBABEL.size, properties.size
|
208
118
|
end
|
209
119
|
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
STRUCTURE_SMILES,-log10(LC50_mmol)
|
2
|
+
C1=CC(C=O)=CC(OC)=C1OCCCCCC,1.9469215565165803
|
3
|
+
C1(OC)=C([N+]([O-])=O)C(C=O)=CC(Br)=C1O,0.575118363368933
|
4
|
+
C1=CC(Cl)=CC=C1OC2=C([N+](=O)[O-])C=CC=C2,2.114073660198569
|
5
|
+
CC1=C(NC=O)C=CC=C1Cl,0.5606673061697374
|
6
|
+
CCCCOC(=O)C1=CC=CC(C(=O)OCCCC)=C1,2.490797477668897
|
7
|
+
C(C1=CC=CC=C1)(C2=CC=CC=C2)(O)C#C,1.2732727909734278
|
8
|
+
CCCSCCSCCC,1.3746875490383261
|
9
|
+
CCCSCCCCSCCC,1.8386319977650252
|
10
|
+
CCCSCCCCSCCC,1.7328282715969863
|
11
|
+
ClCCOC(=O)NC1CCCCC1,0.7695510786217261
|
12
|
+
O=C1C(C2=CC=CC=C2)(C(=O)NC(=O)N1)CC,-0.31806333496276157
|
13
|
+
OC1=C(C=C(C=C1)[N+](=O)[O-])[N+](=O)[O-],1.2276782932770802
|
14
|
+
NC(=O)OCC,-1.7693773260761385
|
15
|
+
[O-]C(C1=CC=CC=C1O)=O.[Na+],-1.0969100130080565
|
16
|
+
C1=CC=CC=C1C(=O)N,-0.7371926427047373
|
17
|
+
CC[N+](CC)(CC)CC1(=CC=CC=C1).[Cl-],0.1505805862031006
|
18
|
+
CN(C)N,0.8827287043442358
|
19
|
+
CC(C(C(NC([O-])=N1)=O)(C1=O)CC)CCC.[Na+],0.7011469235902933
|
20
|
+
N1C(=O)C(CC)(CCC(C)C)C(=O)NC1=O,0.42365864979420714
|
21
|
+
O=C1C2=C(N=CN2C)N(C(=O)N1C)C,0.10902040301031106
|
22
|
+
C1=CC=C2C(=C1)C(=O)C(C)=CC2=O,3.1944991418415998
|
23
|
+
OC1=C(Cl)C(Cl)=C(Cl)C=C1Cl,2.3526170298853804
|
24
|
+
OC1=CC(C)=C(Cl)C=C1,1.4156687756324693
|
25
|
+
O=S(O)(O)=O.C1(=CC=CC=C1CC(N)C).C2=CC=CC=C2CC(N)C,1.106793246940152
|
26
|
+
O(CC)CC,-1.5378190950732742
|
27
|
+
NC1=CC=CC=C1,-0.05307844348341968
|
28
|
+
O=C(OC1=C2C(=CC=C1)C=CC=C2)NC,1.3615107430453628
|
29
|
+
CCO,-2.503790683057181
|
30
|
+
C1(=NC=CC=C1C2CCCN2C).OS(O)(=O)=O,1.275724130399211
|
31
|
+
C1(O)=CC=CC=C1C(=O)N,0.13312218566250114
|
32
|
+
CCCCCC=O,0.7569619513137056
|
33
|
+
O=C1OC2=CC=CC=C2C(O)=C1CC3=C(O)C4=CC=CC=C4OC3=O,1.8181564120552274
|
34
|
+
C1(C=O)=CC=C(OC2=CC=CC=C2)C=C1,1.6345120151091004
|
35
|
+
CO,-2.962369335670021
|
36
|
+
OC(C)C,-2.1583624920952498
|
37
|
+
CC(=O)C,-2.089905111439398
|
38
|
+
ClC(Cl)Cl,0.22767829327708025
|
39
|
+
CS(=O)C,-2.6384892569546374
|
40
|
+
ClC(C(Cl)(Cl)Cl)(Cl)Cl,2.221848749616356
|
41
|
+
OC1=C(C=C(C(=C1CC2=C(C(=CC(=C2Cl)Cl)Cl)O)Cl)Cl)Cl,4.287350298372789
|
42
|
+
C1=CC(=CC=C1N)C(=O)CC,0.009217308196862182
|
43
|
+
OCCC,-1.8790958795000727
|
44
|
+
CCCCO,-1.3673559210260189
|
45
|
+
CCCCCO,-0.72916478969277
|
46
|
+
C1=CC=CC=C1,0.6478174818886375
|
47
|
+
CC(Cl)(Cl)Cl,0.4497716469449059
|
48
|
+
[S-]C1=NC(C(C(C)CCC)(CC)C(N1)=O)=O.[Na+],1.0039263455147247
|
49
|
+
CC#N,-1.6031443726201824
|
50
|
+
CC=O,0.11520463605101904
|
51
|
+
ClCCl,-0.5899496013257077
|
52
|
+
IC(I)I,2.129596094720973
|
53
|
+
[N+](C)(C)(C)C.[Cl-],-0.6253124509616739
|
54
|
+
CC(C)(C)O,-1.9370161074648142
|
55
|
+
C(F)(F)(F)CO,-0.07554696139253074
|
56
|
+
CC(=O)C(C)(C)C,0.06098022355133353
|
57
|
+
ClC(C(Cl)Cl)(Cl)Cl,1.4294570601181025
|
58
|
+
CC1(C)NC(=O)NC1=O,-2.110589710299249
|
59
|
+
CCC(O)(C)CC,-0.8182258936139555
|
60
|
+
C#CC(O)(C)CC,-1.0934216851622351
|
61
|
+
C1CCCC(C#C)(O)C1,-0.31386722036915343
|
62
|
+
CCCCOCCOP(=O)(OCCOCCCC)OCCOCCCC,1.5512936800949202
|
63
|
+
CCCCOCCOP(=O)(OCCOCCCC)OCCOCCCC,1.7423214251308154
|
64
|
+
OCC(C)C,-1.2855573090077739
|
65
|
+
CC(Cl)CCl,-0.04921802267018165
|
66
|
+
NCC(N)C,-1.1335389083702174
|
67
|
+
CC(O)CC,-1.6946051989335686
|
68
|
+
CCC(=O)C,-1.6503075231319364
|
69
|
+
OC(C)CN,-1.526339277389844
|
70
|
+
ClC(CCl)Cl,0.21324857785443882
|
71
|
+
ClC(=CCl)Cl,0.4736607226101559
|
72
|
+
CC(=O)OC,-0.6830470382388496
|
73
|
+
ClC(C(Cl)Cl)Cl,0.9172146296835499
|
74
|
+
C1(C)(C)CCCC(C)=C1C=CC(C)=O,1.576754126063192
|
75
|
+
ClC1=C(O)C(Cl)=CC(=C1)C(C2=CC(Cl)=C(O)C(=C2)Cl)(C)C,2.4400933749638876
|
76
|
+
C(C1C=CC(=CC=1)O)(CC)(C)C,1.8013429130455774
|
77
|
+
C1CC(CCC1(N)C)C(C)(N)C,0.41680122603137726
|
78
|
+
ClC(Cl)C1=C(Cl)C=CC=C1Cl,2.374687549038326
|
79
|
+
C1=CC=C2C=CC=C3C2=C1CC3,1.9507819773298183
|
80
|
+
CC1=CNC2=C1C=CC=C2,1.1713401034646802
|
81
|
+
C1=CC=CC=C1OC(=O)C2=CC=CC=C2C(=O)OC3=CC=CC=C3,3.600326278518962
|
82
|
+
CCOC(=O)C1=CC=CC=C1C(=O)OCC,0.8446639625349383
|
83
|
+
C1=CC=C(C(=O)OCCCC)C(=C1)C(=O)OCCCC,2.444905551421681
|
84
|
+
O=C1C2=C(C=CC=C2)N=NN1CSP(=S)(OC)OC,3.694648630553376
|
85
|
+
C1=CC=CC=C1NC(=O)C2=C(O)C=CC=C2,1.7328282715969863
|
86
|
+
Cl\C(Cl)=C(Cl)/C(Cl)=C(Cl)\Cl,3.462180904926726
|
87
|
+
OC1=C(C(=C(C(=C1Cl)Cl)Cl)Cl)Cl,3.040005161671584
|
88
|
+
OC1=C(C=C(C=C1Cl)Cl)Cl,1.6055483191737838
|
89
|
+
OC1=CC(C(F)(F)F)=C([N+]([O-])=O)C=C1,1.3555614105321614
|
90
|
+
C1(N)=CC=CC=C1C(=O)N,-0.4623979978989561
|
91
|
+
C1(N)=CC=CC=C1C(=O)N,-0.3979400086720376
|
92
|
+
OC1=C([N+]([O-])=O)C=CC=C1,-0.06069784035361165
|
@@ -0,0 +1,16 @@
|
|
1
|
+
STRUCTURE_SMILES,-log10(LC50_mmol)
|
2
|
+
C1=CC(C=O)=CC(OC)=C1OCCCCCC,1.9469215565165803
|
3
|
+
C1(OC)=C([N+]([O-])=O)C(C=O)=CC(Br)=C1O,0.575118363368933
|
4
|
+
C1=CC(Cl)=CC=C1OC2=C([N+](=O)[O-])C=CC=C2,2.114073660198569
|
5
|
+
CC1=C(NC=O)C=CC=C1Cl,0.5606673061697374
|
6
|
+
CCCCOC(=O)C1=CC=CC(C(=O)OCCCC)=C1,2.490797477668897
|
7
|
+
C(C1=CC=CC=C1)(C2=CC=CC=C2)(O)C#C,1.2732727909734278
|
8
|
+
CCCSCCSCCC,1.3746875490383261
|
9
|
+
CCCSCCCCSCCC,1.8386319977650252
|
10
|
+
ClCCOC(=O)NC1CCCCC1,0.7695510786217261
|
11
|
+
O=C1C(C2=CC=CC=C2)(C(=O)NC(=O)N1)CC,-0.31806333496276157
|
12
|
+
OC1=C(C=C(C=C1)[N+](=O)[O-])[N+](=O)[O-],1.2276782932770802
|
13
|
+
NC(=O)OCC,-1.7693773260761385
|
14
|
+
[O-]C(C1=CC=CC=C1O)=O.[Na+],-1.0969100130080565
|
15
|
+
C1=CC=CC=C1C(=O)N,-0.7371926427047373
|
16
|
+
CC[N+](CC)(CC)CC1(=CC=CC=C1).[Cl-],0.1505805862031006
|