lazar 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +10 -0
  3. data/.yardopts +4 -0
  4. data/Gemfile +2 -0
  5. data/LICENSE +674 -0
  6. data/README.md +44 -0
  7. data/Rakefile +1 -0
  8. data/VERSION +1 -0
  9. data/ext/lazar/Makefile +5 -0
  10. data/java/CdkDescriptorInfo.class +0 -0
  11. data/java/CdkDescriptorInfo.java +22 -0
  12. data/java/CdkDescriptors.class +0 -0
  13. data/java/CdkDescriptors.java +141 -0
  14. data/java/Jmol.jar +0 -0
  15. data/java/JoelibDescriptorInfo.class +0 -0
  16. data/java/JoelibDescriptorInfo.java +15 -0
  17. data/java/JoelibDescriptors.class +0 -0
  18. data/java/JoelibDescriptors.java +60 -0
  19. data/java/Rakefile +15 -0
  20. data/java/cdk-1.4.19.jar +0 -0
  21. data/java/joelib2.jar +0 -0
  22. data/java/log4j.jar +0 -0
  23. data/lazar.gemspec +28 -0
  24. data/lib/SMARTS_InteLigand.txt +983 -0
  25. data/mongoid.yml +8 -0
  26. data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +13553 -0
  27. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +436 -0
  28. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +568 -0
  29. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +87 -0
  30. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +978 -0
  31. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +1120 -0
  32. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +1113 -0
  33. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +850 -0
  34. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +829 -0
  35. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +1198 -0
  36. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +1505 -0
  37. data/test/data/EPAFHM.csv +618 -0
  38. data/test/data/EPAFHM.medi.csv +100 -0
  39. data/test/data/EPAFHM.mini.csv +22 -0
  40. data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +581 -0
  41. data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +1217 -0
  42. data/test/data/ISSCAN-multi.csv +59 -0
  43. data/test/data/LOAEL_log_mg_corrected_smiles.csv +568 -0
  44. data/test/data/LOAEL_log_mmol_corrected_smiles.csv +568 -0
  45. data/test/data/acetaldehyde.sdf +14 -0
  46. data/test/data/boiling_points.ext.sdf +11460 -0
  47. data/test/data/cpdb_100.csv +101 -0
  48. data/test/data/hamster_carcinogenicity.csv +86 -0
  49. data/test/data/hamster_carcinogenicity.mini.bool_float.csv +11 -0
  50. data/test/data/hamster_carcinogenicity.mini.bool_int.csv +11 -0
  51. data/test/data/hamster_carcinogenicity.mini.bool_string.csv +11 -0
  52. data/test/data/hamster_carcinogenicity.mini.csv +11 -0
  53. data/test/data/hamster_carcinogenicity.ntriples +618 -0
  54. data/test/data/hamster_carcinogenicity.sdf +2805 -0
  55. data/test/data/hamster_carcinogenicity.xls +0 -0
  56. data/test/data/hamster_carcinogenicity.yaml +352 -0
  57. data/test/data/hamster_carcinogenicity_with_errors.csv +88 -0
  58. data/test/data/kazius.csv +4070 -0
  59. data/test/data/multi_cell_call.csv +1067 -0
  60. data/test/data/multi_cell_call_no_dup.csv +1057 -0
  61. data/test/data/multicolumn.csv +8 -0
  62. data/test/data/rat_feature_dataset.csv +1179 -0
  63. data/test/data/wrong_dataset.csv +8 -0
  64. metadata +108 -8
@@ -0,0 +1,983 @@
1
+ #
2
+ # SMARTS Patterns for Functional Group Classification
3
+ #
4
+ # written by Christian Laggner
5
+ # Copyright 2005 Inte:Ligand Software-Entwicklungs und Consulting GmbH
6
+ #
7
+ # Released under the Lesser General Public License (LGPL license)
8
+ # see http://www.gnu.org/copyleft/lesser.html
9
+ # Modified from Version 221105
10
+ #####################################################################################################
11
+
12
+ # General Stuff:
13
+ # These patters were written in an attempt to represent the classification of organic compounds
14
+ # from the viewpoint of an organic chemist.
15
+ # They are often very restrictive. This may be generally a good thing, but it also takes some time
16
+ # for filtering/indexing large compound sets.
17
+ # For filtering undesired groups (in druglike compounds) one will want to have more general patterns
18
+ # (e.g. you don't want *any* halide of *any* acid, *neither* aldehyde *nor* formyl esters and amides, ...).
19
+ #
20
+
21
+ # Part I: Carbon
22
+ # ==============
23
+
24
+
25
+ # I.1: Carbon-Carbon Bonds
26
+ # ------------------------
27
+
28
+ # I.1.1 Alkanes:
29
+
30
+ Primary_carbon: [CX4H3][#6]
31
+
32
+ Secondary_carbon: [CX4H2]([#6])[#6]
33
+
34
+ Tertiary_carbon: [CX4H1]([#6])([#6])[#6]
35
+
36
+ Quaternary_carbon: [CX4]([#6])([#6])([#6])[#6]
37
+
38
+
39
+ # I.1.2 C-C double and Triple Bonds
40
+
41
+ Alkene: [CX3;$([H2]),$([H1][#6]),$(C([#6])[#6])]=[CX3;$([H2]),$([H1][#6]),$(C([#6])[#6])]
42
+ # sp2 C may be substituted only by C or H -
43
+ # does not hit ketenes and allenes, nor enamines, enols and the like
44
+
45
+ Alkyne: [CX2]#[CX2]
46
+ # non-carbon substituents (e.g. alkynol ethers) are rather rare, thus no further discrimination
47
+
48
+ Allene: [CX3]=[CX2]=[CX3]
49
+
50
+
51
+ # I.2: One Carbon-Hetero Bond
52
+ # ---------------------------
53
+
54
+
55
+ # I.2.1 Alkyl Halogenides
56
+
57
+ Alkylchloride: [ClX1][CX4]
58
+ # will also hit chloromethylethers and the like, but no chloroalkenes, -alkynes or -aromats
59
+ # a more restrictive version can be obtained by modifying the Alcohol string.
60
+
61
+ Alkylfluoride: [FX1][CX4]
62
+
63
+ Alkylbromide: [BrX1][CX4]
64
+
65
+ Alkyliodide: [IX1][CX4]
66
+
67
+
68
+ # I.2.2 Alcohols and Ethers
69
+
70
+ Alcohol: [OX2H][CX4;!$(C([OX2H])[O,S,#7,#15])]
71
+ # nonspecific definition, no acetals, aminals, and the like
72
+
73
+ Primary_alcohol: [OX2H][CX4H2;!$(C([OX2H])[O,S,#7,#15])]
74
+
75
+ Secondary_alcohol: [OX2H][CX4H;!$(C([OX2H])[O,S,#7,#15])]
76
+
77
+ Tertiary_alcohol: [OX2H][CX4D4;!$(C([OX2H])[O,S,#7,#15])]
78
+
79
+ Dialkylether: [OX2]([CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])])[CX4;!$(C([OX2])[O,S,#7,#15])]
80
+ # no acetals and the like; no enolethers
81
+
82
+ Dialkylthioether: [SX2]([CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])])[CX4;!$(C([OX2])[O,S,#7,#15])]
83
+ # no acetals and the like; no enolethers
84
+
85
+ Alkylarylether: [OX2](c)[CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])]
86
+ # no acetals and the like; no enolethers
87
+
88
+ Diarylether: [c][OX2][c]
89
+
90
+ Alkylarylthioether: [SX2](c)[CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])]
91
+
92
+ Diarylthioether: [c][SX2][c]
93
+
94
+ Oxonium: [O+;!$([O]~[!#6]);!$([S]*~[#7,#8,#15,#16])]
95
+ # can't be aromatic, thus O and not #8
96
+
97
+ # I.2.3 Amines
98
+
99
+ Amine: [NX3+0,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])]
100
+ # hits all amines (prim/sec/tert/quart), including ammonium salts, also enamines, but not amides, imides, aminals, ...
101
+
102
+ # the following amines include also the protonated forms
103
+
104
+ Primary_aliph_amine: [NX3H2+0,NX4H3+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
105
+
106
+ Secondary_aliph_amine: [NX3H1+0,NX4H2+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
107
+
108
+ Tertiary_aliph_amine: [NX3H0+0,NX4H1+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
109
+
110
+ Quaternary_aliph_ammonium: [NX4H0+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
111
+
112
+ Primary_arom_amine: [NX3H2+0,NX4H3+]c
113
+
114
+ Secondary_arom_amine: [NX3H1+0,NX4H2+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])]
115
+
116
+ Tertiary_arom_amine: [NX3H0+0,NX4H1+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])]
117
+
118
+ Quaternary_arom_ammonium: [NX4H0+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])]
119
+
120
+ Secondary_mixed_amine: [NX3H1+0,NX4H2+;$([N]([c])[C]);!$([N]*~[#7,#8,#15,#16])]
121
+
122
+ Tertiary_mixed_amine: [NX3H0+0,NX4H1+;$([N]([c])([C])[#6]);!$([N]*~[#7,#8,#15,#16])]
123
+
124
+ Quaternary_mixed_ammonium: [NX4H0+;$([N]([c])([C])[#6][#6]);!$([N]*~[#7,#8,#15,#16])]
125
+
126
+ Ammonium: [N+;!$([N]~[!#6]);!$(N=*);!$([N]*~[#7,#8,#15,#16])]
127
+ # only C and H substituents allowed. Quaternary or protonated amines
128
+ # NX4+ or Nv4+ is not recognized by Daylight's depictmatch if less than four C are present
129
+
130
+
131
+ # I.2.4 Others
132
+
133
+ Alkylthiol: [SX2H][CX4;!$(C([SX2H])~[O,S,#7,#15])]
134
+
135
+ Dialkylthioether: [SX2]([CX4;!$(C([SX2])[O,S,#7,#15,F,Cl,Br,I])])[CX4;!$(C([SX2])[O,S,#7,#15])]
136
+
137
+ Alkylarylthioether: [SX2](c)[CX4;!$(C([SX2])[O,S,#7,#15])]
138
+
139
+ Disulfide: [SX2D2][SX2D2]
140
+
141
+ 1,2-Aminoalcohol: [OX2H][CX4;!$(C([OX2H])[O,S,#7,#15,F,Cl,Br,I])][CX4;!$(C([N])[O,S,#7,#15])][NX3;!$(NC=[O,S,N])]
142
+ # does not hit alpha-amino acids, enaminoalcohols, 1,2-aminoacetals, o-aminophenols, etc.
143
+
144
+ 1,2-Diol: [OX2H][CX4;!$(C([OX2H])[O,S,#7,#15])][CX4;!$(C([OX2H])[O,S,#7,#15])][OX2H]
145
+ # does not hit alpha-hydroxy acids, enolalcohols, 1,2-hydroxyacetals, 1,2-diphenols, etc.
146
+
147
+ 1,1-Diol: [OX2H][CX4;!$(C([OX2H])([OX2H])[O,S,#7,#15])][OX2H]
148
+
149
+ Hydroperoxide: [OX2H][OX2]
150
+ #does not neccessarily have to be connected to a carbon atom, includes also hydrotrioxides
151
+
152
+ Peroxo: [OX2D2][OX2D2]
153
+
154
+ Organolithium_compounds: [LiX1][#6,#14]
155
+
156
+ Organomagnesium_compounds: [MgX2][#6,#14]
157
+ # not restricted to Grignard compounds, also dialkyl Mg
158
+
159
+ Organometallic_compounds: [!#1;!#5;!#6;!#7;!#8;!#9;!#14;!#15;!#16;!#17;!#33;!#34;!#35;!#52;!#53;!#85]~[#6;!-]
160
+ # very general, includes all metals covalently bound to carbon
161
+
162
+
163
+ # I.3: Two Carbon-Hetero Bonds (Carbonyl and Derivatives)
164
+ # ----------------------------
165
+
166
+ # I.3.1 Double Bond to Hetero
167
+
168
+ Aldehyde: [$([CX3H][#6]),$([CX3H2])]=[OX1]
169
+ # hits aldehydes including formaldehyde
170
+
171
+ Ketone: [#6][CX3](=[OX1])[#6]
172
+ # does not include oxo-groups connected to a (hetero-) aromatic ring
173
+
174
+ Thioaldehyde: [$([CX3H][#6]),$([CX3H2])]=[SX1]
175
+
176
+ Thioketone: [#6][CX3](=[SX1])[#6]
177
+ # does not include thioxo-groups connected to a (hetero-) aromatic ring
178
+
179
+ Imine: [NX2;$([N][#6]),$([NH]);!$([N][CX3]=[#7,#8,#15,#16])]=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])]
180
+ # nitrogen is not part of an amidelike strukture, nor of an aromatic ring, but can be part of an aminal or similar
181
+
182
+ Immonium: [NX3+;!$([N][!#6]);!$([N][CX3]=[#7,#8,#15,#16])]
183
+
184
+ Oxime: [NX2](=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])])[OX2H]
185
+
186
+ Oximether: [NX2](=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])])[OX2][#6;!$(C=[#7,#8])]
187
+ # ether, not ester or amide; does not hit isoxazole
188
+
189
+
190
+ # I.3.2. Two Single Bonds to Hetero
191
+
192
+ Acetal: [OX2]([#6;!$(C=[O,S,N])])[CX4;!$(C(O)(O)[!#6])][OX2][#6;!$(C=[O,S,N])]
193
+ # does not hit hydroxy-methylesters, ketenacetals, hemiacetals, orthoesters, etc.
194
+
195
+ Hemiacetal: [OX2H][CX4;!$(C(O)(O)[!#6])][OX2][#6;!$(C=[O,S,N])]
196
+
197
+ Aminal: [NX3v3;!$(NC=[#7,#8,#15,#16])]([#6])[CX4;!$(C(N)(N)[!#6])][NX3v3;!$(NC=[#7,#8,#15,#16])][#6]
198
+ # Ns are not part of an amide or similar. v3 ist to exclude nitro and similar groups
199
+
200
+ Hemiaminal: [NX3v3;!$(NC=[#7,#8,#15,#16])]([#6])[CX4;!$(C(N)(N)[!#6])][OX2H]
201
+
202
+ Thioacetal: [SX2]([#6;!$(C=[O,S,N])])[CX4;!$(C(S)(S)[!#6])][SX2][#6;!$(C=[O,S,N])]
203
+
204
+ Thiohemiacetal: [SX2]([#6;!$(C=[O,S,N])])[CX4;!$(C(S)(S)[!#6])][OX2H]
205
+
206
+ Halogen_acetal_like: [NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1]
207
+ # hits chloromethylenethers and other reactive alkylating agents
208
+
209
+ Acetal_like: [NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1,NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])]
210
+ # includes all of the above and other combinations (S-C-N, hydrates, ...), but still no aminomethylenesters and similar
211
+
212
+ Halogenmethylen_ester_and_similar: [NX3v3,SX2,OX2;$(**=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1]
213
+ # also reactive alkylating agents. Acid does not have to be carboxylic acid, also S- and P-based acids allowed
214
+
215
+ NOS_methylen_ester_and_similar: [NX3v3,SX2,OX2;$(**=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])]
216
+ # Same as above, but N,O or S instead of halogen. Ester/amide allowed only on one side
217
+
218
+ Hetero_methylen_ester_and_similar: [NX3v3,SX2,OX2;$(**=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1,NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])]
219
+ # Combination of the last two patterns
220
+
221
+ Cyanhydrine: [NX1]#[CX2][CX4;$([CH2]),$([CH]([CX2])[#6]),$(C([CX2])([#6])[#6])][OX2H]
222
+
223
+
224
+ # I.3.3 Single Bond to Hetero, C=C Double Bond (Enols and Similar)
225
+
226
+ Chloroalkene: [ClX1][CX3]=[CX3]
227
+
228
+ Fluoroalkene: [FX1][CX3]=[CX3]
229
+
230
+ Bromoalkene: [BrX1][CX3]=[CX3]
231
+
232
+ Iodoalkene: [IX1][CX3]=[CX3]
233
+
234
+ Enol: [OX2H][CX3;$([H1]),$(C[#6])]=[CX3]
235
+ # no phenols
236
+
237
+ Endiol: [OX2H][CX3;$([H1]),$(C[#6])]=[CX3;$([H1]),$(C[#6])][OX2H]
238
+ # no 1,2-diphenols, ketenacetals, ...
239
+
240
+ Enolether: [OX2]([#6;!$(C=[N,O,S])])[CX3;$([H0][#6]),$([H1])]=[CX3]
241
+ # finds also endiodiethers, but not enolesters, no aromats
242
+
243
+ Enolester: [OX2]([CX3]=[OX1])[#6X3;$([#6][#6]),$([H1])]=[#6X3;!$(C[OX2H])]
244
+
245
+
246
+ Enamine: [NX3;$([NH2][CX3]),$([NH1]([CX3])[#6]),$([N]([CX3])([#6])[#6]);!$([N]*=[#7,#8,#15,#16])][CX3;$([CH]),$([C][#6])]=[CX3]
247
+ # does not hit amines attached to aromatic rings, nor may the nitrogen be aromatic
248
+
249
+ Thioenol: [SX2H][CX3;$([H1]),$(C[#6])]=[CX3]
250
+
251
+ Thioenolether: [SX2]([#6;!$(C=[N,O,S])])[CX3;$(C[#6]),$([CH])]=[CX3]
252
+
253
+
254
+ # I.4: Three Carbon-Hetero Bonds (Carboxyl and Derivatives)
255
+ # ------------------------------
256
+
257
+ Acylchloride: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[ClX1]
258
+
259
+ Acylfluoride: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[FX1]
260
+
261
+ Acylbromide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[BrX1]
262
+
263
+ Acyliodide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[IX1]
264
+
265
+ Acylhalide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[FX1,ClX1,BrX1,IX1]
266
+ # all of the above
267
+
268
+
269
+ # The following contains all simple carboxylic combinations of O, N, S, & Hal -
270
+ # - acids, esters, amides, ... as well as a few extra cases (anhydride, hydrazide...)
271
+ # Cyclic structures (including aromats) like lactones, lactames, ... got their own
272
+ # definitions. Structures where both heteroatoms are part of an aromatic ring
273
+ # (oxazoles, imidazoles, ...) were excluded.
274
+
275
+ Carboxylic_acid: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[$([OX2H]),$([OX1-])]
276
+ # includes carboxylate anions
277
+
278
+ Carboxylic_ester: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[OX2][#6;!$(C=[O,N,S])]
279
+ # does not hit anhydrides or lactones
280
+
281
+ Lactone: [#6][#6X3R](=[OX1])[#8X2][#6;!$(C=[O,N,S])]
282
+ # may also be aromatic
283
+
284
+ Carboxylic_anhydride: [CX3;$([H0][#6]),$([H1])](=[OX1])[#8X2][CX3;$([H0][#6]),$([H1])](=[OX1])
285
+ # anhydride formed by two carboxylic acids, no mixed anhydrides (e.g. between carboxylic acid and sulfuric acid); may be part of a ring, even aromatic
286
+
287
+ Carboxylic_acid_derivative: [$([#6X3H0][#6]),$([#6X3H])](=[!#6])[!#6]
288
+ # includes most of the structures of I.4 and many more, also 1,3-heteroaromatics such as isoxazole
289
+
290
+ Carbothioic_acid: [CX3;!R;$([C][#6]),$([CH]);$([C](=[OX1])[$([SX2H]),$([SX1-])]),$([C](=[SX1])[$([OX2H]),$([OX1-])])]
291
+ # hits both tautomeric forms, as well as anions
292
+
293
+ Carbothioic_S_ester: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[SX2][#6;!$(C=[O,N,S])]
294
+
295
+ Carbothioic_S_lactone: [#6][#6X3R](=[OX1])[#16X2][#6;!$(C=[O,N,S])]
296
+ # may also be aromatic
297
+
298
+ Carbothioic_O_ester: [CX3;$([H0][#6]),$([H1])](=[SX1])[OX2][#6;!$(C=[O,N,S])]
299
+
300
+ Carbothioic_O_lactone: [#6][#6X3R](=[SX1])[#8X2][#6;!$(C=[O,N,S])]
301
+
302
+ Carbothioic_halide: [CX3;$([H0][#6]),$([H1])](=[SX1])[FX1,ClX1,BrX1,IX1]
303
+
304
+ Carbodithioic_acid: [CX3;!R;$([C][#6]),$([CH]);$([C](=[SX1])[SX2H])]
305
+
306
+ Carbodithioic_ester: [CX3;!R;$([C][#6]),$([CH]);$([C](=[SX1])[SX2][#6;!$(C=[O,N,S])])]
307
+
308
+ Carbodithiolactone: [#6][#6X3R](=[SX1])[#16X2][#6;!$(C=[O,N,S])]
309
+
310
+
311
+ Amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
312
+ # does not hit lactames
313
+
314
+ Primary_amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[NX3H2]
315
+
316
+ Secondary_amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H1][#6;!$(C=[O,N,S])]
317
+
318
+ Tertiary_amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])]
319
+
320
+ Lactam: [#6R][#6X3R](=[OX1])[#7X3;$([H1][#6;!$(C=[O,N,S])]),$([H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
321
+ # cyclic amides, may also be aromatic
322
+
323
+ Alkyl_imide: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#7X3H0]([#6])[#6X3;$([H0][#6]),$([H1])](=[OX1])
324
+ # may be part of a ring, even aromatic. only C allowed at central N. May also be triacyl amide
325
+
326
+ N_hetero_imide: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#7X3H0]([!#6])[#6X3;$([H0][#6]),$([H1])](=[OX1])
327
+ # everything else than H or C at central N
328
+
329
+ Imide_acidic: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#7X3H1][#6X3;$([H0][#6]),$([H1])](=[OX1])
330
+ # can be deprotonated
331
+
332
+ Thioamide: [$([CX3;!R][#6]),$([CX3H;!R])](=[SX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
333
+ # does not hit thiolactames
334
+
335
+ Thiolactam: [#6R][#6X3R](=[SX1])[#7X3;$([H1][#6;!$(C=[O,N,S])]),$([H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
336
+ # cyclic thioamides, may also be aromatic
337
+
338
+
339
+ Oximester: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#8X2][#7X2]=,:[#6X3;$([H0]([#6])[#6]),$([H1][#6]),$([H2])]
340
+ # may also be part of a ring / aromatic
341
+
342
+ Amidine: [NX3;!$(NC=[O,S])][CX3;$([CH]),$([C][#6])]=[NX2;!$(NC=[O,S])]
343
+ # only basic amidines, not as part of aromatic ring (e.g. imidazole)
344
+
345
+ Hydroxamic_acid: [CX3;$([H0][#6]),$([H1])](=[OX1])[#7X3;$([H1]),$([H0][#6;!$(C=[O,N,S])])][$([OX2H]),$([OX1-])]
346
+
347
+ Hydroxamic_acid_ester: [CX3;$([H0][#6]),$([H1])](=[OX1])[#7X3;$([H1]),$([H0][#6;!$(C=[O,N,S])])][OX2][#6;!$(C=[O,N,S])]
348
+ # does not hit anhydrides of carboxylic acids withs hydroxamic acids
349
+
350
+
351
+ Imidoacid: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])]
352
+ # not cyclic
353
+
354
+ Imidoacid_cyclic: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])]
355
+ # the enamide-form of lactames. may be aromatic like 2-hydroxypyridine
356
+
357
+ Imidoester: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[OX2][#6;!$(C=[O,N,S])]
358
+ # esters of the above structures. no anhydrides.
359
+
360
+ Imidolactone: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[OX2][#6;!$(C=[O,N,S])]
361
+ # no oxazoles and similar
362
+
363
+ Imidothioacid: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([SX2H]),$([SX1-])]
364
+ # not cyclic
365
+
366
+ Imidothioacid_cyclic: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([SX2H]),$([SX1-])]
367
+ # the enamide-form of thiolactames. may be aromatic like 2-thiopyridine
368
+
369
+ Imidothioester: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[SX2][#6;!$(C=[O,N,S])]
370
+ # thioesters of the above structures. no anhydrides.
371
+
372
+ Imidothiolactone: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[SX2][#6;!$(C=[O,N,S])]
373
+ # no thioxazoles and similar
374
+
375
+ Amidine: [#7X3v3;!$(N([#6X3]=[#7X2])C=[O,S])][CX3R0;$([H1]),$([H0][#6])]=[NX2v3;!$(N(=[#6X3][#7X3])C=[O,S])]
376
+ # only basic amidines, not substituted by carbonyl or thiocarbonyl, not as part of a ring
377
+
378
+ Imidolactam: [#6][#6X3R;$([H0](=[NX2;!$(N(=[#6X3][#7X3])C=[O,S])])[#7X3;!$(N([#6X3]=[#7X2])C=[O,S])]),$([H0](-[NX3;!$(N([#6X3]=[#7X2])C=[O,S])])=,:[#7X2;!$(N(=[#6X3][#7X3])C=[O,S])])]
379
+ # one of the two C~N bonds is part of a ring (may be aromatic), but not both - thus no imidazole
380
+
381
+ Imidoylhalide: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[FX1,ClX1,BrX1,IX1]
382
+ # not cyclic
383
+
384
+ Imidoylhalide_cyclic: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[FX1,ClX1,BrX1,IX1]
385
+ # may also be aromatic
386
+
387
+ # may be ring, aromatic, substituted with carbonyls, hetero, ...
388
+ # (everything else would get too complicated)
389
+
390
+ Amidrazone: [$([$([#6X3][#6]),$([#6X3H])](=[#7X2v3])[#7X3v3][#7X3v3]),$([$([#6X3][#6]),$([#6X3H])]([#7X3v3])=[#7X2v3][#7X3v3])]
391
+ # hits both tautomers. as above, it may be ring, aromatic, substituted with carbonyls, hetero, ...
392
+
393
+
394
+ Alpha_aminoacid: [NX3,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])][C][CX3](=[OX1])[OX2H,OX1-]
395
+ # N may be alkylated, but not part of an amide (as in peptides), ionic forms are included
396
+ # includes also non-natural aminoacids with double-bonded or two aliph./arom. substituents at alpha-C
397
+ # N may not be aromatic as in 1H-pyrrole-2-carboxylic acid
398
+
399
+ Alpha_hydroxyacid: [OX2H][C][CX3](=[OX1])[OX2H,OX1-]
400
+
401
+ Peptide_middle: [NX3;$([N][CX3](=[OX1])[C][NX3,NX4+])][C][CX3](=[OX1])[NX3;$([N][C][CX3](=[OX1])[NX3,OX2,OX1-])]
402
+ # finds peptidic structures which are neither C- nor N-terminal. Both neighbours must be amino-acids/peptides
403
+
404
+ Peptide_C_term: [NX3;$([N][CX3](=[OX1])[C][NX3,NX4+])][C][CX3](=[OX1])[OX2H,OX1-]
405
+ # finds C-terminal amino acids
406
+
407
+ Peptide_N_term: [NX3,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])][C][CX3](=[OX1])[NX3;$([N][C][CX3](=[OX1])[NX3,OX2,OX1-])]
408
+ # finds N-terminal amino acids. As above, N may be substituted, but not part of an amide-bond.
409
+
410
+
411
+ Carboxylic_orthoester: [#6][OX2][CX4;$(C[#6]),$([CH])]([OX2][#6])[OX2][#6]
412
+ # hits also anhydride like struktures (e. g. HC(OMe)2-OC=O residues)
413
+
414
+ Ketene: [CX3]=[CX2]=[OX1]
415
+
416
+ Ketenacetal: [#7X2,#8X3,#16X2;$(*[#6,#14])][#6X3]([#7X2,#8X3,#16X2;$(*[#6,#14])])=[#6X3]
417
+ # includes aminals, silylacetals, ketenesters, etc. C=C DB is not aromatic, everything else may be
418
+
419
+ Nitrile: [NX1]#[CX2]
420
+ # includes cyanhydrines
421
+
422
+ Isonitrile: [CX1-]#[NX2+]
423
+
424
+
425
+ Vinylogous_carbonyl_or_carboxyl_derivative: [#6X3](=[OX1])[#6X3]=,:[#6X3][#7,#8,#16,F,Cl,Br,I]
426
+ # may be part of a ring, even aromatic
427
+
428
+ Vinylogous_acid: [#6X3](=[OX1])[#6X3]=,:[#6X3][$([OX2H]),$([OX1-])]
429
+
430
+ Vinylogous_ester: [#6X3](=[OX1])[#6X3]=,:[#6X3][#6;!$(C=[O,N,S])]
431
+
432
+ Vinylogous_amide: [#6X3](=[OX1])[#6X3]=,:[#6X3][#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
433
+
434
+ Vinylogous_halide: [#6X3](=[OX1])[#6X3]=,:[#6X3][FX1,ClX1,BrX1,IX1]
435
+
436
+
437
+
438
+ # I.5: Four Carbon-Hetero Bonds (Carbonic Acid and Derivatives)
439
+ # -----------------------------
440
+
441
+ Carbonic_acid_dieester: [#6;!$(C=[O,N,S])][#8X2][#6X3](=[OX1])[#8X2][#6;!$(C=[O,N,S])]
442
+ # may be part of a ring, even aromatic
443
+
444
+ Carbonic_acid_esterhalide: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[OX1])[OX2][FX1,ClX1,BrX1,IX1]
445
+
446
+ Carbonic_acid_monoester: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[OX1])[$([OX2H]),$([OX1-])]
447
+ # unstable
448
+
449
+ Carbonic_acid_derivatives: [!#6][#6X3](=[!#6])[!#6]
450
+
451
+
452
+ Thiocarbonic_acid_dieester: [#6;!$(C=[O,N,S])][#8X2][#6X3](=[SX1])[#8X2][#6;!$(C=[O,N,S])]
453
+ # may be part of a ring, even aromatic
454
+
455
+ Thiocarbonic_acid_esterhalide: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[SX1])[OX2][FX1,ClX1,BrX1,IX1]
456
+
457
+ Thiocarbonic_acid_monoester: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[SX1])[$([OX2H]),$([OX1-])]
458
+
459
+
460
+ Urea:[#7X3;!$([#7][!#6])][#6X3](=[OX1])[#7X3;!$([#7][!#6])]
461
+ # no check whether part of imide, biuret, etc. Aromatic structures are only hit if
462
+ # both N share no double bonds, like in the dioxo-form of uracil
463
+
464
+ Thiourea: [#7X3;!$([#7][!#6])][#6X3](=[SX1])[#7X3;!$([#7][!#6])]
465
+
466
+ Isourea: [#7X2;!$([#7][!#6])]=,:[#6X3]([#8X2&!$([#8][!#6]),OX1-])[#7X3;!$([#7][!#6])]
467
+ # O may be substituted. no check whether further amide-like bonds are present. Aromatic
468
+ # structures are only hit if single bonded N shares no additional double bond, like in
469
+ # the 1-hydroxy-3-oxo form of uracil
470
+
471
+ Isothiourea: [#7X2;!$([#7][!#6])]=,:[#6X3]([#16X2&!$([#16][!#6]),SX1-])[#7X3;!$([#7][!#6])]
472
+
473
+ Guanidine: [N;v3X3,v4X4+][CX3](=[N;v3X2,v4X3+])[N;v3X3,v4X4+]
474
+ # also hits guanidinium salts. v3 and v4 to avoid nitroamidines
475
+
476
+ Carbaminic_acid: [NX3]C(=[OX1])[O;X2H,X1-]
477
+ # quite unstable, unlikely to be found. Also hits salts
478
+
479
+ Urethan: [#7X3][#6](=[OX1])[#8X2][#6]
480
+ # also hits when part of a ring, no check whether the last C is part of carbonyl
481
+
482
+ Biuret: [#7X3][#6](=[OX1])[#7X3][#6](=[OX1])[#7X3]
483
+
484
+ Semicarbazide: [#7X3][#7X3][#6X3]([#7X3;!$([#7][#7])])=[OX1]
485
+
486
+ Carbazide: [#7X3][#7X3][#6X3]([#7X3][#7X3])=[OX1]
487
+
488
+ Semicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7])])=[OX1]
489
+
490
+ Carbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3][#7X3])=[OX1]
491
+
492
+ Thiosemicarbazide: [#7X3][#7X3][#6X3]([#7X3;!$([#7][#7])])=[SX1]
493
+
494
+ Thiocarbazide: [#7X3][#7X3][#6X3]([#7X3][#7X3])=[SX1]
495
+
496
+ Thiosemicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7])])=[SX1]
497
+
498
+ Thiocarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3][#7X3])=[SX1]
499
+
500
+
501
+ Isocyanate: [NX2]=[CX2]=[OX1]
502
+
503
+ Cyanate: [OX2][CX2]#[NX1]
504
+
505
+ Isothiocyanate: [NX2]=[CX2]=[SX1]
506
+
507
+ Thiocyanate: [SX2][CX2]#[NX1]
508
+
509
+ Carbodiimide: [NX2]=[CX2]=[NX2]
510
+
511
+ Orthocarbonic_derivatives: [CX4H0]([O,S,#7])([O,S,#7])([O,S,#7])[O,S,#7,F,Cl,Br,I]
512
+ # halogen allowed just once, to avoid mapping to -OCF3 and similar groups (much more
513
+ # stable as for example C(OCH3)4)
514
+
515
+
516
+ # I.6 Aromatics
517
+ # -------------
518
+
519
+ # I know that this classification is not very logical, arylamines are found under I.2 ...
520
+
521
+ Phenol: [OX2H][c]
522
+
523
+ 1,2-Diphenol: [OX2H][c][c][OX2H]
524
+
525
+ Arylchloride: [Cl][c]
526
+
527
+ Arylfluoride: [F][c]
528
+
529
+ Arylbromide: [Br][c]
530
+
531
+ Aryliodide: [I][c]
532
+
533
+ Arylthiol: [SX2H][c]
534
+
535
+ Iminoarene: [c]=[NX2;$([H1]),$([H0][#6;!$([C]=[N,S,O])])]
536
+ # N may be substituted with H or C, but not carbonyl or similar
537
+ # aromatic atom is always C, not S or P (these are not planar when substituted)
538
+
539
+ Oxoarene: [c]=[OX1]
540
+
541
+ Thioarene: [c]=[SX1]
542
+
543
+ Hetero_N_basic_H: [nX3H1+0]
544
+ # as in pyrole. uncharged to exclude pyridinium ions
545
+
546
+ Hetero_N_basic_no_H: [nX3H0+0]
547
+ # as in N-methylpyrole. uncharged to exclude pyridinium ions
548
+
549
+ Hetero_N_nonbasic: [nX2,nX3+]
550
+ # as in pyridine, pyridinium
551
+
552
+ Hetero_O: [o]
553
+
554
+ Hetero_S: [sX2]
555
+ # X2 because Daylight's depictmatch falsely describes C1=CS(=O)C=C1 as aromatic
556
+ # (is not planar because of lonepair at S)
557
+
558
+ Heteroaromatic: [a;!c]
559
+
560
+
561
+ # Part II: N, S, P, Si, B
562
+ # =======================
563
+
564
+
565
+ # II.1 Nitrogen
566
+ # -------------
567
+
568
+ Nitrite: [NX2](=[OX1])[O;$([X2]),$([X1-])]
569
+ # hits nitrous acid, its anion, esters, and other O-substituted derivatives
570
+
571
+ Thionitrite: [SX2][NX2]=[OX1]
572
+
573
+ Nitrate: [$([NX3](=[OX1])(=[OX1])[O;$([X2]),$([X1-])]),$([NX3+]([OX1-])(=[OX1])[O;$([X2]),$([X1-])])]
574
+ # hits nitric acid, its anion, esters, and other O-substituted derivatives
575
+
576
+ Nitro: [$([NX3](=O)=O),$([NX3+](=O)[O-])][!#8]
577
+ # hits nitro groups attached to C,N, ... but not nitrates
578
+
579
+ Nitroso: [NX2](=[OX1])[!#7;!#8]
580
+ # no nitrites, no nitrosamines
581
+
582
+ Azide: [NX1]~[NX2]~[NX2,NX1]
583
+ # hits both mesomeric forms, also anion
584
+
585
+ Acylazide: [CX3](=[OX1])[NX2]~[NX2]~[NX1]
586
+
587
+ Diazo: [$([#6]=[NX2+]=[NX1-]),$([#6-]-[NX2+]#[NX1])]
588
+
589
+ Diazonium: [#6][NX2+]#[NX1]
590
+
591
+ Nitrosamine: [#7;!$(N*=O)][NX2]=[OX1]
592
+
593
+ Nitrosamide: [NX2](=[OX1])N-*=O
594
+ # includes nitrososulfonamides
595
+
596
+ N-Oxide: [$([#7+][OX1-]),$([#7v5]=[OX1]);!$([#7](~[O])~[O]);!$([#7]=[#7])]
597
+ # Hits both forms. Won't hit azoxy, nitro, nitroso, or nitrate.
598
+
599
+
600
+ Hydrazine: [NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])][NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])]
601
+ # no hydrazides
602
+
603
+ Hydrazone: [NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])][NX2]=[#6]
604
+
605
+ Hydroxylamine: [NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])][OX2;$([H1]),$(O[#6;!$(C=[N,O,S])])]
606
+ # no discrimination between O-, N-, and O,N-substitution
607
+
608
+
609
+ # II.2 Sulfur
610
+ # -----------
611
+
612
+ Sulfon: [$([SX4](=[OX1])(=[OX1])([#6])[#6]),$([SX4+2]([OX1-])([OX1-])([#6])[#6])]
613
+ # can't be aromatic, thus S and not #16
614
+
615
+ Sulfoxide: [$([SX3](=[OX1])([#6])[#6]),$([SX3+]([OX1-])([#6])[#6])]
616
+
617
+ Sulfonium: [S+;!$([S]~[!#6]);!$([S]*~[#7,#8,#15,#16])]
618
+ # can't be aromatic, thus S and not #16
619
+
620
+ Sulfuric_acid: [SX4](=[OX1])(=[OX1])([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])]
621
+ # includes anions
622
+
623
+ Sulfuric_monoester: [SX4](=[OX1])(=[OX1])([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])]
624
+
625
+ Sulfuric_diester: [SX4](=[OX1])(=[OX1])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])]
626
+
627
+ Sulfuric_monoamide: [SX4](=[OX1])(=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])]
628
+
629
+ Sulfuric_diamide: [SX4](=[OX1])(=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
630
+
631
+ Sulfuric_esteramide: [SX4](=[OX1])(=[OX1])([#7X3][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])]
632
+
633
+ Sulfuric_derivative: [SX4D4](=[!#6])(=[!#6])([!#6])[!#6]
634
+ # everything else (would not be a "true" derivative of sulfuric acid, if one of the substituents were less electronegative
635
+ # than sulfur, but this should be very very rare, anyway)
636
+
637
+
638
+
639
+ #### sulfurous acid and derivatives missing!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
640
+
641
+
642
+
643
+
644
+ Sulfonic_acid: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[$([OX2H]),$([OX1-])]
645
+
646
+ Sulfonamide: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
647
+
648
+ Sulfonic_ester: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[OX2][#6;!$(C=[O,N,S])]
649
+
650
+ Sulfonic_halide: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[FX1,ClX1,BrX1,IX1]
651
+
652
+ Sulfonic_derivative: [SX4;$([H1]),$([H0][#6])](=[!#6])(=[!#6])[!#6]
653
+ # includes all of the above and many more
654
+ # for comparison: this is what "all sulfonic derivatives but not the ones above" would look like:
655
+ # [$([SX4;$([H1]),$([H0][#6])](=[!#6])(=[!#6;!O])[!#6]),$([SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[!$([FX1,ClX1,BrX1,IX1]);!$([#6]);!$([OX2H]);!$([OX1-]);!$([OX2][#6;!$(C=[O,N,S])]);!$([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])])]
656
+
657
+
658
+ Sulfinic_acid: [SX3;$([H1]),$([H0][#6])](=[OX1])[$([OX2H]),$([OX1-])]
659
+
660
+ Sulfinic_amide: [SX3;$([H1]),$([H0][#6])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
661
+
662
+ Sulfinic_ester: [SX3;$([H1]),$([H0][#6])](=[OX1])[OX2][#6;!$(C=[O,N,S])]
663
+
664
+ Sulfinic_halide: [SX3;$([H1]),$([H0][#6])](=[OX1])[FX1,ClX1,BrX1,IX1]
665
+
666
+ Sulfinic_derivative: [SX3;$([H1]),$([H0][#6])](=[!#6])[!#6]
667
+
668
+ Sulfenic_acid: [SX2;$([H1]),$([H0][#6])][$([OX2H]),$([OX1-])]
669
+
670
+ Sulfenic_amide: [SX2;$([H1]),$([H0][#6])][#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
671
+
672
+ Sulfenic_ester: [SX2;$([H1]),$([H0][#6])][OX2][#6;!$(C=[O,N,S])]
673
+
674
+ Sulfenic_halide: [SX2;$([H1]),$([H0][#6])][FX1,ClX1,BrX1,IX1]
675
+
676
+ Sulfenic_derivative: [SX2;$([H1]),$([H0][#6])][!#6]
677
+
678
+
679
+ # II.3 Phosphorous
680
+ # ----------------
681
+
682
+ Phosphine: [PX3;$([H3]),$([H2][#6]),$([H1]([#6])[#6]),$([H0]([#6])([#6])[#6])]
683
+ # similar to amine, but less restrictive: includes also amide- and aminal-analogues
684
+
685
+ Phosphine_oxide: [PX4;$([H3]=[OX1]),$([H2](=[OX1])[#6]),$([H1](=[OX1])([#6])[#6]),$([H0](=[OX1])([#6])([#6])[#6])]
686
+
687
+ Phosphonium: [P+;!$([P]~[!#6]);!$([P]*~[#7,#8,#15,#16])]
688
+ # similar to Ammonium
689
+
690
+ Phosphorylen: [PX4;$([H3]=[CX3]),$([H2](=[CX3])[#6]),$([H1](=[CX3])([#6])[#6]),$([H0](=[CX3])([#6])([#6])[#6])]
691
+
692
+
693
+ # conventions for the following acids and derivatives:
694
+ # acids find protonated and deprotonated acids
695
+ # esters do not find mixed anhydrides ( ...P-O-C(=O))
696
+ # derivatives: subtituents which go in place of the OH and =O are not H or C (may also be O,
697
+ # thus including acids and esters)
698
+
699
+ Phosphonic_acid: [PX4;$([H1]),$([H0][#6])](=[OX1])([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])]
700
+ # includes anions
701
+
702
+ Phosphonic_monoester: [PX4;$([H1]),$([H0][#6])](=[OX1])([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])]
703
+
704
+ Phosphonic_diester: [PX4;$([H1]),$([H0][#6])](=[OX1])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])]
705
+
706
+ Phosphonic_monoamide: [PX4;$([H1]),$([H0][#6])](=[OX1])([$([OX2H]),$([OX1-])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
707
+
708
+ Phosphonic_diamide: [PX4;$([H1]),$([H0][#6])](=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
709
+
710
+ Phosphonic_esteramide: [PX4;$([H1]),$([H0][#6])](=[OX1])([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
711
+
712
+ Phosphonic_acid_derivative: [PX4;$([H1]),$([H0][#6])](=[!#6])([!#6])[!#6]
713
+ # all of the above and much more
714
+
715
+
716
+ Phosphoric_acid: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])]
717
+ # includes anions
718
+
719
+ Phosphoric_monoester: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])]
720
+
721
+ Phosphoric_diester: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])]
722
+
723
+ Phosphoric_triester: [PX4D4](=[OX1])([OX2][#6;!$(C=[O,N,S])])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])]
724
+
725
+ Phosphoric_monoamide: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([$([OX2H]),$([OX1-])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
726
+
727
+ Phosphoric_diamide: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
728
+
729
+ Phosphoric_triamide: [PX4D4](=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
730
+
731
+ Phosphoric_monoestermonoamide: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
732
+
733
+ Phosphoric_diestermonoamide: [PX4D4](=[OX1])([OX2][#6;!$(C=[O,N,S])])([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
734
+
735
+ Phosphoric_monoesterdiamide: [PX4D4](=[OX1])([OX2][#6;!$(C=[O,N,S])])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
736
+
737
+ Phosphoric_acid_derivative: [PX4D4](=[!#6])([!#6])([!#6])[!#6]
738
+
739
+
740
+ Phosphinic_acid: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[OX1])[$([OX2H]),$([OX1-])]
741
+
742
+ Phosphinic_ester: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[OX1])[OX2][#6;!$(C=[O,N,S])]
743
+
744
+ Phosphinic_amide: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
745
+
746
+ Phosphinic_acid_derivative: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[!#6])[!#6]
747
+
748
+
749
+ Phosphonous_acid: [PX3;$([H1]),$([H0][#6])]([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])]
750
+
751
+ Phosphonous_monoester: [PX3;$([H1]),$([H0][#6])]([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])]
752
+
753
+ Phosphonous_diester: [PX3;$([H1]),$([H0][#6])]([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])]
754
+
755
+ Phosphonous_monoamide: [PX3;$([H1]),$([H0][#6])]([$([OX2H]),$([OX1-])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
756
+
757
+ Phosphonous_diamide: [PX3;$([H1]),$([H0][#6])]([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
758
+
759
+ Phosphonous_esteramide: [PX3;$([H1]),$([H0][#6])]([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
760
+
761
+ Phosphonous_derivatives: [PX3;$([D2]),$([D3][#6])]([!#6])[!#6]
762
+
763
+
764
+ Phosphinous_acid: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][$([OX2H]),$([OX1-])]
765
+
766
+ Phosphinous_ester: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][OX2][#6;!$(C=[O,N,S])]
767
+
768
+ Phosphinous_amide: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
769
+
770
+ Phosphinous_derivatives: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][!#6]
771
+
772
+
773
+ # II.4 Silicon
774
+ # ------------
775
+
776
+ Quart_silane: [SiX4]([#6])([#6])([#6])[#6]
777
+ # four C-substituents. non-reactive, non-toxic, in experimental phase for drug development
778
+
779
+ Non-quart_silane: [SiX4;$([H1]([#6])([#6])[#6]),$([H2]([#6])[#6]),$([H3][#6]),$([H4])]
780
+ # has 1-4 hydride(s), reactive. Daylight's depictmatch does not add hydrogens automatically to
781
+ # the free positions at Si, thus Hs had to be added implicitly
782
+
783
+ Silylmonohalide: [SiX4]([FX1,ClX1,BrX1,IX1])([#6])([#6])[#6]
784
+ # reagents for inserting protection groups
785
+
786
+ Het_trialkylsilane: [SiX4]([!#6])([#6])([#6])[#6]
787
+ # mostly acid-labile protection groups such as trimethylsilyl-ethers
788
+
789
+ Dihet_dialkylsilane: [SiX4]([!#6])([!#6])([#6])[#6]
790
+
791
+ Trihet_alkylsilane: [SiX4]([!#6])([!#6])([!#6])[#6]
792
+
793
+ Silicic_acid_derivative: [SiX4]([!#6])([!#6])([!#6])[!#6]
794
+ # four substituent which are neither C nor H
795
+
796
+
797
+ # II.5 Boron
798
+ # ----------
799
+
800
+ Trialkylborane: [BX3]([#6])([#6])[#6]
801
+ # also carbonyls allowed
802
+
803
+ Boric_acid_derivatives: [BX3]([!#6])([!#6])[!#6]
804
+ # includes acids, esters, amides, ... H-substituent at B is very rare.
805
+
806
+ Boronic_acid_derivative: [BX3]([!#6])([!#6])[!#6]
807
+ # # includes acids, esters, amides, ...
808
+
809
+ Borohydride: [BH1,BH2,BH3,BH4]
810
+ # at least one H attached to B
811
+
812
+ Quaternary_boron: [BX4]
813
+ # mostly borates (negative charge), in complex with Lewis-base
814
+
815
+
816
+
817
+ # Part III: Some Special Patterns
818
+ # ===============================
819
+
820
+
821
+ # III.1 Chains
822
+ # ------------
823
+
824
+ # some simple chains
825
+
826
+
827
+
828
+ # III.2 Rings
829
+ # -----------
830
+
831
+ Aromatic: a
832
+
833
+ Heterocyclic: [!#6;!R0]
834
+ # may be aromatic or not
835
+
836
+ Epoxide: [OX2r3]1[#6r3][#6r3]1
837
+ # toxic/reactive. may be annelated to aromat, but must not be aromatic itself (oxirane-2,3-dione)
838
+
839
+ NH_aziridine: [NX3H1r3]1[#6r3][#6r3]1
840
+ # toxic/reactive according to Maybridge's garbage filter
841
+
842
+ Spiro: [D4R;$(*(@*)(@*)(@*)@*)]
843
+ # at least two different rings can be found which are sharing just one atom.
844
+ # these two rings can be connected by a third ring, so it matches also some
845
+ # bridged systems, like morphine
846
+
847
+ Annelated_rings: [R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])]@[R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])]
848
+ # two different rings sharing exactly two atoms
849
+
850
+ Bridged_rings: [R;$(*(@*)(@*)@*);!$([D4R;$(*(@*)(@*)(@*)@*)]);!$([R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])]@[R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])])]
851
+ # part of two or more rings, not spiro, not annelated -> finds bridgehead atoms,
852
+ # but only if they are not annelated at the same time - otherwise impossible (?)
853
+ # to distinguish from non-bridgehead annelated atoms
854
+
855
+ # some basic ring-patterns (just size, no other information):
856
+
857
+
858
+
859
+
860
+
861
+ # III.3 Sugars and Nucleosides/Nucleotides, Steroids
862
+ # --------------------------------------------------
863
+
864
+ # because of the large variety of sugar derivatives, different patterns can be applied.
865
+ # The choice of patterns and their combinations will depend on the contents of the database
866
+ # e.g. natural products, nucleoside analoges with modified sugars, ... as well as on the
867
+ # desired restriction
868
+
869
+
870
+ Sugar_pattern_1: [OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)]
871
+ # 5 or 6-membered ring containing one O and at least one (r5) or two (r6) oxygen-substituents.
872
+
873
+ Sugar_pattern_2: [OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)]
874
+ # 5 or 6-membered ring containing one O and an acetal-like bond at postion 2.
875
+
876
+ Sugar_pattern_combi: [OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C(O)@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C(O)@C(O)@C1)]
877
+ # combination of the two above
878
+
879
+ Sugar_pattern_2_reducing: [OX2;$([r5]1@C(!@[OX2H1])@C@C@C1),$([r6]1@C(!@[OX2H1])@C@C@C@C1)]
880
+ # 5 or 6-membered cyclic hemi-acetal
881
+
882
+ Sugar_pattern_2_alpha: [OX2;$([r5]1@[C@@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@[C@@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)]
883
+ # 5 or 6-membered cyclic hemi-acetal
884
+
885
+ Sugar_pattern_2_beta: [OX2;$([r5]1@[C@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@[C@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)]
886
+ # 5 or 6-membered cyclic hemi-acetal
887
+
888
+ ##Poly_sugar_1: ([OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)].[OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)])
889
+ # pattern1 occours more than once (in same molecule, but moieties don't have to be adjacent!)
890
+
891
+ ##Poly_sugar_2: ([OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)].[OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)])
892
+ # pattern2 occours more than once (in same molecule, but moieties don't have to be adjacent!)
893
+
894
+
895
+ # III.4 Everything else...
896
+ # ------------------------
897
+
898
+ Conjugated_double_bond: *=*[*]=,#,:[*]
899
+
900
+ Conjugated_tripple_bond: *#*[*]=,#,:[*]
901
+
902
+ Cis_double_bond: */[D2]=[D2]\*
903
+ # only one single-bonded substituent on each DB-atom. no aromats.
904
+ # only found when character of DB is explicitely stated.
905
+
906
+ Trans_double_bond: */[D2]=[D2]/*
907
+ # analog
908
+
909
+ Mixed_anhydrides: [$(*=O),$([#16,#14,#5]),$([#7]([#6]=[OX1]))][#8X2][$(*=O),$([#16,#14,#5]),$([#7]([#6]=[OX1]))]
910
+ # should hits all combinations of two acids
911
+
912
+ Halogen_on_hetero: [FX1,ClX1,BrX1,IX1][!#6]
913
+
914
+ Halogen_multi_subst: [F,Cl,Br,I;!$([X1]);!$([X0-])]
915
+ # Halogen which is not mono-substituted nor an anion, e.g. chlorate.
916
+ # Most of these cases should be also filtered by Halogen_on_hetero.
917
+
918
+ Trifluoromethyl: [FX1][CX4;!$([H0][Cl,Br,I]);!$([F][C]([F])([F])[F])]([FX1])([FX1])
919
+ # C with three F attached, connected to anything which is not another halogen
920
+
921
+ C_ONS_bond: [#6]~[#7,#8,#16]
922
+ # probably all drug-like molecules have at least one O, N, or S connected to a C -> nice filter
923
+
924
+ ## Mixture: (*).(*)
925
+ # two or more seperate parts, may also be salt
926
+ # component-level grouping is not yet supported in Open Babel Version 2.0
927
+
928
+
929
+ Charged: [!+0]
930
+
931
+ Anion: [-1,-2,-3,-4,-5,-6,-7]
932
+
933
+ Kation: [+1,+2,+3,+4,+5,+6,+7]
934
+
935
+ Salt: ([-1,-2,-3,-4,-5,-6,-7]).([+1,+2,+3,+4,+5,+6,+7])
936
+ # two or more seperate components with opposite charges
937
+
938
+ ##Zwitterion: ([-1,-2,-3,-4,-5,-6,-7].[+1,+2,+3,+4,+5,+6,+7])
939
+ # both negative and positive charges somewhere within the same molecule.
940
+
941
+ 1,3-Tautomerizable: [$([#7X2,OX1,SX1]=*[!H0;!$([a;!n])]),$([#7X3,OX2,SX2;!H0]*=*),$([#7X3,OX2,SX2;!H0]*:n)]
942
+ # 1,3 migration of H allowed. Includes keto/enol and amide/enamide.
943
+ # Aromatic rings must stay aromatic - no keto form of phenol
944
+
945
+ 1,5-Tautomerizable: [$([#7X2,OX1,SX1]=,:**=,:*[!H0;!$([a;!n])]),$([#7X3,OX2,SX2;!H0]*=**=*),$([#7X3,OX2,SX2;!H0]*=,:**:n)]
946
+
947
+ Rotatable_bond: [!$(*#*)&!D1]-!@[!$(*#*)&!D1]
948
+ # taken from http://www.daylight.com/support/contrib/smarts/content.html
949
+
950
+ Michael_acceptor: [CX3]=[CX3][$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-])]
951
+ # the classical case: C=C near carbonyl, nitrile, nitro, or similar
952
+ # Oxo-heteroaromats and similar are not included.
953
+
954
+ Dicarbodiazene: [CX3](=[OX1])[NX2]=[NX2][CX3](=[OX1])
955
+ # Michael-like acceptor, see Mitsunobu reaction
956
+
957
+ # H-Bond_donor:
958
+
959
+ # H-Bond_acceptor:
960
+
961
+ # Pos_ionizable:
962
+
963
+ # Neg_ionizable:
964
+
965
+ # Unlikely_ions:
966
+ # O+,N-,C+,C-, ...
967
+
968
+ CH-acidic: [$([CX4;!$([H0]);!$(C[!#6;!$([P,S]=O);!$(N(~O)~O)])][$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-]);!$(*[S,O,N;H1,H2]);!$([*+0][S,O;X1-])]),$([CX4;!$([H0])]1[CX3]=[CX3][CX3]=[CX3]1)]
969
+ # C-H alpha to carbony, nitro or similar, C is not double-bonded, only C, H, S,P=O and nitro substituents allowed.
970
+ # pentadiene is included. acids, their salts, prim./sec. amides, and imides are excluded.
971
+ # hits also CH-acidic_strong
972
+
973
+ CH-acidic_strong: [CX4;!$([H0]);!$(C[!#6;!$([P,S]=O);!$(N(~O)~O)])]([$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-]);!$(*[S,O,N;H1,H2]);!$([*+0][S,O;X1-])])[$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-]);!$(*[S,O,N;H1,H2]);!$([*+0][S,O;X1-])]
974
+ # same as above (without pentadiene), but carbonyl or similar on two or three sides
975
+
976
+ Chiral_center_specified: [$([*@](~*)(~*)(*)*),$([*@H](*)(*)*),$([*@](~*)(*)*),$([*@H](~*)~*)]
977
+ # Hits atoms with tetrahedral chirality, if chiral center is specified in the SMILES string
978
+ # depictmach does not find oxonium, sulfonium, or sulfoxides!
979
+
980
+ # Chiral_center_unspecified: [$([*@?](~*)(~*)(*)*),$([*@?H](*)(*)*),$([*@?](~*)(*)*),$([*@?H](~*)~*)]
981
+ # Hits atoms with tetrahedral chirality, if chiral center is not specified in the SMILES string
982
+ # "@?" (unspecified chirality) is not yet supported in Open Babel Version 2.0
983
+