lazar 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. metadata +18 -65
  3. data/.gitignore +0 -10
  4. data/.yardopts +0 -4
  5. data/Gemfile +0 -2
  6. data/LICENSE +0 -674
  7. data/README.md +0 -44
  8. data/Rakefile +0 -1
  9. data/VERSION +0 -1
  10. data/ext/lazar/Makefile +0 -5
  11. data/java/CdkDescriptorInfo.class +0 -0
  12. data/java/CdkDescriptorInfo.java +0 -22
  13. data/java/CdkDescriptors.class +0 -0
  14. data/java/CdkDescriptors.java +0 -141
  15. data/java/Jmol.jar +0 -0
  16. data/java/JoelibDescriptorInfo.class +0 -0
  17. data/java/JoelibDescriptorInfo.java +0 -15
  18. data/java/JoelibDescriptors.class +0 -0
  19. data/java/JoelibDescriptors.java +0 -60
  20. data/java/Rakefile +0 -15
  21. data/java/cdk-1.4.19.jar +0 -0
  22. data/java/joelib2.jar +0 -0
  23. data/java/log4j.jar +0 -0
  24. data/lazar.gemspec +0 -29
  25. data/lib/SMARTS_InteLigand.txt +0 -983
  26. data/mongoid.yml +0 -8
  27. data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +0 -13553
  28. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +0 -436
  29. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +0 -568
  30. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +0 -87
  31. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +0 -978
  32. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +0 -1120
  33. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +0 -1113
  34. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +0 -850
  35. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +0 -829
  36. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +0 -1198
  37. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +0 -1505
  38. data/test/data/EPAFHM.csv +0 -618
  39. data/test/data/EPAFHM.medi.csv +0 -100
  40. data/test/data/EPAFHM.mini.csv +0 -22
  41. data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +0 -581
  42. data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +0 -1217
  43. data/test/data/ISSCAN-multi.csv +0 -59
  44. data/test/data/LOAEL_log_mg_corrected_smiles.csv +0 -568
  45. data/test/data/LOAEL_log_mmol_corrected_smiles.csv +0 -568
  46. data/test/data/acetaldehyde.sdf +0 -14
  47. data/test/data/boiling_points.ext.sdf +0 -11460
  48. data/test/data/cpdb_100.csv +0 -101
  49. data/test/data/hamster_carcinogenicity.csv +0 -86
  50. data/test/data/hamster_carcinogenicity.mini.bool_float.csv +0 -11
  51. data/test/data/hamster_carcinogenicity.mini.bool_int.csv +0 -11
  52. data/test/data/hamster_carcinogenicity.mini.bool_string.csv +0 -11
  53. data/test/data/hamster_carcinogenicity.mini.csv +0 -11
  54. data/test/data/hamster_carcinogenicity.ntriples +0 -618
  55. data/test/data/hamster_carcinogenicity.sdf +0 -2805
  56. data/test/data/hamster_carcinogenicity.xls +0 -0
  57. data/test/data/hamster_carcinogenicity.yaml +0 -352
  58. data/test/data/hamster_carcinogenicity_with_errors.csv +0 -88
  59. data/test/data/kazius.csv +0 -4070
  60. data/test/data/multi_cell_call.csv +0 -1067
  61. data/test/data/multi_cell_call_no_dup.csv +0 -1057
  62. data/test/data/multicolumn.csv +0 -8
  63. data/test/data/rat_feature_dataset.csv +0 -1179
  64. data/test/data/wrong_dataset.csv +0 -8
data/README.md DELETED
@@ -1,44 +0,0 @@
1
- lazar
2
- =====
3
-
4
- Ruby libraries for the lazar framework
5
-
6
- Dependencies
7
- ------------
8
-
9
- lazar depends on a couple of external programs and libraries. On Debian 7 "Wheezy" systems you can install them with
10
-
11
- `sudo apt-get install build-essential ruby ruby-dev git cmake swig r-base r-base-dev r-cran-rserve openjdk-7-jre libgsl0-dev libxml2-dev zlib1g-dev libcairo2-dev`
12
-
13
- You will also need at least mongodb version 3.0, but Debian "Wheezy" provides version 2.4. Please follow the instructions at http://docs.mongodb.org/manual/tutorial/install-mongodb-on-debian/:
14
-
15
- ```
16
- sudo apt-key adv --keyserver keyserver.ubuntu.com --recv 7F0CEB10
17
- echo "deb http://repo.mongodb.org/apt/debian wheezy/mongodb-org/3.0 main" | sudo tee /etc/apt/sources.list.d/mongodb-org-3.0.list
18
- sudo apt-get update
19
- sudo apt-get install -y mongodb-org
20
- ```
21
-
22
- Installation
23
- ------------
24
-
25
- `gem install lazar`
26
-
27
- Please be patient, the compilation of OpenBabel and Fminer libraries can be very time consuming. If installation fails you can try to install manually:
28
-
29
- ```
30
- git clone https://github.com/opentox/lazar.git
31
- cd lazar
32
- ruby ext/lazar/extconf.rb
33
- bundle install
34
- ```
35
-
36
- The output should give you more verbose information that can help in debugging (e.g. to identify missing libraries).
37
-
38
- Documentation
39
- -------------
40
- * [API documentation](http://rdoc.info/gems/lazar)
41
-
42
- Copyright
43
- ---------
44
- Copyright (c) 2009-2015 Christoph Helma, Martin Guetlein, Micha Rautenberg, Andreas Maunz, David Vorgrimmler, Denis Gebele. See LICENSE for details.
data/Rakefile DELETED
@@ -1 +0,0 @@
1
- require "bundler/gem_tasks"
data/VERSION DELETED
@@ -1 +0,0 @@
1
- 0.0.4
data/ext/lazar/Makefile DELETED
@@ -1,5 +0,0 @@
1
- all:
2
- true
3
-
4
- install:
5
- true
Binary file
@@ -1,22 +0,0 @@
1
- import java.util.*;
2
- import org.openscience.cdk.qsar.descriptors.molecular.*;
3
- import org.openscience.cdk.qsar.*;
4
-
5
- class CdkDescriptorInfo {
6
- public static void main(String[] args) {
7
-
8
- DescriptorEngine engine = new DescriptorEngine(DescriptorEngine.MOLECULAR);
9
-
10
- for (Iterator<IDescriptor> it = engine.getDescriptorInstances().iterator(); it.hasNext(); ) {
11
- IDescriptor descriptor = it.next();
12
- String cdk_class = descriptor.getClass().toString().replaceAll("class ","");
13
- System.out.println("- :java_class: \""+cdk_class+"\"");
14
- String description = engine.getDictionaryDefinition(cdk_class).replaceAll("^\\s+", "" ).replaceAll("\\s+$", "").replaceAll("\\s+", " ");
15
- System.out.println(" :description: \""+description+"\"");
16
- System.out.println(" :names:");
17
- for (String name : descriptor.getDescriptorNames()) {
18
- System.out.println(" - \""+name+"\"");
19
- }
20
- }
21
- }
22
- }
Binary file
@@ -1,141 +0,0 @@
1
- import java.util.*;
2
- import java.io.*;
3
- import org.openscience.cdk.DefaultChemObjectBuilder;
4
- import org.openscience.cdk.interfaces.IMolecule;
5
- import org.openscience.cdk.io.iterator.IteratingMDLReader;
6
- import org.openscience.cdk.qsar.*;
7
- import org.openscience.cdk.qsar.DescriptorValue;
8
- import org.openscience.cdk.aromaticity.CDKHueckelAromaticityDetector;
9
- import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
10
- import org.openscience.cdk.exception.NoSuchAtomTypeException;
11
-
12
- class CdkDescriptors {
13
- public static void main(String[] args) {
14
-
15
- if (args==null || args.length<2) {
16
- System.err.println("required params: <sd-file> <descriptor1> <descriptor2(optional)> <descriptor3(optional)> ...");
17
- System.exit(1);
18
- }
19
- if (! new File(args[0]).exists()){
20
- System.err.println("file not found "+args[0]);
21
- System.exit(1);
22
- }
23
-
24
- // command line descriptor params can be either "descriptorName" or "descriptorValueName"
25
- // terminology:
26
- // A descriptor can calculate serveral values, e.g., ALOGP produces ALOGP.ALogP, ALOGP.ALogp2, ALOGP.AMR
27
- // "descriptorName" ALOGP
28
- // "valueName" AMR
29
- // "descriptorValueName" ALOGP.AMR
30
- DescriptorEngine engine;
31
- Set<String> classNames = new LinkedHashSet<String>(); // descriptors to be computed
32
- Set<String> descriptorNames = new LinkedHashSet<String>(); // all values of this descriptor will be printed
33
- Set<String> descriptorValueNames = new LinkedHashSet<String>(); // only these values of a descriptor will be printed
34
- for (int i =1; i < args.length; i++) {
35
- String descriptorName;
36
- if (args[i].indexOf(".")!=-1) {
37
- descriptorValueNames.add(args[i]);
38
- descriptorName = args[i].substring(0,args[i].indexOf("."));
39
- }
40
- else {
41
- descriptorNames.add(args[i]);
42
- descriptorName = args[i];
43
- }
44
- classNames.add(getDescriptorClassName(descriptorName));
45
- }
46
-
47
- engine = new DescriptorEngine(new ArrayList<String>(classNames));
48
- List<IDescriptor> instances = engine.instantiateDescriptors(new ArrayList<String>(classNames));
49
- List<DescriptorSpecification> specs = engine.initializeSpecifications(instances);
50
- engine.setDescriptorInstances(instances);
51
- engine.setDescriptorSpecifications(specs);
52
-
53
- try {
54
- BufferedReader br = new BufferedReader(new FileReader(args[0]));
55
- PrintWriter yaml = new PrintWriter(new FileWriter(args[0]+"cdk.yaml"));
56
- // parse 3d sdf from file and calculate descriptors
57
- IteratingMDLReader reader = new IteratingMDLReader( br, DefaultChemObjectBuilder.getInstance());
58
- int c = 0;
59
- while (reader.hasNext()) {
60
- try {
61
- System.out.println("computing "+(args.length-1)+" descriptors for compound "+(++c));
62
- IMolecule molecule = (IMolecule)reader.next();
63
- molecule = (IMolecule) AtomContainerManipulator.removeHydrogens(molecule);
64
- try {
65
- AtomContainerManipulator.percieveAtomTypesAndConfigureAtoms(molecule);
66
- }
67
- catch (NoSuchAtomTypeException e) {
68
- e.printStackTrace();
69
- }
70
- CDKHueckelAromaticityDetector.detectAromaticity(molecule);
71
-
72
- engine.process(molecule);
73
- Map<Object,Object> properties = molecule.getProperties();
74
- Boolean first = true;
75
- for (Map.Entry<Object, Object> entry : properties.entrySet()) {
76
- try {
77
- if ((entry.getKey() instanceof DescriptorSpecification) && (entry.getValue() instanceof DescriptorValue)) {
78
- DescriptorSpecification property = (DescriptorSpecification)entry.getKey();
79
- DescriptorValue value = (DescriptorValue)entry.getValue();
80
- String[] values = value.getValue().toString().split(",");
81
- for (int i = 0; i < values.length; i++) {
82
- String cdk_class = property.getImplementationTitle();
83
- String descriptorName = cdk_class.substring(cdk_class.lastIndexOf(".")+1).replace("Descriptor","");
84
- String descriptorValueName = descriptorName + "." + value.getNames()[i];
85
- if (descriptorNames.contains(descriptorName) || descriptorValueNames.contains(descriptorValueName)) {
86
- if (first) { yaml.print("- "); first = false; }
87
- else { yaml.print(" "); }
88
- yaml.println("Cdk." + descriptorValueName + ": " + values[i]);
89
- }
90
- }
91
- }
92
- }
93
- catch (ClassCastException e) { } // sdf properties are stored as molecules properties (strings), ignore them
94
- catch (Exception e) { e.printStackTrace(); } // output nothing to yaml
95
- }
96
- }
97
- catch (Exception e) {
98
- yaml.println("- {}");
99
- e.printStackTrace();
100
- continue;
101
- }
102
- }
103
- yaml.close();
104
- }
105
- catch (Exception e) { e.printStackTrace(); }
106
- }
107
-
108
-
109
- /** HACK to find the class for a descriptor
110
- * problem: Descriptor is not always at the end of the class (APolDescriptor), but may be in the middle (AutocorrelationDescriptorPolarizability)
111
- * this method makes a class-lookup using trial and error */
112
- static String getDescriptorClassName(String descriptorName) {
113
- String split = splitCamelCase(descriptorName)+" "; // space mark possible positions for 'Descriptor'
114
- for(int i = split.length()-1; i>0; i--) {
115
- if (split.charAt(i)==' ') { // iterate over all spaces, starting with the trailing one
116
- String test = split.substring(0,i)+"Descriptor"+split.substring(i+1,split.length()); // replace current space with 'Descriptor' ..
117
- test = test.replaceAll("\\s",""); // .. and remove other spaces
118
- String className = "org.openscience.cdk.qsar.descriptors.molecular." + test;
119
- try {
120
- Class.forName(className);
121
- return className;
122
- } catch (ClassNotFoundException e) {}
123
- }
124
- }
125
- System.err.println("Descriptor not found: "+descriptorName);
126
- System.exit(1);
127
- return null;
128
- }
129
-
130
- /** inserts space in between camel words */
131
- static String splitCamelCase(String s) {
132
- return s.replaceAll(
133
- String.format("%s|%s|%s",
134
- "(?<=[A-Z])(?=[A-Z][a-z])",
135
- "(?<=[^A-Z])(?=[A-Z])",
136
- "(?<=[A-Za-z])(?=[^A-Za-z])"
137
- ),
138
- " "
139
- );
140
- }
141
- }
data/java/Jmol.jar DELETED
Binary file
Binary file
@@ -1,15 +0,0 @@
1
- import joelib2.feature.FeatureHelper;
2
-
3
- class JoelibDescriptorInfo {
4
- public static void main(String[] args) {
5
- FeatureHelper helper = FeatureHelper.instance();
6
- System.out.println("---"); // document separator for Joelib debug messages
7
- for (Object feature : helper.getNativeFeatures() ) {
8
- System.out.println("- :java_class: \""+feature.toString()+"\"");
9
- // methods for accessing feature descriptions e.g. with
10
- // FeatureFactory.instance().getFeature(feature.toString()).getDescription().getText() or
11
- // FeatureFactory.instance().getFeature(feature.toString()).getDescription().getHtml()
12
- // are defunct
13
- }
14
- }
15
- }
Binary file
@@ -1,60 +0,0 @@
1
- import java.util.*;
2
- import java.io.*;
3
- import joelib2.feature.Feature;
4
- import joelib2.feature.FeatureHelper;
5
- import joelib2.feature.FeatureFactory;
6
- import joelib2.feature.FeatureResult;
7
- import joelib2.io.BasicIOType;
8
- import joelib2.io.BasicIOTypeHolder;
9
- import joelib2.io.BasicReader;
10
- import joelib2.io.MoleculeFileHelper;
11
- import joelib2.io.MoleculeFileIO;
12
- import joelib2.io.MoleculeIOException;
13
- import joelib2.molecule.BasicConformerMolecule;
14
-
15
- class JoelibDescriptors {
16
- public static void main(String[] args) {
17
-
18
- String[] features = null;
19
- features = new String[args.length-1];
20
- System.arraycopy(args,1,features,0,args.length-1);
21
-
22
- FeatureFactory factory = FeatureFactory.instance();
23
- MoleculeFileIO loader = null;
24
- String line = new String();
25
- String sdf = new String();
26
- try {
27
- // parse 3d sdf from file and calculate descriptors
28
- InputStream is = new FileInputStream(args[0]);
29
- PrintWriter yaml = new PrintWriter(new FileWriter(args[0]+"joelib.yaml"));
30
- BasicIOType inType = BasicIOTypeHolder.instance().getIOType("SDF");
31
- loader = MoleculeFileHelper.getMolReader(is, inType);
32
- BasicConformerMolecule mol = new BasicConformerMolecule(inType, inType);
33
- while (true) {
34
- try {
35
- Boolean success = loader.read(mol);
36
- if (!success) { break; } // last molecule
37
- for (int i =0; i < features.length; i++) {
38
- String name = "joelib2.feature.types." + features[i];
39
- Feature feature = factory.getFeature(name);
40
- FeatureResult result = feature.calculate(mol);
41
- if (i == 0) { yaml.print("- "); }
42
- else { yaml.print(" "); }
43
- yaml.print( "Joelib."+features[i]+": " );
44
- yaml.println( result.toString() );
45
- }
46
-
47
- }
48
- catch (Exception e) {
49
- System.err.println(e.toString());
50
- e.printStackTrace();
51
- }
52
- }
53
- yaml.close();
54
- }
55
- catch (Exception e) {
56
- System.err.println(e.toString());
57
- e.printStackTrace();
58
- }
59
- }
60
- }
data/java/Rakefile DELETED
@@ -1,15 +0,0 @@
1
- # Java class, classpath
2
- java_classes = [
3
- ["CdkDescriptors", "cdk-1.4.19.jar"],
4
- ["CdkDescriptorInfo", "cdk-1.4.19.jar"],
5
- ["JoelibDescriptors", "joelib2.jar:."],
6
- ["JoelibDescriptorInfo", "joelib2.jar:."],
7
- ]
8
-
9
- task :default => java_classes.collect{|c| "#{c.first}.class"}
10
-
11
- java_classes.each do |c|
12
- file "#{c.first}.class" => "#{c.first}.java" do
13
- puts `javac -classpath #{c.last} #{c.first}.java`
14
- end
15
- end
data/java/cdk-1.4.19.jar DELETED
Binary file
data/java/joelib2.jar DELETED
Binary file
data/java/log4j.jar DELETED
Binary file
data/lazar.gemspec DELETED
@@ -1,29 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
- $:.push File.expand_path("../lib", __FILE__)
3
-
4
- Gem::Specification.new do |s|
5
- s.name = "lazar"
6
- s.version = File.read("./VERSION").strip
7
- s.authors = ["Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler, Denis Gebele"]
8
- s.email = ["helma@in-silico.ch"]
9
- s.homepage = "http://github.com/opentox/lazar"
10
- s.summary = %q{Lazar framework}
11
- s.description = %q{Libraries for lazy structure-activity relationships and read-across.}
12
- s.license = 'GPL-3'
13
-
14
- s.rubyforge_project = "lazar"
15
-
16
- s.files = `git ls-files`.split("\n")
17
- s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
- s.extensions = %w[ext/lazar/extconf.rb]
19
- s.require_paths = ["lib"]
20
-
21
- # specify any dependencies here; for example:
22
- s.add_runtime_dependency "bundler"
23
- s.add_runtime_dependency "rest-client"
24
- s.add_runtime_dependency 'nokogiri'
25
- #s.add_runtime_dependency "openbabel"
26
- s.add_runtime_dependency 'rserve-client'
27
- s.add_runtime_dependency "mongoid", '~> 5.0beta'
28
-
29
- end
@@ -1,983 +0,0 @@
1
- #
2
- # SMARTS Patterns for Functional Group Classification
3
- #
4
- # written by Christian Laggner
5
- # Copyright 2005 Inte:Ligand Software-Entwicklungs und Consulting GmbH
6
- #
7
- # Released under the Lesser General Public License (LGPL license)
8
- # see http://www.gnu.org/copyleft/lesser.html
9
- # Modified from Version 221105
10
- #####################################################################################################
11
-
12
- # General Stuff:
13
- # These patters were written in an attempt to represent the classification of organic compounds
14
- # from the viewpoint of an organic chemist.
15
- # They are often very restrictive. This may be generally a good thing, but it also takes some time
16
- # for filtering/indexing large compound sets.
17
- # For filtering undesired groups (in druglike compounds) one will want to have more general patterns
18
- # (e.g. you don't want *any* halide of *any* acid, *neither* aldehyde *nor* formyl esters and amides, ...).
19
- #
20
-
21
- # Part I: Carbon
22
- # ==============
23
-
24
-
25
- # I.1: Carbon-Carbon Bonds
26
- # ------------------------
27
-
28
- # I.1.1 Alkanes:
29
-
30
- Primary_carbon: [CX4H3][#6]
31
-
32
- Secondary_carbon: [CX4H2]([#6])[#6]
33
-
34
- Tertiary_carbon: [CX4H1]([#6])([#6])[#6]
35
-
36
- Quaternary_carbon: [CX4]([#6])([#6])([#6])[#6]
37
-
38
-
39
- # I.1.2 C-C double and Triple Bonds
40
-
41
- Alkene: [CX3;$([H2]),$([H1][#6]),$(C([#6])[#6])]=[CX3;$([H2]),$([H1][#6]),$(C([#6])[#6])]
42
- # sp2 C may be substituted only by C or H -
43
- # does not hit ketenes and allenes, nor enamines, enols and the like
44
-
45
- Alkyne: [CX2]#[CX2]
46
- # non-carbon substituents (e.g. alkynol ethers) are rather rare, thus no further discrimination
47
-
48
- Allene: [CX3]=[CX2]=[CX3]
49
-
50
-
51
- # I.2: One Carbon-Hetero Bond
52
- # ---------------------------
53
-
54
-
55
- # I.2.1 Alkyl Halogenides
56
-
57
- Alkylchloride: [ClX1][CX4]
58
- # will also hit chloromethylethers and the like, but no chloroalkenes, -alkynes or -aromats
59
- # a more restrictive version can be obtained by modifying the Alcohol string.
60
-
61
- Alkylfluoride: [FX1][CX4]
62
-
63
- Alkylbromide: [BrX1][CX4]
64
-
65
- Alkyliodide: [IX1][CX4]
66
-
67
-
68
- # I.2.2 Alcohols and Ethers
69
-
70
- Alcohol: [OX2H][CX4;!$(C([OX2H])[O,S,#7,#15])]
71
- # nonspecific definition, no acetals, aminals, and the like
72
-
73
- Primary_alcohol: [OX2H][CX4H2;!$(C([OX2H])[O,S,#7,#15])]
74
-
75
- Secondary_alcohol: [OX2H][CX4H;!$(C([OX2H])[O,S,#7,#15])]
76
-
77
- Tertiary_alcohol: [OX2H][CX4D4;!$(C([OX2H])[O,S,#7,#15])]
78
-
79
- Dialkylether: [OX2]([CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])])[CX4;!$(C([OX2])[O,S,#7,#15])]
80
- # no acetals and the like; no enolethers
81
-
82
- Dialkylthioether: [SX2]([CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])])[CX4;!$(C([OX2])[O,S,#7,#15])]
83
- # no acetals and the like; no enolethers
84
-
85
- Alkylarylether: [OX2](c)[CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])]
86
- # no acetals and the like; no enolethers
87
-
88
- Diarylether: [c][OX2][c]
89
-
90
- Alkylarylthioether: [SX2](c)[CX4;!$(C([OX2])[O,S,#7,#15,F,Cl,Br,I])]
91
-
92
- Diarylthioether: [c][SX2][c]
93
-
94
- Oxonium: [O+;!$([O]~[!#6]);!$([S]*~[#7,#8,#15,#16])]
95
- # can't be aromatic, thus O and not #8
96
-
97
- # I.2.3 Amines
98
-
99
- Amine: [NX3+0,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])]
100
- # hits all amines (prim/sec/tert/quart), including ammonium salts, also enamines, but not amides, imides, aminals, ...
101
-
102
- # the following amines include also the protonated forms
103
-
104
- Primary_aliph_amine: [NX3H2+0,NX4H3+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
105
-
106
- Secondary_aliph_amine: [NX3H1+0,NX4H2+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
107
-
108
- Tertiary_aliph_amine: [NX3H0+0,NX4H1+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
109
-
110
- Quaternary_aliph_ammonium: [NX4H0+;!$([N][!C]);!$([N]*~[#7,#8,#15,#16])]
111
-
112
- Primary_arom_amine: [NX3H2+0,NX4H3+]c
113
-
114
- Secondary_arom_amine: [NX3H1+0,NX4H2+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])]
115
-
116
- Tertiary_arom_amine: [NX3H0+0,NX4H1+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])]
117
-
118
- Quaternary_arom_ammonium: [NX4H0+;!$([N][!c]);!$([N]*~[#7,#8,#15,#16])]
119
-
120
- Secondary_mixed_amine: [NX3H1+0,NX4H2+;$([N]([c])[C]);!$([N]*~[#7,#8,#15,#16])]
121
-
122
- Tertiary_mixed_amine: [NX3H0+0,NX4H1+;$([N]([c])([C])[#6]);!$([N]*~[#7,#8,#15,#16])]
123
-
124
- Quaternary_mixed_ammonium: [NX4H0+;$([N]([c])([C])[#6][#6]);!$([N]*~[#7,#8,#15,#16])]
125
-
126
- Ammonium: [N+;!$([N]~[!#6]);!$(N=*);!$([N]*~[#7,#8,#15,#16])]
127
- # only C and H substituents allowed. Quaternary or protonated amines
128
- # NX4+ or Nv4+ is not recognized by Daylight's depictmatch if less than four C are present
129
-
130
-
131
- # I.2.4 Others
132
-
133
- Alkylthiol: [SX2H][CX4;!$(C([SX2H])~[O,S,#7,#15])]
134
-
135
- Dialkylthioether: [SX2]([CX4;!$(C([SX2])[O,S,#7,#15,F,Cl,Br,I])])[CX4;!$(C([SX2])[O,S,#7,#15])]
136
-
137
- Alkylarylthioether: [SX2](c)[CX4;!$(C([SX2])[O,S,#7,#15])]
138
-
139
- Disulfide: [SX2D2][SX2D2]
140
-
141
- 1,2-Aminoalcohol: [OX2H][CX4;!$(C([OX2H])[O,S,#7,#15,F,Cl,Br,I])][CX4;!$(C([N])[O,S,#7,#15])][NX3;!$(NC=[O,S,N])]
142
- # does not hit alpha-amino acids, enaminoalcohols, 1,2-aminoacetals, o-aminophenols, etc.
143
-
144
- 1,2-Diol: [OX2H][CX4;!$(C([OX2H])[O,S,#7,#15])][CX4;!$(C([OX2H])[O,S,#7,#15])][OX2H]
145
- # does not hit alpha-hydroxy acids, enolalcohols, 1,2-hydroxyacetals, 1,2-diphenols, etc.
146
-
147
- 1,1-Diol: [OX2H][CX4;!$(C([OX2H])([OX2H])[O,S,#7,#15])][OX2H]
148
-
149
- Hydroperoxide: [OX2H][OX2]
150
- #does not neccessarily have to be connected to a carbon atom, includes also hydrotrioxides
151
-
152
- Peroxo: [OX2D2][OX2D2]
153
-
154
- Organolithium_compounds: [LiX1][#6,#14]
155
-
156
- Organomagnesium_compounds: [MgX2][#6,#14]
157
- # not restricted to Grignard compounds, also dialkyl Mg
158
-
159
- Organometallic_compounds: [!#1;!#5;!#6;!#7;!#8;!#9;!#14;!#15;!#16;!#17;!#33;!#34;!#35;!#52;!#53;!#85]~[#6;!-]
160
- # very general, includes all metals covalently bound to carbon
161
-
162
-
163
- # I.3: Two Carbon-Hetero Bonds (Carbonyl and Derivatives)
164
- # ----------------------------
165
-
166
- # I.3.1 Double Bond to Hetero
167
-
168
- Aldehyde: [$([CX3H][#6]),$([CX3H2])]=[OX1]
169
- # hits aldehydes including formaldehyde
170
-
171
- Ketone: [#6][CX3](=[OX1])[#6]
172
- # does not include oxo-groups connected to a (hetero-) aromatic ring
173
-
174
- Thioaldehyde: [$([CX3H][#6]),$([CX3H2])]=[SX1]
175
-
176
- Thioketone: [#6][CX3](=[SX1])[#6]
177
- # does not include thioxo-groups connected to a (hetero-) aromatic ring
178
-
179
- Imine: [NX2;$([N][#6]),$([NH]);!$([N][CX3]=[#7,#8,#15,#16])]=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])]
180
- # nitrogen is not part of an amidelike strukture, nor of an aromatic ring, but can be part of an aminal or similar
181
-
182
- Immonium: [NX3+;!$([N][!#6]);!$([N][CX3]=[#7,#8,#15,#16])]
183
-
184
- Oxime: [NX2](=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])])[OX2H]
185
-
186
- Oximether: [NX2](=[CX3;$([CH2]),$([CH][#6]),$([C]([#6])[#6])])[OX2][#6;!$(C=[#7,#8])]
187
- # ether, not ester or amide; does not hit isoxazole
188
-
189
-
190
- # I.3.2. Two Single Bonds to Hetero
191
-
192
- Acetal: [OX2]([#6;!$(C=[O,S,N])])[CX4;!$(C(O)(O)[!#6])][OX2][#6;!$(C=[O,S,N])]
193
- # does not hit hydroxy-methylesters, ketenacetals, hemiacetals, orthoesters, etc.
194
-
195
- Hemiacetal: [OX2H][CX4;!$(C(O)(O)[!#6])][OX2][#6;!$(C=[O,S,N])]
196
-
197
- Aminal: [NX3v3;!$(NC=[#7,#8,#15,#16])]([#6])[CX4;!$(C(N)(N)[!#6])][NX3v3;!$(NC=[#7,#8,#15,#16])][#6]
198
- # Ns are not part of an amide or similar. v3 ist to exclude nitro and similar groups
199
-
200
- Hemiaminal: [NX3v3;!$(NC=[#7,#8,#15,#16])]([#6])[CX4;!$(C(N)(N)[!#6])][OX2H]
201
-
202
- Thioacetal: [SX2]([#6;!$(C=[O,S,N])])[CX4;!$(C(S)(S)[!#6])][SX2][#6;!$(C=[O,S,N])]
203
-
204
- Thiohemiacetal: [SX2]([#6;!$(C=[O,S,N])])[CX4;!$(C(S)(S)[!#6])][OX2H]
205
-
206
- Halogen_acetal_like: [NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1]
207
- # hits chloromethylenethers and other reactive alkylating agents
208
-
209
- Acetal_like: [NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1,NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])]
210
- # includes all of the above and other combinations (S-C-N, hydrates, ...), but still no aminomethylenesters and similar
211
-
212
- Halogenmethylen_ester_and_similar: [NX3v3,SX2,OX2;$(**=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1]
213
- # also reactive alkylating agents. Acid does not have to be carboxylic acid, also S- and P-based acids allowed
214
-
215
- NOS_methylen_ester_and_similar: [NX3v3,SX2,OX2;$(**=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])]
216
- # Same as above, but N,O or S instead of halogen. Ester/amide allowed only on one side
217
-
218
- Hetero_methylen_ester_and_similar: [NX3v3,SX2,OX2;$(**=[#7,#8,#15,#16])][CX4;!$(C([N,S,O])([N,S,O])[!#6])][FX1,ClX1,BrX1,IX1,NX3v3,SX2,OX2;!$(*C=[#7,#8,#15,#16])]
219
- # Combination of the last two patterns
220
-
221
- Cyanhydrine: [NX1]#[CX2][CX4;$([CH2]),$([CH]([CX2])[#6]),$(C([CX2])([#6])[#6])][OX2H]
222
-
223
-
224
- # I.3.3 Single Bond to Hetero, C=C Double Bond (Enols and Similar)
225
-
226
- Chloroalkene: [ClX1][CX3]=[CX3]
227
-
228
- Fluoroalkene: [FX1][CX3]=[CX3]
229
-
230
- Bromoalkene: [BrX1][CX3]=[CX3]
231
-
232
- Iodoalkene: [IX1][CX3]=[CX3]
233
-
234
- Enol: [OX2H][CX3;$([H1]),$(C[#6])]=[CX3]
235
- # no phenols
236
-
237
- Endiol: [OX2H][CX3;$([H1]),$(C[#6])]=[CX3;$([H1]),$(C[#6])][OX2H]
238
- # no 1,2-diphenols, ketenacetals, ...
239
-
240
- Enolether: [OX2]([#6;!$(C=[N,O,S])])[CX3;$([H0][#6]),$([H1])]=[CX3]
241
- # finds also endiodiethers, but not enolesters, no aromats
242
-
243
- Enolester: [OX2]([CX3]=[OX1])[#6X3;$([#6][#6]),$([H1])]=[#6X3;!$(C[OX2H])]
244
-
245
-
246
- Enamine: [NX3;$([NH2][CX3]),$([NH1]([CX3])[#6]),$([N]([CX3])([#6])[#6]);!$([N]*=[#7,#8,#15,#16])][CX3;$([CH]),$([C][#6])]=[CX3]
247
- # does not hit amines attached to aromatic rings, nor may the nitrogen be aromatic
248
-
249
- Thioenol: [SX2H][CX3;$([H1]),$(C[#6])]=[CX3]
250
-
251
- Thioenolether: [SX2]([#6;!$(C=[N,O,S])])[CX3;$(C[#6]),$([CH])]=[CX3]
252
-
253
-
254
- # I.4: Three Carbon-Hetero Bonds (Carboxyl and Derivatives)
255
- # ------------------------------
256
-
257
- Acylchloride: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[ClX1]
258
-
259
- Acylfluoride: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[FX1]
260
-
261
- Acylbromide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[BrX1]
262
-
263
- Acyliodide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[IX1]
264
-
265
- Acylhalide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[FX1,ClX1,BrX1,IX1]
266
- # all of the above
267
-
268
-
269
- # The following contains all simple carboxylic combinations of O, N, S, & Hal -
270
- # - acids, esters, amides, ... as well as a few extra cases (anhydride, hydrazide...)
271
- # Cyclic structures (including aromats) like lactones, lactames, ... got their own
272
- # definitions. Structures where both heteroatoms are part of an aromatic ring
273
- # (oxazoles, imidazoles, ...) were excluded.
274
-
275
- Carboxylic_acid: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[$([OX2H]),$([OX1-])]
276
- # includes carboxylate anions
277
-
278
- Carboxylic_ester: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[OX2][#6;!$(C=[O,N,S])]
279
- # does not hit anhydrides or lactones
280
-
281
- Lactone: [#6][#6X3R](=[OX1])[#8X2][#6;!$(C=[O,N,S])]
282
- # may also be aromatic
283
-
284
- Carboxylic_anhydride: [CX3;$([H0][#6]),$([H1])](=[OX1])[#8X2][CX3;$([H0][#6]),$([H1])](=[OX1])
285
- # anhydride formed by two carboxylic acids, no mixed anhydrides (e.g. between carboxylic acid and sulfuric acid); may be part of a ring, even aromatic
286
-
287
- Carboxylic_acid_derivative: [$([#6X3H0][#6]),$([#6X3H])](=[!#6])[!#6]
288
- # includes most of the structures of I.4 and many more, also 1,3-heteroaromatics such as isoxazole
289
-
290
- Carbothioic_acid: [CX3;!R;$([C][#6]),$([CH]);$([C](=[OX1])[$([SX2H]),$([SX1-])]),$([C](=[SX1])[$([OX2H]),$([OX1-])])]
291
- # hits both tautomeric forms, as well as anions
292
-
293
- Carbothioic_S_ester: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[SX2][#6;!$(C=[O,N,S])]
294
-
295
- Carbothioic_S_lactone: [#6][#6X3R](=[OX1])[#16X2][#6;!$(C=[O,N,S])]
296
- # may also be aromatic
297
-
298
- Carbothioic_O_ester: [CX3;$([H0][#6]),$([H1])](=[SX1])[OX2][#6;!$(C=[O,N,S])]
299
-
300
- Carbothioic_O_lactone: [#6][#6X3R](=[SX1])[#8X2][#6;!$(C=[O,N,S])]
301
-
302
- Carbothioic_halide: [CX3;$([H0][#6]),$([H1])](=[SX1])[FX1,ClX1,BrX1,IX1]
303
-
304
- Carbodithioic_acid: [CX3;!R;$([C][#6]),$([CH]);$([C](=[SX1])[SX2H])]
305
-
306
- Carbodithioic_ester: [CX3;!R;$([C][#6]),$([CH]);$([C](=[SX1])[SX2][#6;!$(C=[O,N,S])])]
307
-
308
- Carbodithiolactone: [#6][#6X3R](=[SX1])[#16X2][#6;!$(C=[O,N,S])]
309
-
310
-
311
- Amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
312
- # does not hit lactames
313
-
314
- Primary_amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[NX3H2]
315
-
316
- Secondary_amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H1][#6;!$(C=[O,N,S])]
317
-
318
- Tertiary_amide: [CX3;$([R0][#6]),$([H1R0])](=[OX1])[#7X3H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])]
319
-
320
- Lactam: [#6R][#6X3R](=[OX1])[#7X3;$([H1][#6;!$(C=[O,N,S])]),$([H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
321
- # cyclic amides, may also be aromatic
322
-
323
- Alkyl_imide: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#7X3H0]([#6])[#6X3;$([H0][#6]),$([H1])](=[OX1])
324
- # may be part of a ring, even aromatic. only C allowed at central N. May also be triacyl amide
325
-
326
- N_hetero_imide: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#7X3H0]([!#6])[#6X3;$([H0][#6]),$([H1])](=[OX1])
327
- # everything else than H or C at central N
328
-
329
- Imide_acidic: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#7X3H1][#6X3;$([H0][#6]),$([H1])](=[OX1])
330
- # can be deprotonated
331
-
332
- Thioamide: [$([CX3;!R][#6]),$([CX3H;!R])](=[SX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
333
- # does not hit thiolactames
334
-
335
- Thiolactam: [#6R][#6X3R](=[SX1])[#7X3;$([H1][#6;!$(C=[O,N,S])]),$([H0]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
336
- # cyclic thioamides, may also be aromatic
337
-
338
-
339
- Oximester: [#6X3;$([H0][#6]),$([H1])](=[OX1])[#8X2][#7X2]=,:[#6X3;$([H0]([#6])[#6]),$([H1][#6]),$([H2])]
340
- # may also be part of a ring / aromatic
341
-
342
- Amidine: [NX3;!$(NC=[O,S])][CX3;$([CH]),$([C][#6])]=[NX2;!$(NC=[O,S])]
343
- # only basic amidines, not as part of aromatic ring (e.g. imidazole)
344
-
345
- Hydroxamic_acid: [CX3;$([H0][#6]),$([H1])](=[OX1])[#7X3;$([H1]),$([H0][#6;!$(C=[O,N,S])])][$([OX2H]),$([OX1-])]
346
-
347
- Hydroxamic_acid_ester: [CX3;$([H0][#6]),$([H1])](=[OX1])[#7X3;$([H1]),$([H0][#6;!$(C=[O,N,S])])][OX2][#6;!$(C=[O,N,S])]
348
- # does not hit anhydrides of carboxylic acids withs hydroxamic acids
349
-
350
-
351
- Imidoacid: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])]
352
- # not cyclic
353
-
354
- Imidoacid_cyclic: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])]
355
- # the enamide-form of lactames. may be aromatic like 2-hydroxypyridine
356
-
357
- Imidoester: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[OX2][#6;!$(C=[O,N,S])]
358
- # esters of the above structures. no anhydrides.
359
-
360
- Imidolactone: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[OX2][#6;!$(C=[O,N,S])]
361
- # no oxazoles and similar
362
-
363
- Imidothioacid: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([SX2H]),$([SX1-])]
364
- # not cyclic
365
-
366
- Imidothioacid_cyclic: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[$([SX2H]),$([SX1-])]
367
- # the enamide-form of thiolactames. may be aromatic like 2-thiopyridine
368
-
369
- Imidothioester: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[SX2][#6;!$(C=[O,N,S])]
370
- # thioesters of the above structures. no anhydrides.
371
-
372
- Imidothiolactone: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[SX2][#6;!$(C=[O,N,S])]
373
- # no thioxazoles and similar
374
-
375
- Amidine: [#7X3v3;!$(N([#6X3]=[#7X2])C=[O,S])][CX3R0;$([H1]),$([H0][#6])]=[NX2v3;!$(N(=[#6X3][#7X3])C=[O,S])]
376
- # only basic amidines, not substituted by carbonyl or thiocarbonyl, not as part of a ring
377
-
378
- Imidolactam: [#6][#6X3R;$([H0](=[NX2;!$(N(=[#6X3][#7X3])C=[O,S])])[#7X3;!$(N([#6X3]=[#7X2])C=[O,S])]),$([H0](-[NX3;!$(N([#6X3]=[#7X2])C=[O,S])])=,:[#7X2;!$(N(=[#6X3][#7X3])C=[O,S])])]
379
- # one of the two C~N bonds is part of a ring (may be aromatic), but not both - thus no imidazole
380
-
381
- Imidoylhalide: [CX3R0;$([H0][#6]),$([H1])](=[NX2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[FX1,ClX1,BrX1,IX1]
382
- # not cyclic
383
-
384
- Imidoylhalide_cyclic: [#6R][#6X3R](=,:[#7X2;$([H1]),$([H0][#6;!$(C=[O,N,S])])])[FX1,ClX1,BrX1,IX1]
385
- # may also be aromatic
386
-
387
- # may be ring, aromatic, substituted with carbonyls, hetero, ...
388
- # (everything else would get too complicated)
389
-
390
- Amidrazone: [$([$([#6X3][#6]),$([#6X3H])](=[#7X2v3])[#7X3v3][#7X3v3]),$([$([#6X3][#6]),$([#6X3H])]([#7X3v3])=[#7X2v3][#7X3v3])]
391
- # hits both tautomers. as above, it may be ring, aromatic, substituted with carbonyls, hetero, ...
392
-
393
-
394
- Alpha_aminoacid: [NX3,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])][C][CX3](=[OX1])[OX2H,OX1-]
395
- # N may be alkylated, but not part of an amide (as in peptides), ionic forms are included
396
- # includes also non-natural aminoacids with double-bonded or two aliph./arom. substituents at alpha-C
397
- # N may not be aromatic as in 1H-pyrrole-2-carboxylic acid
398
-
399
- Alpha_hydroxyacid: [OX2H][C][CX3](=[OX1])[OX2H,OX1-]
400
-
401
- Peptide_middle: [NX3;$([N][CX3](=[OX1])[C][NX3,NX4+])][C][CX3](=[OX1])[NX3;$([N][C][CX3](=[OX1])[NX3,OX2,OX1-])]
402
- # finds peptidic structures which are neither C- nor N-terminal. Both neighbours must be amino-acids/peptides
403
-
404
- Peptide_C_term: [NX3;$([N][CX3](=[OX1])[C][NX3,NX4+])][C][CX3](=[OX1])[OX2H,OX1-]
405
- # finds C-terminal amino acids
406
-
407
- Peptide_N_term: [NX3,NX4+;!$([N]~[!#6]);!$([N]*~[#7,#8,#15,#16])][C][CX3](=[OX1])[NX3;$([N][C][CX3](=[OX1])[NX3,OX2,OX1-])]
408
- # finds N-terminal amino acids. As above, N may be substituted, but not part of an amide-bond.
409
-
410
-
411
- Carboxylic_orthoester: [#6][OX2][CX4;$(C[#6]),$([CH])]([OX2][#6])[OX2][#6]
412
- # hits also anhydride like struktures (e. g. HC(OMe)2-OC=O residues)
413
-
414
- Ketene: [CX3]=[CX2]=[OX1]
415
-
416
- Ketenacetal: [#7X2,#8X3,#16X2;$(*[#6,#14])][#6X3]([#7X2,#8X3,#16X2;$(*[#6,#14])])=[#6X3]
417
- # includes aminals, silylacetals, ketenesters, etc. C=C DB is not aromatic, everything else may be
418
-
419
- Nitrile: [NX1]#[CX2]
420
- # includes cyanhydrines
421
-
422
- Isonitrile: [CX1-]#[NX2+]
423
-
424
-
425
- Vinylogous_carbonyl_or_carboxyl_derivative: [#6X3](=[OX1])[#6X3]=,:[#6X3][#7,#8,#16,F,Cl,Br,I]
426
- # may be part of a ring, even aromatic
427
-
428
- Vinylogous_acid: [#6X3](=[OX1])[#6X3]=,:[#6X3][$([OX2H]),$([OX1-])]
429
-
430
- Vinylogous_ester: [#6X3](=[OX1])[#6X3]=,:[#6X3][#6;!$(C=[O,N,S])]
431
-
432
- Vinylogous_amide: [#6X3](=[OX1])[#6X3]=,:[#6X3][#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
433
-
434
- Vinylogous_halide: [#6X3](=[OX1])[#6X3]=,:[#6X3][FX1,ClX1,BrX1,IX1]
435
-
436
-
437
-
438
- # I.5: Four Carbon-Hetero Bonds (Carbonic Acid and Derivatives)
439
- # -----------------------------
440
-
441
- Carbonic_acid_dieester: [#6;!$(C=[O,N,S])][#8X2][#6X3](=[OX1])[#8X2][#6;!$(C=[O,N,S])]
442
- # may be part of a ring, even aromatic
443
-
444
- Carbonic_acid_esterhalide: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[OX1])[OX2][FX1,ClX1,BrX1,IX1]
445
-
446
- Carbonic_acid_monoester: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[OX1])[$([OX2H]),$([OX1-])]
447
- # unstable
448
-
449
- Carbonic_acid_derivatives: [!#6][#6X3](=[!#6])[!#6]
450
-
451
-
452
- Thiocarbonic_acid_dieester: [#6;!$(C=[O,N,S])][#8X2][#6X3](=[SX1])[#8X2][#6;!$(C=[O,N,S])]
453
- # may be part of a ring, even aromatic
454
-
455
- Thiocarbonic_acid_esterhalide: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[SX1])[OX2][FX1,ClX1,BrX1,IX1]
456
-
457
- Thiocarbonic_acid_monoester: [#6;!$(C=[O,N,S])][OX2;!R][CX3](=[SX1])[$([OX2H]),$([OX1-])]
458
-
459
-
460
- Urea:[#7X3;!$([#7][!#6])][#6X3](=[OX1])[#7X3;!$([#7][!#6])]
461
- # no check whether part of imide, biuret, etc. Aromatic structures are only hit if
462
- # both N share no double bonds, like in the dioxo-form of uracil
463
-
464
- Thiourea: [#7X3;!$([#7][!#6])][#6X3](=[SX1])[#7X3;!$([#7][!#6])]
465
-
466
- Isourea: [#7X2;!$([#7][!#6])]=,:[#6X3]([#8X2&!$([#8][!#6]),OX1-])[#7X3;!$([#7][!#6])]
467
- # O may be substituted. no check whether further amide-like bonds are present. Aromatic
468
- # structures are only hit if single bonded N shares no additional double bond, like in
469
- # the 1-hydroxy-3-oxo form of uracil
470
-
471
- Isothiourea: [#7X2;!$([#7][!#6])]=,:[#6X3]([#16X2&!$([#16][!#6]),SX1-])[#7X3;!$([#7][!#6])]
472
-
473
- Guanidine: [N;v3X3,v4X4+][CX3](=[N;v3X2,v4X3+])[N;v3X3,v4X4+]
474
- # also hits guanidinium salts. v3 and v4 to avoid nitroamidines
475
-
476
- Carbaminic_acid: [NX3]C(=[OX1])[O;X2H,X1-]
477
- # quite unstable, unlikely to be found. Also hits salts
478
-
479
- Urethan: [#7X3][#6](=[OX1])[#8X2][#6]
480
- # also hits when part of a ring, no check whether the last C is part of carbonyl
481
-
482
- Biuret: [#7X3][#6](=[OX1])[#7X3][#6](=[OX1])[#7X3]
483
-
484
- Semicarbazide: [#7X3][#7X3][#6X3]([#7X3;!$([#7][#7])])=[OX1]
485
-
486
- Carbazide: [#7X3][#7X3][#6X3]([#7X3][#7X3])=[OX1]
487
-
488
- Semicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7])])=[OX1]
489
-
490
- Carbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3][#7X3])=[OX1]
491
-
492
- Thiosemicarbazide: [#7X3][#7X3][#6X3]([#7X3;!$([#7][#7])])=[SX1]
493
-
494
- Thiocarbazide: [#7X3][#7X3][#6X3]([#7X3][#7X3])=[SX1]
495
-
496
- Thiosemicarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3;!$([#7][#7])])=[SX1]
497
-
498
- Thiocarbazone: [#7X2](=[#6])[#7X3][#6X3]([#7X3][#7X3])=[SX1]
499
-
500
-
501
- Isocyanate: [NX2]=[CX2]=[OX1]
502
-
503
- Cyanate: [OX2][CX2]#[NX1]
504
-
505
- Isothiocyanate: [NX2]=[CX2]=[SX1]
506
-
507
- Thiocyanate: [SX2][CX2]#[NX1]
508
-
509
- Carbodiimide: [NX2]=[CX2]=[NX2]
510
-
511
- Orthocarbonic_derivatives: [CX4H0]([O,S,#7])([O,S,#7])([O,S,#7])[O,S,#7,F,Cl,Br,I]
512
- # halogen allowed just once, to avoid mapping to -OCF3 and similar groups (much more
513
- # stable as for example C(OCH3)4)
514
-
515
-
516
- # I.6 Aromatics
517
- # -------------
518
-
519
- # I know that this classification is not very logical, arylamines are found under I.2 ...
520
-
521
- Phenol: [OX2H][c]
522
-
523
- 1,2-Diphenol: [OX2H][c][c][OX2H]
524
-
525
- Arylchloride: [Cl][c]
526
-
527
- Arylfluoride: [F][c]
528
-
529
- Arylbromide: [Br][c]
530
-
531
- Aryliodide: [I][c]
532
-
533
- Arylthiol: [SX2H][c]
534
-
535
- Iminoarene: [c]=[NX2;$([H1]),$([H0][#6;!$([C]=[N,S,O])])]
536
- # N may be substituted with H or C, but not carbonyl or similar
537
- # aromatic atom is always C, not S or P (these are not planar when substituted)
538
-
539
- Oxoarene: [c]=[OX1]
540
-
541
- Thioarene: [c]=[SX1]
542
-
543
- Hetero_N_basic_H: [nX3H1+0]
544
- # as in pyrole. uncharged to exclude pyridinium ions
545
-
546
- Hetero_N_basic_no_H: [nX3H0+0]
547
- # as in N-methylpyrole. uncharged to exclude pyridinium ions
548
-
549
- Hetero_N_nonbasic: [nX2,nX3+]
550
- # as in pyridine, pyridinium
551
-
552
- Hetero_O: [o]
553
-
554
- Hetero_S: [sX2]
555
- # X2 because Daylight's depictmatch falsely describes C1=CS(=O)C=C1 as aromatic
556
- # (is not planar because of lonepair at S)
557
-
558
- Heteroaromatic: [a;!c]
559
-
560
-
561
- # Part II: N, S, P, Si, B
562
- # =======================
563
-
564
-
565
- # II.1 Nitrogen
566
- # -------------
567
-
568
- Nitrite: [NX2](=[OX1])[O;$([X2]),$([X1-])]
569
- # hits nitrous acid, its anion, esters, and other O-substituted derivatives
570
-
571
- Thionitrite: [SX2][NX2]=[OX1]
572
-
573
- Nitrate: [$([NX3](=[OX1])(=[OX1])[O;$([X2]),$([X1-])]),$([NX3+]([OX1-])(=[OX1])[O;$([X2]),$([X1-])])]
574
- # hits nitric acid, its anion, esters, and other O-substituted derivatives
575
-
576
- Nitro: [$([NX3](=O)=O),$([NX3+](=O)[O-])][!#8]
577
- # hits nitro groups attached to C,N, ... but not nitrates
578
-
579
- Nitroso: [NX2](=[OX1])[!#7;!#8]
580
- # no nitrites, no nitrosamines
581
-
582
- Azide: [NX1]~[NX2]~[NX2,NX1]
583
- # hits both mesomeric forms, also anion
584
-
585
- Acylazide: [CX3](=[OX1])[NX2]~[NX2]~[NX1]
586
-
587
- Diazo: [$([#6]=[NX2+]=[NX1-]),$([#6-]-[NX2+]#[NX1])]
588
-
589
- Diazonium: [#6][NX2+]#[NX1]
590
-
591
- Nitrosamine: [#7;!$(N*=O)][NX2]=[OX1]
592
-
593
- Nitrosamide: [NX2](=[OX1])N-*=O
594
- # includes nitrososulfonamides
595
-
596
- N-Oxide: [$([#7+][OX1-]),$([#7v5]=[OX1]);!$([#7](~[O])~[O]);!$([#7]=[#7])]
597
- # Hits both forms. Won't hit azoxy, nitro, nitroso, or nitrate.
598
-
599
-
600
- Hydrazine: [NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])][NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])]
601
- # no hydrazides
602
-
603
- Hydrazone: [NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])][NX2]=[#6]
604
-
605
- Hydroxylamine: [NX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6]);!$(NC=[O,N,S])][OX2;$([H1]),$(O[#6;!$(C=[N,O,S])])]
606
- # no discrimination between O-, N-, and O,N-substitution
607
-
608
-
609
- # II.2 Sulfur
610
- # -----------
611
-
612
- Sulfon: [$([SX4](=[OX1])(=[OX1])([#6])[#6]),$([SX4+2]([OX1-])([OX1-])([#6])[#6])]
613
- # can't be aromatic, thus S and not #16
614
-
615
- Sulfoxide: [$([SX3](=[OX1])([#6])[#6]),$([SX3+]([OX1-])([#6])[#6])]
616
-
617
- Sulfonium: [S+;!$([S]~[!#6]);!$([S]*~[#7,#8,#15,#16])]
618
- # can't be aromatic, thus S and not #16
619
-
620
- Sulfuric_acid: [SX4](=[OX1])(=[OX1])([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])]
621
- # includes anions
622
-
623
- Sulfuric_monoester: [SX4](=[OX1])(=[OX1])([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])]
624
-
625
- Sulfuric_diester: [SX4](=[OX1])(=[OX1])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])]
626
-
627
- Sulfuric_monoamide: [SX4](=[OX1])(=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[$([OX2H]),$([OX1-])]
628
-
629
- Sulfuric_diamide: [SX4](=[OX1])(=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
630
-
631
- Sulfuric_esteramide: [SX4](=[OX1])(=[OX1])([#7X3][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])]
632
-
633
- Sulfuric_derivative: [SX4D4](=[!#6])(=[!#6])([!#6])[!#6]
634
- # everything else (would not be a "true" derivative of sulfuric acid, if one of the substituents were less electronegative
635
- # than sulfur, but this should be very very rare, anyway)
636
-
637
-
638
-
639
- #### sulfurous acid and derivatives missing!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
640
-
641
-
642
-
643
-
644
- Sulfonic_acid: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[$([OX2H]),$([OX1-])]
645
-
646
- Sulfonamide: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
647
-
648
- Sulfonic_ester: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[OX2][#6;!$(C=[O,N,S])]
649
-
650
- Sulfonic_halide: [SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[FX1,ClX1,BrX1,IX1]
651
-
652
- Sulfonic_derivative: [SX4;$([H1]),$([H0][#6])](=[!#6])(=[!#6])[!#6]
653
- # includes all of the above and many more
654
- # for comparison: this is what "all sulfonic derivatives but not the ones above" would look like:
655
- # [$([SX4;$([H1]),$([H0][#6])](=[!#6])(=[!#6;!O])[!#6]),$([SX4;$([H1]),$([H0][#6])](=[OX1])(=[OX1])[!$([FX1,ClX1,BrX1,IX1]);!$([#6]);!$([OX2H]);!$([OX1-]);!$([OX2][#6;!$(C=[O,N,S])]);!$([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])])]
656
-
657
-
658
- Sulfinic_acid: [SX3;$([H1]),$([H0][#6])](=[OX1])[$([OX2H]),$([OX1-])]
659
-
660
- Sulfinic_amide: [SX3;$([H1]),$([H0][#6])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
661
-
662
- Sulfinic_ester: [SX3;$([H1]),$([H0][#6])](=[OX1])[OX2][#6;!$(C=[O,N,S])]
663
-
664
- Sulfinic_halide: [SX3;$([H1]),$([H0][#6])](=[OX1])[FX1,ClX1,BrX1,IX1]
665
-
666
- Sulfinic_derivative: [SX3;$([H1]),$([H0][#6])](=[!#6])[!#6]
667
-
668
- Sulfenic_acid: [SX2;$([H1]),$([H0][#6])][$([OX2H]),$([OX1-])]
669
-
670
- Sulfenic_amide: [SX2;$([H1]),$([H0][#6])][#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
671
-
672
- Sulfenic_ester: [SX2;$([H1]),$([H0][#6])][OX2][#6;!$(C=[O,N,S])]
673
-
674
- Sulfenic_halide: [SX2;$([H1]),$([H0][#6])][FX1,ClX1,BrX1,IX1]
675
-
676
- Sulfenic_derivative: [SX2;$([H1]),$([H0][#6])][!#6]
677
-
678
-
679
- # II.3 Phosphorous
680
- # ----------------
681
-
682
- Phosphine: [PX3;$([H3]),$([H2][#6]),$([H1]([#6])[#6]),$([H0]([#6])([#6])[#6])]
683
- # similar to amine, but less restrictive: includes also amide- and aminal-analogues
684
-
685
- Phosphine_oxide: [PX4;$([H3]=[OX1]),$([H2](=[OX1])[#6]),$([H1](=[OX1])([#6])[#6]),$([H0](=[OX1])([#6])([#6])[#6])]
686
-
687
- Phosphonium: [P+;!$([P]~[!#6]);!$([P]*~[#7,#8,#15,#16])]
688
- # similar to Ammonium
689
-
690
- Phosphorylen: [PX4;$([H3]=[CX3]),$([H2](=[CX3])[#6]),$([H1](=[CX3])([#6])[#6]),$([H0](=[CX3])([#6])([#6])[#6])]
691
-
692
-
693
- # conventions for the following acids and derivatives:
694
- # acids find protonated and deprotonated acids
695
- # esters do not find mixed anhydrides ( ...P-O-C(=O))
696
- # derivatives: subtituents which go in place of the OH and =O are not H or C (may also be O,
697
- # thus including acids and esters)
698
-
699
- Phosphonic_acid: [PX4;$([H1]),$([H0][#6])](=[OX1])([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])]
700
- # includes anions
701
-
702
- Phosphonic_monoester: [PX4;$([H1]),$([H0][#6])](=[OX1])([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])]
703
-
704
- Phosphonic_diester: [PX4;$([H1]),$([H0][#6])](=[OX1])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])]
705
-
706
- Phosphonic_monoamide: [PX4;$([H1]),$([H0][#6])](=[OX1])([$([OX2H]),$([OX1-])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
707
-
708
- Phosphonic_diamide: [PX4;$([H1]),$([H0][#6])](=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
709
-
710
- Phosphonic_esteramide: [PX4;$([H1]),$([H0][#6])](=[OX1])([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
711
-
712
- Phosphonic_acid_derivative: [PX4;$([H1]),$([H0][#6])](=[!#6])([!#6])[!#6]
713
- # all of the above and much more
714
-
715
-
716
- Phosphoric_acid: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])]
717
- # includes anions
718
-
719
- Phosphoric_monoester: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])]
720
-
721
- Phosphoric_diester: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])]
722
-
723
- Phosphoric_triester: [PX4D4](=[OX1])([OX2][#6;!$(C=[O,N,S])])([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])]
724
-
725
- Phosphoric_monoamide: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([$([OX2H]),$([OX1-])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
726
-
727
- Phosphoric_diamide: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
728
-
729
- Phosphoric_triamide: [PX4D4](=[OX1])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
730
-
731
- Phosphoric_monoestermonoamide: [PX4D4](=[OX1])([$([OX2H]),$([OX1-])])([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
732
-
733
- Phosphoric_diestermonoamide: [PX4D4](=[OX1])([OX2][#6;!$(C=[O,N,S])])([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
734
-
735
- Phosphoric_monoesterdiamide: [PX4D4](=[OX1])([OX2][#6;!$(C=[O,N,S])])([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
736
-
737
- Phosphoric_acid_derivative: [PX4D4](=[!#6])([!#6])([!#6])[!#6]
738
-
739
-
740
- Phosphinic_acid: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[OX1])[$([OX2H]),$([OX1-])]
741
-
742
- Phosphinic_ester: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[OX1])[OX2][#6;!$(C=[O,N,S])]
743
-
744
- Phosphinic_amide: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[OX1])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
745
-
746
- Phosphinic_acid_derivative: [PX4;$([H2]),$([H1][#6]),$([H0]([#6])[#6])](=[!#6])[!#6]
747
-
748
-
749
- Phosphonous_acid: [PX3;$([H1]),$([H0][#6])]([$([OX2H]),$([OX1-])])[$([OX2H]),$([OX1-])]
750
-
751
- Phosphonous_monoester: [PX3;$([H1]),$([H0][#6])]([$([OX2H]),$([OX1-])])[OX2][#6;!$(C=[O,N,S])]
752
-
753
- Phosphonous_diester: [PX3;$([H1]),$([H0][#6])]([OX2][#6;!$(C=[O,N,S])])[OX2][#6;!$(C=[O,N,S])]
754
-
755
- Phosphonous_monoamide: [PX3;$([H1]),$([H0][#6])]([$([OX2H]),$([OX1-])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
756
-
757
- Phosphonous_diamide: [PX3;$([H1]),$([H0][#6])]([#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
758
-
759
- Phosphonous_esteramide: [PX3;$([H1]),$([H0][#6])]([OX2][#6;!$(C=[O,N,S])])[#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
760
-
761
- Phosphonous_derivatives: [PX3;$([D2]),$([D3][#6])]([!#6])[!#6]
762
-
763
-
764
- Phosphinous_acid: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][$([OX2H]),$([OX1-])]
765
-
766
- Phosphinous_ester: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][OX2][#6;!$(C=[O,N,S])]
767
-
768
- Phosphinous_amide: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][#7X3;$([H2]),$([H1][#6;!$(C=[O,N,S])]),$([#7]([#6;!$(C=[O,N,S])])[#6;!$(C=[O,N,S])])]
769
-
770
- Phosphinous_derivatives: [PX3;$([H2]),$([H1][#6]),$([H0]([#6])[#6])][!#6]
771
-
772
-
773
- # II.4 Silicon
774
- # ------------
775
-
776
- Quart_silane: [SiX4]([#6])([#6])([#6])[#6]
777
- # four C-substituents. non-reactive, non-toxic, in experimental phase for drug development
778
-
779
- Non-quart_silane: [SiX4;$([H1]([#6])([#6])[#6]),$([H2]([#6])[#6]),$([H3][#6]),$([H4])]
780
- # has 1-4 hydride(s), reactive. Daylight's depictmatch does not add hydrogens automatically to
781
- # the free positions at Si, thus Hs had to be added implicitly
782
-
783
- Silylmonohalide: [SiX4]([FX1,ClX1,BrX1,IX1])([#6])([#6])[#6]
784
- # reagents for inserting protection groups
785
-
786
- Het_trialkylsilane: [SiX4]([!#6])([#6])([#6])[#6]
787
- # mostly acid-labile protection groups such as trimethylsilyl-ethers
788
-
789
- Dihet_dialkylsilane: [SiX4]([!#6])([!#6])([#6])[#6]
790
-
791
- Trihet_alkylsilane: [SiX4]([!#6])([!#6])([!#6])[#6]
792
-
793
- Silicic_acid_derivative: [SiX4]([!#6])([!#6])([!#6])[!#6]
794
- # four substituent which are neither C nor H
795
-
796
-
797
- # II.5 Boron
798
- # ----------
799
-
800
- Trialkylborane: [BX3]([#6])([#6])[#6]
801
- # also carbonyls allowed
802
-
803
- Boric_acid_derivatives: [BX3]([!#6])([!#6])[!#6]
804
- # includes acids, esters, amides, ... H-substituent at B is very rare.
805
-
806
- Boronic_acid_derivative: [BX3]([!#6])([!#6])[!#6]
807
- # # includes acids, esters, amides, ...
808
-
809
- Borohydride: [BH1,BH2,BH3,BH4]
810
- # at least one H attached to B
811
-
812
- Quaternary_boron: [BX4]
813
- # mostly borates (negative charge), in complex with Lewis-base
814
-
815
-
816
-
817
- # Part III: Some Special Patterns
818
- # ===============================
819
-
820
-
821
- # III.1 Chains
822
- # ------------
823
-
824
- # some simple chains
825
-
826
-
827
-
828
- # III.2 Rings
829
- # -----------
830
-
831
- Aromatic: a
832
-
833
- Heterocyclic: [!#6;!R0]
834
- # may be aromatic or not
835
-
836
- Epoxide: [OX2r3]1[#6r3][#6r3]1
837
- # toxic/reactive. may be annelated to aromat, but must not be aromatic itself (oxirane-2,3-dione)
838
-
839
- NH_aziridine: [NX3H1r3]1[#6r3][#6r3]1
840
- # toxic/reactive according to Maybridge's garbage filter
841
-
842
- Spiro: [D4R;$(*(@*)(@*)(@*)@*)]
843
- # at least two different rings can be found which are sharing just one atom.
844
- # these two rings can be connected by a third ring, so it matches also some
845
- # bridged systems, like morphine
846
-
847
- Annelated_rings: [R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])]@[R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])]
848
- # two different rings sharing exactly two atoms
849
-
850
- Bridged_rings: [R;$(*(@*)(@*)@*);!$([D4R;$(*(@*)(@*)(@*)@*)]);!$([R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])]@[R;$(*(@*)(@*)@*);!$([R2;$(*(@*)(@*)(@*)@*)])])]
851
- # part of two or more rings, not spiro, not annelated -> finds bridgehead atoms,
852
- # but only if they are not annelated at the same time - otherwise impossible (?)
853
- # to distinguish from non-bridgehead annelated atoms
854
-
855
- # some basic ring-patterns (just size, no other information):
856
-
857
-
858
-
859
-
860
-
861
- # III.3 Sugars and Nucleosides/Nucleotides, Steroids
862
- # --------------------------------------------------
863
-
864
- # because of the large variety of sugar derivatives, different patterns can be applied.
865
- # The choice of patterns and their combinations will depend on the contents of the database
866
- # e.g. natural products, nucleoside analoges with modified sugars, ... as well as on the
867
- # desired restriction
868
-
869
-
870
- Sugar_pattern_1: [OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)]
871
- # 5 or 6-membered ring containing one O and at least one (r5) or two (r6) oxygen-substituents.
872
-
873
- Sugar_pattern_2: [OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)]
874
- # 5 or 6-membered ring containing one O and an acetal-like bond at postion 2.
875
-
876
- Sugar_pattern_combi: [OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C(O)@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C(O)@C(O)@C1)]
877
- # combination of the two above
878
-
879
- Sugar_pattern_2_reducing: [OX2;$([r5]1@C(!@[OX2H1])@C@C@C1),$([r6]1@C(!@[OX2H1])@C@C@C@C1)]
880
- # 5 or 6-membered cyclic hemi-acetal
881
-
882
- Sugar_pattern_2_alpha: [OX2;$([r5]1@[C@@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@[C@@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)]
883
- # 5 or 6-membered cyclic hemi-acetal
884
-
885
- Sugar_pattern_2_beta: [OX2;$([r5]1@[C@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@[C@](!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)]
886
- # 5 or 6-membered cyclic hemi-acetal
887
-
888
- ##Poly_sugar_1: ([OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)].[OX2;$([r5]1@C@C@C(O)@C1),$([r6]1@C@C@C(O)@C(O)@C1)])
889
- # pattern1 occours more than once (in same molecule, but moieties don't have to be adjacent!)
890
-
891
- ##Poly_sugar_2: ([OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)].[OX2;$([r5]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C1),$([r6]1@C(!@[OX2,NX3,SX2,FX1,ClX1,BrX1,IX1])@C@C@C@C1)])
892
- # pattern2 occours more than once (in same molecule, but moieties don't have to be adjacent!)
893
-
894
-
895
- # III.4 Everything else...
896
- # ------------------------
897
-
898
- Conjugated_double_bond: *=*[*]=,#,:[*]
899
-
900
- Conjugated_tripple_bond: *#*[*]=,#,:[*]
901
-
902
- Cis_double_bond: */[D2]=[D2]\*
903
- # only one single-bonded substituent on each DB-atom. no aromats.
904
- # only found when character of DB is explicitely stated.
905
-
906
- Trans_double_bond: */[D2]=[D2]/*
907
- # analog
908
-
909
- Mixed_anhydrides: [$(*=O),$([#16,#14,#5]),$([#7]([#6]=[OX1]))][#8X2][$(*=O),$([#16,#14,#5]),$([#7]([#6]=[OX1]))]
910
- # should hits all combinations of two acids
911
-
912
- Halogen_on_hetero: [FX1,ClX1,BrX1,IX1][!#6]
913
-
914
- Halogen_multi_subst: [F,Cl,Br,I;!$([X1]);!$([X0-])]
915
- # Halogen which is not mono-substituted nor an anion, e.g. chlorate.
916
- # Most of these cases should be also filtered by Halogen_on_hetero.
917
-
918
- Trifluoromethyl: [FX1][CX4;!$([H0][Cl,Br,I]);!$([F][C]([F])([F])[F])]([FX1])([FX1])
919
- # C with three F attached, connected to anything which is not another halogen
920
-
921
- C_ONS_bond: [#6]~[#7,#8,#16]
922
- # probably all drug-like molecules have at least one O, N, or S connected to a C -> nice filter
923
-
924
- ## Mixture: (*).(*)
925
- # two or more seperate parts, may also be salt
926
- # component-level grouping is not yet supported in Open Babel Version 2.0
927
-
928
-
929
- Charged: [!+0]
930
-
931
- Anion: [-1,-2,-3,-4,-5,-6,-7]
932
-
933
- Kation: [+1,+2,+3,+4,+5,+6,+7]
934
-
935
- Salt: ([-1,-2,-3,-4,-5,-6,-7]).([+1,+2,+3,+4,+5,+6,+7])
936
- # two or more seperate components with opposite charges
937
-
938
- ##Zwitterion: ([-1,-2,-3,-4,-5,-6,-7].[+1,+2,+3,+4,+5,+6,+7])
939
- # both negative and positive charges somewhere within the same molecule.
940
-
941
- 1,3-Tautomerizable: [$([#7X2,OX1,SX1]=*[!H0;!$([a;!n])]),$([#7X3,OX2,SX2;!H0]*=*),$([#7X3,OX2,SX2;!H0]*:n)]
942
- # 1,3 migration of H allowed. Includes keto/enol and amide/enamide.
943
- # Aromatic rings must stay aromatic - no keto form of phenol
944
-
945
- 1,5-Tautomerizable: [$([#7X2,OX1,SX1]=,:**=,:*[!H0;!$([a;!n])]),$([#7X3,OX2,SX2;!H0]*=**=*),$([#7X3,OX2,SX2;!H0]*=,:**:n)]
946
-
947
- Rotatable_bond: [!$(*#*)&!D1]-!@[!$(*#*)&!D1]
948
- # taken from http://www.daylight.com/support/contrib/smarts/content.html
949
-
950
- Michael_acceptor: [CX3]=[CX3][$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-])]
951
- # the classical case: C=C near carbonyl, nitrile, nitro, or similar
952
- # Oxo-heteroaromats and similar are not included.
953
-
954
- Dicarbodiazene: [CX3](=[OX1])[NX2]=[NX2][CX3](=[OX1])
955
- # Michael-like acceptor, see Mitsunobu reaction
956
-
957
- # H-Bond_donor:
958
-
959
- # H-Bond_acceptor:
960
-
961
- # Pos_ionizable:
962
-
963
- # Neg_ionizable:
964
-
965
- # Unlikely_ions:
966
- # O+,N-,C+,C-, ...
967
-
968
- CH-acidic: [$([CX4;!$([H0]);!$(C[!#6;!$([P,S]=O);!$(N(~O)~O)])][$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-]);!$(*[S,O,N;H1,H2]);!$([*+0][S,O;X1-])]),$([CX4;!$([H0])]1[CX3]=[CX3][CX3]=[CX3]1)]
969
- # C-H alpha to carbony, nitro or similar, C is not double-bonded, only C, H, S,P=O and nitro substituents allowed.
970
- # pentadiene is included. acids, their salts, prim./sec. amides, and imides are excluded.
971
- # hits also CH-acidic_strong
972
-
973
- CH-acidic_strong: [CX4;!$([H0]);!$(C[!#6;!$([P,S]=O);!$(N(~O)~O)])]([$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-]);!$(*[S,O,N;H1,H2]);!$([*+0][S,O;X1-])])[$([CX3]=[O,N,S]),$(C#[N]),$([S,P]=[OX1]),$([NX3]=O),$([NX3+](=O)[O-]);!$(*[S,O,N;H1,H2]);!$([*+0][S,O;X1-])]
974
- # same as above (without pentadiene), but carbonyl or similar on two or three sides
975
-
976
- Chiral_center_specified: [$([*@](~*)(~*)(*)*),$([*@H](*)(*)*),$([*@](~*)(*)*),$([*@H](~*)~*)]
977
- # Hits atoms with tetrahedral chirality, if chiral center is specified in the SMILES string
978
- # depictmach does not find oxonium, sulfonium, or sulfoxides!
979
-
980
- # Chiral_center_unspecified: [$([*@?](~*)(~*)(*)*),$([*@?H](*)(*)*),$([*@?](~*)(*)*),$([*@?H](~*)~*)]
981
- # Hits atoms with tetrahedral chirality, if chiral center is not specified in the SMILES string
982
- # "@?" (unspecified chirality) is not yet supported in Open Babel Version 2.0
983
-