PyPI - molcraft - Versions diffs - 0.1.0rc9__py3-none-any.whl - Mend

molcraft 0.1.0rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of molcraft might be problematic. Click here for more details.

Files changed (19) hide show

molcraft/__init__.py +18 -0
molcraft/callbacks.py +100 -0
molcraft/chem.py +714 -0
molcraft/datasets.py +132 -0
molcraft/descriptors.py +149 -0
molcraft/features.py +379 -0
molcraft/featurizers.py +624 -0
molcraft/layers.py +1910 -0
molcraft/losses.py +37 -0
molcraft/models.py +623 -0
molcraft/ops.py +195 -0
molcraft/records.py +187 -0
molcraft/tensors.py +561 -0
molcraft/trainers.py +212 -0
molcraft-0.1.0rc9.dist-info/METADATA +118 -0
molcraft-0.1.0rc9.dist-info/RECORD +19 -0
molcraft-0.1.0rc9.dist-info/WHEEL +5 -0
molcraft-0.1.0rc9.dist-info/licenses/LICENSE +21 -0
molcraft-0.1.0rc9.dist-info/top_level.txt +1 -0

molcraft/datasets.py ADDED Viewed

@@ -0,0 +1,132 @@
+import warnings
+import numpy as np
+import pandas as pd
+import typing
+def split(
+    data: pd.DataFrame | np.ndarray,
+    *,
+    train_size: float | None = None,
+    validation_size: float | None = None,
+    test_size: float | None = None,
+    groups: str | np.ndarray = None,
+    shuffle: bool = False,
+    random_seed: int | None = None,
+) -> tuple[np.ndarray | pd.DataFrame, ...]:
+    """Splits the dataset into subsets.
+    Args:
+        data:
+            A pd.DataFrame or np.ndarray object.
+        train_size:
+            The size of the train set.
+        validation_size:
+            The size of the validation set.
+        test_size:
+            The size of the test set.
+        groups:
+            The groups to perform the splitting on.
+        shuffle:
+            Whether the dataset should be shuffled prior to splitting.
+        random_seed:
+            The random state/seed. Only applicable if shuffling.
+    """
+    if not isinstance(data, (pd.DataFrame, np.ndarray)):
+        raise ValueError(f'Unsupported `data` type ({type(data)}).')
+    if isinstance(groups, str):
+        groups = data[groups].values
+    elif groups is None:
+        groups = np.arange(len(data))
+    indices = np.unique(groups)
+    size = len(indices)
+    if not train_size and not test_size:
+        raise ValueError(
+            f'Found both `train_size` and `test_size` to be `None`, '
+            f'specify at least one of them.'
+        )
+    if isinstance(test_size, float):
+        test_size = int(size * test_size)
+    if isinstance(train_size, float):
+        train_size = int(size * train_size)
+    if isinstance(validation_size, float):
+        validation_size = int(size * validation_size)
+    elif not validation_size:
+        validation_size = 0
+    if not train_size:
+        train_size = (size - test_size - validation_size)
+    if not test_size:
+        test_size = (size - train_size - validation_size)
+    remainder = size - (train_size + validation_size + test_size)
+    if remainder < 0:
+        raise ValueError(
+            f'subset sizes added up to more than the data size.'
+        )
+    train_size += remainder
+    if shuffle:
+        np.random.seed(random_seed)
+        np.random.shuffle(indices)
+    train_mask = np.isin(groups, indices[:train_size])
+    test_mask = np.isin(groups, indices[-test_size:])
+    if not validation_size:
+        return data[train_mask], data[test_mask]
+    validation_mask = np.isin(groups, indices[train_size:-test_size])
+    return data[train_mask], data[validation_mask], data[test_mask]
+def cv_split(
+    data: pd.DataFrame | np.ndarray,
+    num_splits: int = 10,
+    groups: str | np.ndarray = None,
+    shuffle: bool = False,
+    random_seed: int | None = None,
+) -> typing.Iterator[
+        tuple[np.ndarray | pd.DataFrame, np.ndarray | pd.DataFrame]
+    ]:
+    """Splits the dataset into cross-validation folds.
+    Args:
+        data:
+            A pd.DataFrame or np.ndarray object.
+        num_splits:
+            The number of cross-validation folds.
+        groups:
+            The groups to perform the splitting on.
+        shuffle:
+            Whether the dataset should be shuffled prior to splitting.
+        random_seed:
+            The random state/seed. Only applicable if shuffling.
+    """
+    if not isinstance(data, (pd.DataFrame, np.ndarray)):
+        raise ValueError(f'Unsupported `data` type ({type(data)}).')
+    if isinstance(groups, str):
+        groups = data[groups].values
+    elif groups is None:
+        groups = np.arange(len(data))
+    indices = np.unique(groups)
+    size = len(indices)
+    if num_splits > size:
+        raise ValueError(
+            f'`num_splits` ({num_splits}) must not be greater than'
+            f'the data size or the number of groups ({size}).'
+        )
+    if shuffle:
+        np.random.seed(random_seed)
+        np.random.shuffle(indices)
+    indices_splits = np.array_split(indices, num_splits)
+    for k in range(num_splits):
+        test_indices = indices_splits[k]
+        test_mask = np.isin(groups, test_indices)
+        train_mask = ~test_mask
+        yield data[train_mask], data[test_mask]

molcraft/descriptors.py ADDED Viewed

@@ -0,0 +1,149 @@
+import warnings
+import keras
+import numpy as np
+from rdkit.Chem import rdMolDescriptors
+from molcraft import chem
+from molcraft import features
+@keras.saving.register_keras_serializable(package='molcraft')
+class Descriptor(features.Feature):
+    def __call__(self, mol: chem.Mol) -> np.ndarray:
+        if not isinstance(mol, chem.Mol):
+            raise ValueError(
+                f'Input to {self.name} must be a `chem.Mol` object.'
+            )
+        descriptor = self.call(mol)
+        func = (
+            self._featurize_categorical if self.vocab else
+            self._featurize_floating
+        )
+        if not isinstance(descriptor, (tuple, list, np.ndarray)):
+            descriptor = [descriptor]
+        descriptors = []
+        for value in descriptor:
+            descriptors.append(func(value))
+        return np.concatenate(descriptors)
+@keras.saving.register_keras_serializable(package='molcraft')
+class Descriptor3D(Descriptor):
+    def __call__(self, mol: chem.Mol) -> np.ndarray:
+        if not isinstance(mol, chem.Mol):
+            raise ValueError(
+                f'Input to {self.name} must be a `chem.Mol` object.'
+            )
+        if mol.num_conformers == 0:
+            raise ValueError(
+                f'The inputted `chem.Mol` to {self.name} must embed a conformer. '
+                f'It is recommended that {self.name} is used as a molecule feature '
+                'for `MolGraphFeaturizer3D`, which by default embeds a conformer.'
+            )
+        return super().__call__(mol)
+@keras.saving.register_keras_serializable(package='molcraft')
+class MolWeight(Descriptor):
+    def call(self, mol: chem.Mol) -> np.ndarray:
+        return rdMolDescriptors.CalcExactMolWt(mol)
+@keras.saving.register_keras_serializable(package='molcraft')
+class TotalPolarSurfaceArea(Descriptor):
+    def call(self, mol: chem.Mol) -> np.ndarray:
+        return rdMolDescriptors.CalcTPSA(mol)
+@keras.saving.register_keras_serializable(package='molcraft')
+class LogP(Descriptor):
+    """Crippen logP."""
+    def call(self, mol: chem.Mol) -> np.ndarray:
+        return rdMolDescriptors.CalcCrippenDescriptors(mol)[0]
+@keras.saving.register_keras_serializable(package='molcraft')
+class MolarRefractivity(Descriptor):
+    """Crippen molar refractivity."""
+    def call(self, mol: chem.Mol) -> np.ndarray:
+        return rdMolDescriptors.CalcCrippenDescriptors(mol)[1]
+@keras.saving.register_keras_serializable(package='molcraft')
+class NumAtoms(Descriptor):
+    def call(self, mol: chem.Mol) -> np.ndarray:
+        return rdMolDescriptors.CalcNumAtoms(mol)
+@keras.saving.register_keras_serializable(package='molcraft')
+class NumHeavyAtoms(Descriptor):
+    def call(self, mol: chem.Mol) -> np.ndarray:
+        return rdMolDescriptors.CalcNumHeavyAtoms(mol)
+@keras.saving.register_keras_serializable(package='molcraft')
+class NumHeteroatoms(Descriptor):
+    def call(self, mol: chem.Mol) -> np.ndarray:
+        return rdMolDescriptors.CalcNumHeteroatoms(mol)
+@keras.saving.register_keras_serializable(package='molcraft')
+class NumHydrogenDonors(Descriptor):
+    def call(self, mol: chem.Mol) -> np.ndarray:
+        return rdMolDescriptors.CalcNumHBD(mol)
+@keras.saving.register_keras_serializable(package='molcraft')
+class NumHydrogenAcceptors(Descriptor):
+    def call(self, mol: chem.Mol) -> np.ndarray:
+        return rdMolDescriptors.CalcNumHBA(mol)
+@keras.saving.register_keras_serializable(package='molcraft')
+class NumRotatableBonds(Descriptor):
+    def call(self, mol: chem.Mol) -> np.ndarray:
+        return rdMolDescriptors.CalcNumRotatableBonds(mol)
+@keras.saving.register_keras_serializable(package='molcraft')
+class NumRings(Descriptor):
+    def call(self, mol: chem.Mol) -> np.ndarray:
+        return rdMolDescriptors.CalcNumRings(mol)
+@keras.saving.register_keras_serializable(package='molcraft')
+class NumAromaticRings(Descriptor):
+    def call(self, mol: chem.Mol) -> np.ndarray:
+        return rdMolDescriptors.CalcNumAromaticRings(mol)
+@keras.saving.register_keras_serializable(package='molcraft')
+class AtomCount(Descriptor):
+    def __init__(self, atom_type: str, **kwargs):
+        super().__init__(**kwargs)
+        self.atom_type = atom_type
+    def call(self, mol: chem.Mol) -> np.ndarray:
+        count = 0
+        for atom in mol.atoms:
+            if atom.GetSymbol() == self.atom_type:
+                count += 1
+        return count
+    def get_config(self) -> dict:
+        config = super().get_config()
+        config['atom_type'] = self.atom_type
+        return config
+@keras.saving.register_keras_serializable(package='molcraft')
+class ForceFieldEnergy(Descriptor3D):
+    """Universal Force Field (UFF) Energy."""
+    def call(self, mol: chem.Mol) -> np.ndarray:
+        return chem.conformer_energies(mol, method="UFF")

molcraft/features.py ADDED Viewed

@@ -0,0 +1,379 @@
+import warnings
+import abc
+import math
+import keras
+import numpy as np
+from molcraft import chem
+@keras.saving.register_keras_serializable(package='molcraft')
+class Feature(abc.ABC):
+    def __init__(
+        self,
+        vocab: set[int | str] = None,
+        allow_oov: bool = True,
+        encode_oov: bool = False,
+        dtype: str = 'float32'
+    ) -> None:
+        self.encode_oov = encode_oov
+        self.allow_oov = allow_oov
+        self.oov_token = '<oov>'
+        self.dtype = dtype
+        if not vocab:
+            vocab = default_vocabulary.get(self.name, None)
+        if vocab:
+            if isinstance(vocab, set):
+                vocab: list = list(vocab)
+                vocab.sort(key=lambda x: x if x is not None else "")
+            elif not isinstance(vocab, list):
+                vocab: list = list(vocab)
+            if self.encode_oov and self.oov_token not in vocab:
+                vocab.append(self.oov_token)
+            onehot_encodings = np.eye(len(vocab), dtype=self.dtype)
+            self.feature_to_onehot = dict(zip(vocab, onehot_encodings))
+        self.vocab = vocab
+    @abc.abstractmethod
+    def call(self, mol: chem.Mol) -> list[float | int | bool | str]:
+        pass
+    def __call__(self, mol: chem.Mol) -> np.ndarray:
+        if not isinstance(mol, chem.Mol):
+            raise TypeError(f'Input to {self.name} must be a `chem.Mol` object.')
+        features = self.call(mol)
+        if len(features) != mol.num_atoms and len(features) != mol.num_bonds:
+            raise ValueError(
+                f'The number of features computed by {self.name} does not '
+                'match the number of atoms or bonds of the `chem.Mol` object. '
+                'Make sure to iterate over `atoms` or `bonds` of the `chem.Mol` '
+                'object when computing features.'
+            )
+        if len(features) == 0:
+            # Edge case: no atoms or bonds in the molecule.
+            return np.zeros((0, self.output_dim), dtype=self.dtype)
+        func = (
+            self._featurize_categorical if self.vocab else
+            self._featurize_floating
+        )
+        return np.stack([func(x) for x in features])
+    def get_config(self) -> dict:
+        config = {
+            'vocab': self.vocab,
+            'allow_oov': self.allow_oov,
+            'encode_oov': self.encode_oov,
+            'dtype': self.dtype
+        }
+        return config
+    @classmethod
+    def from_config(cls, config: dict) -> 'Feature':
+        return cls(**config)
+    @property
+    def name(self) -> str:
+        return self.__class__.__name__
+    @property
+    def output_dim(self) -> int:
+        return 1 if not self.vocab else len(self.vocab)
+    def _featurize_categorical(self, feature: str | int) -> np.ndarray:
+        encoding = self.feature_to_onehot.get(feature, None)
+        if encoding is not None:
+            return encoding
+        if not self.allow_oov:
+            raise ValueError(
+                f'{feature} could not be encoded, as it was not found in `vocab`. '
+                'To allow OOV features, set `allow_oov` or `encode_oov` to True.'
+            )
+        oov_encoding = self.feature_to_onehot.get(self.oov_token, None)
+        if oov_encoding is None:
+            oov_encoding = np.zeros([self.output_dim], dtype=self.dtype)
+        return oov_encoding
+    def _featurize_floating(self, value: float | list[float]) -> np.ndarray:
+        if not isinstance(value, (int, float, bool)):
+            raise ValueError(
+                f'{self.name} produced a value of type {type(value)}. '
+                'If it represents a categorical feature, please provide a `vocab` '
+                'to the constructor. If if represents a floating point feature, '
+                'please make sure its `call` method returns a list of values of '
+                'type `float`, `int`, `bool` or `None`.'
+            )
+        if not math.isfinite(value):
+            warnings.warn(
+                f'Found value of {self.name} to be non-finite. '
+                f'Value received: {value}. Converting it to a value of 0.',
+            )
+            value = 0.0
+        return np.asarray([value], dtype=self.dtype)
+@keras.saving.register_keras_serializable(package='molcraft')
+class AtomType(Feature):
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return [atom.GetSymbol() for atom in mol.atoms]
+@keras.saving.register_keras_serializable(package='molcraft')
+class Degree(Feature):
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return [atom.GetDegree() for atom in mol.atoms]
+@keras.saving.register_keras_serializable(package='molcraft')
+class NumHydrogens(Feature):
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return [atom.GetTotalNumHs() for atom in mol.atoms]
+@keras.saving.register_keras_serializable(package='molcraft')
+class Valence(Feature):
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return [atom.GetTotalValence() for atom in mol.atoms]
+@keras.saving.register_keras_serializable(package='molcraft')
+class AtomicWeight(Feature):
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        pt = chem.get_periodic_table()
+        return [pt.GetAtomicWeight(atom.GetSymbol()) for atom in mol.atoms]
+@keras.saving.register_keras_serializable(package='molcraft')
+class Hybridization(Feature):
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return [str(atom.GetHybridization()).lower() for atom in mol.atoms]
+@keras.saving.register_keras_serializable(package='molcraft')
+class CIPCode(Feature):
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return [
+            atom.GetProp("_CIPCode") if atom.HasProp("_CIPCode") else "None"
+            for atom in mol.atoms]
+@keras.saving.register_keras_serializable(package='molcraft')
+class RingSize(Feature):
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        def ring_size(atom):
+            if not atom.IsInRing():
+                return -1
+            size = 3
+            while not atom.IsInRingSize(size):
+                size += 1
+            return size
+        return [ring_size(atom) for atom in mol.atoms]
+@keras.saving.register_keras_serializable(package='molcraft')
+class FormalCharge(Feature):
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return [atom.GetFormalCharge() for atom in mol.atoms]
+@keras.saving.register_keras_serializable(package='molcraft')
+class IsChiralityPossible(Feature):
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return [atom.HasProp("_ChiralityPossible") for atom in mol.atoms]
+@keras.saving.register_keras_serializable(package='molcraft')
+class NumRadicalElectrons(Feature):
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return [atom.GetNumRadicalElectrons() for atom in mol.atoms]
+@keras.saving.register_keras_serializable(package='molcraft')
+class IsAromatic(Feature):
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return [atom.GetIsAromatic() for atom in mol.atoms]
+@keras.saving.register_keras_serializable(package='molcraft')
+class IsHeteroatom(Feature):
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return chem.hetero_atoms(mol)
+@keras.saving.register_keras_serializable(package='molcraft')
+class IsHydrogenDonor(Feature):
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return chem.hydrogen_donors(mol)
+@keras.saving.register_keras_serializable(package='molcraft')
+class IsHydrogenAcceptor(Feature):
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return chem.hydrogen_acceptors(mol)
+@keras.saving.register_keras_serializable(package='molcraft')
+class IsInRing(Feature):
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return [atom.IsInRing() for atom in mol.atoms]
+@keras.saving.register_keras_serializable(package='molcraft')
+class PartialCharge(Feature):
+    """Gasteiger partial charge."""
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return chem.partial_charges(mol)
+@keras.saving.register_keras_serializable(package='molcraft')
+class TotalPolarSurfaceAreaContribution(Feature):
+    """Total polar surface area (TPSA) contribution."""
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return chem.total_polar_surface_area_contributions(mol)
+@keras.saving.register_keras_serializable(package='molcraft')
+class AccessibleSurfaceAreaContribution(Feature):
+    """Labute accessible surface area (ASA) contribution."""
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return chem.accessible_surface_area_contributions(mol)
+@keras.saving.register_keras_serializable(package='molcraft')
+class LogPContribution(Feature):
+    """Crippen logP contribution."""
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return chem.logp_contributions(mol)
+@keras.saving.register_keras_serializable(package='molcraft')
+class MolarRefractivityContribution(Feature):
+    """Crippen molar refractivity contribution."""
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return chem.molar_refractivity_contributions(mol)
+@keras.saving.register_keras_serializable(package='molcraft')
+class BondType(Feature):
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return [str(bond.GetBondType()).lower() for bond in mol.bonds]
+@keras.saving.register_keras_serializable(package='molcraft')
+class Stereo(Feature):
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return [
+            str(bond.GetStereo()).replace('STEREO', '').capitalize()
+            for bond in mol.bonds
+        ]
+@keras.saving.register_keras_serializable(package='molcraft')
+class IsConjugated(Feature):
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return [bond.GetIsConjugated() for bond in mol.bonds]
+@keras.saving.register_keras_serializable(package='molcraft')
+class IsRotatable(Feature):
+    def call(self, mol: chem.Mol) -> list[int, float, str]:
+        return chem.rotatable_bonds(mol)
+@keras.saving.register_keras_serializable(package='molcraft')
+class PairFeature(Feature):
+    def __call__(self, mol: chem.Mol) -> np.ndarray:
+        if not isinstance(mol, chem.Mol):
+            raise TypeError(f'Input to {self.name} must be a `chem.Mol` instance.')
+        features = self.call(mol)
+        if len(features) != int(mol.num_atoms**2):
+            raise ValueError(
+                f'The number of features computed by {self.name} does not '
+                'match the number of node/atom pairs in the `chem.Mol` object. '
+                f'Make sure the list of items returned by {self.name}(input) '
+                'correspond to node/atom pairs: '
+                '[(0, 0), (0, 1), ..., (0, N), (1, 0), ... (N, N)], '
+                'where N denotes the number of nodes/atoms.'
+            )
+        func = (
+            self._featurize_categorical if self.vocab else
+            self._featurize_floating
+        )
+        return np.asarray([func(x) for x in features], dtype=self.dtype)
+@keras.saving.register_keras_serializable(package='molcraft')
+class PairDistance(PairFeature):
+    def __init__(
+        self,
+        max_distance: int = None,
+        allow_oov: int = True,
+        encode_oov: bool = True,
+        **kwargs,
+    ) -> None:
+        vocab = kwargs.pop('vocab', None)
+        if not vocab:
+            if max_distance is None:
+                max_distance = 10
+            vocab = list(range(max_distance + 1))
+        super().__init__(
+            vocab=vocab,
+            allow_oov=allow_oov,
+            encode_oov=encode_oov,
+            **kwargs
+        )
+    def call(self, mol: chem.Mol) -> list[int]:
+        return [int(x) for x in chem.get_distances(mol).reshape(-1)]
+default_vocabulary = {
+    'AtomType': [
+        '*', 'H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na',
+        'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V',
+        'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se',
+        'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh',
+        'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba',
+        'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho',
+        'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt',
+        'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac',
+        'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm',
+        'Md', 'No', 'Lr', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg',
+        'Cn', 'Nh', 'Fl', 'Mc', 'Lv', 'Ts', 'Og'
+    ],
+    'Degree': [
+        0, 1, 2, 3, 4, 5, 6, 7, 8
+    ],
+    'TotalNumHs': [
+        0, 1, 2, 3, 4
+    ],
+    'TotalValence': [
+        0, 1, 2, 3, 4, 5, 6, 7, 8
+    ],
+    'Hybridization': [
+        "s", "sp", "sp2", "sp3", "sp3d", "sp3d2", "unspecified"
+    ],
+    'CIPCode': [
+        "R", "S", "None"
+    ],
+    'FormalCharge': [
+        -3, -2, -1, 0, 1, 2, 3
+    ],
+    'NumRadicalElectrons': [
+        0, 1, 2, 3, 4
+    ],
+    'RingSize': [
+        -1, 3, 4, 5, 6, 7, 8
+    ],
+    'BondType': [
+        "zero", "single", "double", "triple", "aromatic"
+    ],
+    'Stereo': [
+        "E", "Z", "Any", "None"
+    ],
+}