PyPI - nerdd-module - Versions diffs - 0.2.4__tar.gz → 0.2.6__tar.gz - Mend

nerdd-module 0.2.4tar.gz → 0.2.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (81) hide show

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/PKG-INFO RENAMED Viewed

@@ -1,8 +1,8 @@
 Metadata-Version: 2.1
 Name: nerdd-module
-Version: 0.2.4
+Version: 0.2.6
 Summary: Base package to create NERDD modules
-Home-page: https://github.com/molinfo-vienna/nerdd-module.git
+Home-page: https://github.com/molinfo-vienna/nerdd-module
 Maintainer: Steffen Hirte
 Maintainer-email: steffen.hirte@univie.ac.at
 License: BSD 3-Clause License
@@ -33,8 +33,11 @@ Requires-Dist: stringcase>=1.2.0
 Requires-Dist: decorator>=5.1.1
 Requires-Dist: importlib-resources>=5; python_version < "3.10"
 Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
-Requires-Dist: chembl_structure_pipeline>=1.0.0
 Provides-Extra: dev
+Requires-Dist: black; extra == "dev"
+Requires-Dist: isort; extra == "dev"
+Provides-Extra: csp
+Requires-Dist: chembl_structure_pipeline>=1.0.0; extra == "csp"
 Provides-Extra: test
 Requires-Dist: pytest; extra == "test"
 Requires-Dist: pytest-sugar; extra == "test"

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/abstract_model.py RENAMED Viewed

@@ -1,13 +1,13 @@
 from abc import ABC, abstractmethod
-from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
+from typing import Callable, Iterable, List, Tuple, Union
 import pandas as pd
-from rdkit.Chem import Mol, MolToSmiles
+from rdkit.Chem import Mol
 from .config import AutoConfiguration, Configuration
-from .io import DepthFirstExplorer, MoleculeEntry
+from .input import DepthFirstExplorer, MoleculeEntry
 from .preprocessing import Pipeline, Step, registry
-from .problem import Problem
+from .problem import Problem, UnknownProblem
 __all__ = ["AbstractModel"]
@@ -156,17 +156,20 @@ class AbstractModel(ABC):
         #    (and we assume that the order of the molecules is the same)
         if "mol_id" in df_predictions.columns:
             # check that mol_id contains only valid ids
-            assert set(df_predictions.mol_id).issubset(
-                set(df_valid_subset.mol_id)
-            ), "The mol_id column must only contain valid ids!"
+            assert set(df_predictions.mol_id).issubset(set(df_valid_subset.mol_id)), (
+                f"The mol_id column contains invalid ids: "
+                f"{set(df_predictions.mol_id).difference(set(df_valid_subset.mol_id))}."
+            )
             # use mol_id as index
             df_predictions.set_index("mol_id", drop=True, inplace=True)
         elif "mol" in df_predictions.columns:
             # check that molecule names contain only valid ids
             names = df_predictions.mol.apply(lambda mol: int(mol.GetProp("_Name")))
-            assert set(names).issubset(
-                set(df_preprocess.mol_id)
-            ), "The molecule names must only contain valid ids!"
+            assert set(names).issubset(set(df_preprocess.mol_id)), (
+                f"The mol_id column contains invalid ids: "
+                f"{set(df_predictions.mol_id).difference(set(df_valid_subset.mol_id))}."
+            )
             # use mol_id as index
             df_predictions.set_index(
@@ -184,6 +187,8 @@ class AbstractModel(ABC):
                 df_valid_subset.index.astype("int64"), inplace=True
             )
+        # TODO: check derivative_id or atom_id
         # add column that indicates whether a molecule was missing
         missing_mol_ids = set(df_preprocess.mol_id).difference(df_predictions.index)
         df_preprocess["missing"] = df_preprocess.mol_id.isin(missing_mol_ids)
@@ -212,7 +217,7 @@ class AbstractModel(ABC):
         else:
             df_result["errors"] = df_result.preprocessing_errors
         df_result["errors"] = df_result.errors + df_result.missing.map(
-            lambda x: ["!1"] if x else []
+            lambda x: [UnknownProblem()] if x else []
         )
         df_result.drop(columns=["missing", "preprocessing_errors"], inplace=True)

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/cli.py RENAMED Viewed

@@ -4,7 +4,7 @@ import sys
 import rich_click as click
 from decorator import decorator
-from nerdd_module.io import WriterRegistry
+from nerdd_module.output import WriterRegistry
 from stringcase import spinalcase
 __all__ = ["auto_cli"]

nerdd_module-0.2.6/nerdd_module/config/default_configuration.py ADDED Viewed

@@ -0,0 +1,41 @@
+from stringcase import snakecase
+from ..polyfills import version
+from .configuration import Configuration
+__all__ = ["DefaultConfiguration"]
+class DefaultConfiguration(Configuration):
+    def __init__(self, nerdd_module):
+        super().__init__()
+        # generate a name from the module name
+        class_name = nerdd_module.__class__.__name__
+        if class_name.endswith("Model"):
+            # remove the "Model" suffix
+            # e.g. SkinDoctorModel -> SkinDoctor
+            class_name = class_name[: -len("Model")]
+        # convert the class name to snake case
+        # e.g. SkinDoctor -> skin_doctor
+        name = snakecase(class_name)
+        # append version to the configuration
+        try:
+            module = nerdd_module.__module__
+            root_module = module.split(".", 1)[0]
+            version_ = version(root_module)
+        except ModuleNotFoundError:
+            pass
+        self.config = dict(
+            name=name,
+            version=version_,
+            task="molecular_property_prediction",
+            job_parameters=[],
+            result_properties=[],
+        )
+    def _get_dict(self):
+        return self.config

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/config/merged_configuration.py RENAMED Viewed

@@ -9,6 +9,8 @@ class MergedConfiguration(Configuration):
         self.config = dict()
+        # merge all configurations starting from the first one
+        # --> last configuration has the highest priority
         for c in configs:
             self.config.update(c._get_dict())

{nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/__init__.py RENAMED Viewed

@@ -1,4 +1,3 @@
-from .csv_writer import *
 from .depth_first_explorer import *
 from .file_reader import *
 from .gzip_reader import *
@@ -8,10 +7,7 @@ from .mol_reader import *
 from .reader import *
 from .reader_registry import *
 from .sdf_reader import *
-from .sdf_writer import *
 from .smiles_reader import *
 from .string_reader import *
 from .tar_reader import *
-from .writer import *
-from .writer_registry import *
 from .zip_reader import *

{nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/file_reader.py RENAMED Viewed

@@ -1,6 +1,5 @@
-import os
 from pathlib import Path
-from typing import Generator
+from typing import Generator, Tuple
 from .reader import MoleculeEntry, Reader
 from .reader_registry import register_reader
@@ -8,30 +7,42 @@ from .reader_registry import register_reader
 __all__ = ["FileReader"]
-@register_reader
+@register_reader("data_dir")
 class FileReader(Reader):
-    def __init__(self, data_dir="."):
+    def __init__(self, data_dir=None):
         super().__init__()
-        self.data_dir = Path(data_dir)
+        self.data_dir = data_dir
+        if self.data_dir is not None:
+            self.data_dir = Path(self.data_dir)
     def read(self, filename, explore) -> Generator[MoleculeEntry, None, None]:
         assert isinstance(filename, str), "input must be a string"
+        # convert filename to path
         try:
             path = Path(filename)
-            if not path.is_absolute():
-                path = self.data_dir / path
         except:
             raise ValueError("input must be a valid path")
-        assert self.data_dir in path.parents, "input must be a relative path"
+        # convert to absolute path
+        if not path.is_absolute():
+            if self.data_dir is not None:
+                path = self.data_dir / path
+            else:
+                path = Path(".") / path
+        # check that the file is within the data_dir
+        assert (
+            self.data_dir is None or self.data_dir in path.parents
+        ), "input must be a relative path"
+        # check that the file exists
         assert path.exists(), "input must be a valid file"
         with open(path, "rb") as f:
             for entry in explore(f):
                 if len(entry.source) == 1 and entry.source[0] == "raw_input":
-                    source = tuple()
+                    source: Tuple[str, ...] = tuple()
                 else:
                     source = entry.source
                 yield entry._replace(source=tuple([filename, *source]))

nerdd_module-0.2.6/nerdd_module/input/reader_registry.py ADDED Viewed

@@ -0,0 +1,64 @@
+from functools import lru_cache
+from typing import Dict, Generator, List, Tuple, Type
+from .reader import Reader
+__all__ = ["ReaderRegistry", "register_reader"]
+# lru_cache makes the registry a singleton
+@lru_cache(maxsize=1)
+class ReaderRegistry:
+    def __init__(self):
+        self._factories: List[Tuple[Type[Reader], Tuple[str, ...], Dict[str, str]]] = []
+        self._config = {}
+    def _create_reader(self, ReaderClass: Type[Reader], *args, **kwargs) -> Reader:
+        # translate all args
+        args = tuple(self._config.get(arg, None) for arg in args)
+        # translate all kwargs
+        kwargs = {
+            k: self._config.get(v, None) for k, v in kwargs.items() if v in self._config
+        }
+        return ReaderClass(*args, **kwargs)
+    def register(self, ReaderClass: Type[Reader], *args: str, **kwargs: str):
+        assert issubclass(ReaderClass, Reader)
+        assert all([isinstance(arg, str) for arg in args])
+        assert all(
+            [isinstance(k, str) and isinstance(v, str) for k, v in kwargs.items()]
+        )
+        self._factories.append((ReaderClass, args, kwargs))
+    def readers(self) -> Generator[Reader, None, None]:
+        for reader, args, kwargs in self._factories:
+            yield self._create_reader(reader, *args, **kwargs)
+    def __iter__(self):
+        return iter(self.readers())
+def register_reader(*args, **kwargs):
+    def wrapper(cls, *args, **kwargs):
+        ReaderRegistry().register(cls, *args, **kwargs)
+        return cls
+    # Case 1: first argument is a class
+    # --> decorator is used without arguments
+    # @register_reader
+    # class F:
+    #     ...
+    if len(args) > 0 and isinstance(args[0], type):
+        return wrapper(args[0], *args[1:], **kwargs)
+    # Case 2: first argument is a not a class
+    # --> decorator is used with arguments
+    # @register_reader("blah")
+    # class F:
+    #     ...
+    def inner(cls):
+        assert isinstance(cls, type), "Decorator must be used with a class"
+        return wrapper(cls, *args, **kwargs)
+    return inner

nerdd_module-0.2.6/nerdd_module/output/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ from .writer_registry import *

{nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/output}/csv_writer.py RENAMED Viewed

@@ -26,5 +26,5 @@ class CsvWriter(Writer):
         for entry in chain([first_entry], entry_iter):
             for key, value in entry.items():
                 if isinstance(value, Mol):
-                    entry[key] = MolToSmiles(value)
+                    entry[key] = MolToSmiles(value, canonical=False)
             writer.writerow(entry)

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/polyfills/__init__.py RENAMED Viewed

@@ -1,2 +1,3 @@
 from .files import *
 from .get_entry_points import *
+from .version import *

{nerdd-module-0.2.4/nerdd_module → nerdd_module-0.2.6/nerdd_module/polyfills}/version.py RENAMED Viewed

@@ -1,10 +1,8 @@
 import sys
+__all__ = ["version"]
 if sys.version_info < (3, 10):
     from importlib_metadata import version
 else:
     from importlib.metadata import version
-__all__ = ["__version__"]
-__version__ = version(__package__)

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/preprocessing/__init__.py RENAMED Viewed

@@ -4,6 +4,7 @@ from .empty_pipeline import *
 from .filter_by_element import *
 from .filter_by_weight import *
 from .pipeline import *
-from ..problem import *
 from .registry import *
+from .remove_stereochemistry import *
+from .sanitize import *
 from .step import *

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/preprocessing/check_valid_smiles.py RENAMED Viewed

@@ -1,8 +1,8 @@
-from typing import List, Tuple
+from typing import List, Optional, Tuple
 from rdkit.Chem import Mol, MolFromSmiles, MolToSmiles
-from ..problem import Problem
+from ..problem import InvalidSmiles, Problem
 from .step import Step
 __all__ = ["CheckValidSmiles"]
@@ -14,15 +14,13 @@ class CheckValidSmiles(Step):
     def __init__(self):
         super().__init__()
-    def _run(self, mol: Mol) -> Tuple[Mol, List[Problem]]:
+    def _run(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
         errors = []
         smi = MolToSmiles(mol, True)
         check_mol = MolFromSmiles(smi)
         if check_mol is None:
-            errors.append(
-                Problem("invalid_smiles", "Cannot convert molecule to SMILES")
-            )
+            errors.append(InvalidSmiles())
             mol = None
         return mol, errors

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/preprocessing/chembl_structure_pipeline.py RENAMED Viewed

@@ -1,5 +1,5 @@
 import warnings
-from typing import List, Tuple
+from typing import List, Optional, Tuple
 from rdkit.Chem import Mol
 from rdkit.rdBase import BlockLogs
@@ -41,7 +41,7 @@ class StandardizeWithCsp(Step):
         if import_error is not None:
             raise import_error
-    def _run(self, mol: Mol) -> Tuple[Mol, List[Problem]]:
+    def _run(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
         errors = []
         # chembl structure pipeline cannot handle molecules with 3D coordinates
@@ -65,7 +65,7 @@ class GetParentMol(Step):
         if import_error is not None:
             raise import_error
-    def _run(self, mol: Mol) -> Tuple[Mol, List[Problem]]:
+    def _run(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
         errors = []
         # chembl structure pipeline cannot handle molecules with 3D coordinates

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/preprocessing/filter_by_element.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import Iterable, List, Tuple
+from typing import Iterable, List, Optional, Tuple
 from rdkit.Chem import Mol
@@ -14,7 +14,7 @@ class FilterByElement(Step):
         self.allowed_elements = set(allowed_elements)
         self.remove_invalid_molecules = remove_invalid_molecules
-    def _run(self, mol: Mol) -> Tuple[Mol, List[Problem]]:
+    def _run(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
         errors = []
         result_mol = mol

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/preprocessing/filter_by_weight.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from typing import List, Tuple
+from typing import List, Optional, Tuple
 from rdkit.Chem import Mol
 from rdkit.Chem.Descriptors import MolWt
@@ -14,7 +14,7 @@ class FilterByWeight(Step):
         self.max_weight = max_weight
         self.remove_invalid_molecules = remove_invalid_molecules
-    def _run(self, mol: Mol) -> Tuple[Mol, List[Problem]]:
+    def _run(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
         errors = []
         weight = MolWt(mol)
@@ -25,9 +25,11 @@ class FilterByWeight(Step):
                 result_mol = mol
             errors.append(
                 Problem(
-                    "invalid_weight",
-                    f"Molecular weight {weight:.2f} out of range "
-                    f"[{self.min_weight}, {self.max_weight}]",
+                    type="invalid_weight",
+                    message=(
+                        f"Molecular weight {weight:.2f} out of range "
+                        f"[{self.min_weight}, {self.max_weight}]"
+                    ),
                 )
             )
         else:

nerdd_module-0.2.6/nerdd_module/preprocessing/sanitize.py ADDED Viewed

@@ -0,0 +1,18 @@
+from rdkit.Chem import SanitizeMol
+from .step import Step
+__all__ = ["Sanitize"]
+class Sanitize(Step):
+    def __init__(self):
+        super().__init__()
+    def _run(self, mol):
+        errors = []
+        # sanitize molecule
+        SanitizeMol(mol)
+        return mol, errors

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/preprocessing/step.py RENAMED Viewed

@@ -1,5 +1,5 @@
 from abc import ABC, abstractmethod
-from typing import List, Tuple
+from typing import List, Optional, Tuple
 from rdkit.Chem import Mol
@@ -12,14 +12,14 @@ class Step(ABC):
     def __init__(self):
         pass
-    def run(self, mol: Mol) -> Tuple[Mol, List[Problem]]:
+    def run(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
         """
         Runs the step on a molecule.
         """
         return self._run(mol)
     @abstractmethod
-    def _run(self, mol: Mol) -> Tuple[Mol, List[Problem]]:
+    def _run(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
         """
         Runs the step on a molecule.
         """

nerdd_module-0.2.6/nerdd_module/problem.py ADDED Viewed

@@ -0,0 +1,13 @@
+from typing import NamedTuple
+__all__ = ["Problem", "InvalidSmiles", "UnknownProblem"]
+class Problem(NamedTuple):
+    type: str
+    message: str
+InvalidSmiles = lambda: Problem(type="invalid_smiles", message="Invalid SMILES string")
+UnknownProblem = lambda: Problem(type="unknown", message="Unknown error occurred")

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/tests/checks.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import json
+from ast import literal_eval
 import numpy as np
 import pandas as pd
@@ -39,14 +40,17 @@ def check_column_range(subset, column_name, low, high):
 @then(parsers.parse("the value in column '{column_name}' should be '{expected_value}'"))
-def check_column_value(predictions, column_name, expected_value):
-    value = predictions[column_name].iloc[0]
+def check_column_value(subset, column_name, expected_value):
+    if len(subset) == 0:
+        return
+    value = subset[column_name].iloc[0]
     # expected value is always provided as string
     # try to convert to float if possible
     try:
-        expected_value = float(expected_value)
-    except ValueError:
+        expected_value = literal_eval(expected_value)
+    except:
         pass
     if expected_value == "(none)":
@@ -132,3 +136,49 @@ def check_column_length(subset, column_name, length):
     assert (
         subset[column_name].map(lambda x: len(x) > length)
     ).all(), f"Column {column_name} has unexpected length"
+@then(
+    parsers.parse(
+        "when '{condition_column_name}' is '{condition_value}' "
+        "the value in column '{column_name}' should be '{expected_value}'"
+    )
+)
+def check_conditional_column_value(
+    subset, condition_column_name, condition_value, column_name, expected_value
+):
+    # expected value is always provided as string
+    # try to convert to float if possible
+    try:
+        expected_value = literal_eval(expected_value)
+    except:
+        pass
+    # same for condition value
+    try:
+        condition_value = literal_eval(condition_value)
+    except:
+        pass
+    # condition value can be (none) to indicate None
+    if condition_value == "(none)":
+        subset = subset[pd.isnull(subset[condition_column_name])]
+    else:
+        subset = subset[subset[condition_column_name] == condition_value]
+    value = subset[column_name]
+    assert (
+        len(value) > 0
+    ), f"No rows found for condition {condition_column_name} == {condition_value}"
+    # expected value can be (none) to indicate None
+    if expected_value == "(none)":
+        # if expected_value is the magic string "(none)", we expect None
+        assert pd.isnull(
+            value
+        ).all(), f"Column {column_name} is assigned to {value} != None"
+    else:
+        # otherwise, we expect the value to be equal to the expected value
+        assert (
+            value == expected_value
+        ).all(), f"Column {column_name} is assigned to {value} != {expected_value}"

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/tests/representations.py RENAMED Viewed

@@ -45,7 +45,7 @@ def representations_from_molecules(molecules, input_type):
     ),
     target_fixture="molecules",
 )
-def molecules(num, num_none, random_seed):
+def molecules(num, num_none, random_seed=0):
     result = None
     # pytest-bdd and hypothesis don't play well together (yet)

nerdd_module-0.2.6/nerdd_module/version.py ADDED Viewed

@@ -0,0 +1,5 @@
+from .polyfills import version
+__all__ = ["__version__"]
+__version__ = version(__package__)

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module.egg-info/PKG-INFO RENAMED Viewed

@@ -1,8 +1,8 @@
 Metadata-Version: 2.1
 Name: nerdd-module
-Version: 0.2.4
+Version: 0.2.6
 Summary: Base package to create NERDD modules
-Home-page: https://github.com/molinfo-vienna/nerdd-module.git
+Home-page: https://github.com/molinfo-vienna/nerdd-module
 Maintainer: Steffen Hirte
 Maintainer-email: steffen.hirte@univie.ac.at
 License: BSD 3-Clause License
@@ -33,8 +33,11 @@ Requires-Dist: stringcase>=1.2.0
 Requires-Dist: decorator>=5.1.1
 Requires-Dist: importlib-resources>=5; python_version < "3.10"
 Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
-Requires-Dist: chembl_structure_pipeline>=1.0.0
 Provides-Extra: dev
+Requires-Dist: black; extra == "dev"
+Requires-Dist: isort; extra == "dev"
+Provides-Extra: csp
+Requires-Dist: chembl_structure_pipeline>=1.0.0; extra == "csp"
 Provides-Extra: test
 Requires-Dist: pytest; extra == "test"
 Requires-Dist: pytest-sugar; extra == "test"

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module.egg-info/SOURCES.txt RENAMED Viewed

@@ -19,28 +19,30 @@ nerdd_module/config/dict_configuration.py
 nerdd_module/config/merged_configuration.py
 nerdd_module/config/package_configuration.py
 nerdd_module/config/yaml_configuration.py
-nerdd_module/io/__init__.py
-nerdd_module/io/csv_writer.py
-nerdd_module/io/depth_first_explorer.py
-nerdd_module/io/explorer.py
-nerdd_module/io/file_reader.py
-nerdd_module/io/gzip_reader.py
-nerdd_module/io/inchi_reader.py
-nerdd_module/io/list_reader.py
-nerdd_module/io/mol_reader.py
-nerdd_module/io/reader.py
-nerdd_module/io/reader_registry.py
-nerdd_module/io/sdf_reader.py
-nerdd_module/io/sdf_writer.py
-nerdd_module/io/smiles_reader.py
-nerdd_module/io/string_reader.py
-nerdd_module/io/tar_reader.py
-nerdd_module/io/writer.py
-nerdd_module/io/writer_registry.py
-nerdd_module/io/zip_reader.py
+nerdd_module/input/__init__.py
+nerdd_module/input/depth_first_explorer.py
+nerdd_module/input/explorer.py
+nerdd_module/input/file_reader.py
+nerdd_module/input/gzip_reader.py
+nerdd_module/input/inchi_reader.py
+nerdd_module/input/list_reader.py
+nerdd_module/input/mol_reader.py
+nerdd_module/input/reader.py
+nerdd_module/input/reader_registry.py
+nerdd_module/input/sdf_reader.py
+nerdd_module/input/smiles_reader.py
+nerdd_module/input/string_reader.py
+nerdd_module/input/tar_reader.py
+nerdd_module/input/zip_reader.py
+nerdd_module/output/__init__.py
+nerdd_module/output/csv_writer.py
+nerdd_module/output/sdf_writer.py
+nerdd_module/output/writer.py
+nerdd_module/output/writer_registry.py
 nerdd_module/polyfills/__init__.py
 nerdd_module/polyfills/files.py
 nerdd_module/polyfills/get_entry_points.py
+nerdd_module/polyfills/version.py
 nerdd_module/preprocessing/__init__.py
 nerdd_module/preprocessing/check_valid_smiles.py
 nerdd_module/preprocessing/chembl_structure_pipeline.py
@@ -50,6 +52,7 @@ nerdd_module/preprocessing/filter_by_weight.py
 nerdd_module/preprocessing/pipeline.py
 nerdd_module/preprocessing/registry.py
 nerdd_module/preprocessing/remove_stereochemistry.py
+nerdd_module/preprocessing/sanitize.py
 nerdd_module/preprocessing/step.py
 nerdd_module/tests/__init__.py
 nerdd_module/tests/checks.py
@@ -68,6 +71,5 @@ tests/models/MolWeightModelWithExplicitMols.py
 tests/models/__init__.py
 tests/steps/__init__.py
 tests/steps/checks.py
-tests/steps/molecules.py
 tests/steps/predictors.py
 tests/steps/preprocessing.py

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module.egg-info/requires.txt RENAMED Viewed

@@ -5,13 +5,17 @@ filetype~=1.2.0
 rich-click>=1.7.1
 stringcase>=1.2.0
 decorator>=5.1.1
-chembl_structure_pipeline>=1.0.0
 [:python_version < "3.10"]
 importlib-resources>=5
 importlib-metadata>=4.6
+[csp]
+chembl_structure_pipeline>=1.0.0
 [dev]
+black
+isort
 [docs]
 mkdocs

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/setup.py RENAMED Viewed

@@ -16,11 +16,11 @@ rdkit_requirement = ["rdkit>=2022.3.3"] if not rdkit_installed else []
 setup(
     name="nerdd-module",
-    version="0.2.4",
+    version="0.2.6",
     maintainer="Steffen Hirte",
     maintainer_email="steffen.hirte@univie.ac.at",
     packages=find_packages(),
-    url="https://github.com/molinfo-vienna/nerdd-module.git",
+    url="https://github.com/molinfo-vienna/nerdd-module",
     description="Base package to create NERDD modules",
     license="BSD 3-Clause License",
     long_description=open("README.md").read(),
@@ -36,12 +36,19 @@ setup(
         # install importlib-resources and importlib-metadata for old Python versions
         "importlib-resources>=5; python_version<'3.10'",
         "importlib-metadata>=4.6; python_version<'3.10'",
-        # note: version 1.0.0 of chembl_structure_pipeline is not available on pypi,
-        # but it could potentially be installed from github
-        "chembl_structure_pipeline>=1.0.0",
     ],
     extras_require={
-        "dev": [],
+        "dev": [
+            "black",
+            "isort",
+        ],
+        "csp": [
+            # note: version 1.0.0 of chembl_structure_pipeline is not available on pypi
+            # BUT: maybe it was already installed in the current environment manually
+            # other note: chembl_structure_pipeline *always* installs a recent version
+            #   of rdkit
+            "chembl_structure_pipeline>=1.0.0"
+        ],
         "test": [
             "pytest",
             "pytest-sugar",

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/tests/conftest.py RENAMED Viewed

@@ -4,4 +4,4 @@
 # from .steps import *
 #
 # instead, we use pytest_plugins to make this work
-pytest_plugins = ["tests.steps"]
+pytest_plugins = ["tests.steps", "nerdd_module.tests"]

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/tests/models/MolWeightModel.py RENAMED Viewed

@@ -1,20 +1,17 @@
 import pandas as pd
 from nerdd_module import AbstractModel
+from nerdd_module.preprocessing import Sanitize
 from rdkit.Chem.Descriptors import MolWt
 __all__ = ["MolWeightModel"]
 class MolWeightModel(AbstractModel):
-    def __init__(self, preprocessing_pipeline="chembl_structure_pipeline", **kwargs):
+    def __init__(self, preprocessing_pipeline=[Sanitize()], **kwargs):
         super().__init__(preprocessing_pipeline, **kwargs)
     def _predict_mols(self, mols, multiplier):
-        return pd.DataFrame(
-            {
-                "weight": [MolWt(m) * multiplier for m in mols],
-            }
-        )
+        return pd.DataFrame({"weight": [MolWt(m) * multiplier for m in mols]})
     def _get_config(self):
         return {

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/tests/models/MolWeightModelWithExplicitMolIds.py RENAMED Viewed

@@ -1,12 +1,13 @@
 import pandas as pd
 from nerdd_module import AbstractModel
+from nerdd_module.preprocessing import Sanitize
 from rdkit.Chem.Descriptors import MolWt
 __all__ = ["MolWeightModelWithExplicitMolIds"]
 class MolWeightModelWithExplicitMolIds(AbstractModel):
-    def __init__(self, preprocessing_pipeline="chembl_structure_pipeline", **kwargs):
+    def __init__(self, preprocessing_pipeline=[Sanitize()], **kwargs):
         super().__init__(preprocessing_pipeline, **kwargs)
     def _predict_mols(self, mols, multiplier):

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/tests/models/MolWeightModelWithExplicitMols.py RENAMED Viewed

@@ -1,20 +1,18 @@
 import pandas as pd
 from nerdd_module import AbstractModel
+from nerdd_module.preprocessing import Sanitize
 from rdkit.Chem.Descriptors import MolWt
 __all__ = ["MolWeightModelWithExplicitMols"]
 class MolWeightModelWithExplicitMols(AbstractModel):
-    def __init__(self, preprocessing_pipeline="chembl_structure_pipeline", **kwargs):
+    def __init__(self, preprocessing_pipeline=[Sanitize()], **kwargs):
         super().__init__(preprocessing_pipeline, **kwargs)
     def _predict_mols(self, mols, multiplier):
         return pd.DataFrame(
-            {
-                "mol": mols,
-                "weight": [MolWt(m) * multiplier for m in mols],
-            }
+            {"mol": mols, "weight": [MolWt(m) * multiplier for m in mols]}
         )
     def _get_config(self):

{nerdd-module-0.2.4 → nerdd_module-0.2.6}/tests/steps/__init__.py RENAMED Viewed

@@ -1,4 +1,3 @@
 from .checks import *
-from .molecules import *
 from .predictors import *
 from .preprocessing import *

nerdd-module-0.2.4/nerdd_module/config/default_configuration.py DELETED Viewed

@@ -1,17 +0,0 @@
-from .configuration import Configuration
-__all__ = ["DefaultConfiguration"]
-class DefaultConfiguration(Configuration):
-    def __init__(self, nerdd_module):
-        super().__init__()
-        self.config = dict(
-            task="molecular_property_prediction",
-            job_parameters=[],
-            result_properties=[],
-        )
-    def _get_dict(self):
-        return self.config

nerdd-module-0.2.4/nerdd_module/io/reader_registry.py DELETED Viewed

@@ -1,30 +0,0 @@
-from functools import lru_cache
-from typing import Generator, Type
-from .reader import Reader
-__all__ = ["ReaderRegistry", "register_reader"]
-# lru_cache makes the registry a singleton
-@lru_cache(maxsize=1)
-class ReaderRegistry:
-    def __init__(self):
-        self._factories = []
-    def register(self, ReaderClass: Type[Reader], *args, **kwargs):
-        assert issubclass(ReaderClass, Reader)
-        self._factories.append(lambda: ReaderClass(*args, **kwargs))
-    def readers(self) -> Generator[Reader, None, None]:
-        for reader in self._factories:
-            yield reader()
-    def __iter__(self):
-        return iter(map(lambda f: f(), self._factories))
-def register_reader(clazz, *args, **kwargs):
-    # TODO: implement both decorator modes
-    ReaderRegistry().register(clazz, *args, **kwargs)
-    return clazz

nerdd-module-0.2.4/nerdd_module/problem.py DELETED Viewed

@@ -1,8 +0,0 @@
-from typing import NamedTuple
-__all__ = ["Problem"]
-class Problem(NamedTuple):
-    type: str
-    message: str

nerdd-module-0.2.4/tests/steps/molecules.py DELETED Viewed

@@ -1,54 +0,0 @@
-import numpy as np
-from hypothesis import given as hgiven
-from hypothesis import settings
-from hypothesis import strategies as st
-from hypothesis_rdkit import mols
-from pytest_bdd import given, parsers
-from rdkit.Chem import MolToInchi, MolToMolBlock, MolToSmiles
-@given(
-    parsers.parse(
-        "a list of {num:d} random molecules, where {num_none:d} entries are None"
-    ),
-    target_fixture="molecules",
-)
-def molecules(num, num_none):
-    result = None
-    @hgiven(st.lists(mols(), min_size=num, max_size=num, unique_by=MolToSmiles))
-    @settings(max_examples=1, deadline=None)
-    def generate(mols):
-        nonlocal result
-        # ensure that all molecules are valid
-        result = mols
-    generate()
-    # replace random entries with None
-    indices = np.random.choice(num, num_none, replace=False)
-    for i in indices:
-        result[i] = None
-    return result
-@given(
-    parsers.parse("the representations of the molecules as {input_type}"),
-    target_fixture="representations",
-)
-def representations(molecules, input_type):
-    if input_type == "smiles":
-        converter = MolToSmiles
-    elif input_type == "mol_block":
-        converter = MolToMolBlock
-    elif input_type == "inchi":
-        converter = MolToInchi
-    elif input_type == "rdkit_mol":
-        converter = lambda mol: mol
-    else:
-        raise ValueError(f"Unknown input_type: {input_type}")
-    result = [converter(mol) if mol is not None else None for mol in molecules]
-    return result