PyPI - nerdd-module - Versions diffs - 0.3.12__tar.gz → 0.3.14__tar.gz - Mend

nerdd-module 0.3.12tar.gz → 0.3.14tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (102) hide show

{nerdd_module-0.3.12 → nerdd_module-0.3.14}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: nerdd-module
-Version: 0.3.12
+Version: 0.3.14
 Summary: Base package to create NERDD modules
 Author-email: Steffen Hirte <steffen.hirte@univie.ac.at>
 Maintainer-email: Steffen Hirte <steffen.hirte@univie.ac.at>
@@ -62,14 +62,14 @@ Requires-Dist: importlib-resources>=5; python_version < "3.9"
 Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
 Requires-Dist: typing_extensions>=4.0.1; python_version < "3.8"
 Provides-Extra: dev
-Requires-Dist: mypy==1.13.0; extra == "dev"
+Requires-Dist: mypy>=1; extra == "dev"
 Requires-Dist: ruff==0.7.1; extra == "dev"
 Requires-Dist: pandas-stubs; extra == "dev"
 Requires-Dist: rdkit-stubs; extra == "dev"
 Requires-Dist: types-PyYAML; extra == "dev"
 Requires-Dist: types-decorator; extra == "dev"
 Requires-Dist: types-setuptools; extra == "dev"
-Requires-Dist: pre-commit==3.5.0; extra == "dev"
+Requires-Dist: pre-commit>=2; extra == "dev"
 Provides-Extra: rdkit
 Requires-Dist: rdkit>=2022.3.3; extra == "rdkit"
 Provides-Extra: csp
@@ -79,7 +79,7 @@ Requires-Dist: pytest; extra == "test"
 Requires-Dist: pytest-sugar; extra == "test"
 Requires-Dist: pytest-cov; extra == "test"
 Requires-Dist: pytest-asyncio; extra == "test"
-Requires-Dist: pytest-bdd==7.3.0; extra == "test"
+Requires-Dist: pytest-bdd<8; extra == "test"
 Requires-Dist: pytest-mock; extra == "test"
 Requires-Dist: pytest-watcher; extra == "test"
 Requires-Dist: hypothesis; extra == "test"

{nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/__init__.py RENAMED Viewed

@@ -1,4 +1,5 @@
 from .cli import *
+from .converters import *
 from .model import *
 from .output import *
 from .polyfills import get_entry_points

{nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/config/models.py RENAMED Viewed

@@ -71,12 +71,16 @@ class ResultProperty(BaseModel):
     name: str
     type: str
     visible_name: Optional[str] = None
+    visible: bool = True
     help_text: Optional[str] = None
     sortable: bool = False
     group: Optional[str] = None
     level: Level = "molecule"
     formats: Union[FormatSpec, IncludeExcludeFormatSpec, None] = None
     representation: Optional[str] = None
+    from_property: Optional[str] = None
+    image_width: Optional[int] = None
+    image_height: Optional[int] = None
     def is_visible(self, output_format: str) -> bool:
         formats = self.formats

nerdd_module-0.3.14/nerdd_module/converters/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+from .basic_type_converter import *
+from .converter import *
+from .converter_config import *
+from .mol_converter import *
+from .mol_to_image_converter import *
+from .problem_list_converter import *
+from .representation_converter import *
+from .void_converter import *

nerdd_module-0.3.14/nerdd_module/converters/basic_type_converter.py ADDED Viewed

@@ -0,0 +1,23 @@
+from typing import Any
+from .converter import Converter
+from .converter_config import ALL, ConverterConfig
+__all__ = ["BasicTypeConverter", "basic_data_types"]
+basic_data_types = [
+    "int",
+    "float",
+    "string",
+    "bool",
+]
+class BasicTypeConverter(Converter):
+    def _convert(self, input: Any, context: dict) -> Any:
+        return input
+    config = ConverterConfig(
+        data_types=basic_data_types,
+        output_formats=ALL,
+    )

nerdd_module-0.3.14/nerdd_module/converters/converter.py ADDED Viewed

@@ -0,0 +1,100 @@
+from __future__ import annotations
+import logging
+from abc import ABC, abstractmethod
+from typing import Any, Callable, Dict, Tuple, Union
+from ..config import ResultProperty
+from ..util import call_with_mappings
+from .converter_config import ALL, ALL_TYPE
+__all__ = ["Converter"]
+logger = logging.getLogger(__name__)
+_factories: Dict[
+    Tuple[Union[str, ALL_TYPE], Union[str, ALL_TYPE]], Callable[[dict], Converter]
+] = {}
+class Converter(ABC):
+    # a special symbol to indicate that a property should be hidden
+    HIDE = object()
+    def __init__(self, result_property: ResultProperty, output_format: str, **kwargs: Any) -> None:
+        super().__init__()
+        self.result_property = result_property
+        self.output_format = output_format
+    @classmethod
+    def __init_subclass__(
+        cls,
+        is_abstract: bool = False,
+        **kwargs: Any,
+    ) -> None:
+        super().__init_subclass__(**kwargs)
+        if hasattr(cls, "config"):
+            data_types = cls.config["data_types"]
+            output_formats = cls.config["output_formats"]
+        else:
+            data_types = None
+            output_formats = None
+        if not is_abstract:
+            if isinstance(data_types, str) or data_types is ALL:
+                data_types_list = [data_types]
+            else:
+                data_types_list = data_types
+            if isinstance(output_formats, str) or output_formats is ALL:
+                output_formats_list = [output_formats]
+            else:
+                output_formats_list = output_formats
+            for output_format in output_formats_list:
+                for data_type in data_types_list:
+                    logger.debug(f"Registering converter {cls} for {data_type} -> {output_format}")
+                    _factories[(data_type, output_format)] = cls
+    @abstractmethod
+    def _convert(self, input: Any, context: dict) -> Any:
+        pass
+    def convert(self, input: Any, context: dict) -> Any:
+        return self._convert(input, context)
+    @classmethod
+    def get_converter(
+        cls,
+        result_property: ResultProperty,
+        output_format: str,
+        return_default: bool = True,
+        **kwargs: Any,
+    ) -> Converter:
+        data_type = result_property.type
+        if (data_type, output_format) not in _factories:
+            ConverterFunc = None
+            if return_default:
+                if (data_type, ALL) in _factories:
+                    ConverterFunc = _factories[(data_type, ALL)]
+                elif (ALL, output_format) in _factories:
+                    ConverterFunc = _factories[(ALL, output_format)]
+                elif (ALL, ALL) in _factories:
+                    ConverterFunc = _factories[(ALL, ALL)]
+            if ConverterFunc is None:
+                raise ValueError(
+                    f"Unknown data type '{data_type}' or output format '{output_format}'"
+                )
+        else:
+            ConverterFunc = _factories[(data_type, output_format)]
+        # kwargs will be passed to the constructor of the converter
+        # --> add data_type and output_format to the kwargs
+        kwargs["result_property"] = result_property
+        kwargs["output_format"] = output_format
+        return call_with_mappings(ConverterFunc, kwargs)

nerdd_module-0.3.14/nerdd_module/converters/converter_config.py ADDED Viewed

@@ -0,0 +1,15 @@
+from typing import List, Optional, Union
+from ..polyfills import Literal, TypedDict
+__all__ = ["ConverterConfig", "ALL", "ALL_TYPE"]
+# a special symbol to indicate that all data types / output formats are considered
+ALL_TYPE = Literal["ALL"]
+ALL: ALL_TYPE = "ALL"
+class ConverterConfig(TypedDict):
+    data_types: Optional[Union[str, List[str], ALL_TYPE]]
+    output_formats: Optional[Union[str, List[str], ALL_TYPE]]

nerdd_module-0.3.14/nerdd_module/converters/mol_converter.py ADDED Viewed

@@ -0,0 +1,24 @@
+from typing import Any
+from ..config import ResultProperty
+from .converter import Converter
+from .converter_config import ConverterConfig
+__all__ = ["MolConverter"]
+class MolConverter(Converter):
+    def __init__(self, result_property: ResultProperty, output_format: str, **kwargs: Any) -> None:
+        super().__init__(result_property, output_format, **kwargs)
+    def _convert(self, input: Any, context: dict) -> Any:
+        if self.output_format == "sdf" and self.result_property.name != "input_mol":
+            # in an SDF, the main molecule (input_mol) can be a Mol object
+            return Converter.HIDE
+        elif self.output_format in ["pandas", "record_list", "iterator"]:
+            return input
+    config = ConverterConfig(
+        data_types="mol",
+        output_formats=["sdf", "pandas", "record_list", "iterator"],
+    )

nerdd_module-0.3.14/nerdd_module/converters/mol_to_image_converter.py ADDED Viewed

@@ -0,0 +1,62 @@
+from typing import Any
+from xml.dom import minidom
+from rdkit.Chem import Mol
+from rdkit.Chem.Draw import MolDraw2DSVG
+from .converter import Converter
+from .converter_config import ConverterConfig
+__all__ = ["MolToImageConverter"]
+default_width = 300
+default_height = 180
+class MolToImageConverter(Converter):
+    def _convert(self, input: Any, context: dict) -> Any:
+        width = self.result_property.image_width
+        height = self.result_property.image_height
+        if width is None:
+            width = default_width
+        if height is None:
+            height = default_height
+        mol = input
+        if mol is None:
+            return None
+        assert isinstance(mol, Mol), f"Expected RDKit Mol object, but got {type(mol)}"
+        svg = MolDraw2DSVG(width, height)
+        # remove background
+        opts = svg.drawOptions()
+        opts.clearBackground = False
+        # add highlight circles around atoms during drawing
+        # (we will remove them later in post processing)
+        atoms = range(mol.GetNumAtoms())
+        colors = [[(0.8, 1, 1)]] * mol.GetNumAtoms()
+        radii = [0.5] * mol.GetNumAtoms()
+        atom_highlight = dict(zip(atoms, colors))
+        atom_radii = dict(zip(atoms, radii))
+        svg.DrawMoleculeWithHighlights(mol, "", atom_highlight, {}, atom_radii, [])
+        svg.FinishDrawing()
+        # post process SVG
+        xml = svg.GetDrawingText()
+        tree = minidom.parseString(xml)
+        root = tree.getElementsByTagName("svg")[0]
+        # make highlight circles invisible
+        for ellipse in root.getElementsByTagName("ellipse"):
+            ellipse.setAttribute("style", "opacity:0")
+        xml = tree.toxml()
+        return xml
+    # TODO: move to nerdd-link
+    config = ConverterConfig(data_types="mol", output_formats="json")

nerdd_module-0.3.14/nerdd_module/converters/problem_list_converter.py ADDED Viewed

@@ -0,0 +1,21 @@
+from typing import Any, List, cast
+from ..problem import Problem
+from .converter import Converter
+from .converter_config import ALL, ConverterConfig
+__all__ = ["ProblemListConverter"]
+class ProblemListConverter(Converter):
+    def _convert(self, input: Any, context: dict) -> Any:
+        if self.output_format in ["pandas", "iterator", "record_list"]:
+            return input
+        else:
+            problem_list: List[Problem] = cast(List[Problem], input)
+            return "; ".join([f"{problem.type}: {problem.message}" for problem in problem_list])
+    config = ConverterConfig(
+        data_types="problem_list",
+        output_formats=ALL,
+    )

nerdd_module-0.3.14/nerdd_module/converters/representation_converter.py ADDED Viewed

@@ -0,0 +1,42 @@
+from typing import Any
+from rdkit.Chem import MolToInchi, MolToSmiles
+from ..config import ResultProperty
+from .converter import Converter
+from .converter_config import ALL, ConverterConfig
+__all__ = ["RepresentationConverter"]
+class RepresentationConverter(Converter):
+    def __init__(self, result_property: ResultProperty, output_format: str, **kwargs: Any) -> None:
+        super().__init__(result_property, output_format, **kwargs)
+        representation = result_property.representation or "smiles"
+        if representation == "inchi":
+            self._serialize = MolToInchi
+        elif representation == "smiles":
+            self._serialize = MolToSmiles
+        else:
+            raise ValueError(f"Unsupported representation: {representation}")
+    def _convert(self, input: Any, context: dict) -> Any:
+        from_property = self.result_property.from_property
+        if from_property is None:
+            actual_input = input
+        else:
+            actual_input = context[from_property]
+        try:
+            representation = self._serialize(actual_input)
+        except:  # noqa: E722 (allow bare except, because RDKit is unpredictable)
+            representation = None
+        return representation
+    config = ConverterConfig(
+        data_types="representation",
+        output_formats=ALL,
+    )

nerdd_module-0.3.14/nerdd_module/converters/void_converter.py ADDED Viewed

@@ -0,0 +1,17 @@
+from typing import Any
+from .converter import Converter
+from .converter_config import ALL, ConverterConfig
+__all__ = ["VoidConverter"]
+class VoidConverter(Converter):
+    def _convert(self, input: Any, context: dict) -> Any:
+        return Converter.HIDE
+    # by default, all data types will be hidden for all output formats
+    config = ConverterConfig(
+        data_types=ALL,
+        output_formats=ALL,
+    )

{nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/input/depth_first_explorer.py RENAMED Viewed

@@ -107,7 +107,7 @@ class DepthFirstExplorer(Explorer):
             except Exception:
                 pass
-            # clean up tree
+            # clean up stack
             while len(self._state_stack) > depth:
                 self._state_stack.pop()
             generator = None
@@ -115,12 +115,11 @@ class DepthFirstExplorer(Explorer):
         if generator is None:
             if best_reader is None:
                 generator = self._read(InvalidInputReader(), input)
-                sample = []
             else:
                 generator = self._read(best_reader, input)
-                sample = list(islice(generator, self._num_test_entries))
+            sample = []
         else:
-            if best_mode is not None and best_mode != "guess":
+            if best_mode == "builtin":
                 parent["first_guess"].append(best_reader)
         yield from sample

{nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/input/file_reader.py RENAMED Viewed

@@ -16,11 +16,16 @@ class FileReader(Reader):
             self.data_dir = Path(self.data_dir)
     def read(self, filename: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
-        assert isinstance(filename, str), "input must be a string"
+        assert isinstance(filename, (str, bytes)), "input must be a string or bytes"
+        if isinstance(filename, bytes):
+            filename_str = filename.decode("utf-8")
+        else:
+            filename_str = filename
         # convert filename to path
         try:
-            path = Path(filename)
+            path = Path(filename_str)
         except TypeError as e:
             raise ValueError("input must be a valid path") from e
@@ -45,7 +50,7 @@ class FileReader(Reader):
                     source: Tuple[str, ...] = tuple()
                 else:
                     source = entry.source
-                yield entry._replace(source=(filename, *source))
+                yield entry._replace(source=(filename_str, *source))
     def __repr__(self) -> str:
         return f"FileReader(data_dir={self.data_dir})"

nerdd_module-0.3.14/nerdd_module/model/convert_representations_step.py ADDED Viewed

@@ -0,0 +1,28 @@
+from typing import Any, List
+from ..config import ResultProperty
+from ..converters import Converter
+from ..steps import MapStep
+__all__ = ["ConvertRepresentationsStep"]
+class ConvertRepresentationsStep(MapStep):
+    def __init__(
+        self, result_properties: List[ResultProperty], output_format: str, **kwargs: Any
+    ) -> None:
+        super().__init__()
+        self._result_properties = result_properties
+        self._converter_map = {
+            p.name: Converter.get_converter(p, output_format, **kwargs) for p in result_properties
+        }
+    def _process(self, record: dict) -> dict:
+        result = {
+            k.name: self._converter_map[k.name].convert(
+                input=record.get(k.name, None), context=record
+            )
+            for k in self._result_properties
+        }
+        return {k: v for k, v in result.items() if v is not Converter.HIDE}

{nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/model/simple_model.py RENAMED Viewed

@@ -113,22 +113,60 @@ class SimpleModel(Model):
         task_based_property = []
         if task == "atom_property_prediction":
             task_based_property = [
-                {"name": "atom_id", "type": "integer"},
+                {"name": "atom_id", "type": "int", "visible": False},
             ]
         elif task == "derivative_property_prediction":
             task_based_property = [
-                {"name": "derivative_id", "type": "integer"},
+                {"name": "derivative_id", "type": "int", "visible": False},
             ]
         default_properties_start = [
-            {"name": "mol_id", "type": "integer"},
+            {"name": "mol_id", "type": "int", "visible": False},
             *task_based_property,
-            {"name": "input_text", "type": "string"},
-            {"name": "input_type", "type": "string"},
-            {"name": "source", "type": "string"},
-            {"name": "name", "type": "string"},
-            {"name": "input_mol", "type": "mol"},
-            {"name": "preprocessed_mol", "type": "mol"},
+            {
+                "name": "input_text",
+                "visible_name": "Input text",
+                "type": "string",
+                "visible": False,
+            },
+            {
+                "name": "input_type",
+                "visible_name": "Input type",
+                "type": "string",
+                "visible": False,
+            },
+            {
+                "name": "source",
+                "visible_name": "Source",
+                "type": "string",
+                "visible": False,
+            },
+            {"name": "name", "visible_name": "Name", "type": "string"},
+            {
+                "name": "input_mol",
+                "visible_name": "Input Structure",
+                "type": "mol",
+                "visible": False,
+            },
+            {
+                "name": "input_smiles",
+                "visible_name": "Input SMILES",
+                "type": "representation",
+                "from_property": "input_mol",
+                "visible": False,
+            },
+            {
+                "name": "preprocessed_mol",
+                "visible_name": "Preprocessed Structure",
+                "type": "mol",
+            },
+            {
+                "name": "preprocessed_smiles",
+                "visible_name": "Preprocessed SMILES",
+                "type": "representation",
+                "from_property": "preprocessed_mol",
+                "visible": False,
+            },
         ]
         default_properties_end = [

{nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/output/sdf_writer.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from typing import IO, Any, Dict, Iterable
-from rdkit.Chem import SDWriter
+from rdkit.Chem import Mol, SDWriter
 from .file_writer import FileLike, FileWriter
@@ -18,13 +18,22 @@ class SdfWriter(FileWriter, output_format="sdf"):
                 # assume that there is a mol object
                 mol = entry["input_mol"]
+                # if the molecule is erroneous, use an empty molecule
+                if mol is None:
+                    mol = Mol()
                 # write (almost) all properties to the mol object
                 for key, value in entry.items():
-                    value_as_str = str(value)
-                    if "\n" in value_as_str:
-                        # SDF can't write multi-line strings
+                    # skip "input_mol" key, because we use it as the main molecule
+                    if key == "input_mol":
                         continue
+                    value_as_str = str(value)
+                    # SDF can't write multi-line strings
+                    # -> replace newline with space
+                    value_as_str = value_as_str.replace("\n", " ")
                     mol.SetProp(key, value_as_str)
                 # write molecule

{nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/preprocessing/chembl_structure_pipeline.py RENAMED Viewed

@@ -14,17 +14,23 @@ warnings.filterwarnings(
     module="rdkit.Chem.MolStandardize",
 )
+# We check if chembl_structure_pipeline is installed. Since importing this library already logs
+# messages, we suppress them using RDKit's BlockLogs. We would like to use
+#   with BlockLogs(): ...
+# but this does not work with old versions of RDKit. Therefore, we create an instance of
+# BlockLogs that will suppress log messages as long as it exists. When it is deleted (in the
+# "finally" block), logs are enabled again.
+block_logs = BlockLogs()
 try:
-    # importing chembl_structure_pipeline already logs messages
-    # --> suppress them temporarily
-    with BlockLogs():
-        from chembl_structure_pipeline import get_parent_mol, standardize_mol
+    from chembl_structure_pipeline import get_parent_mol, standardize_mol
     import_error = None
 except ImportError as e:
     # raise ImportError later when using this class
     # --> this allows to use the rest of the package without chembl_structure_pipeline
     import_error = e
+finally:
+    del block_logs
 __all__ = ["GetParentMolWithCsp", "StandardizeWithCsp"]

{nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/tests/__init__.py RENAMED Viewed

@@ -1,3 +1,4 @@
 from .checks import *
+from .files import *
 from .predictions import *
 from .representations import *

nerdd-module 0.3.12__tar.gz → 0.3.14__tar.gz

nerdd-module 0.3.12tar.gz → 0.3.14tar.gz