PyPI - nerdd-module - Versions diffs - 0.3.46__tar.gz → 0.3.48__tar.gz - Mend

nerdd-module 0.3.46tar.gz → 0.3.48tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (101) hide show

{nerdd_module-0.3.46 → nerdd_module-0.3.48}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nerdd-module
-Version: 0.3.46
+Version: 0.3.48
 Summary: Base package to create NERDD modules
 Author-email: Steffen Hirte <steffen.hirte@univie.ac.at>
 Maintainer-email: Steffen Hirte <steffen.hirte@univie.ac.at>

{nerdd_module-0.3.46 → nerdd_module-0.3.48}/nerdd_module/input/depth_first_explorer.py RENAMED Viewed

@@ -85,11 +85,9 @@ class DepthFirstExplorer(Explorer):
                 if (
                     score > best_score
-                    # if the score is the same, prefer the reader with higher ratio
-                    # of valid entries
+                    # if the score is the same, prefer the reader with higher ratio of valid entries
                     or (score == best_score and ratio > best_ratio)
-                    # if the ratio is the same, prefer the reader with less invalid
-                    # results
+                    # if the ratio is the same, prefer the reader with less invalid results
                     or (
                         score == best_score
                         and ratio == best_ratio

{nerdd_module-0.3.46 → nerdd_module-0.3.48}/nerdd_module/input/inchi_reader.py RENAMED Viewed

@@ -12,8 +12,9 @@ __all__ = ["InchiReader"]
 class InchiReader(StreamReader):
-    def __init__(self) -> None:
+    def __init__(self, max_length_inchi: int = 10_000) -> None:
         super().__init__()
+        self._max_length_inchi = max_length_inchi
     def _read_stream(self, input_stream: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
         # suppress RDKit warnings
@@ -27,6 +28,23 @@ class InchiReader(StreamReader):
                 if line.strip().startswith("#"):
                     continue
+                # avoid long InChI strings, because they might take veeeeery long to parse
+                if len(line) > self._max_length_inchi:
+                    errors = [
+                        Problem(
+                            "line_too_long",
+                            f"Line exceeds max length of {self._max_length_inchi} characters",
+                        )
+                    ]
+                    yield MoleculeEntry(
+                        raw_input=line.strip("\n")[: self._max_length_inchi - 3] + "...",
+                        input_type="inchi",
+                        source=("raw_input",),
+                        mol=None,
+                        errors=errors,
+                    )
+                    continue
                 try:
                     mol = MolFromInchi(line, sanitize=False)
                 except:  # noqa: E722 (allow bare except, because RDKit is unpredictable)
@@ -46,10 +64,12 @@ class InchiReader(StreamReader):
                 )
     def __repr__(self) -> str:
-        return "InchiReader()"
+        return f"InchiReader(max_length_inchi={self._max_length_inchi})"
     config = ReaderConfig(
         examples=[
-            "InChI=1S/C18H16O3/c1-2-13(12-8-4-3-5-9-12)16-17(19)14-10-6-7-11-15(14)21-18(16)20/h3-11,13,19H,2H2,1H3"
+            # (this is one InChI string, split into two lines)
+            "InChI=1S/C18H16O3/c1-2-13(12-8-4-3-5-9-12)16-17(19)14-10-6-7-11-15(14)21-18(16)20"
+            "/h3-11,13,19H,2H2,1H3"
         ]
     )

{nerdd_module-0.3.46 → nerdd_module-0.3.48}/nerdd_module/input/sdf_reader.py RENAMED Viewed

@@ -11,7 +11,7 @@ __all__ = ["SdfReader"]
 class SdfReader(StreamReader):
-    def __init__(self, max_num_lines_mol_block: int = 10000) -> None:
+    def __init__(self, max_num_lines_mol_block: int = 10_000) -> None:
         super().__init__()
         self.max_num_lines_mol_block = max_num_lines_mol_block

{nerdd_module-0.3.46 → nerdd_module-0.3.48}/nerdd_module/input/smiles_reader.py RENAMED Viewed

@@ -12,8 +12,9 @@ __all__ = ["SmilesReader"]
 class SmilesReader(StreamReader):
-    def __init__(self) -> None:
+    def __init__(self, max_length_smiles: int = 10_000) -> None:
         super().__init__()
+        self._max_length_smiles = max_length_smiles
     def _read_stream(self, input_stream: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
         # suppress RDKit warnings
@@ -27,6 +28,25 @@ class SmilesReader(StreamReader):
                 if line.strip().startswith("#"):
                     continue
+                line = line.strip("\n")
+                # avoid long smiles strings, because they might take veeeeery long to parse
+                if len(line) > self._max_length_smiles:
+                    errors = [
+                        Problem(
+                            "line_too_long",
+                            f"Line exceeds max length of {self._max_length_smiles} characters",
+                        )
+                    ]
+                    yield MoleculeEntry(
+                        raw_input=line[: self._max_length_smiles - 3] + "...",
+                        input_type="smiles",
+                        source=("raw_input",),
+                        mol=None,
+                        errors=errors,
+                    )
+                    continue
                 try:
                     mol = MolFromSmiles(line, sanitize=False)
                 except:  # noqa: E722 (allow bare except, because RDKit is unpredictable)
@@ -48,7 +68,7 @@ class SmilesReader(StreamReader):
                     errors = []
                 yield MoleculeEntry(
-                    raw_input=line.strip("\n"),
+                    raw_input=line,
                     input_type="smiles",
                     source=("raw_input",),
                     mol=mol,
@@ -56,6 +76,6 @@ class SmilesReader(StreamReader):
                 )
     def __repr__(self) -> str:
-        return "SmilesReader()"
+        return f"SmilesReader(max_length={self._max_length_smiles})"
     config = ReaderConfig(examples=["C1=NC2=C(N1COCCO)N=C(NC2=O)N"])

{nerdd_module-0.3.46 → nerdd_module-0.3.48}/nerdd_module/tests/checks.py RENAMED Viewed

@@ -46,7 +46,7 @@ def check_column_value_equality(subset, column_name, expected_value):
     # try to convert to float if possible
     try:
         expected_value = literal_eval(expected_value)
-    except:
+    except:  # noqa: E722
         pass
     if expected_value is None:
@@ -72,7 +72,7 @@ def check_column_value_inequality(subset, column_name, forbidden_value):
     # try to convert to float if possible
     try:
         forbidden_value = literal_eval(forbidden_value)
-    except:
+    except:  # noqa: E722
         pass
     if forbidden_value is None:
@@ -156,13 +156,13 @@ def check_conditional_column_value(
     # try to convert to float if possible
     try:
         expected_value = literal_eval(expected_value)
-    except:
+    except:  # noqa: E722
         pass
     # same for condition value
     try:
         condition_value = literal_eval(condition_value)
-    except:
+    except:  # noqa: E722
         pass
     # condition value can be (none) to indicate None

{nerdd_module-0.3.46 → nerdd_module-0.3.48}/nerdd_module/tests/files.py RENAMED Viewed

@@ -39,9 +39,7 @@ def representation_files(molecules, input_type, num_files):
     # choose num_files-1 numbers to split the representations into num_files parts
     # the while loop makes sure that each part contains at least one valid molecule
     while True:
-        split_indices = np.random.choice(
-            len(representations), size=num_files - 1, replace=False
-        )
+        split_indices = np.random.choice(len(representations), size=num_files - 1, replace=False)
         split_indices = np.sort(split_indices)
         # split the representations
@@ -57,13 +55,16 @@ def representation_files(molecules, input_type, num_files):
     # write the representations to files
     representations_files = []
-    for _, split_representation in enumerate(split_representations):
+    for split_representation in split_representations:
         with NamedTemporaryFile("w", delete=False) as f:
             for representation in split_representation:
+                # write representation
                 if representation is None:
                     f.write("None")
                 else:
                     f.write(representation)
+                # write separator
                 if input_type in ["smiles", "inchi"]:
                     f.write("\n")
                 elif input_type == "mol_block":

{nerdd_module-0.3.46 → nerdd_module-0.3.48}/nerdd_module/tests/models/AtomicMassModel.py RENAMED Viewed

@@ -8,7 +8,9 @@ allowed_versions = ["mol_ids", "mols", "iterator", "error"]
 class AtomicMassModel(Model):
-    def __init__(self, preprocessing_steps=[Sanitize()], version="mol_ids", **kwargs):
+    def __init__(self, preprocessing_steps=None, version="mol_ids", **kwargs):
+        if preprocessing_steps is None:
+            preprocessing_steps = [Sanitize()]
         assert (
             version in allowed_versions
         ), f"version must be one of {allowed_versions}, got {version}"
@@ -64,5 +66,3 @@ class AtomicMassModel(Model):
                 {"name": "mass", "type": "float", "level": "atom"},
             ],
         }

{nerdd_module-0.3.46 → nerdd_module-0.3.48}/nerdd_module/tests/models/MolWeightModel.py RENAMED Viewed

@@ -9,7 +9,9 @@ allowed_versions = ["order_based", "mol_ids", "mols", "iterator", "error"]
 class MolWeightModel(Model):
-    def __init__(self, preprocessing_steps=[Sanitize()], version="order_based", **kwargs):
+    def __init__(self, preprocessing_steps=None, version="order_based", **kwargs):
+        if preprocessing_steps is None:
+            preprocessing_steps = [Sanitize()]
         assert (
             version in allowed_versions
         ), f"version must be one of {allowed_versions}, got {version}"
@@ -48,4 +50,4 @@ class MolWeightModel(Model):
             "result_properties": [
                 {"name": "weight", "type": "float"},
             ],
-        }
+        }

{nerdd_module-0.3.46 → nerdd_module-0.3.48}/nerdd_module/tests/predictions.py RENAMED Viewed

@@ -21,7 +21,8 @@ def mol_weight_model(version):
 @when(
     parsers.parse(
-        "the mol weight model (version '{version}') generates predictions for the molecule representations"
+        "the mol weight model (version '{version}') generates predictions for the molecule "
+        "representations"
     ),
     target_fixture="predictions",
 )
@@ -36,7 +37,8 @@ def predictions_mol_weight_model(representations, version, multiplier):
 @when(
     parsers.parse(
-        "the atomic mass model (version '{version}') generates predictions for the molecule representations"
+        "the atomic mass model (version '{version}') generates predictions for the molecule "
+        "representations"
     ),
     target_fixture="predictions",
 )
@@ -48,6 +50,7 @@ def predictions_atomic_mass_model(representations, version, multiplier):
         output_format="record_list",
     )
 @when(
     "all results are considered",
     target_fixture="subset",
@@ -55,6 +58,7 @@ def predictions_atomic_mass_model(representations, version, multiplier):
 def all_results(predictions):
     return predictions
 @when(
     "the subset of the result where the input was not None is considered",
     target_fixture="subset",
@@ -63,10 +67,11 @@ def subset_without_input_none(predictions):
     # remove None entries
     return [p for p in predictions if p["input_mol"] is not None]
 @when(
     "the subset of the result where the preprocessed mol was not None is considered",
     target_fixture="subset",
 )
 def subset_without_preprocessed_none(predictions):
     # remove None entries
-    return [p for p in predictions if p["preprocessed_mol"] is not None]
+    return [p for p in predictions if p["preprocessed_mol"] is not None]

{nerdd_module-0.3.46 → nerdd_module-0.3.48}/nerdd_module/tests/representations.py RENAMED Viewed

@@ -1,3 +1,6 @@
+import re
+from functools import reduce
 import numpy as np
 from hypothesis import given as hgiven
 from hypothesis import seed, settings
@@ -5,10 +8,9 @@ from hypothesis import strategies as st
 from hypothesis_rdkit import mols
 from pytest_bdd import given, parsers
 from rdkit.Chem import MolToInchi, MolToMolBlock, MolToSmiles
-from ..polyfills import BlockLogs
 from rdkit.Chem.rdMolDescriptors import CalcExactMolWt
-import re
-from functools import reduce
+from ..polyfills import BlockLogs
 @given(parsers.parse("a random seed set to {seed:d}"), target_fixture="random_seed")
@@ -38,7 +40,9 @@ def representations_from_molecules(molecules, input_type):
     elif input_type == "inchi":
         converter = MolToInchi
     elif input_type == "rdkit_mol":
-        converter = lambda mol: mol
+        def converter(mol):
+            return mol
     else:
         raise ValueError(f"Unknown input_type: {input_type}")
@@ -49,9 +53,7 @@ def representations_from_molecules(molecules, input_type):
 @given(
-    parsers.re(
-        r"a list of (?P<num>\d+) random molecules(?:, where(?P<conditions>[\s\S]*))?"
-    ),
+    parsers.re(r"a list of (?P<num>\d+) random molecules(?:, where(?P<conditions>[\s\S]*))?"),
     target_fixture="molecules",
 )
 def molecules(num, conditions, random_seed=0):
@@ -61,6 +63,7 @@ def molecules(num, conditions, random_seed=0):
     maps = []
     if conditions is not None:
         def filter_weight(min_weight, max_weight):
             min_weight = float(min_weight)
             max_weight = float(max_weight)
@@ -76,15 +79,19 @@ def molecules(num, conditions, random_seed=0):
         expressions = [
             # filters are functions that return True if the molecule should be kept
-            ("filter", r"each mol has a weight between (?P<min_weight>\d+) and (?P<max_weight>\d+)", filter_weight),
+            (
+                "filter",
+                r"each mol has a weight between (?P<min_weight>\d+) and (?P<max_weight>\d+)",
+                filter_weight,
+            ),
             # maps are functions that modify the molecule
-            ("map", r"(?P<num_none>\d+) entries are None", map_to_none)
+            ("map", r"(?P<num_none>\d+) entries are None", map_to_none),
         ]
         conditions_list = [c for c in conditions.split("\n") if c.strip() != ""]
         for condition in conditions_list:
-            for kind, expression, f in expressions:
+            for kind, expression, f in expressions:  # noqa: B007
                 # conditions might be a markdown list (starting with a star character)
                 expression = r"\s*(\*\s*)?" + expression + r"\s*"
@@ -92,7 +99,7 @@ def molecules(num, conditions, random_seed=0):
                 if match:
                     params = match.groupdict()
                     break
             assert match is not None, f"Could not parse condition: {condition}"
             if kind == "filter":
@@ -102,8 +109,11 @@ def molecules(num, conditions, random_seed=0):
             else:
                 raise ValueError(f"Unknown kind: {kind}")
-    filter_func = lambda mol: all(f(mol) for f in filters)
-    map_func = lambda ms: reduce(lambda ms, f: f(ms), maps, ms)
+    def filter_func(mol):
+        return all(f(mol) for f in filters)
+    def map_func(ms):
+        return reduce(lambda ms, f: f(ms), maps, ms)
     result = None

{nerdd_module-0.3.46 → nerdd_module-0.3.48}/nerdd_module.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nerdd-module
-Version: 0.3.46
+Version: 0.3.48
 Summary: Base package to create NERDD modules
 Author-email: Steffen Hirte <steffen.hirte@univie.ac.at>
 Maintainer-email: Steffen Hirte <steffen.hirte@univie.ac.at>

{nerdd_module-0.3.46 → nerdd_module-0.3.48}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "nerdd-module"
-version = "0.3.46"
+version = "0.3.48"
 description = "Base package to create NERDD modules"
 readme = "README.md"
 license = "BSD-3-Clause"
@@ -105,7 +105,6 @@ patterns = ["*.py", "*.feature", "pyproject.toml"]
 [tool.ruff]
 line-length = 100
-extend-exclude = ["tests", "nerdd_module/tests"]
 [tool.ruff.lint]
 select = [