nerdd-module 0.2.5__tar.gz → 0.2.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/PKG-INFO +6 -3
  2. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/abstract_model.py +16 -11
  3. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/cli.py +1 -1
  4. nerdd_module-0.2.6/nerdd_module/config/default_configuration.py +41 -0
  5. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/config/merged_configuration.py +2 -0
  6. {nerdd-module-0.2.5/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/__init__.py +0 -4
  7. {nerdd-module-0.2.5/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/file_reader.py +6 -5
  8. {nerdd-module-0.2.5/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/reader_registry.py +13 -8
  9. nerdd_module-0.2.6/nerdd_module/output/__init__.py +1 -0
  10. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/polyfills/__init__.py +1 -0
  11. {nerdd-module-0.2.5/nerdd_module → nerdd_module-0.2.6/nerdd_module/polyfills}/version.py +2 -4
  12. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/preprocessing/__init__.py +2 -1
  13. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/preprocessing/check_valid_smiles.py +4 -6
  14. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/preprocessing/chembl_structure_pipeline.py +3 -3
  15. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/preprocessing/filter_by_element.py +2 -2
  16. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/preprocessing/filter_by_weight.py +7 -5
  17. nerdd_module-0.2.6/nerdd_module/preprocessing/sanitize.py +18 -0
  18. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/preprocessing/step.py +3 -3
  19. nerdd_module-0.2.6/nerdd_module/problem.py +13 -0
  20. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/tests/checks.py +17 -7
  21. nerdd_module-0.2.6/nerdd_module/version.py +5 -0
  22. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module.egg-info/PKG-INFO +6 -3
  23. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module.egg-info/SOURCES.txt +22 -19
  24. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module.egg-info/requires.txt +5 -1
  25. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/setup.py +13 -6
  26. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/tests/models/MolWeightModel.py +3 -6
  27. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/tests/models/MolWeightModelWithExplicitMolIds.py +2 -1
  28. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/tests/models/MolWeightModelWithExplicitMols.py +3 -5
  29. nerdd-module-0.2.5/nerdd_module/config/default_configuration.py +0 -17
  30. nerdd-module-0.2.5/nerdd_module/problem.py +0 -8
  31. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/LICENSE +0 -0
  32. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/README.md +0 -0
  33. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/__init__.py +0 -0
  34. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/config/__init__.py +0 -0
  35. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/config/auto_configuration.py +0 -0
  36. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/config/configuration.py +0 -0
  37. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/config/dict_configuration.py +0 -0
  38. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/config/package_configuration.py +0 -0
  39. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/config/yaml_configuration.py +0 -0
  40. {nerdd-module-0.2.5/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/depth_first_explorer.py +0 -0
  41. {nerdd-module-0.2.5/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/explorer.py +0 -0
  42. {nerdd-module-0.2.5/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/gzip_reader.py +0 -0
  43. {nerdd-module-0.2.5/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/inchi_reader.py +0 -0
  44. {nerdd-module-0.2.5/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/list_reader.py +0 -0
  45. {nerdd-module-0.2.5/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/mol_reader.py +0 -0
  46. {nerdd-module-0.2.5/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/reader.py +0 -0
  47. {nerdd-module-0.2.5/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/sdf_reader.py +0 -0
  48. {nerdd-module-0.2.5/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/smiles_reader.py +0 -0
  49. {nerdd-module-0.2.5/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/string_reader.py +0 -0
  50. {nerdd-module-0.2.5/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/tar_reader.py +0 -0
  51. {nerdd-module-0.2.5/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/zip_reader.py +0 -0
  52. {nerdd-module-0.2.5/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/output}/csv_writer.py +0 -0
  53. {nerdd-module-0.2.5/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/output}/sdf_writer.py +0 -0
  54. {nerdd-module-0.2.5/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/output}/writer.py +0 -0
  55. {nerdd-module-0.2.5/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/output}/writer_registry.py +0 -0
  56. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/polyfills/files.py +0 -0
  57. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/polyfills/get_entry_points.py +0 -0
  58. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/preprocessing/empty_pipeline.py +0 -0
  59. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/preprocessing/pipeline.py +0 -0
  60. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/preprocessing/registry.py +0 -0
  61. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/preprocessing/remove_stereochemistry.py +0 -0
  62. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/tests/__init__.py +0 -0
  63. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/tests/predictions.py +0 -0
  64. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module/tests/representations.py +0 -0
  65. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module.egg-info/dependency_links.txt +0 -0
  66. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/nerdd_module.egg-info/top_level.txt +0 -0
  67. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/setup.cfg +0 -0
  68. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/tests/__init__.py +0 -0
  69. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/tests/conftest.py +0 -0
  70. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/tests/models/AtomicMassModel.py +0 -0
  71. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/tests/models/__init__.py +0 -0
  72. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/tests/steps/__init__.py +0 -0
  73. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/tests/steps/checks.py +0 -0
  74. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/tests/steps/predictors.py +0 -0
  75. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/tests/steps/preprocessing.py +0 -0
  76. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/tests/test_atom_property_prediction.py +0 -0
  77. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/tests/test_molecule_property_prediction.py +0 -0
  78. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/tests/test_preprocessing.py +0 -0
  79. {nerdd-module-0.2.5 → nerdd_module-0.2.6}/tests/test_reading_formats.py +0 -0
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nerdd-module
3
- Version: 0.2.5
3
+ Version: 0.2.6
4
4
  Summary: Base package to create NERDD modules
5
- Home-page: https://github.com/molinfo-vienna/nerdd-module.git
5
+ Home-page: https://github.com/molinfo-vienna/nerdd-module
6
6
  Maintainer: Steffen Hirte
7
7
  Maintainer-email: steffen.hirte@univie.ac.at
8
8
  License: BSD 3-Clause License
@@ -33,8 +33,11 @@ Requires-Dist: stringcase>=1.2.0
33
33
  Requires-Dist: decorator>=5.1.1
34
34
  Requires-Dist: importlib-resources>=5; python_version < "3.10"
35
35
  Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
36
- Requires-Dist: chembl_structure_pipeline>=1.0.0
37
36
  Provides-Extra: dev
37
+ Requires-Dist: black; extra == "dev"
38
+ Requires-Dist: isort; extra == "dev"
39
+ Provides-Extra: csp
40
+ Requires-Dist: chembl_structure_pipeline>=1.0.0; extra == "csp"
38
41
  Provides-Extra: test
39
42
  Requires-Dist: pytest; extra == "test"
40
43
  Requires-Dist: pytest-sugar; extra == "test"
@@ -1,13 +1,13 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union
2
+ from typing import Callable, Iterable, List, Tuple, Union
3
3
 
4
4
  import pandas as pd
5
- from rdkit.Chem import Mol, MolToSmiles
5
+ from rdkit.Chem import Mol
6
6
 
7
7
  from .config import AutoConfiguration, Configuration
8
- from .io import DepthFirstExplorer, MoleculeEntry
8
+ from .input import DepthFirstExplorer, MoleculeEntry
9
9
  from .preprocessing import Pipeline, Step, registry
10
- from .problem import Problem
10
+ from .problem import Problem, UnknownProblem
11
11
 
12
12
  __all__ = ["AbstractModel"]
13
13
 
@@ -156,17 +156,20 @@ class AbstractModel(ABC):
156
156
  # (and we assume that the order of the molecules is the same)
157
157
  if "mol_id" in df_predictions.columns:
158
158
  # check that mol_id contains only valid ids
159
- assert set(df_predictions.mol_id).issubset(
160
- set(df_valid_subset.mol_id)
161
- ), f"The mol_id column contains invalid ids: {set(df_predictions.mol_id).difference(set(df_valid_subset.mol_id))}."
159
+ assert set(df_predictions.mol_id).issubset(set(df_valid_subset.mol_id)), (
160
+ f"The mol_id column contains invalid ids: "
161
+ f"{set(df_predictions.mol_id).difference(set(df_valid_subset.mol_id))}."
162
+ )
163
+
162
164
  # use mol_id as index
163
165
  df_predictions.set_index("mol_id", drop=True, inplace=True)
164
166
  elif "mol" in df_predictions.columns:
165
167
  # check that molecule names contain only valid ids
166
168
  names = df_predictions.mol.apply(lambda mol: int(mol.GetProp("_Name")))
167
- assert set(names).issubset(
168
- set(df_preprocess.mol_id)
169
- ), f"The mol_id column contains invalid ids: {set(df_predictions.mol_id).difference(set(df_valid_subset.mol_id))}."
169
+ assert set(names).issubset(set(df_preprocess.mol_id)), (
170
+ f"The mol_id column contains invalid ids: "
171
+ f"{set(df_predictions.mol_id).difference(set(df_valid_subset.mol_id))}."
172
+ )
170
173
 
171
174
  # use mol_id as index
172
175
  df_predictions.set_index(
@@ -184,6 +187,8 @@ class AbstractModel(ABC):
184
187
  df_valid_subset.index.astype("int64"), inplace=True
185
188
  )
186
189
 
190
+ # TODO: check derivative_id or atom_id
191
+
187
192
  # add column that indicates whether a molecule was missing
188
193
  missing_mol_ids = set(df_preprocess.mol_id).difference(df_predictions.index)
189
194
  df_preprocess["missing"] = df_preprocess.mol_id.isin(missing_mol_ids)
@@ -212,7 +217,7 @@ class AbstractModel(ABC):
212
217
  else:
213
218
  df_result["errors"] = df_result.preprocessing_errors
214
219
  df_result["errors"] = df_result.errors + df_result.missing.map(
215
- lambda x: ["!1"] if x else []
220
+ lambda x: [UnknownProblem()] if x else []
216
221
  )
217
222
  df_result.drop(columns=["missing", "preprocessing_errors"], inplace=True)
218
223
 
@@ -4,7 +4,7 @@ import sys
4
4
 
5
5
  import rich_click as click
6
6
  from decorator import decorator
7
- from nerdd_module.io import WriterRegistry
7
+ from nerdd_module.output import WriterRegistry
8
8
  from stringcase import spinalcase
9
9
 
10
10
  __all__ = ["auto_cli"]
@@ -0,0 +1,41 @@
1
+ from stringcase import snakecase
2
+
3
+ from ..polyfills import version
4
+ from .configuration import Configuration
5
+
6
+ __all__ = ["DefaultConfiguration"]
7
+
8
+
9
+ class DefaultConfiguration(Configuration):
10
+ def __init__(self, nerdd_module):
11
+ super().__init__()
12
+
13
+ # generate a name from the module name
14
+ class_name = nerdd_module.__class__.__name__
15
+ if class_name.endswith("Model"):
16
+ # remove the "Model" suffix
17
+ # e.g. SkinDoctorModel -> SkinDoctor
18
+ class_name = class_name[: -len("Model")]
19
+
20
+ # convert the class name to snake case
21
+ # e.g. SkinDoctor -> skin_doctor
22
+ name = snakecase(class_name)
23
+
24
+ # append version to the configuration
25
+ try:
26
+ module = nerdd_module.__module__
27
+ root_module = module.split(".", 1)[0]
28
+ version_ = version(root_module)
29
+ except ModuleNotFoundError:
30
+ pass
31
+
32
+ self.config = dict(
33
+ name=name,
34
+ version=version_,
35
+ task="molecular_property_prediction",
36
+ job_parameters=[],
37
+ result_properties=[],
38
+ )
39
+
40
+ def _get_dict(self):
41
+ return self.config
@@ -9,6 +9,8 @@ class MergedConfiguration(Configuration):
9
9
 
10
10
  self.config = dict()
11
11
 
12
+ # merge all configurations starting from the first one
13
+ # --> last configuration has the highest priority
12
14
  for c in configs:
13
15
  self.config.update(c._get_dict())
14
16
 
@@ -1,4 +1,3 @@
1
- from .csv_writer import *
2
1
  from .depth_first_explorer import *
3
2
  from .file_reader import *
4
3
  from .gzip_reader import *
@@ -8,10 +7,7 @@ from .mol_reader import *
8
7
  from .reader import *
9
8
  from .reader_registry import *
10
9
  from .sdf_reader import *
11
- from .sdf_writer import *
12
10
  from .smiles_reader import *
13
11
  from .string_reader import *
14
12
  from .tar_reader import *
15
- from .writer import *
16
- from .writer_registry import *
17
13
  from .zip_reader import *
@@ -1,6 +1,5 @@
1
- import os
2
1
  from pathlib import Path
3
- from typing import Generator
2
+ from typing import Generator, Tuple
4
3
 
5
4
  from .reader import MoleculeEntry, Reader
6
5
  from .reader_registry import register_reader
@@ -24,7 +23,7 @@ class FileReader(Reader):
24
23
  path = Path(filename)
25
24
  except:
26
25
  raise ValueError("input must be a valid path")
27
-
26
+
28
27
  # convert to absolute path
29
28
  if not path.is_absolute():
30
29
  if self.data_dir is not None:
@@ -33,7 +32,9 @@ class FileReader(Reader):
33
32
  path = Path(".") / path
34
33
 
35
34
  # check that the file is within the data_dir
36
- assert self.data_dir is None or self.data_dir in path.parents, "input must be a relative path"
35
+ assert (
36
+ self.data_dir is None or self.data_dir in path.parents
37
+ ), "input must be a relative path"
37
38
 
38
39
  # check that the file exists
39
40
  assert path.exists(), "input must be a valid file"
@@ -41,7 +42,7 @@ class FileReader(Reader):
41
42
  with open(path, "rb") as f:
42
43
  for entry in explore(f):
43
44
  if len(entry.source) == 1 and entry.source[0] == "raw_input":
44
- source = tuple()
45
+ source: Tuple[str, ...] = tuple()
45
46
  else:
46
47
  source = entry.source
47
48
  yield entry._replace(source=tuple([filename, *source]))
@@ -10,21 +10,25 @@ __all__ = ["ReaderRegistry", "register_reader"]
10
10
  @lru_cache(maxsize=1)
11
11
  class ReaderRegistry:
12
12
  def __init__(self):
13
- self._factories : List[Tuple[Type[Reader], Tuple[str, ...], Dict[str, str]]] = []
13
+ self._factories: List[Tuple[Type[Reader], Tuple[str, ...], Dict[str, str]]] = []
14
14
  self._config = {}
15
15
 
16
16
  def _create_reader(self, ReaderClass: Type[Reader], *args, **kwargs) -> Reader:
17
17
  # translate all args
18
- args = [self._config.get(arg, None) for arg in args]
18
+ args = tuple(self._config.get(arg, None) for arg in args)
19
19
  # translate all kwargs
20
- kwargs = {k: self._config.get(v, None) for k, v in kwargs.items() if v in self._config}
20
+ kwargs = {
21
+ k: self._config.get(v, None) for k, v in kwargs.items() if v in self._config
22
+ }
21
23
 
22
24
  return ReaderClass(*args, **kwargs)
23
25
 
24
- def register(self, ReaderClass: Type[Reader], *args :str , **kwargs:str):
26
+ def register(self, ReaderClass: Type[Reader], *args: str, **kwargs: str):
25
27
  assert issubclass(ReaderClass, Reader)
26
28
  assert all([isinstance(arg, str) for arg in args])
27
- assert all([isinstance(k, str) and isinstance(v, str) for k, v in kwargs.items()])
29
+ assert all(
30
+ [isinstance(k, str) and isinstance(v, str) for k, v in kwargs.items()]
31
+ )
28
32
  self._factories.append((ReaderClass, args, kwargs))
29
33
 
30
34
  def readers(self) -> Generator[Reader, None, None]:
@@ -40,14 +44,14 @@ def register_reader(*args, **kwargs):
40
44
  ReaderRegistry().register(cls, *args, **kwargs)
41
45
  return cls
42
46
 
43
- # Case 1: first argument is a class
44
- # --> decorator is used without arguments
47
+ # Case 1: first argument is a class
48
+ # --> decorator is used without arguments
45
49
  # @register_reader
46
50
  # class F:
47
51
  # ...
48
52
  if len(args) > 0 and isinstance(args[0], type):
49
53
  return wrapper(args[0], *args[1:], **kwargs)
50
-
54
+
51
55
  # Case 2: first argument is a not a class
52
56
  # --> decorator is used with arguments
53
57
  # @register_reader("blah")
@@ -56,4 +60,5 @@ def register_reader(*args, **kwargs):
56
60
  def inner(cls):
57
61
  assert isinstance(cls, type), "Decorator must be used with a class"
58
62
  return wrapper(cls, *args, **kwargs)
63
+
59
64
  return inner
@@ -0,0 +1 @@
1
+ from .writer_registry import *
@@ -1,2 +1,3 @@
1
1
  from .files import *
2
2
  from .get_entry_points import *
3
+ from .version import *
@@ -1,10 +1,8 @@
1
1
  import sys
2
2
 
3
+ __all__ = ["version"]
4
+
3
5
  if sys.version_info < (3, 10):
4
6
  from importlib_metadata import version
5
7
  else:
6
8
  from importlib.metadata import version
7
-
8
- __all__ = ["__version__"]
9
-
10
- __version__ = version(__package__)
@@ -4,6 +4,7 @@ from .empty_pipeline import *
4
4
  from .filter_by_element import *
5
5
  from .filter_by_weight import *
6
6
  from .pipeline import *
7
- from ..problem import *
8
7
  from .registry import *
8
+ from .remove_stereochemistry import *
9
+ from .sanitize import *
9
10
  from .step import *
@@ -1,8 +1,8 @@
1
- from typing import List, Tuple
1
+ from typing import List, Optional, Tuple
2
2
 
3
3
  from rdkit.Chem import Mol, MolFromSmiles, MolToSmiles
4
4
 
5
- from ..problem import Problem
5
+ from ..problem import InvalidSmiles, Problem
6
6
  from .step import Step
7
7
 
8
8
  __all__ = ["CheckValidSmiles"]
@@ -14,15 +14,13 @@ class CheckValidSmiles(Step):
14
14
  def __init__(self):
15
15
  super().__init__()
16
16
 
17
- def _run(self, mol: Mol) -> Tuple[Mol, List[Problem]]:
17
+ def _run(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
18
18
  errors = []
19
19
 
20
20
  smi = MolToSmiles(mol, True)
21
21
  check_mol = MolFromSmiles(smi)
22
22
  if check_mol is None:
23
- errors.append(
24
- Problem("invalid_smiles", "Cannot convert molecule to SMILES")
25
- )
23
+ errors.append(InvalidSmiles())
26
24
  mol = None
27
25
 
28
26
  return mol, errors
@@ -1,5 +1,5 @@
1
1
  import warnings
2
- from typing import List, Tuple
2
+ from typing import List, Optional, Tuple
3
3
 
4
4
  from rdkit.Chem import Mol
5
5
  from rdkit.rdBase import BlockLogs
@@ -41,7 +41,7 @@ class StandardizeWithCsp(Step):
41
41
  if import_error is not None:
42
42
  raise import_error
43
43
 
44
- def _run(self, mol: Mol) -> Tuple[Mol, List[Problem]]:
44
+ def _run(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
45
45
  errors = []
46
46
 
47
47
  # chembl structure pipeline cannot handle molecules with 3D coordinates
@@ -65,7 +65,7 @@ class GetParentMol(Step):
65
65
  if import_error is not None:
66
66
  raise import_error
67
67
 
68
- def _run(self, mol: Mol) -> Tuple[Mol, List[Problem]]:
68
+ def _run(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
69
69
  errors = []
70
70
 
71
71
  # chembl structure pipeline cannot handle molecules with 3D coordinates
@@ -1,4 +1,4 @@
1
- from typing import Iterable, List, Tuple
1
+ from typing import Iterable, List, Optional, Tuple
2
2
 
3
3
  from rdkit.Chem import Mol
4
4
 
@@ -14,7 +14,7 @@ class FilterByElement(Step):
14
14
  self.allowed_elements = set(allowed_elements)
15
15
  self.remove_invalid_molecules = remove_invalid_molecules
16
16
 
17
- def _run(self, mol: Mol) -> Tuple[Mol, List[Problem]]:
17
+ def _run(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
18
18
  errors = []
19
19
  result_mol = mol
20
20
 
@@ -1,4 +1,4 @@
1
- from typing import List, Tuple
1
+ from typing import List, Optional, Tuple
2
2
 
3
3
  from rdkit.Chem import Mol
4
4
  from rdkit.Chem.Descriptors import MolWt
@@ -14,7 +14,7 @@ class FilterByWeight(Step):
14
14
  self.max_weight = max_weight
15
15
  self.remove_invalid_molecules = remove_invalid_molecules
16
16
 
17
- def _run(self, mol: Mol) -> Tuple[Mol, List[Problem]]:
17
+ def _run(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
18
18
  errors = []
19
19
 
20
20
  weight = MolWt(mol)
@@ -25,9 +25,11 @@ class FilterByWeight(Step):
25
25
  result_mol = mol
26
26
  errors.append(
27
27
  Problem(
28
- "invalid_weight",
29
- f"Molecular weight {weight:.2f} out of range "
30
- f"[{self.min_weight}, {self.max_weight}]",
28
+ type="invalid_weight",
29
+ message=(
30
+ f"Molecular weight {weight:.2f} out of range "
31
+ f"[{self.min_weight}, {self.max_weight}]"
32
+ ),
31
33
  )
32
34
  )
33
35
  else:
@@ -0,0 +1,18 @@
1
+ from rdkit.Chem import SanitizeMol
2
+
3
+ from .step import Step
4
+
5
+ __all__ = ["Sanitize"]
6
+
7
+
8
+ class Sanitize(Step):
9
+ def __init__(self):
10
+ super().__init__()
11
+
12
+ def _run(self, mol):
13
+ errors = []
14
+
15
+ # sanitize molecule
16
+ SanitizeMol(mol)
17
+
18
+ return mol, errors
@@ -1,5 +1,5 @@
1
1
  from abc import ABC, abstractmethod
2
- from typing import List, Tuple
2
+ from typing import List, Optional, Tuple
3
3
 
4
4
  from rdkit.Chem import Mol
5
5
 
@@ -12,14 +12,14 @@ class Step(ABC):
12
12
  def __init__(self):
13
13
  pass
14
14
 
15
- def run(self, mol: Mol) -> Tuple[Mol, List[Problem]]:
15
+ def run(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
16
16
  """
17
17
  Runs the step on a molecule.
18
18
  """
19
19
  return self._run(mol)
20
20
 
21
21
  @abstractmethod
22
- def _run(self, mol: Mol) -> Tuple[Mol, List[Problem]]:
22
+ def _run(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
23
23
  """
24
24
  Runs the step on a molecule.
25
25
  """
@@ -0,0 +1,13 @@
1
+ from typing import NamedTuple
2
+
3
+ __all__ = ["Problem", "InvalidSmiles", "UnknownProblem"]
4
+
5
+
6
+ class Problem(NamedTuple):
7
+ type: str
8
+ message: str
9
+
10
+
11
+ InvalidSmiles = lambda: Problem(type="invalid_smiles", message="Invalid SMILES string")
12
+
13
+ UnknownProblem = lambda: Problem(type="unknown", message="Unknown error occurred")
@@ -138,9 +138,15 @@ def check_column_length(subset, column_name, length):
138
138
  ).all(), f"Column {column_name} has unexpected length"
139
139
 
140
140
 
141
- @then(parsers.parse("when '{condition_column_name}' is '{condition_value}' "
142
- "the value in column '{column_name}' should be '{expected_value}'"))
143
- def check_conditional_column_value(subset, condition_column_name, condition_value, column_name, expected_value):
141
+ @then(
142
+ parsers.parse(
143
+ "when '{condition_column_name}' is '{condition_value}' "
144
+ "the value in column '{column_name}' should be '{expected_value}'"
145
+ )
146
+ )
147
+ def check_conditional_column_value(
148
+ subset, condition_column_name, condition_value, column_name, expected_value
149
+ ):
144
150
  # expected value is always provided as string
145
151
  # try to convert to float if possible
146
152
  try:
@@ -161,14 +167,18 @@ def check_conditional_column_value(subset, condition_column_name, condition_valu
161
167
  subset = subset[subset[condition_column_name] == condition_value]
162
168
 
163
169
  value = subset[column_name]
164
- assert len(value) > 0, f"No rows found for condition {condition_column_name} == {condition_value}"
170
+ assert (
171
+ len(value) > 0
172
+ ), f"No rows found for condition {condition_column_name} == {condition_value}"
165
173
 
166
174
  # expected value can be (none) to indicate None
167
175
  if expected_value == "(none)":
168
176
  # if expected_value is the magic string "(none)", we expect None
169
- assert pd.isnull(value).all(), f"Column {column_name} is assigned to {value} != None"
177
+ assert pd.isnull(
178
+ value
179
+ ).all(), f"Column {column_name} is assigned to {value} != None"
170
180
  else:
171
181
  # otherwise, we expect the value to be equal to the expected value
172
182
  assert (
173
- (value == expected_value).all()
174
- ), f"Column {column_name} is assigned to {value} != {expected_value}"
183
+ value == expected_value
184
+ ).all(), f"Column {column_name} is assigned to {value} != {expected_value}"
@@ -0,0 +1,5 @@
1
+ from .polyfills import version
2
+
3
+ __all__ = ["__version__"]
4
+
5
+ __version__ = version(__package__)
@@ -1,8 +1,8 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nerdd-module
3
- Version: 0.2.5
3
+ Version: 0.2.6
4
4
  Summary: Base package to create NERDD modules
5
- Home-page: https://github.com/molinfo-vienna/nerdd-module.git
5
+ Home-page: https://github.com/molinfo-vienna/nerdd-module
6
6
  Maintainer: Steffen Hirte
7
7
  Maintainer-email: steffen.hirte@univie.ac.at
8
8
  License: BSD 3-Clause License
@@ -33,8 +33,11 @@ Requires-Dist: stringcase>=1.2.0
33
33
  Requires-Dist: decorator>=5.1.1
34
34
  Requires-Dist: importlib-resources>=5; python_version < "3.10"
35
35
  Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
36
- Requires-Dist: chembl_structure_pipeline>=1.0.0
37
36
  Provides-Extra: dev
37
+ Requires-Dist: black; extra == "dev"
38
+ Requires-Dist: isort; extra == "dev"
39
+ Provides-Extra: csp
40
+ Requires-Dist: chembl_structure_pipeline>=1.0.0; extra == "csp"
38
41
  Provides-Extra: test
39
42
  Requires-Dist: pytest; extra == "test"
40
43
  Requires-Dist: pytest-sugar; extra == "test"
@@ -19,28 +19,30 @@ nerdd_module/config/dict_configuration.py
19
19
  nerdd_module/config/merged_configuration.py
20
20
  nerdd_module/config/package_configuration.py
21
21
  nerdd_module/config/yaml_configuration.py
22
- nerdd_module/io/__init__.py
23
- nerdd_module/io/csv_writer.py
24
- nerdd_module/io/depth_first_explorer.py
25
- nerdd_module/io/explorer.py
26
- nerdd_module/io/file_reader.py
27
- nerdd_module/io/gzip_reader.py
28
- nerdd_module/io/inchi_reader.py
29
- nerdd_module/io/list_reader.py
30
- nerdd_module/io/mol_reader.py
31
- nerdd_module/io/reader.py
32
- nerdd_module/io/reader_registry.py
33
- nerdd_module/io/sdf_reader.py
34
- nerdd_module/io/sdf_writer.py
35
- nerdd_module/io/smiles_reader.py
36
- nerdd_module/io/string_reader.py
37
- nerdd_module/io/tar_reader.py
38
- nerdd_module/io/writer.py
39
- nerdd_module/io/writer_registry.py
40
- nerdd_module/io/zip_reader.py
22
+ nerdd_module/input/__init__.py
23
+ nerdd_module/input/depth_first_explorer.py
24
+ nerdd_module/input/explorer.py
25
+ nerdd_module/input/file_reader.py
26
+ nerdd_module/input/gzip_reader.py
27
+ nerdd_module/input/inchi_reader.py
28
+ nerdd_module/input/list_reader.py
29
+ nerdd_module/input/mol_reader.py
30
+ nerdd_module/input/reader.py
31
+ nerdd_module/input/reader_registry.py
32
+ nerdd_module/input/sdf_reader.py
33
+ nerdd_module/input/smiles_reader.py
34
+ nerdd_module/input/string_reader.py
35
+ nerdd_module/input/tar_reader.py
36
+ nerdd_module/input/zip_reader.py
37
+ nerdd_module/output/__init__.py
38
+ nerdd_module/output/csv_writer.py
39
+ nerdd_module/output/sdf_writer.py
40
+ nerdd_module/output/writer.py
41
+ nerdd_module/output/writer_registry.py
41
42
  nerdd_module/polyfills/__init__.py
42
43
  nerdd_module/polyfills/files.py
43
44
  nerdd_module/polyfills/get_entry_points.py
45
+ nerdd_module/polyfills/version.py
44
46
  nerdd_module/preprocessing/__init__.py
45
47
  nerdd_module/preprocessing/check_valid_smiles.py
46
48
  nerdd_module/preprocessing/chembl_structure_pipeline.py
@@ -50,6 +52,7 @@ nerdd_module/preprocessing/filter_by_weight.py
50
52
  nerdd_module/preprocessing/pipeline.py
51
53
  nerdd_module/preprocessing/registry.py
52
54
  nerdd_module/preprocessing/remove_stereochemistry.py
55
+ nerdd_module/preprocessing/sanitize.py
53
56
  nerdd_module/preprocessing/step.py
54
57
  nerdd_module/tests/__init__.py
55
58
  nerdd_module/tests/checks.py
@@ -5,13 +5,17 @@ filetype~=1.2.0
5
5
  rich-click>=1.7.1
6
6
  stringcase>=1.2.0
7
7
  decorator>=5.1.1
8
- chembl_structure_pipeline>=1.0.0
9
8
 
10
9
  [:python_version < "3.10"]
11
10
  importlib-resources>=5
12
11
  importlib-metadata>=4.6
13
12
 
13
+ [csp]
14
+ chembl_structure_pipeline>=1.0.0
15
+
14
16
  [dev]
17
+ black
18
+ isort
15
19
 
16
20
  [docs]
17
21
  mkdocs
@@ -16,11 +16,11 @@ rdkit_requirement = ["rdkit>=2022.3.3"] if not rdkit_installed else []
16
16
 
17
17
  setup(
18
18
  name="nerdd-module",
19
- version="0.2.5",
19
+ version="0.2.6",
20
20
  maintainer="Steffen Hirte",
21
21
  maintainer_email="steffen.hirte@univie.ac.at",
22
22
  packages=find_packages(),
23
- url="https://github.com/molinfo-vienna/nerdd-module.git",
23
+ url="https://github.com/molinfo-vienna/nerdd-module",
24
24
  description="Base package to create NERDD modules",
25
25
  license="BSD 3-Clause License",
26
26
  long_description=open("README.md").read(),
@@ -36,12 +36,19 @@ setup(
36
36
  # install importlib-resources and importlib-metadata for old Python versions
37
37
  "importlib-resources>=5; python_version<'3.10'",
38
38
  "importlib-metadata>=4.6; python_version<'3.10'",
39
- # note: version 1.0.0 of chembl_structure_pipeline is not available on pypi,
40
- # but it could potentially be installed from github
41
- "chembl_structure_pipeline>=1.0.0",
42
39
  ],
43
40
  extras_require={
44
- "dev": [],
41
+ "dev": [
42
+ "black",
43
+ "isort",
44
+ ],
45
+ "csp": [
46
+ # note: version 1.0.0 of chembl_structure_pipeline is not available on pypi
47
+ # BUT: maybe it was already installed in the current environment manually
48
+ # other note: chembl_structure_pipeline *always* installs a recent version
49
+ # of rdkit
50
+ "chembl_structure_pipeline>=1.0.0"
51
+ ],
45
52
  "test": [
46
53
  "pytest",
47
54
  "pytest-sugar",
@@ -1,20 +1,17 @@
1
1
  import pandas as pd
2
2
  from nerdd_module import AbstractModel
3
+ from nerdd_module.preprocessing import Sanitize
3
4
  from rdkit.Chem.Descriptors import MolWt
4
5
 
5
6
  __all__ = ["MolWeightModel"]
6
7
 
7
8
 
8
9
  class MolWeightModel(AbstractModel):
9
- def __init__(self, preprocessing_pipeline="chembl_structure_pipeline", **kwargs):
10
+ def __init__(self, preprocessing_pipeline=[Sanitize()], **kwargs):
10
11
  super().__init__(preprocessing_pipeline, **kwargs)
11
12
 
12
13
  def _predict_mols(self, mols, multiplier):
13
- return pd.DataFrame(
14
- {
15
- "weight": [MolWt(m) * multiplier for m in mols],
16
- }
17
- )
14
+ return pd.DataFrame({"weight": [MolWt(m) * multiplier for m in mols]})
18
15
 
19
16
  def _get_config(self):
20
17
  return {
@@ -1,12 +1,13 @@
1
1
  import pandas as pd
2
2
  from nerdd_module import AbstractModel
3
+ from nerdd_module.preprocessing import Sanitize
3
4
  from rdkit.Chem.Descriptors import MolWt
4
5
 
5
6
  __all__ = ["MolWeightModelWithExplicitMolIds"]
6
7
 
7
8
 
8
9
  class MolWeightModelWithExplicitMolIds(AbstractModel):
9
- def __init__(self, preprocessing_pipeline="chembl_structure_pipeline", **kwargs):
10
+ def __init__(self, preprocessing_pipeline=[Sanitize()], **kwargs):
10
11
  super().__init__(preprocessing_pipeline, **kwargs)
11
12
 
12
13
  def _predict_mols(self, mols, multiplier):
@@ -1,20 +1,18 @@
1
1
  import pandas as pd
2
2
  from nerdd_module import AbstractModel
3
+ from nerdd_module.preprocessing import Sanitize
3
4
  from rdkit.Chem.Descriptors import MolWt
4
5
 
5
6
  __all__ = ["MolWeightModelWithExplicitMols"]
6
7
 
7
8
 
8
9
  class MolWeightModelWithExplicitMols(AbstractModel):
9
- def __init__(self, preprocessing_pipeline="chembl_structure_pipeline", **kwargs):
10
+ def __init__(self, preprocessing_pipeline=[Sanitize()], **kwargs):
10
11
  super().__init__(preprocessing_pipeline, **kwargs)
11
12
 
12
13
  def _predict_mols(self, mols, multiplier):
13
14
  return pd.DataFrame(
14
- {
15
- "mol": mols,
16
- "weight": [MolWt(m) * multiplier for m in mols],
17
- }
15
+ {"mol": mols, "weight": [MolWt(m) * multiplier for m in mols]}
18
16
  )
19
17
 
20
18
  def _get_config(self):
@@ -1,17 +0,0 @@
1
- from .configuration import Configuration
2
-
3
- __all__ = ["DefaultConfiguration"]
4
-
5
-
6
- class DefaultConfiguration(Configuration):
7
- def __init__(self, nerdd_module):
8
- super().__init__()
9
-
10
- self.config = dict(
11
- task="molecular_property_prediction",
12
- job_parameters=[],
13
- result_properties=[],
14
- )
15
-
16
- def _get_dict(self):
17
- return self.config
@@ -1,8 +0,0 @@
1
- from typing import NamedTuple
2
-
3
- __all__ = ["Problem"]
4
-
5
-
6
- class Problem(NamedTuple):
7
- type: str
8
- message: str
File without changes
File without changes
File without changes