nerdd-module 0.3.12__tar.gz → 0.3.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/PKG-INFO +4 -4
  2. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/__init__.py +1 -0
  3. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/config/models.py +2 -0
  4. nerdd_module-0.3.13/nerdd_module/converters/__init__.py +7 -0
  5. nerdd_module-0.3.13/nerdd_module/converters/converter.py +100 -0
  6. nerdd_module-0.3.13/nerdd_module/converters/converter_config.py +15 -0
  7. nerdd_module-0.3.13/nerdd_module/converters/identity_converter.py +23 -0
  8. nerdd_module-0.3.13/nerdd_module/converters/mol_converter.py +41 -0
  9. nerdd_module-0.3.13/nerdd_module/converters/mol_to_image_converter.py +62 -0
  10. nerdd_module-0.3.13/nerdd_module/converters/problem_list_converter.py +21 -0
  11. nerdd_module-0.3.13/nerdd_module/converters/void_converter.py +17 -0
  12. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/input/depth_first_explorer.py +3 -4
  13. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/input/file_reader.py +8 -3
  14. nerdd_module-0.3.13/nerdd_module/model/convert_representations_step.py +24 -0
  15. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/model/simple_model.py +13 -9
  16. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/preprocessing/chembl_structure_pipeline.py +10 -4
  17. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/tests/__init__.py +1 -0
  18. nerdd_module-0.3.13/nerdd_module/tests/files.py +74 -0
  19. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/tests/predictions.py +8 -6
  20. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/tests/representations.py +9 -8
  21. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module.egg-info/PKG-INFO +4 -4
  22. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module.egg-info/SOURCES.txt +6 -0
  23. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module.egg-info/requires.txt +3 -3
  24. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/pyproject.toml +4 -4
  25. nerdd_module-0.3.12/nerdd_module/converters/__init__.py +0 -2
  26. nerdd_module-0.3.12/nerdd_module/converters/converter.py +0 -62
  27. nerdd_module-0.3.12/nerdd_module/converters/identity_converter.py +0 -8
  28. nerdd_module-0.3.12/nerdd_module/model/convert_representations_step.py +0 -20
  29. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/LICENSE +0 -0
  30. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/README.md +0 -0
  31. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/cli.py +0 -0
  32. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/config/__init__.py +0 -0
  33. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/config/configuration.py +0 -0
  34. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/config/default_configuration.py +0 -0
  35. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/config/dict_configuration.py +0 -0
  36. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/config/merged_configuration.py +0 -0
  37. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/config/package_configuration.py +0 -0
  38. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/config/search_yaml_configuration.py +0 -0
  39. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/config/yaml_configuration.py +0 -0
  40. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/input/__init__.py +0 -0
  41. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/input/explorer.py +0 -0
  42. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/input/gzip_reader.py +0 -0
  43. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/input/inchi_reader.py +0 -0
  44. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/input/list_reader.py +0 -0
  45. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/input/mol_reader.py +0 -0
  46. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/input/reader.py +0 -0
  47. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/input/reader_config.py +0 -0
  48. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/input/sdf_reader.py +0 -0
  49. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/input/smiles_reader.py +0 -0
  50. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/input/string_reader.py +0 -0
  51. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/input/tar_reader.py +0 -0
  52. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/input/zip_reader.py +0 -0
  53. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/model/__init__.py +0 -0
  54. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/model/assign_mol_id_step.py +0 -0
  55. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/model/assign_name_step.py +0 -0
  56. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/model/enforce_schema_step.py +0 -0
  57. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/model/model.py +0 -0
  58. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/model/read_input_step.py +0 -0
  59. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/model/write_output_step.py +0 -0
  60. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/output/__init__.py +0 -0
  61. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/output/csv_writer.py +0 -0
  62. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/output/file_writer.py +0 -0
  63. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/output/iterator_writer.py +0 -0
  64. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/output/pandas_writer.py +0 -0
  65. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/output/record_list_writer.py +0 -0
  66. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/output/sdf_writer.py +0 -0
  67. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/output/writer.py +0 -0
  68. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/polyfills/__init__.py +0 -0
  69. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/polyfills/files.py +0 -0
  70. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/polyfills/get_entry_points.py +0 -0
  71. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/polyfills/literal.py +0 -0
  72. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/polyfills/typed_dict.py +0 -0
  73. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/polyfills/types.py +0 -0
  74. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/polyfills/version.py +0 -0
  75. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/preprocessing/__init__.py +0 -0
  76. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/preprocessing/check_valid_smiles.py +0 -0
  77. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/preprocessing/filter_by_element.py +0 -0
  78. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/preprocessing/filter_by_weight.py +0 -0
  79. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/preprocessing/preprocessing_step.py +0 -0
  80. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/preprocessing/remove_stereochemistry.py +0 -0
  81. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/preprocessing/sanitize.py +0 -0
  82. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/problem.py +0 -0
  83. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/py.typed +0 -0
  84. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/steps/__init__.py +0 -0
  85. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/steps/map_step.py +0 -0
  86. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/steps/output_step.py +0 -0
  87. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/steps/step.py +0 -0
  88. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/tests/checks.py +0 -0
  89. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/tests/models/AtomicMassModel.py +0 -0
  90. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/tests/models/MolWeightModel.py +0 -0
  91. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/tests/models/__init__.py +0 -0
  92. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/tests/preprocessing/DummyPreprocessingStep.py +0 -0
  93. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/tests/preprocessing/__init__.py +0 -0
  94. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/util/__init__.py +0 -0
  95. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/util/call_with_mappings.py +0 -0
  96. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/util/package.py +0 -0
  97. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module/version.py +0 -0
  98. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module.egg-info/dependency_links.txt +0 -0
  99. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/nerdd_module.egg-info/top_level.txt +0 -0
  100. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/setup.cfg +0 -0
  101. {nerdd_module-0.3.12 → nerdd_module-0.3.13}/tests/test_features.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nerdd-module
3
- Version: 0.3.12
3
+ Version: 0.3.13
4
4
  Summary: Base package to create NERDD modules
5
5
  Author-email: Steffen Hirte <steffen.hirte@univie.ac.at>
6
6
  Maintainer-email: Steffen Hirte <steffen.hirte@univie.ac.at>
@@ -62,14 +62,14 @@ Requires-Dist: importlib-resources>=5; python_version < "3.9"
62
62
  Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
63
63
  Requires-Dist: typing_extensions>=4.0.1; python_version < "3.8"
64
64
  Provides-Extra: dev
65
- Requires-Dist: mypy==1.13.0; extra == "dev"
65
+ Requires-Dist: mypy>=1; extra == "dev"
66
66
  Requires-Dist: ruff==0.7.1; extra == "dev"
67
67
  Requires-Dist: pandas-stubs; extra == "dev"
68
68
  Requires-Dist: rdkit-stubs; extra == "dev"
69
69
  Requires-Dist: types-PyYAML; extra == "dev"
70
70
  Requires-Dist: types-decorator; extra == "dev"
71
71
  Requires-Dist: types-setuptools; extra == "dev"
72
- Requires-Dist: pre-commit==3.5.0; extra == "dev"
72
+ Requires-Dist: pre-commit>=2; extra == "dev"
73
73
  Provides-Extra: rdkit
74
74
  Requires-Dist: rdkit>=2022.3.3; extra == "rdkit"
75
75
  Provides-Extra: csp
@@ -79,7 +79,7 @@ Requires-Dist: pytest; extra == "test"
79
79
  Requires-Dist: pytest-sugar; extra == "test"
80
80
  Requires-Dist: pytest-cov; extra == "test"
81
81
  Requires-Dist: pytest-asyncio; extra == "test"
82
- Requires-Dist: pytest-bdd==7.3.0; extra == "test"
82
+ Requires-Dist: pytest-bdd<8; extra == "test"
83
83
  Requires-Dist: pytest-mock; extra == "test"
84
84
  Requires-Dist: pytest-watcher; extra == "test"
85
85
  Requires-Dist: hypothesis; extra == "test"
@@ -1,4 +1,5 @@
1
1
  from .cli import *
2
+ from .converters import *
2
3
  from .model import *
3
4
  from .output import *
4
5
  from .polyfills import get_entry_points
@@ -77,6 +77,8 @@ class ResultProperty(BaseModel):
77
77
  level: Level = "molecule"
78
78
  formats: Union[FormatSpec, IncludeExcludeFormatSpec, None] = None
79
79
  representation: Optional[str] = None
80
+ image_width: Optional[int] = None
81
+ image_height: Optional[int] = None
80
82
 
81
83
  def is_visible(self, output_format: str) -> bool:
82
84
  formats = self.formats
@@ -0,0 +1,7 @@
1
+ from .converter import *
2
+ from .converter_config import *
3
+ from .identity_converter import *
4
+ from .mol_converter import *
5
+ from .mol_to_image_converter import *
6
+ from .problem_list_converter import *
7
+ from .void_converter import *
@@ -0,0 +1,100 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from abc import ABC, abstractmethod
5
+ from typing import Any, Callable, Dict, Tuple, Union
6
+
7
+ from ..config import ResultProperty
8
+ from ..util import call_with_mappings
9
+ from .converter_config import ALL, ALL_TYPE
10
+
11
+ __all__ = ["Converter"]
12
+
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ _factories: Dict[
18
+ Tuple[Union[str, ALL_TYPE], Union[str, ALL_TYPE]], Callable[[dict], Converter]
19
+ ] = {}
20
+
21
+
22
+ class Converter(ABC):
23
+ # a special symbol to indicate that a property should be hidden
24
+ HIDE = object()
25
+
26
+ def __init__(self, result_property: ResultProperty, output_format: str, **kwargs: Any) -> None:
27
+ super().__init__()
28
+ self.property = result_property
29
+ self.output_format = output_format
30
+
31
+ @classmethod
32
+ def __init_subclass__(
33
+ cls,
34
+ is_abstract: bool = False,
35
+ **kwargs: Any,
36
+ ) -> None:
37
+ super().__init_subclass__(**kwargs)
38
+
39
+ if hasattr(cls, "config"):
40
+ data_types = cls.config["data_types"]
41
+ output_formats = cls.config["output_formats"]
42
+ else:
43
+ data_types = None
44
+ output_formats = None
45
+
46
+ if not is_abstract:
47
+ if isinstance(data_types, str) or data_types is ALL:
48
+ data_types_list = [data_types]
49
+ else:
50
+ data_types_list = data_types
51
+
52
+ if isinstance(output_formats, str) or output_formats is ALL:
53
+ output_formats_list = [output_formats]
54
+ else:
55
+ output_formats_list = output_formats
56
+
57
+ for output_format in output_formats_list:
58
+ for data_type in data_types_list:
59
+ logger.debug(f"Registering converter {cls} for {data_type} -> {output_format}")
60
+ _factories[(data_type, output_format)] = cls
61
+
62
+ @abstractmethod
63
+ def _convert(self, input: Any, context: dict) -> Any:
64
+ pass
65
+
66
+ def convert(self, input: Any, context: dict) -> Any:
67
+ return self._convert(input, context)
68
+
69
+ @classmethod
70
+ def get_converter(
71
+ cls,
72
+ result_property: ResultProperty,
73
+ output_format: str,
74
+ return_default: bool = True,
75
+ **kwargs: Any,
76
+ ) -> Converter:
77
+ data_type = result_property.type
78
+ if (data_type, output_format) not in _factories:
79
+ ConverterFunc = None
80
+ if return_default:
81
+ if (data_type, ALL) in _factories:
82
+ ConverterFunc = _factories[(data_type, ALL)]
83
+ elif (ALL, output_format) in _factories:
84
+ ConverterFunc = _factories[(ALL, output_format)]
85
+ elif (ALL, ALL) in _factories:
86
+ ConverterFunc = _factories[(ALL, ALL)]
87
+
88
+ if ConverterFunc is None:
89
+ raise ValueError(
90
+ f"Unknown data type '{data_type}' or output format '{output_format}'"
91
+ )
92
+ else:
93
+ ConverterFunc = _factories[(data_type, output_format)]
94
+
95
+ # kwargs will be passed to the constructor of the converter
96
+ # --> add data_type and output_format to the kwargs
97
+ kwargs["result_property"] = result_property
98
+ kwargs["output_format"] = output_format
99
+
100
+ return call_with_mappings(ConverterFunc, kwargs)
@@ -0,0 +1,15 @@
1
+ from typing import List, Optional, Union
2
+
3
+ from ..polyfills import Literal, TypedDict
4
+
5
+ __all__ = ["ConverterConfig", "ALL", "ALL_TYPE"]
6
+
7
+
8
+ # a special symbol to indicate that all data types / output formats are considered
9
+ ALL_TYPE = Literal["ALL"]
10
+ ALL: ALL_TYPE = "ALL"
11
+
12
+
13
+ class ConverterConfig(TypedDict):
14
+ data_types: Optional[Union[str, List[str], ALL_TYPE]]
15
+ output_formats: Optional[Union[str, List[str], ALL_TYPE]]
@@ -0,0 +1,23 @@
1
+ from typing import Any
2
+
3
+ from .converter import Converter
4
+ from .converter_config import ALL, ConverterConfig
5
+
6
+ __all__ = ["IdentityConverter", "primitive_data_types"]
7
+
8
+ primitive_data_types = [
9
+ "int",
10
+ "float",
11
+ "string",
12
+ "bool",
13
+ ]
14
+
15
+
16
+ class IdentityConverter(Converter):
17
+ def _convert(self, input: Any, context: dict) -> Any:
18
+ return input
19
+
20
+ config = ConverterConfig(
21
+ data_types=primitive_data_types,
22
+ output_formats=ALL,
23
+ )
@@ -0,0 +1,41 @@
1
+ from typing import Any
2
+
3
+ from rdkit.Chem import MolToInchi, MolToSmiles
4
+
5
+ from ..config import ResultProperty
6
+ from .converter import Converter
7
+ from .converter_config import ALL, ConverterConfig
8
+
9
+ __all__ = ["MolConverter"]
10
+
11
+
12
+ class MolConverter(Converter):
13
+ def __init__(self, result_property: ResultProperty, output_format: str, **kwargs: Any) -> None:
14
+ super().__init__(result_property, output_format, **kwargs)
15
+
16
+ if output_format == "sdf" and result_property.name == "input_mol":
17
+ # in an SDF, the main molecule (input_mol) can be a Mol object
18
+ self._serialize = lambda x: x
19
+ elif output_format in ["pandas", "record_list", "iterator"]:
20
+ self._serialize = lambda mol: mol
21
+ else:
22
+ representation = result_property.representation or "smiles"
23
+ if representation == "inchi":
24
+ self._serialize = MolToInchi
25
+ elif representation == "smiles":
26
+ self._serialize = MolToSmiles
27
+ else:
28
+ raise ValueError(f"Unsupported representation: {representation}")
29
+
30
+ def _convert(self, input: Any, context: dict) -> Any:
31
+ try:
32
+ representation = self._serialize(input)
33
+ except: # noqa: E722 (allow bare except, because RDKit is unpredictable)
34
+ representation = None
35
+
36
+ return representation
37
+
38
+ config = ConverterConfig(
39
+ data_types="mol",
40
+ output_formats=ALL,
41
+ )
@@ -0,0 +1,62 @@
1
+ from typing import Any
2
+ from xml.dom import minidom
3
+
4
+ from rdkit.Chem import Mol
5
+ from rdkit.Chem.Draw import MolDraw2DSVG
6
+
7
+ from .converter import Converter
8
+ from .converter_config import ConverterConfig
9
+
10
+ __all__ = ["MolToImageConverter"]
11
+
12
+ default_width = 400
13
+ default_height = 300
14
+
15
+
16
+ class MolToImageConverter(Converter):
17
+ def _convert(self, input: Any, context: dict) -> Any:
18
+ width = self.property.image_width
19
+ height = self.property.image_height
20
+
21
+ if width is None:
22
+ width = default_width
23
+ if height is None:
24
+ height = default_height
25
+
26
+ mol = input
27
+ if mol is None:
28
+ return None
29
+
30
+ assert isinstance(mol, Mol), f"Expected RDKit Mol object, but got {type(mol)}"
31
+
32
+ svg = MolDraw2DSVG(width, height)
33
+
34
+ # remove background
35
+ opts = svg.drawOptions()
36
+ opts.clearBackground = False
37
+
38
+ # add highlight circles around atoms during drawing
39
+ # (we will remove them later in post processing)
40
+ atoms = range(mol.GetNumAtoms())
41
+ colors = [[(0.8, 1, 1)]] * mol.GetNumAtoms()
42
+ radii = [0.5] * mol.GetNumAtoms()
43
+ atom_highlight = dict(zip(atoms, colors))
44
+ atom_radii = dict(zip(atoms, radii))
45
+ svg.DrawMoleculeWithHighlights(mol, "", atom_highlight, {}, atom_radii, [])
46
+ svg.FinishDrawing()
47
+
48
+ # post process SVG
49
+ xml = svg.GetDrawingText()
50
+ tree = minidom.parseString(xml)
51
+ root = tree.getElementsByTagName("svg")[0]
52
+
53
+ # make highlight circles invisible
54
+ for ellipse in root.getElementsByTagName("ellipse"):
55
+ ellipse.setAttribute("style", "opacity:0")
56
+
57
+ xml = tree.toxml()
58
+
59
+ return xml
60
+
61
+ # TODO: move to nerdd-link
62
+ config = ConverterConfig(data_types="mol", output_formats="json")
@@ -0,0 +1,21 @@
1
+ from typing import Any, List, cast
2
+
3
+ from ..problem import Problem
4
+ from .converter import Converter
5
+ from .converter_config import ALL, ConverterConfig
6
+
7
+ __all__ = ["ProblemListConverter"]
8
+
9
+
10
+ class ProblemListConverter(Converter):
11
+ def _convert(self, input: Any, context: dict) -> Any:
12
+ if self.output_format in ["pandas", "iterator", "record_list"]:
13
+ return input
14
+ else:
15
+ problem_list: List[Problem] = cast(List[Problem], input)
16
+ return "; ".join([f"{problem.type}: {problem.message}" for problem in problem_list])
17
+
18
+ config = ConverterConfig(
19
+ data_types="problem_list",
20
+ output_formats=ALL,
21
+ )
@@ -0,0 +1,17 @@
1
+ from typing import Any
2
+
3
+ from .converter import Converter
4
+ from .converter_config import ALL, ConverterConfig
5
+
6
+ __all__ = ["VoidConverter"]
7
+
8
+
9
+ class VoidConverter(Converter):
10
+ def _convert(self, input: Any, context: dict) -> Any:
11
+ return Converter.HIDE
12
+
13
+ # by default, all data types will be hidden for all output formats
14
+ config = ConverterConfig(
15
+ data_types=ALL,
16
+ output_formats=ALL,
17
+ )
@@ -107,7 +107,7 @@ class DepthFirstExplorer(Explorer):
107
107
  except Exception:
108
108
  pass
109
109
 
110
- # clean up tree
110
+ # clean up stack
111
111
  while len(self._state_stack) > depth:
112
112
  self._state_stack.pop()
113
113
  generator = None
@@ -115,12 +115,11 @@ class DepthFirstExplorer(Explorer):
115
115
  if generator is None:
116
116
  if best_reader is None:
117
117
  generator = self._read(InvalidInputReader(), input)
118
- sample = []
119
118
  else:
120
119
  generator = self._read(best_reader, input)
121
- sample = list(islice(generator, self._num_test_entries))
120
+ sample = []
122
121
  else:
123
- if best_mode is not None and best_mode != "guess":
122
+ if best_mode == "builtin":
124
123
  parent["first_guess"].append(best_reader)
125
124
 
126
125
  yield from sample
@@ -16,11 +16,16 @@ class FileReader(Reader):
16
16
  self.data_dir = Path(self.data_dir)
17
17
 
18
18
  def read(self, filename: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
19
- assert isinstance(filename, str), "input must be a string"
19
+ assert isinstance(filename, (str, bytes)), "input must be a string or bytes"
20
+
21
+ if isinstance(filename, bytes):
22
+ filename_str = filename.decode("utf-8")
23
+ else:
24
+ filename_str = filename
20
25
 
21
26
  # convert filename to path
22
27
  try:
23
- path = Path(filename)
28
+ path = Path(filename_str)
24
29
  except TypeError as e:
25
30
  raise ValueError("input must be a valid path") from e
26
31
 
@@ -45,7 +50,7 @@ class FileReader(Reader):
45
50
  source: Tuple[str, ...] = tuple()
46
51
  else:
47
52
  source = entry.source
48
- yield entry._replace(source=(filename, *source))
53
+ yield entry._replace(source=(filename_str, *source))
49
54
 
50
55
  def __repr__(self) -> str:
51
56
  return f"FileReader(data_dir={self.data_dir})"
@@ -0,0 +1,24 @@
1
+ from typing import Any, List
2
+
3
+ from ..config import ResultProperty
4
+ from ..converters import Converter
5
+ from ..steps import MapStep
6
+
7
+ __all__ = ["ConvertRepresentationsStep"]
8
+
9
+
10
+ class ConvertRepresentationsStep(MapStep):
11
+ def __init__(
12
+ self, result_properties: List[ResultProperty], output_format: str, **kwargs: Any
13
+ ) -> None:
14
+ super().__init__()
15
+ self._converter_map = {
16
+ p.name: Converter.get_converter(p, output_format, **kwargs) for p in result_properties
17
+ }
18
+
19
+ def _process(self, record: dict) -> dict:
20
+ result = {
21
+ k: self._converter_map[k].convert(input=v, context=record) for k, v in record.items()
22
+ }
23
+
24
+ return {k: v for k, v in result.items() if v is not Converter.HIDE}
@@ -113,22 +113,26 @@ class SimpleModel(Model):
113
113
  task_based_property = []
114
114
  if task == "atom_property_prediction":
115
115
  task_based_property = [
116
- {"name": "atom_id", "type": "integer"},
116
+ {"name": "atom_id", "type": "int"},
117
117
  ]
118
118
  elif task == "derivative_property_prediction":
119
119
  task_based_property = [
120
- {"name": "derivative_id", "type": "integer"},
120
+ {"name": "derivative_id", "type": "int"},
121
121
  ]
122
122
 
123
123
  default_properties_start = [
124
- {"name": "mol_id", "type": "integer"},
124
+ {"name": "mol_id", "type": "int"},
125
125
  *task_based_property,
126
- {"name": "input_text", "type": "string"},
127
- {"name": "input_type", "type": "string"},
128
- {"name": "source", "type": "string"},
129
- {"name": "name", "type": "string"},
130
- {"name": "input_mol", "type": "mol"},
131
- {"name": "preprocessed_mol", "type": "mol"},
126
+ {"name": "input_text", "visible_name": "Input text", "type": "string"},
127
+ {"name": "input_type", "visible_name": "Input type", "type": "string"},
128
+ {"name": "source", "visible_name": "Source", "type": "string"},
129
+ {"name": "name", "visible_name": "Name", "type": "string"},
130
+ {"name": "input_mol", "visible_name": "Input SMILES", "type": "mol"},
131
+ {
132
+ "name": "preprocessed_mol",
133
+ "visible_name": "Preprocessed SMILES",
134
+ "type": "mol",
135
+ },
132
136
  ]
133
137
 
134
138
  default_properties_end = [
@@ -14,17 +14,23 @@ warnings.filterwarnings(
14
14
  module="rdkit.Chem.MolStandardize",
15
15
  )
16
16
 
17
+ # We check if chembl_structure_pipeline is installed. Since importing this library already logs
18
+ # messages, we suppress them using RDKit's BlockLogs. We would like to use
19
+ # with BlockLogs(): ...
20
+ # but this does not work with old versions of RDKit. Therefore, we create an instance of
21
+ # BlockLogs that will suppress log messages as long as it exists. When it is deleted (in the
22
+ # "finally" block), logs are enabled again.
23
+ block_logs = BlockLogs()
17
24
  try:
18
- # importing chembl_structure_pipeline already logs messages
19
- # --> suppress them temporarily
20
- with BlockLogs():
21
- from chembl_structure_pipeline import get_parent_mol, standardize_mol
25
+ from chembl_structure_pipeline import get_parent_mol, standardize_mol
22
26
 
23
27
  import_error = None
24
28
  except ImportError as e:
25
29
  # raise ImportError later when using this class
26
30
  # --> this allows to use the rest of the package without chembl_structure_pipeline
27
31
  import_error = e
32
+ finally:
33
+ del block_logs
28
34
 
29
35
  __all__ = ["GetParentMolWithCsp", "StandardizeWithCsp"]
30
36
 
@@ -1,3 +1,4 @@
1
1
  from .checks import *
2
+ from .files import *
2
3
  from .predictions import *
3
4
  from .representations import *
@@ -0,0 +1,74 @@
1
+ from tempfile import NamedTemporaryFile
2
+
3
+ import numpy as np
4
+ from pytest_bdd import given, parsers
5
+
6
+ from .representations import representations_from_molecules
7
+
8
+
9
+ @given(
10
+ parsers.parse("a file containing the molecules in {input_type} format"),
11
+ target_fixture="files",
12
+ )
13
+ def representation_file(molecules, input_type):
14
+ representations = representations_from_molecules(molecules, input_type)
15
+
16
+ with NamedTemporaryFile("w", delete=False) as f:
17
+ for representation in representations:
18
+ if representation is None:
19
+ f.write("None")
20
+ else:
21
+ f.write(representation)
22
+ if input_type in ["smiles", "inchi"]:
23
+ f.write("\n")
24
+ elif input_type == "mol_block":
25
+ f.write("\n$$$$\n")
26
+ f.flush()
27
+ return [f.name]
28
+
29
+
30
+ @given(
31
+ parsers.parse(
32
+ "a list of {num_files:d} files containing the representations in {input_type} format",
33
+ ),
34
+ target_fixture="files",
35
+ )
36
+ def representation_files(molecules, input_type, num_files):
37
+ representations = representations_from_molecules(molecules, input_type)
38
+
39
+ # choose num_files-1 numbers to split the representations into num_files parts
40
+ # the while loop makes sure that each part contains at least one valid molecule
41
+ while True:
42
+ split_indices = np.random.choice(
43
+ len(representations), size=num_files - 1, replace=False
44
+ )
45
+ split_indices = np.sort(split_indices)
46
+
47
+ # split the representations
48
+ split_representations = np.split(representations, split_indices)
49
+
50
+ # check if each part contains at least one valid molecule
51
+ if all(
52
+ any(representation is not None for representation in split_representation)
53
+ for split_representation in split_representations
54
+ ):
55
+ break
56
+
57
+ # write the representations to files
58
+ representations_files = []
59
+
60
+ for _, split_representation in enumerate(split_representations):
61
+ with NamedTemporaryFile("w", delete=False) as f:
62
+ for representation in split_representation:
63
+ if representation is None:
64
+ f.write("None")
65
+ else:
66
+ f.write(representation)
67
+ if input_type in ["smiles", "inchi"]:
68
+ f.write("\n")
69
+ elif input_type == "mol_block":
70
+ f.write("\n$$$$\n")
71
+ f.flush()
72
+ representations_files.append(f.name)
73
+
74
+ return representations_files
@@ -20,28 +20,30 @@ def mol_weight_model(version):
20
20
 
21
21
 
22
22
  @when(
23
- parsers.parse("the mol weight model (version '{version}') generates predictions for the molecule representations"),
23
+ parsers.parse(
24
+ "the mol weight model (version '{version}') generates predictions for the molecule representations"
25
+ ),
24
26
  target_fixture="predictions",
25
27
  )
26
- def predictions_mol_weight_model(representations, version, input_type, multiplier):
28
+ def predictions_mol_weight_model(representations, version, multiplier):
27
29
  model = MolWeightModel(version=version)
28
30
  return model.predict(
29
31
  representations,
30
- input_type=input_type,
31
32
  multiplier=multiplier,
32
33
  output_format="record_list",
33
34
  )
34
35
 
35
36
 
36
37
  @when(
37
- parsers.parse("the atomic mass model (version '{version}') generates predictions for the molecule representations"),
38
+ parsers.parse(
39
+ "the atomic mass model (version '{version}') generates predictions for the molecule representations"
40
+ ),
38
41
  target_fixture="predictions",
39
42
  )
40
- def predictions_atomic_mass_model(representations, version, input_type, multiplier):
43
+ def predictions_atomic_mass_model(representations, version, multiplier):
41
44
  model = AtomicMassModel(version=version)
42
45
  return model.predict(
43
46
  representations,
44
- input_type=input_type,
45
47
  multiplier=multiplier,
46
48
  output_format="record_list",
47
49
  )
@@ -24,10 +24,12 @@ def representations_from_input(input):
24
24
 
25
25
 
26
26
  @given(
27
- parsers.parse("the representations of the molecules"),
27
+ parsers.parse("the representations of the molecules in {input_type} format"),
28
28
  target_fixture="representations",
29
29
  )
30
30
  def representations_from_molecules(molecules, input_type):
31
+ input_type = input_type.lower()
32
+
31
33
  if input_type == "smiles":
32
34
  converter = MolToSmiles
33
35
  elif input_type == "mol_block":
@@ -46,10 +48,14 @@ def representations_from_molecules(molecules, input_type):
46
48
 
47
49
 
48
50
  @given(
49
- parsers.parse("a list of {num:d} random molecules, where {num_none:d} entries are None"),
51
+ parsers.re(
52
+ r"a list of (?P<num>\d+) random molecules(?:, where (?P<num_none>\d+) entries are None)?"
53
+ ),
50
54
  target_fixture="molecules",
51
55
  )
52
- def molecules(num, num_none, random_seed=0):
56
+ def molecules_with_none(num, num_none=None, random_seed=0):
57
+ num = int(num)
58
+ num_none = int(num_none) if num_none is not None else 0
53
59
  result = None
54
60
 
55
61
  # pytest-bdd and hypothesis don't play well together (yet)
@@ -69,8 +75,3 @@ def molecules(num, num_none, random_seed=0):
69
75
  result[i] = None
70
76
 
71
77
  return result
72
-
73
-
74
- @given(parsers.parse("the input type is '{input_type}'"), target_fixture="input_type")
75
- def input_type(input_type):
76
- return input_type
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nerdd-module
3
- Version: 0.3.12
3
+ Version: 0.3.13
4
4
  Summary: Base package to create NERDD modules
5
5
  Author-email: Steffen Hirte <steffen.hirte@univie.ac.at>
6
6
  Maintainer-email: Steffen Hirte <steffen.hirte@univie.ac.at>
@@ -62,14 +62,14 @@ Requires-Dist: importlib-resources>=5; python_version < "3.9"
62
62
  Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
63
63
  Requires-Dist: typing_extensions>=4.0.1; python_version < "3.8"
64
64
  Provides-Extra: dev
65
- Requires-Dist: mypy==1.13.0; extra == "dev"
65
+ Requires-Dist: mypy>=1; extra == "dev"
66
66
  Requires-Dist: ruff==0.7.1; extra == "dev"
67
67
  Requires-Dist: pandas-stubs; extra == "dev"
68
68
  Requires-Dist: rdkit-stubs; extra == "dev"
69
69
  Requires-Dist: types-PyYAML; extra == "dev"
70
70
  Requires-Dist: types-decorator; extra == "dev"
71
71
  Requires-Dist: types-setuptools; extra == "dev"
72
- Requires-Dist: pre-commit==3.5.0; extra == "dev"
72
+ Requires-Dist: pre-commit>=2; extra == "dev"
73
73
  Provides-Extra: rdkit
74
74
  Requires-Dist: rdkit>=2022.3.3; extra == "rdkit"
75
75
  Provides-Extra: csp
@@ -79,7 +79,7 @@ Requires-Dist: pytest; extra == "test"
79
79
  Requires-Dist: pytest-sugar; extra == "test"
80
80
  Requires-Dist: pytest-cov; extra == "test"
81
81
  Requires-Dist: pytest-asyncio; extra == "test"
82
- Requires-Dist: pytest-bdd==7.3.0; extra == "test"
82
+ Requires-Dist: pytest-bdd<8; extra == "test"
83
83
  Requires-Dist: pytest-mock; extra == "test"
84
84
  Requires-Dist: pytest-watcher; extra == "test"
85
85
  Requires-Dist: hypothesis; extra == "test"
@@ -22,7 +22,12 @@ nerdd_module/config/search_yaml_configuration.py
22
22
  nerdd_module/config/yaml_configuration.py
23
23
  nerdd_module/converters/__init__.py
24
24
  nerdd_module/converters/converter.py
25
+ nerdd_module/converters/converter_config.py
25
26
  nerdd_module/converters/identity_converter.py
27
+ nerdd_module/converters/mol_converter.py
28
+ nerdd_module/converters/mol_to_image_converter.py
29
+ nerdd_module/converters/problem_list_converter.py
30
+ nerdd_module/converters/void_converter.py
26
31
  nerdd_module/input/__init__.py
27
32
  nerdd_module/input/depth_first_explorer.py
28
33
  nerdd_module/input/explorer.py
@@ -76,6 +81,7 @@ nerdd_module/steps/output_step.py
76
81
  nerdd_module/steps/step.py
77
82
  nerdd_module/tests/__init__.py
78
83
  nerdd_module/tests/checks.py
84
+ nerdd_module/tests/files.py
79
85
  nerdd_module/tests/predictions.py
80
86
  nerdd_module/tests/representations.py
81
87
  nerdd_module/tests/models/AtomicMassModel.py
@@ -19,14 +19,14 @@ importlib-resources>=5
19
19
  chembl_structure_pipeline>=1.0.0
20
20
 
21
21
  [dev]
22
- mypy==1.13.0
22
+ mypy>=1
23
23
  ruff==0.7.1
24
24
  pandas-stubs
25
25
  rdkit-stubs
26
26
  types-PyYAML
27
27
  types-decorator
28
28
  types-setuptools
29
- pre-commit==3.5.0
29
+ pre-commit>=2
30
30
 
31
31
  [docs]
32
32
  mkdocs
@@ -41,7 +41,7 @@ pytest
41
41
  pytest-sugar
42
42
  pytest-cov
43
43
  pytest-asyncio
44
- pytest-bdd==7.3.0
44
+ pytest-bdd<8
45
45
  pytest-mock
46
46
  pytest-watcher
47
47
  hypothesis
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "nerdd-module"
7
- version = "0.3.12"
7
+ version = "0.3.13"
8
8
  description = "Base package to create NERDD modules"
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }
@@ -50,14 +50,14 @@ classifiers = [
50
50
 
51
51
  [project.optional-dependencies]
52
52
  dev = [
53
- "mypy==1.13.0",
53
+ "mypy>=1",
54
54
  "ruff==0.7.1",
55
55
  "pandas-stubs",
56
56
  "rdkit-stubs",
57
57
  "types-PyYAML",
58
58
  "types-decorator",
59
59
  "types-setuptools",
60
- "pre-commit==3.5.0",
60
+ "pre-commit>=2",
61
61
  ]
62
62
  rdkit = [
63
63
  # Some old RDKit versions are not recognized by setuptools. For that reason,
@@ -78,7 +78,7 @@ test = [
78
78
  "pytest-sugar",
79
79
  "pytest-cov",
80
80
  "pytest-asyncio",
81
- "pytest-bdd==7.3.0",
81
+ "pytest-bdd<8",
82
82
  "pytest-mock",
83
83
  "pytest-watcher",
84
84
  "hypothesis",
@@ -1,2 +0,0 @@
1
- from .converter import *
2
- from .identity_converter import *
@@ -1,62 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from abc import ABC, abstractmethod
4
- from functools import partial
5
- from typing import Any, Callable, Dict, Optional, Tuple
6
-
7
- from ..util import call_with_mappings
8
-
9
- __all__ = ["Converter"]
10
-
11
-
12
- _factories: Dict[Tuple[Optional[str], Optional[str]], Callable[[dict], Converter]] = {}
13
-
14
-
15
- class Converter(ABC):
16
- def __init__(self) -> None:
17
- super().__init__()
18
-
19
- @classmethod
20
- def __init_subclass__(
21
- cls,
22
- output_format: Optional[str] = None,
23
- data_type: Optional[str] = None,
24
- is_abstract: bool = False,
25
- **kwargs: Any,
26
- ) -> None:
27
- super().__init_subclass__(**kwargs)
28
- if not is_abstract:
29
- _factories[(data_type, output_format)] = partial(call_with_mappings, cls)
30
-
31
- @abstractmethod
32
- def _convert(self, input: Any, context: dict, **kwargs: Any) -> Any:
33
- pass
34
-
35
- def convert(self, input: Any, context: dict, **kwargs: Any) -> Any:
36
- return self._convert(input, context, **kwargs)
37
-
38
- @classmethod
39
- def get_converter(
40
- cls,
41
- data_type: str,
42
- output_format: str,
43
- return_default: bool = True,
44
- **kwargs: Any,
45
- ) -> Converter:
46
- if (data_type, output_format) not in _factories:
47
- default = None
48
- if return_default:
49
- if (data_type, None) in _factories:
50
- default = _factories[(data_type, None)]
51
- elif (None, output_format) in _factories:
52
- default = _factories[(None, output_format)]
53
- elif (None, None) in _factories:
54
- default = _factories[(None, None)]
55
-
56
- if default is None:
57
- raise ValueError(
58
- f"Unknown data type '{data_type}' or output format '{output_format}'"
59
- )
60
- return default(kwargs)
61
-
62
- return _factories[(data_type, output_format)](kwargs)
@@ -1,8 +0,0 @@
1
- from typing import Any
2
-
3
- from .converter import Converter
4
-
5
-
6
- class IdentityConverter(Converter, data_type=None, output_format=None):
7
- def _convert(self, input: Any, context: dict, **kwargs: Any) -> dict:
8
- return input
@@ -1,20 +0,0 @@
1
- from typing import Any
2
-
3
- from ..converters import Converter
4
- from ..steps import MapStep
5
-
6
- __all__ = ["ConvertRepresentationsStep"]
7
-
8
-
9
- class ConvertRepresentationsStep(MapStep):
10
- def __init__(self, result_properties: list, output_format: str, **kwargs: Any) -> None:
11
- super().__init__()
12
- self._converter_map = {
13
- p.name: Converter.get_converter(p.type, output_format, property=p, **kwargs)
14
- for p in result_properties
15
- }
16
-
17
- def _process(self, record: dict) -> dict:
18
- return {
19
- k: self._converter_map[k].convert(input=v, context=record) for k, v in record.items()
20
- }
File without changes
File without changes
File without changes