nerdd-module 0.3.12__tar.gz → 0.3.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/PKG-INFO +4 -4
  2. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/__init__.py +1 -0
  3. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/config/models.py +4 -0
  4. nerdd_module-0.3.14/nerdd_module/converters/__init__.py +8 -0
  5. nerdd_module-0.3.14/nerdd_module/converters/basic_type_converter.py +23 -0
  6. nerdd_module-0.3.14/nerdd_module/converters/converter.py +100 -0
  7. nerdd_module-0.3.14/nerdd_module/converters/converter_config.py +15 -0
  8. nerdd_module-0.3.14/nerdd_module/converters/mol_converter.py +24 -0
  9. nerdd_module-0.3.14/nerdd_module/converters/mol_to_image_converter.py +62 -0
  10. nerdd_module-0.3.14/nerdd_module/converters/problem_list_converter.py +21 -0
  11. nerdd_module-0.3.14/nerdd_module/converters/representation_converter.py +42 -0
  12. nerdd_module-0.3.14/nerdd_module/converters/void_converter.py +17 -0
  13. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/input/depth_first_explorer.py +3 -4
  14. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/input/file_reader.py +8 -3
  15. nerdd_module-0.3.14/nerdd_module/model/convert_representations_step.py +28 -0
  16. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/model/simple_model.py +47 -9
  17. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/output/sdf_writer.py +13 -4
  18. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/preprocessing/chembl_structure_pipeline.py +10 -4
  19. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/tests/__init__.py +1 -0
  20. nerdd_module-0.3.14/nerdd_module/tests/files.py +74 -0
  21. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/tests/models/AtomicMassModel.py +2 -0
  22. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/tests/models/MolWeightModel.py +3 -2
  23. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/tests/predictions.py +8 -6
  24. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/tests/representations.py +14 -9
  25. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module.egg-info/PKG-INFO +4 -4
  26. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module.egg-info/SOURCES.txt +8 -1
  27. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module.egg-info/requires.txt +3 -3
  28. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/pyproject.toml +4 -4
  29. nerdd_module-0.3.12/nerdd_module/converters/__init__.py +0 -2
  30. nerdd_module-0.3.12/nerdd_module/converters/converter.py +0 -62
  31. nerdd_module-0.3.12/nerdd_module/converters/identity_converter.py +0 -8
  32. nerdd_module-0.3.12/nerdd_module/model/convert_representations_step.py +0 -20
  33. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/LICENSE +0 -0
  34. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/README.md +0 -0
  35. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/cli.py +0 -0
  36. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/config/__init__.py +0 -0
  37. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/config/configuration.py +0 -0
  38. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/config/default_configuration.py +0 -0
  39. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/config/dict_configuration.py +0 -0
  40. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/config/merged_configuration.py +0 -0
  41. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/config/package_configuration.py +0 -0
  42. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/config/search_yaml_configuration.py +0 -0
  43. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/config/yaml_configuration.py +0 -0
  44. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/input/__init__.py +0 -0
  45. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/input/explorer.py +0 -0
  46. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/input/gzip_reader.py +0 -0
  47. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/input/inchi_reader.py +0 -0
  48. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/input/list_reader.py +0 -0
  49. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/input/mol_reader.py +0 -0
  50. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/input/reader.py +0 -0
  51. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/input/reader_config.py +0 -0
  52. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/input/sdf_reader.py +0 -0
  53. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/input/smiles_reader.py +0 -0
  54. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/input/string_reader.py +0 -0
  55. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/input/tar_reader.py +0 -0
  56. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/input/zip_reader.py +0 -0
  57. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/model/__init__.py +0 -0
  58. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/model/assign_mol_id_step.py +0 -0
  59. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/model/assign_name_step.py +0 -0
  60. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/model/enforce_schema_step.py +0 -0
  61. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/model/model.py +0 -0
  62. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/model/read_input_step.py +0 -0
  63. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/model/write_output_step.py +0 -0
  64. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/output/__init__.py +0 -0
  65. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/output/csv_writer.py +0 -0
  66. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/output/file_writer.py +0 -0
  67. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/output/iterator_writer.py +0 -0
  68. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/output/pandas_writer.py +0 -0
  69. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/output/record_list_writer.py +0 -0
  70. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/output/writer.py +0 -0
  71. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/polyfills/__init__.py +0 -0
  72. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/polyfills/files.py +0 -0
  73. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/polyfills/get_entry_points.py +0 -0
  74. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/polyfills/literal.py +0 -0
  75. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/polyfills/typed_dict.py +0 -0
  76. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/polyfills/types.py +0 -0
  77. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/polyfills/version.py +0 -0
  78. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/preprocessing/__init__.py +0 -0
  79. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/preprocessing/check_valid_smiles.py +0 -0
  80. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/preprocessing/filter_by_element.py +0 -0
  81. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/preprocessing/filter_by_weight.py +0 -0
  82. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/preprocessing/preprocessing_step.py +0 -0
  83. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/preprocessing/remove_stereochemistry.py +0 -0
  84. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/preprocessing/sanitize.py +0 -0
  85. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/problem.py +0 -0
  86. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/py.typed +0 -0
  87. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/steps/__init__.py +0 -0
  88. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/steps/map_step.py +0 -0
  89. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/steps/output_step.py +0 -0
  90. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/steps/step.py +0 -0
  91. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/tests/checks.py +0 -0
  92. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/tests/models/__init__.py +0 -0
  93. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/tests/preprocessing/DummyPreprocessingStep.py +0 -0
  94. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/tests/preprocessing/__init__.py +0 -0
  95. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/util/__init__.py +0 -0
  96. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/util/call_with_mappings.py +0 -0
  97. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/util/package.py +0 -0
  98. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module/version.py +0 -0
  99. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module.egg-info/dependency_links.txt +0 -0
  100. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/nerdd_module.egg-info/top_level.txt +0 -0
  101. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/setup.cfg +0 -0
  102. {nerdd_module-0.3.12 → nerdd_module-0.3.14}/tests/test_features.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nerdd-module
3
- Version: 0.3.12
3
+ Version: 0.3.14
4
4
  Summary: Base package to create NERDD modules
5
5
  Author-email: Steffen Hirte <steffen.hirte@univie.ac.at>
6
6
  Maintainer-email: Steffen Hirte <steffen.hirte@univie.ac.at>
@@ -62,14 +62,14 @@ Requires-Dist: importlib-resources>=5; python_version < "3.9"
62
62
  Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
63
63
  Requires-Dist: typing_extensions>=4.0.1; python_version < "3.8"
64
64
  Provides-Extra: dev
65
- Requires-Dist: mypy==1.13.0; extra == "dev"
65
+ Requires-Dist: mypy>=1; extra == "dev"
66
66
  Requires-Dist: ruff==0.7.1; extra == "dev"
67
67
  Requires-Dist: pandas-stubs; extra == "dev"
68
68
  Requires-Dist: rdkit-stubs; extra == "dev"
69
69
  Requires-Dist: types-PyYAML; extra == "dev"
70
70
  Requires-Dist: types-decorator; extra == "dev"
71
71
  Requires-Dist: types-setuptools; extra == "dev"
72
- Requires-Dist: pre-commit==3.5.0; extra == "dev"
72
+ Requires-Dist: pre-commit>=2; extra == "dev"
73
73
  Provides-Extra: rdkit
74
74
  Requires-Dist: rdkit>=2022.3.3; extra == "rdkit"
75
75
  Provides-Extra: csp
@@ -79,7 +79,7 @@ Requires-Dist: pytest; extra == "test"
79
79
  Requires-Dist: pytest-sugar; extra == "test"
80
80
  Requires-Dist: pytest-cov; extra == "test"
81
81
  Requires-Dist: pytest-asyncio; extra == "test"
82
- Requires-Dist: pytest-bdd==7.3.0; extra == "test"
82
+ Requires-Dist: pytest-bdd<8; extra == "test"
83
83
  Requires-Dist: pytest-mock; extra == "test"
84
84
  Requires-Dist: pytest-watcher; extra == "test"
85
85
  Requires-Dist: hypothesis; extra == "test"
@@ -1,4 +1,5 @@
1
1
  from .cli import *
2
+ from .converters import *
2
3
  from .model import *
3
4
  from .output import *
4
5
  from .polyfills import get_entry_points
@@ -71,12 +71,16 @@ class ResultProperty(BaseModel):
71
71
  name: str
72
72
  type: str
73
73
  visible_name: Optional[str] = None
74
+ visible: bool = True
74
75
  help_text: Optional[str] = None
75
76
  sortable: bool = False
76
77
  group: Optional[str] = None
77
78
  level: Level = "molecule"
78
79
  formats: Union[FormatSpec, IncludeExcludeFormatSpec, None] = None
79
80
  representation: Optional[str] = None
81
+ from_property: Optional[str] = None
82
+ image_width: Optional[int] = None
83
+ image_height: Optional[int] = None
80
84
 
81
85
  def is_visible(self, output_format: str) -> bool:
82
86
  formats = self.formats
@@ -0,0 +1,8 @@
1
+ from .basic_type_converter import *
2
+ from .converter import *
3
+ from .converter_config import *
4
+ from .mol_converter import *
5
+ from .mol_to_image_converter import *
6
+ from .problem_list_converter import *
7
+ from .representation_converter import *
8
+ from .void_converter import *
@@ -0,0 +1,23 @@
1
+ from typing import Any
2
+
3
+ from .converter import Converter
4
+ from .converter_config import ALL, ConverterConfig
5
+
6
+ __all__ = ["BasicTypeConverter", "basic_data_types"]
7
+
8
+ basic_data_types = [
9
+ "int",
10
+ "float",
11
+ "string",
12
+ "bool",
13
+ ]
14
+
15
+
16
+ class BasicTypeConverter(Converter):
17
+ def _convert(self, input: Any, context: dict) -> Any:
18
+ return input
19
+
20
+ config = ConverterConfig(
21
+ data_types=basic_data_types,
22
+ output_formats=ALL,
23
+ )
@@ -0,0 +1,100 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from abc import ABC, abstractmethod
5
+ from typing import Any, Callable, Dict, Tuple, Union
6
+
7
+ from ..config import ResultProperty
8
+ from ..util import call_with_mappings
9
+ from .converter_config import ALL, ALL_TYPE
10
+
11
+ __all__ = ["Converter"]
12
+
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ _factories: Dict[
18
+ Tuple[Union[str, ALL_TYPE], Union[str, ALL_TYPE]], Callable[[dict], Converter]
19
+ ] = {}
20
+
21
+
22
+ class Converter(ABC):
23
+ # a special symbol to indicate that a property should be hidden
24
+ HIDE = object()
25
+
26
+ def __init__(self, result_property: ResultProperty, output_format: str, **kwargs: Any) -> None:
27
+ super().__init__()
28
+ self.result_property = result_property
29
+ self.output_format = output_format
30
+
31
+ @classmethod
32
+ def __init_subclass__(
33
+ cls,
34
+ is_abstract: bool = False,
35
+ **kwargs: Any,
36
+ ) -> None:
37
+ super().__init_subclass__(**kwargs)
38
+
39
+ if hasattr(cls, "config"):
40
+ data_types = cls.config["data_types"]
41
+ output_formats = cls.config["output_formats"]
42
+ else:
43
+ data_types = None
44
+ output_formats = None
45
+
46
+ if not is_abstract:
47
+ if isinstance(data_types, str) or data_types is ALL:
48
+ data_types_list = [data_types]
49
+ else:
50
+ data_types_list = data_types
51
+
52
+ if isinstance(output_formats, str) or output_formats is ALL:
53
+ output_formats_list = [output_formats]
54
+ else:
55
+ output_formats_list = output_formats
56
+
57
+ for output_format in output_formats_list:
58
+ for data_type in data_types_list:
59
+ logger.debug(f"Registering converter {cls} for {data_type} -> {output_format}")
60
+ _factories[(data_type, output_format)] = cls
61
+
62
+ @abstractmethod
63
+ def _convert(self, input: Any, context: dict) -> Any:
64
+ pass
65
+
66
+ def convert(self, input: Any, context: dict) -> Any:
67
+ return self._convert(input, context)
68
+
69
+ @classmethod
70
+ def get_converter(
71
+ cls,
72
+ result_property: ResultProperty,
73
+ output_format: str,
74
+ return_default: bool = True,
75
+ **kwargs: Any,
76
+ ) -> Converter:
77
+ data_type = result_property.type
78
+ if (data_type, output_format) not in _factories:
79
+ ConverterFunc = None
80
+ if return_default:
81
+ if (data_type, ALL) in _factories:
82
+ ConverterFunc = _factories[(data_type, ALL)]
83
+ elif (ALL, output_format) in _factories:
84
+ ConverterFunc = _factories[(ALL, output_format)]
85
+ elif (ALL, ALL) in _factories:
86
+ ConverterFunc = _factories[(ALL, ALL)]
87
+
88
+ if ConverterFunc is None:
89
+ raise ValueError(
90
+ f"Unknown data type '{data_type}' or output format '{output_format}'"
91
+ )
92
+ else:
93
+ ConverterFunc = _factories[(data_type, output_format)]
94
+
95
+ # kwargs will be passed to the constructor of the converter
96
+ # --> add data_type and output_format to the kwargs
97
+ kwargs["result_property"] = result_property
98
+ kwargs["output_format"] = output_format
99
+
100
+ return call_with_mappings(ConverterFunc, kwargs)
@@ -0,0 +1,15 @@
1
+ from typing import List, Optional, Union
2
+
3
+ from ..polyfills import Literal, TypedDict
4
+
5
+ __all__ = ["ConverterConfig", "ALL", "ALL_TYPE"]
6
+
7
+
8
+ # a special symbol to indicate that all data types / output formats are considered
9
+ ALL_TYPE = Literal["ALL"]
10
+ ALL: ALL_TYPE = "ALL"
11
+
12
+
13
+ class ConverterConfig(TypedDict):
14
+ data_types: Optional[Union[str, List[str], ALL_TYPE]]
15
+ output_formats: Optional[Union[str, List[str], ALL_TYPE]]
@@ -0,0 +1,24 @@
1
+ from typing import Any
2
+
3
+ from ..config import ResultProperty
4
+ from .converter import Converter
5
+ from .converter_config import ConverterConfig
6
+
7
+ __all__ = ["MolConverter"]
8
+
9
+
10
+ class MolConverter(Converter):
11
+ def __init__(self, result_property: ResultProperty, output_format: str, **kwargs: Any) -> None:
12
+ super().__init__(result_property, output_format, **kwargs)
13
+
14
+ def _convert(self, input: Any, context: dict) -> Any:
15
+ if self.output_format == "sdf" and self.result_property.name != "input_mol":
16
+ # in an SDF, the main molecule (input_mol) can be a Mol object
17
+ return Converter.HIDE
18
+ elif self.output_format in ["pandas", "record_list", "iterator"]:
19
+ return input
20
+
21
+ config = ConverterConfig(
22
+ data_types="mol",
23
+ output_formats=["sdf", "pandas", "record_list", "iterator"],
24
+ )
@@ -0,0 +1,62 @@
1
+ from typing import Any
2
+ from xml.dom import minidom
3
+
4
+ from rdkit.Chem import Mol
5
+ from rdkit.Chem.Draw import MolDraw2DSVG
6
+
7
+ from .converter import Converter
8
+ from .converter_config import ConverterConfig
9
+
10
+ __all__ = ["MolToImageConverter"]
11
+
12
+ default_width = 300
13
+ default_height = 180
14
+
15
+
16
+ class MolToImageConverter(Converter):
17
+ def _convert(self, input: Any, context: dict) -> Any:
18
+ width = self.result_property.image_width
19
+ height = self.result_property.image_height
20
+
21
+ if width is None:
22
+ width = default_width
23
+ if height is None:
24
+ height = default_height
25
+
26
+ mol = input
27
+ if mol is None:
28
+ return None
29
+
30
+ assert isinstance(mol, Mol), f"Expected RDKit Mol object, but got {type(mol)}"
31
+
32
+ svg = MolDraw2DSVG(width, height)
33
+
34
+ # remove background
35
+ opts = svg.drawOptions()
36
+ opts.clearBackground = False
37
+
38
+ # add highlight circles around atoms during drawing
39
+ # (we will remove them later in post processing)
40
+ atoms = range(mol.GetNumAtoms())
41
+ colors = [[(0.8, 1, 1)]] * mol.GetNumAtoms()
42
+ radii = [0.5] * mol.GetNumAtoms()
43
+ atom_highlight = dict(zip(atoms, colors))
44
+ atom_radii = dict(zip(atoms, radii))
45
+ svg.DrawMoleculeWithHighlights(mol, "", atom_highlight, {}, atom_radii, [])
46
+ svg.FinishDrawing()
47
+
48
+ # post process SVG
49
+ xml = svg.GetDrawingText()
50
+ tree = minidom.parseString(xml)
51
+ root = tree.getElementsByTagName("svg")[0]
52
+
53
+ # make highlight circles invisible
54
+ for ellipse in root.getElementsByTagName("ellipse"):
55
+ ellipse.setAttribute("style", "opacity:0")
56
+
57
+ xml = tree.toxml()
58
+
59
+ return xml
60
+
61
+ # TODO: move to nerdd-link
62
+ config = ConverterConfig(data_types="mol", output_formats="json")
@@ -0,0 +1,21 @@
1
+ from typing import Any, List, cast
2
+
3
+ from ..problem import Problem
4
+ from .converter import Converter
5
+ from .converter_config import ALL, ConverterConfig
6
+
7
+ __all__ = ["ProblemListConverter"]
8
+
9
+
10
+ class ProblemListConverter(Converter):
11
+ def _convert(self, input: Any, context: dict) -> Any:
12
+ if self.output_format in ["pandas", "iterator", "record_list"]:
13
+ return input
14
+ else:
15
+ problem_list: List[Problem] = cast(List[Problem], input)
16
+ return "; ".join([f"{problem.type}: {problem.message}" for problem in problem_list])
17
+
18
+ config = ConverterConfig(
19
+ data_types="problem_list",
20
+ output_formats=ALL,
21
+ )
@@ -0,0 +1,42 @@
1
+ from typing import Any
2
+
3
+ from rdkit.Chem import MolToInchi, MolToSmiles
4
+
5
+ from ..config import ResultProperty
6
+ from .converter import Converter
7
+ from .converter_config import ALL, ConverterConfig
8
+
9
+ __all__ = ["RepresentationConverter"]
10
+
11
+
12
+ class RepresentationConverter(Converter):
13
+ def __init__(self, result_property: ResultProperty, output_format: str, **kwargs: Any) -> None:
14
+ super().__init__(result_property, output_format, **kwargs)
15
+
16
+ representation = result_property.representation or "smiles"
17
+ if representation == "inchi":
18
+ self._serialize = MolToInchi
19
+ elif representation == "smiles":
20
+ self._serialize = MolToSmiles
21
+ else:
22
+ raise ValueError(f"Unsupported representation: {representation}")
23
+
24
+ def _convert(self, input: Any, context: dict) -> Any:
25
+ from_property = self.result_property.from_property
26
+
27
+ if from_property is None:
28
+ actual_input = input
29
+ else:
30
+ actual_input = context[from_property]
31
+
32
+ try:
33
+ representation = self._serialize(actual_input)
34
+ except: # noqa: E722 (allow bare except, because RDKit is unpredictable)
35
+ representation = None
36
+
37
+ return representation
38
+
39
+ config = ConverterConfig(
40
+ data_types="representation",
41
+ output_formats=ALL,
42
+ )
@@ -0,0 +1,17 @@
1
+ from typing import Any
2
+
3
+ from .converter import Converter
4
+ from .converter_config import ALL, ConverterConfig
5
+
6
+ __all__ = ["VoidConverter"]
7
+
8
+
9
+ class VoidConverter(Converter):
10
+ def _convert(self, input: Any, context: dict) -> Any:
11
+ return Converter.HIDE
12
+
13
+ # by default, all data types will be hidden for all output formats
14
+ config = ConverterConfig(
15
+ data_types=ALL,
16
+ output_formats=ALL,
17
+ )
@@ -107,7 +107,7 @@ class DepthFirstExplorer(Explorer):
107
107
  except Exception:
108
108
  pass
109
109
 
110
- # clean up tree
110
+ # clean up stack
111
111
  while len(self._state_stack) > depth:
112
112
  self._state_stack.pop()
113
113
  generator = None
@@ -115,12 +115,11 @@ class DepthFirstExplorer(Explorer):
115
115
  if generator is None:
116
116
  if best_reader is None:
117
117
  generator = self._read(InvalidInputReader(), input)
118
- sample = []
119
118
  else:
120
119
  generator = self._read(best_reader, input)
121
- sample = list(islice(generator, self._num_test_entries))
120
+ sample = []
122
121
  else:
123
- if best_mode is not None and best_mode != "guess":
122
+ if best_mode == "builtin":
124
123
  parent["first_guess"].append(best_reader)
125
124
 
126
125
  yield from sample
@@ -16,11 +16,16 @@ class FileReader(Reader):
16
16
  self.data_dir = Path(self.data_dir)
17
17
 
18
18
  def read(self, filename: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
19
- assert isinstance(filename, str), "input must be a string"
19
+ assert isinstance(filename, (str, bytes)), "input must be a string or bytes"
20
+
21
+ if isinstance(filename, bytes):
22
+ filename_str = filename.decode("utf-8")
23
+ else:
24
+ filename_str = filename
20
25
 
21
26
  # convert filename to path
22
27
  try:
23
- path = Path(filename)
28
+ path = Path(filename_str)
24
29
  except TypeError as e:
25
30
  raise ValueError("input must be a valid path") from e
26
31
 
@@ -45,7 +50,7 @@ class FileReader(Reader):
45
50
  source: Tuple[str, ...] = tuple()
46
51
  else:
47
52
  source = entry.source
48
- yield entry._replace(source=(filename, *source))
53
+ yield entry._replace(source=(filename_str, *source))
49
54
 
50
55
  def __repr__(self) -> str:
51
56
  return f"FileReader(data_dir={self.data_dir})"
@@ -0,0 +1,28 @@
1
+ from typing import Any, List
2
+
3
+ from ..config import ResultProperty
4
+ from ..converters import Converter
5
+ from ..steps import MapStep
6
+
7
+ __all__ = ["ConvertRepresentationsStep"]
8
+
9
+
10
+ class ConvertRepresentationsStep(MapStep):
11
+ def __init__(
12
+ self, result_properties: List[ResultProperty], output_format: str, **kwargs: Any
13
+ ) -> None:
14
+ super().__init__()
15
+ self._result_properties = result_properties
16
+ self._converter_map = {
17
+ p.name: Converter.get_converter(p, output_format, **kwargs) for p in result_properties
18
+ }
19
+
20
+ def _process(self, record: dict) -> dict:
21
+ result = {
22
+ k.name: self._converter_map[k.name].convert(
23
+ input=record.get(k.name, None), context=record
24
+ )
25
+ for k in self._result_properties
26
+ }
27
+
28
+ return {k: v for k, v in result.items() if v is not Converter.HIDE}
@@ -113,22 +113,60 @@ class SimpleModel(Model):
113
113
  task_based_property = []
114
114
  if task == "atom_property_prediction":
115
115
  task_based_property = [
116
- {"name": "atom_id", "type": "integer"},
116
+ {"name": "atom_id", "type": "int", "visible": False},
117
117
  ]
118
118
  elif task == "derivative_property_prediction":
119
119
  task_based_property = [
120
- {"name": "derivative_id", "type": "integer"},
120
+ {"name": "derivative_id", "type": "int", "visible": False},
121
121
  ]
122
122
 
123
123
  default_properties_start = [
124
- {"name": "mol_id", "type": "integer"},
124
+ {"name": "mol_id", "type": "int", "visible": False},
125
125
  *task_based_property,
126
- {"name": "input_text", "type": "string"},
127
- {"name": "input_type", "type": "string"},
128
- {"name": "source", "type": "string"},
129
- {"name": "name", "type": "string"},
130
- {"name": "input_mol", "type": "mol"},
131
- {"name": "preprocessed_mol", "type": "mol"},
126
+ {
127
+ "name": "input_text",
128
+ "visible_name": "Input text",
129
+ "type": "string",
130
+ "visible": False,
131
+ },
132
+ {
133
+ "name": "input_type",
134
+ "visible_name": "Input type",
135
+ "type": "string",
136
+ "visible": False,
137
+ },
138
+ {
139
+ "name": "source",
140
+ "visible_name": "Source",
141
+ "type": "string",
142
+ "visible": False,
143
+ },
144
+ {"name": "name", "visible_name": "Name", "type": "string"},
145
+ {
146
+ "name": "input_mol",
147
+ "visible_name": "Input Structure",
148
+ "type": "mol",
149
+ "visible": False,
150
+ },
151
+ {
152
+ "name": "input_smiles",
153
+ "visible_name": "Input SMILES",
154
+ "type": "representation",
155
+ "from_property": "input_mol",
156
+ "visible": False,
157
+ },
158
+ {
159
+ "name": "preprocessed_mol",
160
+ "visible_name": "Preprocessed Structure",
161
+ "type": "mol",
162
+ },
163
+ {
164
+ "name": "preprocessed_smiles",
165
+ "visible_name": "Preprocessed SMILES",
166
+ "type": "representation",
167
+ "from_property": "preprocessed_mol",
168
+ "visible": False,
169
+ },
132
170
  ]
133
171
 
134
172
  default_properties_end = [
@@ -1,6 +1,6 @@
1
1
  from typing import IO, Any, Dict, Iterable
2
2
 
3
- from rdkit.Chem import SDWriter
3
+ from rdkit.Chem import Mol, SDWriter
4
4
 
5
5
  from .file_writer import FileLike, FileWriter
6
6
 
@@ -18,13 +18,22 @@ class SdfWriter(FileWriter, output_format="sdf"):
18
18
  # assume that there is a mol object
19
19
  mol = entry["input_mol"]
20
20
 
21
+ # if the molecule is erroneous, use an empty molecule
22
+ if mol is None:
23
+ mol = Mol()
24
+
21
25
  # write (almost) all properties to the mol object
22
26
  for key, value in entry.items():
23
- value_as_str = str(value)
24
- if "\n" in value_as_str:
25
- # SDF can't write multi-line strings
27
+ # skip "input_mol" key, because we use it as the main molecule
28
+ if key == "input_mol":
26
29
  continue
27
30
 
31
+ value_as_str = str(value)
32
+
33
+ # SDF can't write multi-line strings
34
+ # -> replace newline with space
35
+ value_as_str = value_as_str.replace("\n", " ")
36
+
28
37
  mol.SetProp(key, value_as_str)
29
38
 
30
39
  # write molecule
@@ -14,17 +14,23 @@ warnings.filterwarnings(
14
14
  module="rdkit.Chem.MolStandardize",
15
15
  )
16
16
 
17
+ # We check if chembl_structure_pipeline is installed. Since importing this library already logs
18
+ # messages, we suppress them using RDKit's BlockLogs. We would like to use
19
+ # with BlockLogs(): ...
20
+ # but this does not work with old versions of RDKit. Therefore, we create an instance of
21
+ # BlockLogs that will suppress log messages as long as it exists. When it is deleted (in the
22
+ # "finally" block), logs are enabled again.
23
+ block_logs = BlockLogs()
17
24
  try:
18
- # importing chembl_structure_pipeline already logs messages
19
- # --> suppress them temporarily
20
- with BlockLogs():
21
- from chembl_structure_pipeline import get_parent_mol, standardize_mol
25
+ from chembl_structure_pipeline import get_parent_mol, standardize_mol
22
26
 
23
27
  import_error = None
24
28
  except ImportError as e:
25
29
  # raise ImportError later when using this class
26
30
  # --> this allows to use the rest of the package without chembl_structure_pipeline
27
31
  import_error = e
32
+ finally:
33
+ del block_logs
28
34
 
29
35
  __all__ = ["GetParentMolWithCsp", "StandardizeWithCsp"]
30
36
 
@@ -1,3 +1,4 @@
1
1
  from .checks import *
2
+ from .files import *
2
3
  from .predictions import *
3
4
  from .representations import *