nerdd-module 0.3.8__tar.gz → 0.3.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/PKG-INFO +11 -7
  2. nerdd_module-0.3.10/README.md +19 -0
  3. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/cli.py +43 -31
  4. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/config/__init__.py +1 -0
  5. nerdd_module-0.3.10/nerdd_module/config/configuration.py +32 -0
  6. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/config/merged_configuration.py +1 -1
  7. nerdd_module-0.3.10/nerdd_module/config/models.py +178 -0
  8. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/config/package_configuration.py +1 -1
  9. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/config/search_yaml_configuration.py +1 -1
  10. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/config/yaml_configuration.py +10 -2
  11. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/depth_first_explorer.py +1 -1
  12. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/explorer.py +0 -3
  13. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/file_reader.py +6 -3
  14. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/inchi_reader.py +8 -1
  15. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/mol_reader.py +1 -1
  16. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/reader.py +17 -23
  17. nerdd_module-0.3.10/nerdd_module/input/reader_config.py +7 -0
  18. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/sdf_reader.py +1 -1
  19. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/smiles_reader.py +4 -1
  20. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/tar_reader.py +1 -1
  21. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/zip_reader.py +1 -1
  22. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/model/convert_representations_step.py +5 -3
  23. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/model/enforce_schema_step.py +6 -4
  24. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/model/model.py +2 -1
  25. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/model/read_input_step.py +1 -1
  26. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/model/simple_model.py +12 -10
  27. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/output/writer.py +7 -3
  28. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/preprocessing/chembl_structure_pipeline.py +2 -3
  29. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/preprocessing/filter_by_element.py +2 -2
  30. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/util/call_with_mappings.py +5 -2
  31. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module.egg-info/PKG-INFO +11 -7
  32. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module.egg-info/SOURCES.txt +2 -0
  33. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module.egg-info/requires.txt +6 -3
  34. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/pyproject.toml +26 -10
  35. nerdd_module-0.3.8/README.md +0 -18
  36. nerdd_module-0.3.8/nerdd_module/config/configuration.py +0 -90
  37. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/LICENSE +0 -0
  38. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/__init__.py +1 -1
  39. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/config/default_configuration.py +0 -0
  40. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/config/dict_configuration.py +0 -0
  41. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/converters/__init__.py +0 -0
  42. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/converters/converter.py +0 -0
  43. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/converters/identity_converter.py +0 -0
  44. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/__init__.py +0 -0
  45. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/gzip_reader.py +0 -0
  46. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/list_reader.py +0 -0
  47. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/string_reader.py +0 -0
  48. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/model/__init__.py +0 -0
  49. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/model/assign_mol_id_step.py +0 -0
  50. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/model/assign_name_step.py +0 -0
  51. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/model/write_output_step.py +0 -0
  52. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/output/__init__.py +0 -0
  53. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/output/csv_writer.py +0 -0
  54. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/output/file_writer.py +0 -0
  55. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/output/iterator_writer.py +0 -0
  56. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/output/pandas_writer.py +0 -0
  57. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/output/record_list_writer.py +0 -0
  58. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/output/sdf_writer.py +0 -0
  59. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/polyfills/__init__.py +0 -0
  60. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/polyfills/files.py +0 -0
  61. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/polyfills/get_entry_points.py +0 -0
  62. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/polyfills/types.py +0 -0
  63. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/polyfills/version.py +0 -0
  64. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/preprocessing/__init__.py +0 -0
  65. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/preprocessing/check_valid_smiles.py +0 -0
  66. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/preprocessing/filter_by_weight.py +0 -0
  67. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/preprocessing/preprocessing_step.py +0 -0
  68. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/preprocessing/remove_stereochemistry.py +0 -0
  69. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/preprocessing/sanitize.py +0 -0
  70. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/problem.py +0 -0
  71. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/py.typed +0 -0
  72. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/steps/__init__.py +0 -0
  73. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/steps/map_step.py +0 -0
  74. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/steps/output_step.py +0 -0
  75. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/steps/step.py +0 -0
  76. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/tests/__init__.py +0 -0
  77. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/tests/checks.py +0 -0
  78. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/tests/models/AtomicMassModel.py +0 -0
  79. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/tests/models/MolWeightModel.py +0 -0
  80. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/tests/models/__init__.py +0 -0
  81. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/tests/predictions.py +0 -0
  82. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/tests/preprocessing/DummyPreprocessingStep.py +0 -0
  83. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/tests/preprocessing/__init__.py +0 -0
  84. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/tests/representations.py +0 -0
  85. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/util/__init__.py +0 -0
  86. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/util/package.py +0 -0
  87. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/version.py +0 -0
  88. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module.egg-info/dependency_links.txt +0 -0
  89. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module.egg-info/top_level.txt +0 -0
  90. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/setup.cfg +0 -0
  91. {nerdd_module-0.3.8 → nerdd_module-0.3.10}/tests/test_features.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nerdd-module
3
- Version: 0.3.8
3
+ Version: 0.3.10
4
4
  Summary: Base package to create NERDD modules
5
5
  Author-email: Steffen Hirte <steffen.hirte@univie.ac.at>
6
6
  Maintainer-email: Steffen Hirte <steffen.hirte@univie.ac.at>
@@ -57,15 +57,18 @@ Requires-Dist: filetype~=1.2.0
57
57
  Requires-Dist: rich-click>=1.7.1
58
58
  Requires-Dist: stringcase>=1.2.0
59
59
  Requires-Dist: decorator>=5.1.1
60
+ Requires-Dist: pydantic>=2
60
61
  Requires-Dist: importlib-resources>=5; python_version < "3.9"
61
62
  Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
62
63
  Provides-Extra: dev
63
- Requires-Dist: mypy; extra == "dev"
64
- Requires-Dist: ruff; extra == "dev"
64
+ Requires-Dist: mypy==1.13.0; extra == "dev"
65
+ Requires-Dist: ruff==0.7.1; extra == "dev"
65
66
  Requires-Dist: pandas-stubs; extra == "dev"
67
+ Requires-Dist: rdkit-stubs; extra == "dev"
66
68
  Requires-Dist: types-PyYAML; extra == "dev"
67
69
  Requires-Dist: types-decorator; extra == "dev"
68
70
  Requires-Dist: types-setuptools; extra == "dev"
71
+ Requires-Dist: pre-commit==3.5.0; extra == "dev"
69
72
  Provides-Extra: rdkit
70
73
  Requires-Dist: rdkit>=2022.3.3; extra == "rdkit"
71
74
  Provides-Extra: csp
@@ -75,7 +78,7 @@ Requires-Dist: pytest; extra == "test"
75
78
  Requires-Dist: pytest-sugar; extra == "test"
76
79
  Requires-Dist: pytest-cov; extra == "test"
77
80
  Requires-Dist: pytest-asyncio; extra == "test"
78
- Requires-Dist: pytest-bdd; extra == "test"
81
+ Requires-Dist: pytest-bdd==7.3.0; extra == "test"
79
82
  Requires-Dist: pytest-mock; extra == "test"
80
83
  Requires-Dist: pytest-watcher; extra == "test"
81
84
  Requires-Dist: hypothesis; extra == "test"
@@ -100,6 +103,7 @@ pip install -U nerdd-module
100
103
  ## Contribute
101
104
 
102
105
  1. Fork and clone the code
103
- 2. Install test dependencies with ` pip install -e .[test,dev,csp]`
104
- 3. Run tests via `pytest` or `pytest-watch` (short: `ptw`)
105
- 4. Build docs via ` pip install -e .[docs]` and ` mkdocs serve`
106
+ 2. Install test dependencies with `pip install -e .[test,dev,csp]`
107
+ 3. Install pre-commit hooks `pre-commit install`
108
+ 4. Run tests via `pytest` or `pytest-watch` (short: `ptw`)
109
+ 5. Build docs via `pip install -e .[docs]` and `mkdocs serve`
@@ -0,0 +1,19 @@
1
+ # NERDD Module
2
+
3
+ This package provides the basis to implement molecular prediction modules in the
4
+ NERDD ecosystem.
5
+
6
+ ## Installation
7
+
8
+ ``` bash
9
+ pip install -U nerdd-module
10
+ ```
11
+
12
+
13
+ ## Contribute
14
+
15
+ 1. Fork and clone the code
16
+ 2. Install test dependencies with `pip install -e .[test,dev,csp]`
17
+ 3. Install pre-commit hooks `pre-commit install`
18
+ 4. Run tests via `pytest` or `pytest-watch` (short: `ptw`)
19
+ 5. Build docs via `pip install -e .[docs]` and `mkdocs serve`
@@ -1,18 +1,22 @@
1
1
  import logging
2
+ import os
2
3
  import sys
3
4
  from typing import Any, Callable
4
5
 
5
6
  import rich_click as click
6
7
  from decorator import decorator
7
- from stringcase import spinalcase # type: ignore
8
+ from stringcase import spinalcase
8
9
 
10
+ from .config import JobParameter
11
+ from .input import Reader
9
12
  from .model import Model
13
+ from .output import FileWriter, Writer
10
14
 
11
15
  __all__ = ["auto_cli"]
12
16
 
13
17
  input_description = """{description}
14
18
 
15
- INPUT molecules are provided as file paths or strings. The following formats are
19
+ INPUT molecules are provided as file paths or strings. The following formats are
16
20
  supported:
17
21
 
18
22
  {input_format_list}
@@ -21,25 +25,21 @@ Note that input formats shouldn't be mixed.
21
25
  """
22
26
 
23
27
 
24
- def infer_click_type(param: dict) -> click.ParamType:
25
- if "choices" in param:
26
- choices = [c["value"] for c in param["choices"]]
28
+ def infer_click_type(param: JobParameter) -> click.ParamType:
29
+ if param.choices is not None:
30
+ choices = [c.value for c in param.choices]
27
31
  return click.Choice(choices)
28
32
 
29
33
  type_map = {
30
34
  "float": click.FLOAT,
31
- "int": click.INT,
32
- "str": click.STRING,
35
+ "integer": click.INT,
36
+ "string": click.STRING,
33
37
  "bool": click.BOOL,
34
38
  }
35
39
 
36
- if "type" not in param:
37
- raise ValueError(f"Parameter {param['name']} does not have a type")
38
-
39
- t = param["type"]
40
-
40
+ t = param.type
41
41
  if t not in type_map:
42
- raise ValueError(f"Unknown type {t} for parameter {param['name']}")
42
+ raise ValueError(f"Unknown type {t} for parameter {param.name}")
43
43
 
44
44
  return type_map[t]
45
45
 
@@ -47,7 +47,7 @@ def infer_click_type(param: dict) -> click.ParamType:
47
47
  @decorator
48
48
  def auto_cli(f: Callable[..., Model], *args: Any, **kwargs: Any) -> None:
49
49
  # infer the command name
50
- # command_name = os.path.basename(sys.argv[0])
50
+ command_name = os.path.basename(sys.argv[0])
51
51
 
52
52
  # get the model
53
53
  model = f()
@@ -59,21 +59,33 @@ def auto_cli(f: Callable[..., Model], *args: Any, **kwargs: Any) -> None:
59
59
  description=model.description, input_format_list=input_format_list
60
60
  )
61
61
 
62
- output_format_list = ["sdf", "csv"]
62
+ output_format_list = [
63
+ output_format
64
+ for output_format, writer in Writer.get_writers(output_file=None).items()
65
+ if isinstance(writer, FileWriter)
66
+ ]
63
67
 
64
68
  # compose footer with examples
65
- # TODO: add examples
66
- # examples = []
67
- # if "example_smiles" in config:
68
- # examples.append(config["example_smiles"])
69
-
70
- # if len(examples) > 0:
71
- # footer = "Examples:\n"
72
- # for example in examples:
73
- # footer += f'* {command_name} "{example}"\n'
74
- # else:
75
- # footer = ""
76
- footer = ""
69
+ examples = []
70
+ if hasattr(model, "get_config"):
71
+ example_smiles = model.get_config().example_smiles
72
+ if example_smiles is not None:
73
+ examples.append(example_smiles)
74
+
75
+ for ReaderClass in Reader.get_reader_mapping():
76
+ if hasattr(ReaderClass, "config"):
77
+ reader_examples = ReaderClass.config.get("examples", [])
78
+ for example in reader_examples:
79
+ # check if example fits on one line
80
+ if len(example) < 120 and "\n" not in example:
81
+ examples.append(example)
82
+
83
+ if len(examples) > 0:
84
+ footer = "Examples:\n"
85
+ for example in examples:
86
+ footer += f'* {command_name} "{example}"\n'
87
+ else:
88
+ footer = ""
77
89
 
78
90
  #
79
91
  # Define the CLI entry point
@@ -107,12 +119,12 @@ def auto_cli(f: Callable[..., Model], *args: Any, **kwargs: Any) -> None:
107
119
  #
108
120
  for param in model.job_parameters:
109
121
  # convert parameter name to spinal case (e.g. "max_confs" -> "max-confs")
110
- param_name = spinalcase(param["name"])
122
+ param_name = spinalcase(param.name)
111
123
  main = click.option(
112
124
  f"--{param_name}",
113
- default=param.get("default", None),
125
+ default=param.default,
114
126
  type=infer_click_type(param),
115
- help=param.get("help_text", None),
127
+ help=param.help_text,
116
128
  )(main)
117
129
 
118
130
  #
@@ -157,4 +169,4 @@ def auto_cli(f: Callable[..., Model], *args: Any, **kwargs: Any) -> None:
157
169
  # show_default=True: default values are shown in the help text
158
170
  main = click.command(context_settings={"show_default": True}, help=help_text)(main)
159
171
 
160
- return main()
172
+ main()
@@ -2,6 +2,7 @@ from .configuration import *
2
2
  from .default_configuration import *
3
3
  from .dict_configuration import *
4
4
  from .merged_configuration import *
5
+ from .models import *
5
6
  from .package_configuration import *
6
7
  from .search_yaml_configuration import *
7
8
  from .yaml_configuration import *
@@ -0,0 +1,32 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import Optional
3
+
4
+ from .models import Module
5
+
6
+ __all__ = ["Configuration"]
7
+
8
+
9
+ class Configuration(ABC):
10
+ def __init__(self) -> None:
11
+ self._cached_config: Optional[Module] = None
12
+
13
+ def get_dict(self) -> Module:
14
+ if self._cached_config is None:
15
+ config = self._get_dict()
16
+
17
+ # validate the config
18
+ module = Module(**config)
19
+
20
+ self._cached_config = module
21
+
22
+ return self._cached_config
23
+
24
+ @abstractmethod
25
+ def _get_dict(self) -> dict:
26
+ pass
27
+
28
+ def is_empty(self) -> bool:
29
+ return self.get_dict() == {}
30
+
31
+ def __repr__(self) -> str:
32
+ return f"{self.__class__.__name__}({self._get_dict()})"
@@ -41,4 +41,4 @@ def merge(*args: dict) -> dict:
41
41
 
42
42
  class MergedConfiguration(DictConfiguration):
43
43
  def __init__(self, *configs: Configuration):
44
- super().__init__(merge(*[c.get_dict() for c in configs]))
44
+ super().__init__(merge(*[c._get_dict() for c in configs]))
@@ -0,0 +1,178 @@
1
+ from typing import Any, List, Literal, Optional, Union
2
+
3
+ from pydantic import BaseModel, model_validator
4
+
5
+
6
+ class Partner(BaseModel):
7
+ name: str
8
+ logo: str
9
+ url: Optional[str] = None
10
+
11
+
12
+ class Author(BaseModel):
13
+ """
14
+ Author information
15
+
16
+ Attributes:
17
+ first_name : str
18
+ First name of the author.
19
+ last_name : str
20
+ Last name of the author.
21
+ email : Optional[str]
22
+ Email of the author. If provided, the author is a corresponding author.
23
+ """
24
+
25
+ first_name: str
26
+ last_name: str
27
+ email: Optional[str] = None
28
+
29
+
30
+ class Publication(BaseModel):
31
+ title: str
32
+ authors: List[Author] = []
33
+ journal: str
34
+ year: int
35
+ doi: Optional[str]
36
+
37
+
38
+ class JobParameterChoice(BaseModel):
39
+ value: str
40
+ label: Optional[str] = None
41
+
42
+
43
+ class JobParameter(BaseModel):
44
+ name: str
45
+ type: str
46
+ visible_name: Optional[str] = None
47
+ help_text: Optional[str] = None
48
+ default: Optional[str] = None
49
+ required: bool = False
50
+ choices: Optional[List[JobParameterChoice]] = None
51
+
52
+
53
+ Task = Literal[
54
+ "molecular_property_prediction",
55
+ "atom_property_prediction",
56
+ "derivative_property_prediction",
57
+ ]
58
+ Level = Literal["molecule", "atom", "derivative"]
59
+
60
+ FormatSpec = Union[List[str], str]
61
+
62
+
63
+ class IncludeExcludeFormatSpec(BaseModel):
64
+ include: Optional[FormatSpec]
65
+ exclude: Optional[FormatSpec]
66
+
67
+
68
+ class ResultProperty(BaseModel):
69
+ name: str
70
+ type: str
71
+ visible_name: Optional[str] = None
72
+ help_text: Optional[str] = None
73
+ sortable: bool = False
74
+ group: Optional[str] = None
75
+ level: Level = "molecule"
76
+ formats: Union[FormatSpec, IncludeExcludeFormatSpec, None] = None
77
+ representation: Optional[str] = None
78
+
79
+ def is_visible(self, output_format: str) -> bool:
80
+ formats = self.formats
81
+
82
+ if formats is None:
83
+ return True
84
+ elif isinstance(formats, list):
85
+ return output_format in formats
86
+ elif isinstance(formats, IncludeExcludeFormatSpec):
87
+ include = formats.include
88
+ exclude = formats.exclude or []
89
+ return (include is None or output_format in include) and output_format not in exclude
90
+ else:
91
+ raise ValueError(f"Invalid formats declaration {formats} in result property {self}")
92
+
93
+
94
+ class Module(BaseModel):
95
+ task: Optional[Task] = None
96
+ rank: Optional[int] = None
97
+ name: Optional[str] = None
98
+ batch_size: int = 100
99
+ version: Optional[str] = None
100
+ visible_name: Optional[str] = None
101
+ logo: Optional[str] = None
102
+ logo_title: Optional[str] = None
103
+ logo_caption: Optional[str] = None
104
+ example_smiles: Optional[str] = None
105
+ title: Optional[str] = None
106
+ description: Optional[str] = None
107
+ partners: List[Partner] = []
108
+ publications: List[Publication] = []
109
+ about: Optional[str] = None
110
+ job_parameters: List[JobParameter] = []
111
+ result_properties: List[ResultProperty] = []
112
+
113
+ def get_property_columns_of_type(self, t: Level) -> List[ResultProperty]:
114
+ return [c for c in self.result_properties if c.level == t]
115
+
116
+ def molecular_property_columns(self) -> List[ResultProperty]:
117
+ return self.get_property_columns_of_type("molecule")
118
+
119
+ def atom_property_columns(self) -> List[ResultProperty]:
120
+ return self.get_property_columns_of_type("atom")
121
+
122
+ def derivative_property_columns(self) -> List[ResultProperty]:
123
+ return self.get_property_columns_of_type("derivative")
124
+
125
+ def get_visible_properties(self, output_format: str) -> List[ResultProperty]:
126
+ return [p for p in self.result_properties if p.is_visible(output_format)]
127
+
128
+ @model_validator(mode="after")
129
+ @classmethod
130
+ def validate_model(cls, values: Any) -> Any:
131
+ assert isinstance(values, Module)
132
+
133
+ num_atom_properties = len(values.get_property_columns_of_type("atom"))
134
+ num_derivative_properties = len(values.get_property_columns_of_type("derivative"))
135
+ task = values.task
136
+ if task is None:
137
+ # if task is not specified, try to derive it from the result_properties
138
+ if num_atom_properties > 0:
139
+ task = "atom_property_prediction"
140
+ elif num_derivative_properties > 0:
141
+ task = "derivative_property_prediction"
142
+ else:
143
+ task = "molecular_property_prediction"
144
+
145
+ values.task = task
146
+ else:
147
+ # if task is specified, check if it is consistent with the result_properties
148
+ if num_atom_properties > 0:
149
+ assert (
150
+ task == "atom_property_prediction"
151
+ ), "Task should be atom_property_prediction if atom properties are present."
152
+ elif num_derivative_properties > 0:
153
+ assert task == "derivative_property_prediction", (
154
+ "Task should be derivative_property_prediction if derivative properties "
155
+ "are present."
156
+ )
157
+ else:
158
+ assert task == "molecular_property_prediction", (
159
+ "Task should be molecular_property_prediction if no atom or derivative "
160
+ "properties are present."
161
+ )
162
+
163
+ # check that a module can only predict atom or derivative properties, not both
164
+ assert (
165
+ num_atom_properties == 0 or num_derivative_properties == 0
166
+ ), "A module can only predict atom or derivative properties, not both."
167
+
168
+ # check that two properties with the same group appear next to each other
169
+ groups = [p.group for p in values.result_properties if p.group is not None]
170
+ for group in groups:
171
+ indices = [i for i, p in enumerate(values.result_properties) if p.group == group]
172
+ for i, j in zip(indices[:-1], indices[1:]):
173
+ assert i + 1 == j, (
174
+ f"Properties with the same group should appear next to each other, "
175
+ f"but group {group} appears at incides {i} and {j}."
176
+ )
177
+
178
+ return values
@@ -32,4 +32,4 @@ class PackageConfiguration(Configuration):
32
32
  self.config = DictConfiguration({})
33
33
 
34
34
  def _get_dict(self) -> dict:
35
- return self.config.get_dict()
35
+ return self.config._get_dict()
@@ -34,4 +34,4 @@ class SearchYamlConfiguration(DictConfiguration):
34
34
  logger.info(f"Found configuration file in project directory: {default_config_file}")
35
35
  config = YamlConfiguration(default_config_file, base_path)
36
36
 
37
- super().__init__(config.get_dict())
37
+ super().__init__(config._get_dict())
@@ -1,4 +1,5 @@
1
1
  import base64
2
+ import mimetypes
2
3
  import os
3
4
  from pathlib import Path
4
5
  from typing import IO, Any, Union
@@ -42,9 +43,16 @@ def image_constructor(loader: CustomLoaderLike, node: yaml.Node) -> str:
42
43
 
43
44
  # determine the file type from the file extension
44
45
  kind = filetype.guess(f)
45
- assert kind is not None
46
+ if kind is not None:
47
+ mime = kind.mime
48
+ else:
49
+ # For filetypes without magic headers (e.g. SVG), the filetype library
50
+ # doesn't work. In these cases, we try the mimetypes library.
51
+ mime, _ = mimetypes.guess_type(path)
46
52
 
47
- return f"data:{kind.mime};base64,{encoded}"
53
+ assert mime is not None, f"Could not determine mime type for {path}"
54
+
55
+ return f"data:{mime};base64,{encoded}"
48
56
 
49
57
 
50
58
  class YamlConfiguration(Configuration):
@@ -15,7 +15,7 @@ class InvalidInputReader(Reader):
15
15
  yield MoleculeEntry(
16
16
  raw_input=input,
17
17
  input_type="unknown",
18
- source=tuple(["input"]),
18
+ source=("input",),
19
19
  mol=None,
20
20
  errors=[Problem("invalid_input", "Invalid input")],
21
21
  )
@@ -5,9 +5,6 @@ from .reader import MoleculeEntry, Reader
5
5
 
6
6
 
7
7
  class Explorer(ABC):
8
- def __init__(self) -> None:
9
- pass
10
-
11
8
  @abstractmethod
12
9
  def explore(self, input: Any) -> Iterator[MoleculeEntry]:
13
10
  pass
@@ -3,6 +3,7 @@ from pathlib import Path
3
3
  from typing import Any, Iterator, Tuple, Union
4
4
 
5
5
  from .reader import ExploreCallable, MoleculeEntry, Reader
6
+ from .reader_config import ReaderConfig
6
7
 
7
8
  __all__ = ["FileReader"]
8
9
 
@@ -20,8 +21,8 @@ class FileReader(Reader):
20
21
  # convert filename to path
21
22
  try:
22
23
  path = Path(filename)
23
- except TypeError:
24
- raise ValueError("input must be a valid path")
24
+ except TypeError as e:
25
+ raise ValueError("input must be a valid path") from e
25
26
 
26
27
  # convert to absolute path
27
28
  if not path.is_absolute():
@@ -44,7 +45,9 @@ class FileReader(Reader):
44
45
  source: Tuple[str, ...] = tuple()
45
46
  else:
46
47
  source = entry.source
47
- yield entry._replace(source=tuple([filename, *source]))
48
+ yield entry._replace(source=(filename, *source))
48
49
 
49
50
  def __repr__(self) -> str:
50
51
  return f"FileReader(data_dir={self.data_dir})"
52
+
53
+ config = ReaderConfig(examples=["compounds.smiles"])
@@ -6,6 +6,7 @@ from rdkit.rdBase import BlockLogs
6
6
 
7
7
  from ..problem import Problem
8
8
  from .reader import ExploreCallable, MoleculeEntry, Reader
9
+ from .reader_config import ReaderConfig
9
10
 
10
11
  __all__ = ["InchiReader"]
11
12
 
@@ -48,10 +49,16 @@ class InchiReader(Reader):
48
49
  yield MoleculeEntry(
49
50
  raw_input=line,
50
51
  input_type="inchi",
51
- source=tuple(["raw_input"]),
52
+ source=("raw_input",),
52
53
  mol=mol,
53
54
  errors=errors,
54
55
  )
55
56
 
56
57
  def __repr__(self) -> str:
57
58
  return "InchiReader()"
59
+
60
+ config = ReaderConfig(
61
+ examples=[
62
+ "InChI=1S/C18H16O3/c1-2-13(12-8-4-3-5-9-12)16-17(19)14-10-6-7-11-15(14)21-18(16)20/h3-11,13,19H,2H2,1H3"
63
+ ]
64
+ )
@@ -14,7 +14,7 @@ class MolReader(Reader):
14
14
  yield MoleculeEntry(
15
15
  raw_input=mol,
16
16
  input_type="rdkit_mol",
17
- source=tuple(["raw_input"]),
17
+ source=("raw_input",),
18
18
  mol=mol,
19
19
  errors=[],
20
20
  )
@@ -1,12 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import inspect
4
- from abc import ABC, ABCMeta, abstractmethod
5
- from functools import partial
4
+ from abc import ABC, abstractmethod
6
5
  from typing import Any, Callable, Iterator, List, NamedTuple, Optional, Tuple, Type
7
6
 
8
7
  from rdkit.Chem import Mol
9
- from typing_extensions import Protocol
10
8
 
11
9
  from ..problem import Problem
12
10
  from ..util import call_with_mappings
@@ -25,34 +23,30 @@ class MoleculeEntry(NamedTuple):
25
23
  ExploreCallable = Callable[[Any], Iterator[MoleculeEntry]]
26
24
 
27
25
 
28
- class ReaderFactory(Protocol):
29
- def __call__(self, config: dict, *args: Any, **kwargs: Any) -> Reader: ...
26
+ _factories: List[Type["Reader"]] = []
30
27
 
31
28
 
32
- _factories: List[ReaderFactory] = []
33
-
34
-
35
- class ReaderMeta(ABCMeta):
36
- def __init__(cls, name: str, bases: Tuple[type, ...], dct: dict) -> None:
37
- super().__init__(name, bases, dct)
38
-
39
- if not inspect.isabstract(cls):
40
- _factories.append(
41
- partial(
42
- call_with_mappings,
43
- cls,
44
- )
45
- )
46
-
47
-
48
- class Reader(ABC, metaclass=ReaderMeta):
29
+ class Reader(ABC):
49
30
  def __init__(self) -> None:
50
31
  super().__init__()
51
32
 
33
+ @classmethod
34
+ def __init_subclass__(
35
+ cls,
36
+ **kwargs: Any,
37
+ ) -> None:
38
+ super().__init_subclass__(**kwargs)
39
+ if not inspect.isabstract(cls):
40
+ _factories.append(cls)
41
+
52
42
  @abstractmethod
53
43
  def read(self, input: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
54
44
  pass
55
45
 
46
+ @classmethod
47
+ def get_reader_mapping(cls: Type[Reader]) -> List[Type["Reader"]]:
48
+ return _factories
49
+
56
50
  @classmethod
57
51
  def get_readers(cls: Type[Reader], **kwargs: Any) -> List[Reader]:
58
- return [factory(kwargs) for factory in _factories]
52
+ return [call_with_mappings(factory, kwargs) for factory in _factories]
@@ -0,0 +1,7 @@
1
+ from typing import List, TypedDict
2
+
3
+ __all__ = ["ReaderConfig"]
4
+
5
+
6
+ class ReaderConfig(TypedDict):
7
+ examples: List[str]
@@ -61,7 +61,7 @@ class SdfReader(Reader):
61
61
  yield MoleculeEntry(
62
62
  raw_input=mol_block,
63
63
  input_type="mol_block",
64
- source=tuple(["raw_input"]),
64
+ source=("raw_input",),
65
65
  mol=mol,
66
66
  errors=errors,
67
67
  )
@@ -6,6 +6,7 @@ from rdkit.rdBase import BlockLogs
6
6
 
7
7
  from ..problem import Problem
8
8
  from .reader import ExploreCallable, MoleculeEntry, Reader
9
+ from .reader_config import ReaderConfig
9
10
 
10
11
  __all__ = ["SmilesReader"]
11
12
 
@@ -58,10 +59,12 @@ class SmilesReader(Reader):
58
59
  yield MoleculeEntry(
59
60
  raw_input=line,
60
61
  input_type="smiles",
61
- source=tuple(["raw_input"]),
62
+ source=("raw_input",),
62
63
  mol=mol,
63
64
  errors=errors,
64
65
  )
65
66
 
66
67
  def __repr__(self) -> str:
67
68
  return "SmilesReader()"
69
+
70
+ config = ReaderConfig(examples=["C1=NC2=C(N1COCCO)N=C(NC2=O)N"])
@@ -21,7 +21,7 @@ class TarReader(Reader):
21
21
  if not member.isfile():
22
22
  continue
23
23
  for entry in explore(tar.extractfile(member)):
24
- yield entry._replace(source=tuple([member.name, *entry.source]))
24
+ yield entry._replace(source=(member.name, *entry.source))
25
25
 
26
26
  def __repr__(self) -> str:
27
27
  return "TarReader()"