nerdd-module 0.3.8__tar.gz → 0.3.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/PKG-INFO +11 -7
- nerdd_module-0.3.10/README.md +19 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/cli.py +43 -31
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/config/__init__.py +1 -0
- nerdd_module-0.3.10/nerdd_module/config/configuration.py +32 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/config/merged_configuration.py +1 -1
- nerdd_module-0.3.10/nerdd_module/config/models.py +178 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/config/package_configuration.py +1 -1
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/config/search_yaml_configuration.py +1 -1
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/config/yaml_configuration.py +10 -2
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/depth_first_explorer.py +1 -1
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/explorer.py +0 -3
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/file_reader.py +6 -3
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/inchi_reader.py +8 -1
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/mol_reader.py +1 -1
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/reader.py +17 -23
- nerdd_module-0.3.10/nerdd_module/input/reader_config.py +7 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/sdf_reader.py +1 -1
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/smiles_reader.py +4 -1
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/tar_reader.py +1 -1
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/zip_reader.py +1 -1
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/model/convert_representations_step.py +5 -3
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/model/enforce_schema_step.py +6 -4
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/model/model.py +2 -1
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/model/read_input_step.py +1 -1
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/model/simple_model.py +12 -10
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/output/writer.py +7 -3
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/preprocessing/chembl_structure_pipeline.py +2 -3
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/preprocessing/filter_by_element.py +2 -2
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/util/call_with_mappings.py +5 -2
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module.egg-info/PKG-INFO +11 -7
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module.egg-info/SOURCES.txt +2 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module.egg-info/requires.txt +6 -3
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/pyproject.toml +26 -10
- nerdd_module-0.3.8/README.md +0 -18
- nerdd_module-0.3.8/nerdd_module/config/configuration.py +0 -90
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/LICENSE +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/__init__.py +1 -1
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/config/default_configuration.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/config/dict_configuration.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/converters/__init__.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/converters/converter.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/converters/identity_converter.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/__init__.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/gzip_reader.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/list_reader.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/input/string_reader.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/model/__init__.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/model/assign_mol_id_step.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/model/assign_name_step.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/model/write_output_step.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/output/__init__.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/output/csv_writer.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/output/file_writer.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/output/iterator_writer.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/output/pandas_writer.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/output/record_list_writer.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/output/sdf_writer.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/polyfills/__init__.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/polyfills/files.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/polyfills/get_entry_points.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/polyfills/types.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/polyfills/version.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/preprocessing/__init__.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/preprocessing/check_valid_smiles.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/preprocessing/filter_by_weight.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/preprocessing/preprocessing_step.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/preprocessing/remove_stereochemistry.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/preprocessing/sanitize.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/problem.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/py.typed +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/steps/__init__.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/steps/map_step.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/steps/output_step.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/steps/step.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/tests/__init__.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/tests/checks.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/tests/models/AtomicMassModel.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/tests/models/MolWeightModel.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/tests/models/__init__.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/tests/predictions.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/tests/preprocessing/DummyPreprocessingStep.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/tests/preprocessing/__init__.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/tests/representations.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/util/__init__.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/util/package.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module/version.py +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module.egg-info/dependency_links.txt +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/nerdd_module.egg-info/top_level.txt +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/setup.cfg +0 -0
- {nerdd_module-0.3.8 → nerdd_module-0.3.10}/tests/test_features.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: nerdd-module
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.10
|
|
4
4
|
Summary: Base package to create NERDD modules
|
|
5
5
|
Author-email: Steffen Hirte <steffen.hirte@univie.ac.at>
|
|
6
6
|
Maintainer-email: Steffen Hirte <steffen.hirte@univie.ac.at>
|
|
@@ -57,15 +57,18 @@ Requires-Dist: filetype~=1.2.0
|
|
|
57
57
|
Requires-Dist: rich-click>=1.7.1
|
|
58
58
|
Requires-Dist: stringcase>=1.2.0
|
|
59
59
|
Requires-Dist: decorator>=5.1.1
|
|
60
|
+
Requires-Dist: pydantic>=2
|
|
60
61
|
Requires-Dist: importlib-resources>=5; python_version < "3.9"
|
|
61
62
|
Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
|
|
62
63
|
Provides-Extra: dev
|
|
63
|
-
Requires-Dist: mypy; extra == "dev"
|
|
64
|
-
Requires-Dist: ruff; extra == "dev"
|
|
64
|
+
Requires-Dist: mypy==1.13.0; extra == "dev"
|
|
65
|
+
Requires-Dist: ruff==0.7.1; extra == "dev"
|
|
65
66
|
Requires-Dist: pandas-stubs; extra == "dev"
|
|
67
|
+
Requires-Dist: rdkit-stubs; extra == "dev"
|
|
66
68
|
Requires-Dist: types-PyYAML; extra == "dev"
|
|
67
69
|
Requires-Dist: types-decorator; extra == "dev"
|
|
68
70
|
Requires-Dist: types-setuptools; extra == "dev"
|
|
71
|
+
Requires-Dist: pre-commit==3.5.0; extra == "dev"
|
|
69
72
|
Provides-Extra: rdkit
|
|
70
73
|
Requires-Dist: rdkit>=2022.3.3; extra == "rdkit"
|
|
71
74
|
Provides-Extra: csp
|
|
@@ -75,7 +78,7 @@ Requires-Dist: pytest; extra == "test"
|
|
|
75
78
|
Requires-Dist: pytest-sugar; extra == "test"
|
|
76
79
|
Requires-Dist: pytest-cov; extra == "test"
|
|
77
80
|
Requires-Dist: pytest-asyncio; extra == "test"
|
|
78
|
-
Requires-Dist: pytest-bdd; extra == "test"
|
|
81
|
+
Requires-Dist: pytest-bdd==7.3.0; extra == "test"
|
|
79
82
|
Requires-Dist: pytest-mock; extra == "test"
|
|
80
83
|
Requires-Dist: pytest-watcher; extra == "test"
|
|
81
84
|
Requires-Dist: hypothesis; extra == "test"
|
|
@@ -100,6 +103,7 @@ pip install -U nerdd-module
|
|
|
100
103
|
## Contribute
|
|
101
104
|
|
|
102
105
|
1. Fork and clone the code
|
|
103
|
-
2. Install test dependencies with `
|
|
104
|
-
3.
|
|
105
|
-
4.
|
|
106
|
+
2. Install test dependencies with `pip install -e .[test,dev,csp]`
|
|
107
|
+
3. Install pre-commit hooks `pre-commit install`
|
|
108
|
+
4. Run tests via `pytest` or `pytest-watch` (short: `ptw`)
|
|
109
|
+
5. Build docs via `pip install -e .[docs]` and `mkdocs serve`
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# NERDD Module
|
|
2
|
+
|
|
3
|
+
This package provides the basis to implement molecular prediction modules in the
|
|
4
|
+
NERDD ecosystem.
|
|
5
|
+
|
|
6
|
+
## Installation
|
|
7
|
+
|
|
8
|
+
``` bash
|
|
9
|
+
pip install -U nerdd-module
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
## Contribute
|
|
14
|
+
|
|
15
|
+
1. Fork and clone the code
|
|
16
|
+
2. Install test dependencies with `pip install -e .[test,dev,csp]`
|
|
17
|
+
3. Install pre-commit hooks `pre-commit install`
|
|
18
|
+
4. Run tests via `pytest` or `pytest-watch` (short: `ptw`)
|
|
19
|
+
5. Build docs via `pip install -e .[docs]` and `mkdocs serve`
|
|
@@ -1,18 +1,22 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import os
|
|
2
3
|
import sys
|
|
3
4
|
from typing import Any, Callable
|
|
4
5
|
|
|
5
6
|
import rich_click as click
|
|
6
7
|
from decorator import decorator
|
|
7
|
-
from stringcase import spinalcase
|
|
8
|
+
from stringcase import spinalcase
|
|
8
9
|
|
|
10
|
+
from .config import JobParameter
|
|
11
|
+
from .input import Reader
|
|
9
12
|
from .model import Model
|
|
13
|
+
from .output import FileWriter, Writer
|
|
10
14
|
|
|
11
15
|
__all__ = ["auto_cli"]
|
|
12
16
|
|
|
13
17
|
input_description = """{description}
|
|
14
18
|
|
|
15
|
-
INPUT molecules are provided as file paths or strings. The following formats are
|
|
19
|
+
INPUT molecules are provided as file paths or strings. The following formats are
|
|
16
20
|
supported:
|
|
17
21
|
|
|
18
22
|
{input_format_list}
|
|
@@ -21,25 +25,21 @@ Note that input formats shouldn't be mixed.
|
|
|
21
25
|
"""
|
|
22
26
|
|
|
23
27
|
|
|
24
|
-
def infer_click_type(param:
|
|
25
|
-
if
|
|
26
|
-
choices = [c
|
|
28
|
+
def infer_click_type(param: JobParameter) -> click.ParamType:
|
|
29
|
+
if param.choices is not None:
|
|
30
|
+
choices = [c.value for c in param.choices]
|
|
27
31
|
return click.Choice(choices)
|
|
28
32
|
|
|
29
33
|
type_map = {
|
|
30
34
|
"float": click.FLOAT,
|
|
31
|
-
"
|
|
32
|
-
"
|
|
35
|
+
"integer": click.INT,
|
|
36
|
+
"string": click.STRING,
|
|
33
37
|
"bool": click.BOOL,
|
|
34
38
|
}
|
|
35
39
|
|
|
36
|
-
|
|
37
|
-
raise ValueError(f"Parameter {param['name']} does not have a type")
|
|
38
|
-
|
|
39
|
-
t = param["type"]
|
|
40
|
-
|
|
40
|
+
t = param.type
|
|
41
41
|
if t not in type_map:
|
|
42
|
-
raise ValueError(f"Unknown type {t} for parameter {param
|
|
42
|
+
raise ValueError(f"Unknown type {t} for parameter {param.name}")
|
|
43
43
|
|
|
44
44
|
return type_map[t]
|
|
45
45
|
|
|
@@ -47,7 +47,7 @@ def infer_click_type(param: dict) -> click.ParamType:
|
|
|
47
47
|
@decorator
|
|
48
48
|
def auto_cli(f: Callable[..., Model], *args: Any, **kwargs: Any) -> None:
|
|
49
49
|
# infer the command name
|
|
50
|
-
|
|
50
|
+
command_name = os.path.basename(sys.argv[0])
|
|
51
51
|
|
|
52
52
|
# get the model
|
|
53
53
|
model = f()
|
|
@@ -59,21 +59,33 @@ def auto_cli(f: Callable[..., Model], *args: Any, **kwargs: Any) -> None:
|
|
|
59
59
|
description=model.description, input_format_list=input_format_list
|
|
60
60
|
)
|
|
61
61
|
|
|
62
|
-
output_format_list = [
|
|
62
|
+
output_format_list = [
|
|
63
|
+
output_format
|
|
64
|
+
for output_format, writer in Writer.get_writers(output_file=None).items()
|
|
65
|
+
if isinstance(writer, FileWriter)
|
|
66
|
+
]
|
|
63
67
|
|
|
64
68
|
# compose footer with examples
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
69
|
+
examples = []
|
|
70
|
+
if hasattr(model, "get_config"):
|
|
71
|
+
example_smiles = model.get_config().example_smiles
|
|
72
|
+
if example_smiles is not None:
|
|
73
|
+
examples.append(example_smiles)
|
|
74
|
+
|
|
75
|
+
for ReaderClass in Reader.get_reader_mapping():
|
|
76
|
+
if hasattr(ReaderClass, "config"):
|
|
77
|
+
reader_examples = ReaderClass.config.get("examples", [])
|
|
78
|
+
for example in reader_examples:
|
|
79
|
+
# check if example fits on one line
|
|
80
|
+
if len(example) < 120 and "\n" not in example:
|
|
81
|
+
examples.append(example)
|
|
82
|
+
|
|
83
|
+
if len(examples) > 0:
|
|
84
|
+
footer = "Examples:\n"
|
|
85
|
+
for example in examples:
|
|
86
|
+
footer += f'* {command_name} "{example}"\n'
|
|
87
|
+
else:
|
|
88
|
+
footer = ""
|
|
77
89
|
|
|
78
90
|
#
|
|
79
91
|
# Define the CLI entry point
|
|
@@ -107,12 +119,12 @@ def auto_cli(f: Callable[..., Model], *args: Any, **kwargs: Any) -> None:
|
|
|
107
119
|
#
|
|
108
120
|
for param in model.job_parameters:
|
|
109
121
|
# convert parameter name to spinal case (e.g. "max_confs" -> "max-confs")
|
|
110
|
-
param_name = spinalcase(param
|
|
122
|
+
param_name = spinalcase(param.name)
|
|
111
123
|
main = click.option(
|
|
112
124
|
f"--{param_name}",
|
|
113
|
-
default=param.
|
|
125
|
+
default=param.default,
|
|
114
126
|
type=infer_click_type(param),
|
|
115
|
-
help=param.
|
|
127
|
+
help=param.help_text,
|
|
116
128
|
)(main)
|
|
117
129
|
|
|
118
130
|
#
|
|
@@ -157,4 +169,4 @@ def auto_cli(f: Callable[..., Model], *args: Any, **kwargs: Any) -> None:
|
|
|
157
169
|
# show_default=True: default values are shown in the help text
|
|
158
170
|
main = click.command(context_settings={"show_default": True}, help=help_text)(main)
|
|
159
171
|
|
|
160
|
-
|
|
172
|
+
main()
|
|
@@ -2,6 +2,7 @@ from .configuration import *
|
|
|
2
2
|
from .default_configuration import *
|
|
3
3
|
from .dict_configuration import *
|
|
4
4
|
from .merged_configuration import *
|
|
5
|
+
from .models import *
|
|
5
6
|
from .package_configuration import *
|
|
6
7
|
from .search_yaml_configuration import *
|
|
7
8
|
from .yaml_configuration import *
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Optional
|
|
3
|
+
|
|
4
|
+
from .models import Module
|
|
5
|
+
|
|
6
|
+
__all__ = ["Configuration"]
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class Configuration(ABC):
|
|
10
|
+
def __init__(self) -> None:
|
|
11
|
+
self._cached_config: Optional[Module] = None
|
|
12
|
+
|
|
13
|
+
def get_dict(self) -> Module:
|
|
14
|
+
if self._cached_config is None:
|
|
15
|
+
config = self._get_dict()
|
|
16
|
+
|
|
17
|
+
# validate the config
|
|
18
|
+
module = Module(**config)
|
|
19
|
+
|
|
20
|
+
self._cached_config = module
|
|
21
|
+
|
|
22
|
+
return self._cached_config
|
|
23
|
+
|
|
24
|
+
@abstractmethod
|
|
25
|
+
def _get_dict(self) -> dict:
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
def is_empty(self) -> bool:
|
|
29
|
+
return self.get_dict() == {}
|
|
30
|
+
|
|
31
|
+
def __repr__(self) -> str:
|
|
32
|
+
return f"{self.__class__.__name__}({self._get_dict()})"
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
from typing import Any, List, Literal, Optional, Union
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, model_validator
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Partner(BaseModel):
|
|
7
|
+
name: str
|
|
8
|
+
logo: str
|
|
9
|
+
url: Optional[str] = None
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Author(BaseModel):
|
|
13
|
+
"""
|
|
14
|
+
Author information
|
|
15
|
+
|
|
16
|
+
Attributes:
|
|
17
|
+
first_name : str
|
|
18
|
+
First name of the author.
|
|
19
|
+
last_name : str
|
|
20
|
+
Last name of the author.
|
|
21
|
+
email : Optional[str]
|
|
22
|
+
Email of the author. If provided, the author is a corresponding author.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
first_name: str
|
|
26
|
+
last_name: str
|
|
27
|
+
email: Optional[str] = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Publication(BaseModel):
|
|
31
|
+
title: str
|
|
32
|
+
authors: List[Author] = []
|
|
33
|
+
journal: str
|
|
34
|
+
year: int
|
|
35
|
+
doi: Optional[str]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class JobParameterChoice(BaseModel):
|
|
39
|
+
value: str
|
|
40
|
+
label: Optional[str] = None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class JobParameter(BaseModel):
|
|
44
|
+
name: str
|
|
45
|
+
type: str
|
|
46
|
+
visible_name: Optional[str] = None
|
|
47
|
+
help_text: Optional[str] = None
|
|
48
|
+
default: Optional[str] = None
|
|
49
|
+
required: bool = False
|
|
50
|
+
choices: Optional[List[JobParameterChoice]] = None
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
Task = Literal[
|
|
54
|
+
"molecular_property_prediction",
|
|
55
|
+
"atom_property_prediction",
|
|
56
|
+
"derivative_property_prediction",
|
|
57
|
+
]
|
|
58
|
+
Level = Literal["molecule", "atom", "derivative"]
|
|
59
|
+
|
|
60
|
+
FormatSpec = Union[List[str], str]
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class IncludeExcludeFormatSpec(BaseModel):
|
|
64
|
+
include: Optional[FormatSpec]
|
|
65
|
+
exclude: Optional[FormatSpec]
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class ResultProperty(BaseModel):
|
|
69
|
+
name: str
|
|
70
|
+
type: str
|
|
71
|
+
visible_name: Optional[str] = None
|
|
72
|
+
help_text: Optional[str] = None
|
|
73
|
+
sortable: bool = False
|
|
74
|
+
group: Optional[str] = None
|
|
75
|
+
level: Level = "molecule"
|
|
76
|
+
formats: Union[FormatSpec, IncludeExcludeFormatSpec, None] = None
|
|
77
|
+
representation: Optional[str] = None
|
|
78
|
+
|
|
79
|
+
def is_visible(self, output_format: str) -> bool:
|
|
80
|
+
formats = self.formats
|
|
81
|
+
|
|
82
|
+
if formats is None:
|
|
83
|
+
return True
|
|
84
|
+
elif isinstance(formats, list):
|
|
85
|
+
return output_format in formats
|
|
86
|
+
elif isinstance(formats, IncludeExcludeFormatSpec):
|
|
87
|
+
include = formats.include
|
|
88
|
+
exclude = formats.exclude or []
|
|
89
|
+
return (include is None or output_format in include) and output_format not in exclude
|
|
90
|
+
else:
|
|
91
|
+
raise ValueError(f"Invalid formats declaration {formats} in result property {self}")
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class Module(BaseModel):
|
|
95
|
+
task: Optional[Task] = None
|
|
96
|
+
rank: Optional[int] = None
|
|
97
|
+
name: Optional[str] = None
|
|
98
|
+
batch_size: int = 100
|
|
99
|
+
version: Optional[str] = None
|
|
100
|
+
visible_name: Optional[str] = None
|
|
101
|
+
logo: Optional[str] = None
|
|
102
|
+
logo_title: Optional[str] = None
|
|
103
|
+
logo_caption: Optional[str] = None
|
|
104
|
+
example_smiles: Optional[str] = None
|
|
105
|
+
title: Optional[str] = None
|
|
106
|
+
description: Optional[str] = None
|
|
107
|
+
partners: List[Partner] = []
|
|
108
|
+
publications: List[Publication] = []
|
|
109
|
+
about: Optional[str] = None
|
|
110
|
+
job_parameters: List[JobParameter] = []
|
|
111
|
+
result_properties: List[ResultProperty] = []
|
|
112
|
+
|
|
113
|
+
def get_property_columns_of_type(self, t: Level) -> List[ResultProperty]:
|
|
114
|
+
return [c for c in self.result_properties if c.level == t]
|
|
115
|
+
|
|
116
|
+
def molecular_property_columns(self) -> List[ResultProperty]:
|
|
117
|
+
return self.get_property_columns_of_type("molecule")
|
|
118
|
+
|
|
119
|
+
def atom_property_columns(self) -> List[ResultProperty]:
|
|
120
|
+
return self.get_property_columns_of_type("atom")
|
|
121
|
+
|
|
122
|
+
def derivative_property_columns(self) -> List[ResultProperty]:
|
|
123
|
+
return self.get_property_columns_of_type("derivative")
|
|
124
|
+
|
|
125
|
+
def get_visible_properties(self, output_format: str) -> List[ResultProperty]:
|
|
126
|
+
return [p for p in self.result_properties if p.is_visible(output_format)]
|
|
127
|
+
|
|
128
|
+
@model_validator(mode="after")
|
|
129
|
+
@classmethod
|
|
130
|
+
def validate_model(cls, values: Any) -> Any:
|
|
131
|
+
assert isinstance(values, Module)
|
|
132
|
+
|
|
133
|
+
num_atom_properties = len(values.get_property_columns_of_type("atom"))
|
|
134
|
+
num_derivative_properties = len(values.get_property_columns_of_type("derivative"))
|
|
135
|
+
task = values.task
|
|
136
|
+
if task is None:
|
|
137
|
+
# if task is not specified, try to derive it from the result_properties
|
|
138
|
+
if num_atom_properties > 0:
|
|
139
|
+
task = "atom_property_prediction"
|
|
140
|
+
elif num_derivative_properties > 0:
|
|
141
|
+
task = "derivative_property_prediction"
|
|
142
|
+
else:
|
|
143
|
+
task = "molecular_property_prediction"
|
|
144
|
+
|
|
145
|
+
values.task = task
|
|
146
|
+
else:
|
|
147
|
+
# if task is specified, check if it is consistent with the result_properties
|
|
148
|
+
if num_atom_properties > 0:
|
|
149
|
+
assert (
|
|
150
|
+
task == "atom_property_prediction"
|
|
151
|
+
), "Task should be atom_property_prediction if atom properties are present."
|
|
152
|
+
elif num_derivative_properties > 0:
|
|
153
|
+
assert task == "derivative_property_prediction", (
|
|
154
|
+
"Task should be derivative_property_prediction if derivative properties "
|
|
155
|
+
"are present."
|
|
156
|
+
)
|
|
157
|
+
else:
|
|
158
|
+
assert task == "molecular_property_prediction", (
|
|
159
|
+
"Task should be molecular_property_prediction if no atom or derivative "
|
|
160
|
+
"properties are present."
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# check that a module can only predict atom or derivative properties, not both
|
|
164
|
+
assert (
|
|
165
|
+
num_atom_properties == 0 or num_derivative_properties == 0
|
|
166
|
+
), "A module can only predict atom or derivative properties, not both."
|
|
167
|
+
|
|
168
|
+
# check that two properties with the same group appear next to each other
|
|
169
|
+
groups = [p.group for p in values.result_properties if p.group is not None]
|
|
170
|
+
for group in groups:
|
|
171
|
+
indices = [i for i, p in enumerate(values.result_properties) if p.group == group]
|
|
172
|
+
for i, j in zip(indices[:-1], indices[1:]):
|
|
173
|
+
assert i + 1 == j, (
|
|
174
|
+
f"Properties with the same group should appear next to each other, "
|
|
175
|
+
f"but group {group} appears at incides {i} and {j}."
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
return values
|
|
@@ -34,4 +34,4 @@ class SearchYamlConfiguration(DictConfiguration):
|
|
|
34
34
|
logger.info(f"Found configuration file in project directory: {default_config_file}")
|
|
35
35
|
config = YamlConfiguration(default_config_file, base_path)
|
|
36
36
|
|
|
37
|
-
super().__init__(config.
|
|
37
|
+
super().__init__(config._get_dict())
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import base64
|
|
2
|
+
import mimetypes
|
|
2
3
|
import os
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
from typing import IO, Any, Union
|
|
@@ -42,9 +43,16 @@ def image_constructor(loader: CustomLoaderLike, node: yaml.Node) -> str:
|
|
|
42
43
|
|
|
43
44
|
# determine the file type from the file extension
|
|
44
45
|
kind = filetype.guess(f)
|
|
45
|
-
|
|
46
|
+
if kind is not None:
|
|
47
|
+
mime = kind.mime
|
|
48
|
+
else:
|
|
49
|
+
# For filetypes without magic headers (e.g. SVG), the filetype library
|
|
50
|
+
# doesn't work. In these cases, we try the mimetypes library.
|
|
51
|
+
mime, _ = mimetypes.guess_type(path)
|
|
46
52
|
|
|
47
|
-
|
|
53
|
+
assert mime is not None, f"Could not determine mime type for {path}"
|
|
54
|
+
|
|
55
|
+
return f"data:{mime};base64,{encoded}"
|
|
48
56
|
|
|
49
57
|
|
|
50
58
|
class YamlConfiguration(Configuration):
|
|
@@ -3,6 +3,7 @@ from pathlib import Path
|
|
|
3
3
|
from typing import Any, Iterator, Tuple, Union
|
|
4
4
|
|
|
5
5
|
from .reader import ExploreCallable, MoleculeEntry, Reader
|
|
6
|
+
from .reader_config import ReaderConfig
|
|
6
7
|
|
|
7
8
|
__all__ = ["FileReader"]
|
|
8
9
|
|
|
@@ -20,8 +21,8 @@ class FileReader(Reader):
|
|
|
20
21
|
# convert filename to path
|
|
21
22
|
try:
|
|
22
23
|
path = Path(filename)
|
|
23
|
-
except TypeError:
|
|
24
|
-
raise ValueError("input must be a valid path")
|
|
24
|
+
except TypeError as e:
|
|
25
|
+
raise ValueError("input must be a valid path") from e
|
|
25
26
|
|
|
26
27
|
# convert to absolute path
|
|
27
28
|
if not path.is_absolute():
|
|
@@ -44,7 +45,9 @@ class FileReader(Reader):
|
|
|
44
45
|
source: Tuple[str, ...] = tuple()
|
|
45
46
|
else:
|
|
46
47
|
source = entry.source
|
|
47
|
-
yield entry._replace(source=
|
|
48
|
+
yield entry._replace(source=(filename, *source))
|
|
48
49
|
|
|
49
50
|
def __repr__(self) -> str:
|
|
50
51
|
return f"FileReader(data_dir={self.data_dir})"
|
|
52
|
+
|
|
53
|
+
config = ReaderConfig(examples=["compounds.smiles"])
|
|
@@ -6,6 +6,7 @@ from rdkit.rdBase import BlockLogs
|
|
|
6
6
|
|
|
7
7
|
from ..problem import Problem
|
|
8
8
|
from .reader import ExploreCallable, MoleculeEntry, Reader
|
|
9
|
+
from .reader_config import ReaderConfig
|
|
9
10
|
|
|
10
11
|
__all__ = ["InchiReader"]
|
|
11
12
|
|
|
@@ -48,10 +49,16 @@ class InchiReader(Reader):
|
|
|
48
49
|
yield MoleculeEntry(
|
|
49
50
|
raw_input=line,
|
|
50
51
|
input_type="inchi",
|
|
51
|
-
source=
|
|
52
|
+
source=("raw_input",),
|
|
52
53
|
mol=mol,
|
|
53
54
|
errors=errors,
|
|
54
55
|
)
|
|
55
56
|
|
|
56
57
|
def __repr__(self) -> str:
|
|
57
58
|
return "InchiReader()"
|
|
59
|
+
|
|
60
|
+
config = ReaderConfig(
|
|
61
|
+
examples=[
|
|
62
|
+
"InChI=1S/C18H16O3/c1-2-13(12-8-4-3-5-9-12)16-17(19)14-10-6-7-11-15(14)21-18(16)20/h3-11,13,19H,2H2,1H3"
|
|
63
|
+
]
|
|
64
|
+
)
|
|
@@ -1,12 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
import inspect
|
|
4
|
-
from abc import ABC,
|
|
5
|
-
from functools import partial
|
|
4
|
+
from abc import ABC, abstractmethod
|
|
6
5
|
from typing import Any, Callable, Iterator, List, NamedTuple, Optional, Tuple, Type
|
|
7
6
|
|
|
8
7
|
from rdkit.Chem import Mol
|
|
9
|
-
from typing_extensions import Protocol
|
|
10
8
|
|
|
11
9
|
from ..problem import Problem
|
|
12
10
|
from ..util import call_with_mappings
|
|
@@ -25,34 +23,30 @@ class MoleculeEntry(NamedTuple):
|
|
|
25
23
|
ExploreCallable = Callable[[Any], Iterator[MoleculeEntry]]
|
|
26
24
|
|
|
27
25
|
|
|
28
|
-
|
|
29
|
-
def __call__(self, config: dict, *args: Any, **kwargs: Any) -> Reader: ...
|
|
26
|
+
_factories: List[Type["Reader"]] = []
|
|
30
27
|
|
|
31
28
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
class ReaderMeta(ABCMeta):
|
|
36
|
-
def __init__(cls, name: str, bases: Tuple[type, ...], dct: dict) -> None:
|
|
37
|
-
super().__init__(name, bases, dct)
|
|
38
|
-
|
|
39
|
-
if not inspect.isabstract(cls):
|
|
40
|
-
_factories.append(
|
|
41
|
-
partial(
|
|
42
|
-
call_with_mappings,
|
|
43
|
-
cls,
|
|
44
|
-
)
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
class Reader(ABC, metaclass=ReaderMeta):
|
|
29
|
+
class Reader(ABC):
|
|
49
30
|
def __init__(self) -> None:
|
|
50
31
|
super().__init__()
|
|
51
32
|
|
|
33
|
+
@classmethod
|
|
34
|
+
def __init_subclass__(
|
|
35
|
+
cls,
|
|
36
|
+
**kwargs: Any,
|
|
37
|
+
) -> None:
|
|
38
|
+
super().__init_subclass__(**kwargs)
|
|
39
|
+
if not inspect.isabstract(cls):
|
|
40
|
+
_factories.append(cls)
|
|
41
|
+
|
|
52
42
|
@abstractmethod
|
|
53
43
|
def read(self, input: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
54
44
|
pass
|
|
55
45
|
|
|
46
|
+
@classmethod
|
|
47
|
+
def get_reader_mapping(cls: Type[Reader]) -> List[Type["Reader"]]:
|
|
48
|
+
return _factories
|
|
49
|
+
|
|
56
50
|
@classmethod
|
|
57
51
|
def get_readers(cls: Type[Reader], **kwargs: Any) -> List[Reader]:
|
|
58
|
-
return [factory
|
|
52
|
+
return [call_with_mappings(factory, kwargs) for factory in _factories]
|
|
@@ -6,6 +6,7 @@ from rdkit.rdBase import BlockLogs
|
|
|
6
6
|
|
|
7
7
|
from ..problem import Problem
|
|
8
8
|
from .reader import ExploreCallable, MoleculeEntry, Reader
|
|
9
|
+
from .reader_config import ReaderConfig
|
|
9
10
|
|
|
10
11
|
__all__ = ["SmilesReader"]
|
|
11
12
|
|
|
@@ -58,10 +59,12 @@ class SmilesReader(Reader):
|
|
|
58
59
|
yield MoleculeEntry(
|
|
59
60
|
raw_input=line,
|
|
60
61
|
input_type="smiles",
|
|
61
|
-
source=
|
|
62
|
+
source=("raw_input",),
|
|
62
63
|
mol=mol,
|
|
63
64
|
errors=errors,
|
|
64
65
|
)
|
|
65
66
|
|
|
66
67
|
def __repr__(self) -> str:
|
|
67
68
|
return "SmilesReader()"
|
|
69
|
+
|
|
70
|
+
config = ReaderConfig(examples=["C1=NC2=C(N1COCCO)N=C(NC2=O)N"])
|
|
@@ -21,7 +21,7 @@ class TarReader(Reader):
|
|
|
21
21
|
if not member.isfile():
|
|
22
22
|
continue
|
|
23
23
|
for entry in explore(tar.extractfile(member)):
|
|
24
|
-
yield entry._replace(source=
|
|
24
|
+
yield entry._replace(source=(member.name, *entry.source))
|
|
25
25
|
|
|
26
26
|
def __repr__(self) -> str:
|
|
27
27
|
return "TarReader()"
|