nerdd-module 0.3.7__tar.gz → 0.3.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/PKG-INFO +9 -6
- nerdd_module-0.3.9/README.md +19 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/__init__.py +1 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/cli.py +3 -3
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/config/configuration.py +15 -14
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/config/yaml_configuration.py +10 -2
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/input/depth_first_explorer.py +1 -1
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/input/explorer.py +0 -3
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/input/file_reader.py +3 -3
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/input/inchi_reader.py +1 -1
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/input/mol_reader.py +1 -1
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/input/sdf_reader.py +1 -1
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/input/smiles_reader.py +1 -1
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/input/tar_reader.py +1 -1
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/input/zip_reader.py +1 -1
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/model/enforce_schema_step.py +3 -3
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/model/write_output_step.py +3 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/output/__init__.py +2 -1
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/output/writer.py +1 -4
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/preprocessing/chembl_structure_pipeline.py +2 -3
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/preprocessing/filter_by_element.py +5 -15
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/preprocessing/filter_by_weight.py +2 -10
- nerdd_module-0.3.9/nerdd_module/problem.py +42 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/tests/models/AtomicMassModel.py +1 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/tests/models/MolWeightModel.py +1 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/util/call_with_mappings.py +5 -2
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module.egg-info/PKG-INFO +9 -6
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module.egg-info/requires.txt +4 -2
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/pyproject.toml +21 -8
- nerdd_module-0.3.7/README.md +0 -18
- nerdd_module-0.3.7/nerdd_module/problem.py +0 -16
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/LICENSE +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/config/__init__.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/config/default_configuration.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/config/dict_configuration.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/config/merged_configuration.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/config/package_configuration.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/config/search_yaml_configuration.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/converters/__init__.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/converters/converter.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/converters/identity_converter.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/input/__init__.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/input/gzip_reader.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/input/list_reader.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/input/reader.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/input/string_reader.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/model/__init__.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/model/assign_mol_id_step.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/model/assign_name_step.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/model/convert_representations_step.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/model/model.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/model/read_input_step.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/model/simple_model.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/output/csv_writer.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/output/file_writer.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/output/iterator_writer.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/output/pandas_writer.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/output/record_list_writer.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/output/sdf_writer.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/polyfills/__init__.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/polyfills/files.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/polyfills/get_entry_points.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/polyfills/types.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/polyfills/version.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/preprocessing/__init__.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/preprocessing/check_valid_smiles.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/preprocessing/preprocessing_step.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/preprocessing/remove_stereochemistry.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/preprocessing/sanitize.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/py.typed +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/steps/__init__.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/steps/map_step.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/steps/output_step.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/steps/step.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/tests/__init__.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/tests/checks.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/tests/models/__init__.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/tests/predictions.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/tests/preprocessing/DummyPreprocessingStep.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/tests/preprocessing/__init__.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/tests/representations.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/util/__init__.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/util/package.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/version.py +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module.egg-info/SOURCES.txt +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module.egg-info/dependency_links.txt +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module.egg-info/top_level.txt +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/setup.cfg +0 -0
- {nerdd_module-0.3.7 → nerdd_module-0.3.9}/tests/test_features.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: nerdd-module
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.9
|
|
4
4
|
Summary: Base package to create NERDD modules
|
|
5
5
|
Author-email: Steffen Hirte <steffen.hirte@univie.ac.at>
|
|
6
6
|
Maintainer-email: Steffen Hirte <steffen.hirte@univie.ac.at>
|
|
@@ -60,12 +60,14 @@ Requires-Dist: decorator>=5.1.1
|
|
|
60
60
|
Requires-Dist: importlib-resources>=5; python_version < "3.9"
|
|
61
61
|
Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
|
|
62
62
|
Provides-Extra: dev
|
|
63
|
-
Requires-Dist: mypy; extra == "dev"
|
|
64
|
-
Requires-Dist: ruff; extra == "dev"
|
|
63
|
+
Requires-Dist: mypy==1.13.0; extra == "dev"
|
|
64
|
+
Requires-Dist: ruff==0.7.1; extra == "dev"
|
|
65
65
|
Requires-Dist: pandas-stubs; extra == "dev"
|
|
66
|
+
Requires-Dist: rdkit-stubs; extra == "dev"
|
|
66
67
|
Requires-Dist: types-PyYAML; extra == "dev"
|
|
67
68
|
Requires-Dist: types-decorator; extra == "dev"
|
|
68
69
|
Requires-Dist: types-setuptools; extra == "dev"
|
|
70
|
+
Requires-Dist: pre-commit==3.5.0; extra == "dev"
|
|
69
71
|
Provides-Extra: rdkit
|
|
70
72
|
Requires-Dist: rdkit>=2022.3.3; extra == "rdkit"
|
|
71
73
|
Provides-Extra: csp
|
|
@@ -100,6 +102,7 @@ pip install -U nerdd-module
|
|
|
100
102
|
## Contribute
|
|
101
103
|
|
|
102
104
|
1. Fork and clone the code
|
|
103
|
-
2. Install test dependencies with `
|
|
104
|
-
3.
|
|
105
|
-
4.
|
|
105
|
+
2. Install test dependencies with `pip install -e .[test,dev,csp]`
|
|
106
|
+
3. Install pre-commit hooks `pre-commit install`
|
|
107
|
+
4. Run tests via `pytest` or `pytest-watch` (short: `ptw`)
|
|
108
|
+
5. Build docs via `pip install -e .[docs]` and `mkdocs serve`
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# NERDD Module
|
|
2
|
+
|
|
3
|
+
This package provides the basis to implement molecular prediction modules in the
|
|
4
|
+
NERDD ecosystem.
|
|
5
|
+
|
|
6
|
+
## Installation
|
|
7
|
+
|
|
8
|
+
``` bash
|
|
9
|
+
pip install -U nerdd-module
|
|
10
|
+
```
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
## Contribute
|
|
14
|
+
|
|
15
|
+
1. Fork and clone the code
|
|
16
|
+
2. Install test dependencies with `pip install -e .[test,dev,csp]`
|
|
17
|
+
3. Install pre-commit hooks `pre-commit install`
|
|
18
|
+
4. Run tests via `pytest` or `pytest-watch` (short: `ptw`)
|
|
19
|
+
5. Build docs via `pip install -e .[docs]` and `mkdocs serve`
|
|
@@ -4,7 +4,7 @@ from typing import Any, Callable
|
|
|
4
4
|
|
|
5
5
|
import rich_click as click
|
|
6
6
|
from decorator import decorator
|
|
7
|
-
from stringcase import spinalcase
|
|
7
|
+
from stringcase import spinalcase
|
|
8
8
|
|
|
9
9
|
from .model import Model
|
|
10
10
|
|
|
@@ -12,7 +12,7 @@ __all__ = ["auto_cli"]
|
|
|
12
12
|
|
|
13
13
|
input_description = """{description}
|
|
14
14
|
|
|
15
|
-
INPUT molecules are provided as file paths or strings. The following formats are
|
|
15
|
+
INPUT molecules are provided as file paths or strings. The following formats are
|
|
16
16
|
supported:
|
|
17
17
|
|
|
18
18
|
{input_format_list}
|
|
@@ -157,4 +157,4 @@ def auto_cli(f: Callable[..., Model], *args: Any, **kwargs: Any) -> None:
|
|
|
157
157
|
# show_default=True: default values are shown in the help text
|
|
158
158
|
main = click.command(context_settings={"show_default": True}, help=help_text)(main)
|
|
159
159
|
|
|
160
|
-
|
|
160
|
+
main()
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
-
from
|
|
3
|
-
from typing import List
|
|
2
|
+
from typing import List, Optional
|
|
4
3
|
|
|
5
4
|
__all__ = ["Configuration"]
|
|
6
5
|
|
|
@@ -30,23 +29,25 @@ def is_visible(result_property: dict, output_format: str) -> bool:
|
|
|
30
29
|
|
|
31
30
|
class Configuration(ABC):
|
|
32
31
|
def __init__(self) -> None:
|
|
33
|
-
|
|
32
|
+
self._cached_config: Optional[dict] = None
|
|
34
33
|
|
|
35
|
-
@lru_cache(1)
|
|
36
34
|
def get_dict(self) -> dict:
|
|
37
|
-
|
|
35
|
+
if self._cached_config is None:
|
|
36
|
+
config = self._get_dict()
|
|
37
|
+
|
|
38
|
+
if "result_properties" not in config:
|
|
39
|
+
config["result_properties"] = []
|
|
38
40
|
|
|
39
|
-
|
|
40
|
-
config
|
|
41
|
+
# check that a module can only predict atom or derivative properties, not both
|
|
42
|
+
num_atom_properties = len(get_property_columns_of_type(config, "atom"))
|
|
43
|
+
num_derivative_properties = len(get_property_columns_of_type(config, "derivative"))
|
|
44
|
+
assert (
|
|
45
|
+
num_atom_properties == 0 or num_derivative_properties == 0
|
|
46
|
+
), "A module can only predict atom or derivative properties, not both."
|
|
41
47
|
|
|
42
|
-
|
|
43
|
-
num_atom_properties = len(get_property_columns_of_type(config, "atom"))
|
|
44
|
-
num_derivative_properties = len(get_property_columns_of_type(config, "derivative"))
|
|
45
|
-
assert (
|
|
46
|
-
num_atom_properties == 0 or num_derivative_properties == 0
|
|
47
|
-
), "A module can only predict atom or derivative properties, not both."
|
|
48
|
+
self._cached_config = config
|
|
48
49
|
|
|
49
|
-
return
|
|
50
|
+
return self._cached_config
|
|
50
51
|
|
|
51
52
|
@abstractmethod
|
|
52
53
|
def _get_dict(self) -> dict:
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import base64
|
|
2
|
+
import mimetypes
|
|
2
3
|
import os
|
|
3
4
|
from pathlib import Path
|
|
4
5
|
from typing import IO, Any, Union
|
|
@@ -42,9 +43,16 @@ def image_constructor(loader: CustomLoaderLike, node: yaml.Node) -> str:
|
|
|
42
43
|
|
|
43
44
|
# determine the file type from the file extension
|
|
44
45
|
kind = filetype.guess(f)
|
|
45
|
-
|
|
46
|
+
if kind is not None:
|
|
47
|
+
mime = kind.mime
|
|
48
|
+
else:
|
|
49
|
+
# For filetypes without magic headers (e.g. SVG), the filetype library
|
|
50
|
+
# doesn't work. In these cases, we try the mimetypes library.
|
|
51
|
+
mime, _ = mimetypes.guess_type(path)
|
|
46
52
|
|
|
47
|
-
|
|
53
|
+
assert mime is not None, f"Could not determine mime type for {path}"
|
|
54
|
+
|
|
55
|
+
return f"data:{mime};base64,{encoded}"
|
|
48
56
|
|
|
49
57
|
|
|
50
58
|
class YamlConfiguration(Configuration):
|
|
@@ -20,8 +20,8 @@ class FileReader(Reader):
|
|
|
20
20
|
# convert filename to path
|
|
21
21
|
try:
|
|
22
22
|
path = Path(filename)
|
|
23
|
-
except TypeError:
|
|
24
|
-
raise ValueError("input must be a valid path")
|
|
23
|
+
except TypeError as e:
|
|
24
|
+
raise ValueError("input must be a valid path") from e
|
|
25
25
|
|
|
26
26
|
# convert to absolute path
|
|
27
27
|
if not path.is_absolute():
|
|
@@ -44,7 +44,7 @@ class FileReader(Reader):
|
|
|
44
44
|
source: Tuple[str, ...] = tuple()
|
|
45
45
|
else:
|
|
46
46
|
source = entry.source
|
|
47
|
-
yield entry._replace(source=
|
|
47
|
+
yield entry._replace(source=(filename, *source))
|
|
48
48
|
|
|
49
49
|
def __repr__(self) -> str:
|
|
50
50
|
return f"FileReader(data_dir={self.data_dir})"
|
|
@@ -21,7 +21,7 @@ class TarReader(Reader):
|
|
|
21
21
|
if not member.isfile():
|
|
22
22
|
continue
|
|
23
23
|
for entry in explore(tar.extractfile(member)):
|
|
24
|
-
yield entry._replace(source=
|
|
24
|
+
yield entry._replace(source=(member.name, *entry.source))
|
|
25
25
|
|
|
26
26
|
def __repr__(self) -> str:
|
|
27
27
|
return "TarReader()"
|
|
@@ -23,7 +23,7 @@ class ZipReader(Reader):
|
|
|
23
23
|
continue
|
|
24
24
|
with zipf.open(member, "r") as f:
|
|
25
25
|
for entry in explore(f):
|
|
26
|
-
yield entry._replace(source=
|
|
26
|
+
yield entry._replace(source=(member, *entry.source))
|
|
27
27
|
|
|
28
28
|
def __repr__(self) -> str:
|
|
29
29
|
return "ZipReader()"
|
|
@@ -16,9 +16,9 @@ class EnforceSchemaStep(Step):
|
|
|
16
16
|
|
|
17
17
|
# check that properties are unique
|
|
18
18
|
if len(self._property_names) != len(set(self._property_names)):
|
|
19
|
-
duplicate_properties =
|
|
20
|
-
|
|
21
|
-
|
|
19
|
+
duplicate_properties = {
|
|
20
|
+
x for x in self._property_names if self._property_names.count(x) > 1
|
|
21
|
+
}
|
|
22
22
|
logger.warning(
|
|
23
23
|
f"Duplicate properties in result_properties: " f"{', '.join(duplicate_properties)}"
|
|
24
24
|
)
|
|
@@ -17,3 +17,6 @@ class WriteOutputStep(OutputStep):
|
|
|
17
17
|
writer = Writer.get_writer(self._output_format, **self._kawrgs)
|
|
18
18
|
result = writer.write(source)
|
|
19
19
|
return result
|
|
20
|
+
|
|
21
|
+
def __repr__(self) -> str:
|
|
22
|
+
return f"WriteOutputStep(output_format={self._output_format}, kwargs={self._kawrgs})"
|
|
@@ -24,9 +24,6 @@ _factories: Dict[str, WriterFactory] = {}
|
|
|
24
24
|
class Writer(ABC):
|
|
25
25
|
"""Abstract class for writers."""
|
|
26
26
|
|
|
27
|
-
def __init__(self) -> None:
|
|
28
|
-
pass
|
|
29
|
-
|
|
30
27
|
@classmethod
|
|
31
28
|
def __init_subclass__(
|
|
32
29
|
cls,
|
|
@@ -50,5 +47,5 @@ class Writer(ABC):
|
|
|
50
47
|
return _factories[output_format](kwargs)
|
|
51
48
|
|
|
52
49
|
@classmethod
|
|
53
|
-
def get_output_formats(
|
|
50
|
+
def get_output_formats(cls) -> List[str]:
|
|
54
51
|
return list(_factories.keys())
|
{nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/preprocessing/chembl_structure_pipeline.py
RENAMED
|
@@ -18,8 +18,7 @@ try:
|
|
|
18
18
|
# importing chembl_structure_pipeline already logs messages
|
|
19
19
|
# --> suppress them temporarily
|
|
20
20
|
with BlockLogs():
|
|
21
|
-
from chembl_structure_pipeline import get_parent_mol
|
|
22
|
-
from chembl_structure_pipeline import standardize_mol # type: ignore
|
|
21
|
+
from chembl_structure_pipeline import get_parent_mol, standardize_mol
|
|
23
22
|
|
|
24
23
|
import_error = None
|
|
25
24
|
except ImportError as e:
|
|
@@ -38,7 +37,7 @@ class StandardizeWithCsp(PreprocessingStep):
|
|
|
38
37
|
raise import_error
|
|
39
38
|
|
|
40
39
|
def _preprocess(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
|
|
41
|
-
problems = []
|
|
40
|
+
problems: List[Problem] = []
|
|
42
41
|
|
|
43
42
|
# chembl structure pipeline cannot handle molecules with 3D coordinates
|
|
44
43
|
# --> delete conformers
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from typing import Iterable, List, Optional, Tuple
|
|
1
|
+
from typing import Iterable, List, Optional, Set, Tuple
|
|
2
2
|
|
|
3
3
|
from rdkit.Chem import Mol
|
|
4
4
|
|
|
5
|
-
from ..problem import Problem
|
|
5
|
+
from ..problem import InvalidElementsProblem, Problem
|
|
6
6
|
from .preprocessing_step import PreprocessingStep
|
|
7
7
|
|
|
8
8
|
__all__ = ["FilterByElement", "ORGANIC_SUBSET"]
|
|
@@ -29,7 +29,7 @@ class FilterByElement(PreprocessingStep):
|
|
|
29
29
|
self, allowed_elements: Iterable[str], remove_invalid_molecules: bool = False
|
|
30
30
|
) -> None:
|
|
31
31
|
super().__init__()
|
|
32
|
-
self.allowed_elements =
|
|
32
|
+
self.allowed_elements = {a[0].upper() + a[1:] for a in allowed_elements}
|
|
33
33
|
self.hydrogen_in_allowed_elements = "H" in self.allowed_elements
|
|
34
34
|
self.remove_invalid_molecules = remove_invalid_molecules
|
|
35
35
|
|
|
@@ -37,7 +37,7 @@ class FilterByElement(PreprocessingStep):
|
|
|
37
37
|
problems = []
|
|
38
38
|
result_mol = mol
|
|
39
39
|
|
|
40
|
-
elements =
|
|
40
|
+
elements: Set[str] = {atom.GetSymbol() for atom in mol.GetAtoms()}
|
|
41
41
|
invalid_elements = elements - self.allowed_elements
|
|
42
42
|
|
|
43
43
|
# special case: hydrogens are not recognized by mol.GetAtoms()
|
|
@@ -52,16 +52,6 @@ class FilterByElement(PreprocessingStep):
|
|
|
52
52
|
if self.remove_invalid_molecules:
|
|
53
53
|
result_mol = None
|
|
54
54
|
|
|
55
|
-
|
|
56
|
-
invalid_elements_str = ", ".join(list(invalid_elements)[:3]) + "..."
|
|
57
|
-
else:
|
|
58
|
-
invalid_elements_str = ", ".join(list(invalid_elements))
|
|
59
|
-
|
|
60
|
-
problems.append(
|
|
61
|
-
Problem(
|
|
62
|
-
"invalid_elements",
|
|
63
|
-
f"Molecule contains invalid elements {invalid_elements_str}",
|
|
64
|
-
)
|
|
65
|
-
)
|
|
55
|
+
problems.append(InvalidElementsProblem(invalid_elements))
|
|
66
56
|
|
|
67
57
|
return result_mol, problems
|
|
@@ -3,7 +3,7 @@ from typing import List, Optional, Tuple
|
|
|
3
3
|
from rdkit.Chem import Mol
|
|
4
4
|
from rdkit.Chem.rdMolDescriptors import CalcExactMolWt
|
|
5
5
|
|
|
6
|
-
from ..problem import Problem
|
|
6
|
+
from ..problem import InvalidWeightProblem, Problem
|
|
7
7
|
from .preprocessing_step import PreprocessingStep
|
|
8
8
|
|
|
9
9
|
|
|
@@ -27,14 +27,6 @@ class FilterByWeight(PreprocessingStep):
|
|
|
27
27
|
if weight < self.min_weight or weight > self.max_weight:
|
|
28
28
|
if self.remove_invalid_molecules:
|
|
29
29
|
result_mol = None
|
|
30
|
-
problems.append(
|
|
31
|
-
Problem(
|
|
32
|
-
type="invalid_weight",
|
|
33
|
-
message=(
|
|
34
|
-
f"Molecular weight {weight:.2f} out of range "
|
|
35
|
-
f"[{self.min_weight}, {self.max_weight}]"
|
|
36
|
-
),
|
|
37
|
-
)
|
|
38
|
-
)
|
|
30
|
+
problems.append(InvalidWeightProblem(weight, self.min_weight, self.max_weight))
|
|
39
31
|
|
|
40
32
|
return result_mol, problems
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from typing import Iterable, NamedTuple
|
|
2
|
+
|
|
3
|
+
__all__ = [
|
|
4
|
+
"Problem",
|
|
5
|
+
"InvalidSmiles",
|
|
6
|
+
"UnknownProblem",
|
|
7
|
+
"InvalidWeightProblem",
|
|
8
|
+
"InvalidElementsProblem",
|
|
9
|
+
]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Problem(NamedTuple):
|
|
13
|
+
type: str
|
|
14
|
+
message: str
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def InvalidSmiles() -> Problem:
|
|
18
|
+
return Problem(type="invalid_smiles", message="Invalid SMILES string")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def UnknownProblem() -> Problem:
|
|
22
|
+
return Problem(type="unknown", message="Unknown error occurred")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def InvalidWeightProblem(weight: float, min_weight: float, max_weight: float) -> Problem:
|
|
26
|
+
return Problem(
|
|
27
|
+
type="invalid_weight",
|
|
28
|
+
message=(f"Molecular weight {weight:.2f} out of range " f"[{min_weight}, {max_weight}]"),
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def InvalidElementsProblem(invalid_elements: Iterable[str]) -> Problem:
|
|
33
|
+
invalid_element_list = list(invalid_elements)
|
|
34
|
+
if len(invalid_element_list) > 3:
|
|
35
|
+
invalid_elements_str = ", ".join(invalid_element_list[:3]) + "..."
|
|
36
|
+
else:
|
|
37
|
+
invalid_elements_str = ", ".join(invalid_element_list)
|
|
38
|
+
|
|
39
|
+
return Problem(
|
|
40
|
+
"invalid_elements",
|
|
41
|
+
f"Molecule contains invalid elements {invalid_elements_str}",
|
|
42
|
+
)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import inspect
|
|
2
|
-
from typing import Callable, Dict, Tuple, Type, TypeVar, Union
|
|
2
|
+
from typing import Callable, Dict, Optional, Tuple, Type, TypeVar, Union
|
|
3
3
|
|
|
4
4
|
__all__ = ["call_with_mappings"]
|
|
5
5
|
|
|
@@ -10,8 +10,11 @@ def call_with_mappings(
|
|
|
10
10
|
class_or_function: Union[Type[T], Callable[..., T]],
|
|
11
11
|
config: dict,
|
|
12
12
|
args_mapping: Tuple[str, ...] = (),
|
|
13
|
-
kwargs_mapping: Dict[str, str] =
|
|
13
|
+
kwargs_mapping: Optional[Dict[str, str]] = None,
|
|
14
14
|
) -> T:
|
|
15
|
+
if kwargs_mapping is None:
|
|
16
|
+
kwargs_mapping = {}
|
|
17
|
+
|
|
15
18
|
# translate all args
|
|
16
19
|
translated_args = tuple(config.get(arg) for arg in args_mapping)
|
|
17
20
|
# translate all kwargs
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: nerdd-module
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.9
|
|
4
4
|
Summary: Base package to create NERDD modules
|
|
5
5
|
Author-email: Steffen Hirte <steffen.hirte@univie.ac.at>
|
|
6
6
|
Maintainer-email: Steffen Hirte <steffen.hirte@univie.ac.at>
|
|
@@ -60,12 +60,14 @@ Requires-Dist: decorator>=5.1.1
|
|
|
60
60
|
Requires-Dist: importlib-resources>=5; python_version < "3.9"
|
|
61
61
|
Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
|
|
62
62
|
Provides-Extra: dev
|
|
63
|
-
Requires-Dist: mypy; extra == "dev"
|
|
64
|
-
Requires-Dist: ruff; extra == "dev"
|
|
63
|
+
Requires-Dist: mypy==1.13.0; extra == "dev"
|
|
64
|
+
Requires-Dist: ruff==0.7.1; extra == "dev"
|
|
65
65
|
Requires-Dist: pandas-stubs; extra == "dev"
|
|
66
|
+
Requires-Dist: rdkit-stubs; extra == "dev"
|
|
66
67
|
Requires-Dist: types-PyYAML; extra == "dev"
|
|
67
68
|
Requires-Dist: types-decorator; extra == "dev"
|
|
68
69
|
Requires-Dist: types-setuptools; extra == "dev"
|
|
70
|
+
Requires-Dist: pre-commit==3.5.0; extra == "dev"
|
|
69
71
|
Provides-Extra: rdkit
|
|
70
72
|
Requires-Dist: rdkit>=2022.3.3; extra == "rdkit"
|
|
71
73
|
Provides-Extra: csp
|
|
@@ -100,6 +102,7 @@ pip install -U nerdd-module
|
|
|
100
102
|
## Contribute
|
|
101
103
|
|
|
102
104
|
1. Fork and clone the code
|
|
103
|
-
2. Install test dependencies with `
|
|
104
|
-
3.
|
|
105
|
-
4.
|
|
105
|
+
2. Install test dependencies with `pip install -e .[test,dev,csp]`
|
|
106
|
+
3. Install pre-commit hooks `pre-commit install`
|
|
107
|
+
4. Run tests via `pytest` or `pytest-watch` (short: `ptw`)
|
|
108
|
+
5. Build docs via `pip install -e .[docs]` and `mkdocs serve`
|
|
@@ -15,12 +15,14 @@ importlib-resources>=5
|
|
|
15
15
|
chembl_structure_pipeline>=1.0.0
|
|
16
16
|
|
|
17
17
|
[dev]
|
|
18
|
-
mypy
|
|
19
|
-
ruff
|
|
18
|
+
mypy==1.13.0
|
|
19
|
+
ruff==0.7.1
|
|
20
20
|
pandas-stubs
|
|
21
|
+
rdkit-stubs
|
|
21
22
|
types-PyYAML
|
|
22
23
|
types-decorator
|
|
23
24
|
types-setuptools
|
|
25
|
+
pre-commit==3.5.0
|
|
24
26
|
|
|
25
27
|
[docs]
|
|
26
28
|
mkdocs
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "nerdd-module"
|
|
7
|
-
version = "0.3.
|
|
7
|
+
version = "0.3.9"
|
|
8
8
|
description = "Base package to create NERDD modules"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { file = "LICENSE" }
|
|
@@ -48,12 +48,14 @@ classifiers = [
|
|
|
48
48
|
|
|
49
49
|
[project.optional-dependencies]
|
|
50
50
|
dev = [
|
|
51
|
-
"mypy",
|
|
52
|
-
"ruff",
|
|
51
|
+
"mypy==1.13.0",
|
|
52
|
+
"ruff==0.7.1",
|
|
53
53
|
"pandas-stubs",
|
|
54
|
+
"rdkit-stubs",
|
|
54
55
|
"types-PyYAML",
|
|
55
56
|
"types-decorator",
|
|
56
57
|
"types-setuptools",
|
|
58
|
+
"pre-commit==3.5.0",
|
|
57
59
|
]
|
|
58
60
|
rdkit = [
|
|
59
61
|
# Some old RDKit versions are not recognized by setuptools. For that reason,
|
|
@@ -98,7 +100,7 @@ nerdd_module = ["py.typed"]
|
|
|
98
100
|
[tool.pytest.ini_options]
|
|
99
101
|
log_cli = 1
|
|
100
102
|
log_cli_level = "INFO"
|
|
101
|
-
addopts = "-x
|
|
103
|
+
addopts = "-x --cov-report term --cov=nerdd_module"
|
|
102
104
|
|
|
103
105
|
[tool.pytest-watcher]
|
|
104
106
|
patterns = ["*.py", "*.feature", "pyproject.toml"]
|
|
@@ -108,12 +110,23 @@ line-length = 100
|
|
|
108
110
|
extend-exclude = ["tests", "nerdd_module/tests"]
|
|
109
111
|
|
|
110
112
|
[tool.ruff.lint]
|
|
111
|
-
|
|
112
|
-
#
|
|
113
|
-
|
|
114
|
-
|
|
113
|
+
select = [
|
|
114
|
+
"E", # pycodestyle errors
|
|
115
|
+
"W", # pycodestyle warnings
|
|
116
|
+
"F", # pyflakes
|
|
117
|
+
"I", # isort
|
|
118
|
+
"B", # flake8-bugbear
|
|
119
|
+
"C4", # flake8-comprehensions
|
|
120
|
+
"T20" # no print statements
|
|
121
|
+
]
|
|
122
|
+
ignore = [
|
|
123
|
+
"F403", # I often use 'from .submodule import *' in __init__.py files
|
|
124
|
+
"C408" # I prefer dict(a=5) over {'a': 5}
|
|
125
|
+
]
|
|
115
126
|
|
|
116
127
|
[tool.mypy]
|
|
128
|
+
mypy_path="typings"
|
|
129
|
+
# strict=true
|
|
117
130
|
disallow_untyped_defs = true
|
|
118
131
|
# no_implicit_optional = True
|
|
119
132
|
# check_untyped_defs = True
|
nerdd_module-0.3.7/README.md
DELETED
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
# NERDD Module
|
|
2
|
-
|
|
3
|
-
This package provides the basis to implement molecular prediction modules in the
|
|
4
|
-
NERDD ecosystem.
|
|
5
|
-
|
|
6
|
-
## Installation
|
|
7
|
-
|
|
8
|
-
``` bash
|
|
9
|
-
pip install -U nerdd-module
|
|
10
|
-
```
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
## Contribute
|
|
14
|
-
|
|
15
|
-
1. Fork and clone the code
|
|
16
|
-
2. Install test dependencies with ` pip install -e .[test,dev,csp]`
|
|
17
|
-
3. Run tests via `pytest` or `pytest-watch` (short: `ptw`)
|
|
18
|
-
4. Build docs via ` pip install -e .[docs]` and ` mkdocs serve`
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
from typing import NamedTuple
|
|
2
|
-
|
|
3
|
-
__all__ = ["Problem", "InvalidSmiles", "UnknownProblem"]
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class Problem(NamedTuple):
|
|
7
|
-
type: str
|
|
8
|
-
message: str
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def InvalidSmiles() -> Problem:
|
|
12
|
-
return Problem(type="invalid_smiles", message="Invalid SMILES string")
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def UnknownProblem() -> Problem:
|
|
16
|
-
return Problem(type="unknown", message="Unknown error occurred")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/model/convert_representations_step.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/preprocessing/remove_stereochemistry.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.7 → nerdd_module-0.3.9}/nerdd_module/tests/preprocessing/DummyPreprocessingStep.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|