nerdd-module 0.3.36__tar.gz → 0.3.38__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/LICENSE +1 -1
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/PKG-INFO +2 -30
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/cli.py +2 -2
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/config/models.py +1 -1
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/model/__init__.py +1 -1
- nerdd_module-0.3.36/nerdd_module/model/simple_model.py → nerdd_module-0.3.38/nerdd_module/model/model.py +46 -27
- nerdd_module-0.3.36/nerdd_module/model/model.py → nerdd_module-0.3.38/nerdd_module/model/prediction_step.py +11 -103
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/problem.py +14 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/tests/models/AtomicMassModel.py +2 -2
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/tests/models/MolWeightModel.py +2 -2
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module.egg-info/PKG-INFO +2 -30
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module.egg-info/SOURCES.txt +1 -1
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/pyproject.toml +3 -4
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/README.md +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/__init__.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/config/__init__.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/config/configuration.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/config/default_configuration.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/config/dict_configuration.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/config/merged_configuration.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/config/package_configuration.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/config/search_yaml_configuration.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/config/yaml_configuration.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/converters/__init__.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/converters/basic_type_converter.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/converters/converter.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/converters/converter_config.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/converters/mol_converter.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/converters/problem_list_converter.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/converters/representation_converter.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/converters/source_list_converter.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/converters/void_converter.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/input/__init__.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/input/depth_first_explorer.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/input/explorer.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/input/file_reader.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/input/gzip_reader.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/input/inchi_reader.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/input/list_reader.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/input/mol_reader.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/input/reader.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/input/reader_config.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/input/sdf_reader.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/input/smiles_reader.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/input/string_reader.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/input/tar_reader.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/input/zip_reader.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/model/assign_name_step.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/model/convert_representations_step.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/model/enforce_schema_step.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/model/read_input_step.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/model/write_output_step.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/output/__init__.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/output/csv_writer.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/output/file_writer.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/output/iterator_writer.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/output/pandas_writer.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/output/record_list_writer.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/output/sdf_writer.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/output/writer.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/output/writer_config.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/polyfills/__init__.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/polyfills/block_logs.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/polyfills/files.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/polyfills/get_entry_points.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/polyfills/literal.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/polyfills/typed_dict.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/polyfills/types.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/polyfills/version.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/preprocessing/__init__.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/preprocessing/check_valid_smiles.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/preprocessing/chembl_structure_pipeline.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/preprocessing/filter_by_element.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/preprocessing/filter_by_weight.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/preprocessing/preprocessing_step.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/preprocessing/remove_stereochemistry.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/preprocessing/sanitize.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/py.typed +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/steps/__init__.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/steps/map_step.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/steps/output_step.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/steps/step.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/tests/__init__.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/tests/checks.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/tests/files.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/tests/models/__init__.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/tests/predictions.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/tests/preprocessing/DummyPreprocessingStep.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/tests/preprocessing/__init__.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/tests/representations.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/util/__init__.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/util/call_with_mappings.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/util/package.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/version.py +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module.egg-info/dependency_links.txt +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module.egg-info/requires.txt +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module.egg-info/top_level.txt +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/setup.cfg +0 -0
- {nerdd_module-0.3.36 → nerdd_module-0.3.38}/tests/test_features.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
BSD 3-Clause License
|
|
2
2
|
|
|
3
|
-
Copyright (c) 2023 - present, The Computational Drug Discovery and Design Group (COMP3D)
|
|
3
|
+
Copyright (c) 2023 - present, The Computational Drug Discovery and Design Group (COMP3D), Steffen Hirte
|
|
4
4
|
|
|
5
5
|
Redistribution and use in source and binary forms, with or without
|
|
6
6
|
modification, are permitted provided that the following conditions are met:
|
|
@@ -1,42 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nerdd-module
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.38
|
|
4
4
|
Summary: Base package to create NERDD modules
|
|
5
5
|
Author-email: Steffen Hirte <steffen.hirte@univie.ac.at>
|
|
6
6
|
Maintainer-email: Steffen Hirte <steffen.hirte@univie.ac.at>
|
|
7
|
-
License: BSD
|
|
8
|
-
|
|
9
|
-
Copyright (c) 2023 - present, The Computational Drug Discovery and Design Group (COMP3D)
|
|
10
|
-
|
|
11
|
-
Redistribution and use in source and binary forms, with or without
|
|
12
|
-
modification, are permitted provided that the following conditions are met:
|
|
13
|
-
|
|
14
|
-
1. Redistributions of source code must retain the above copyright notice, this
|
|
15
|
-
list of conditions and the following disclaimer.
|
|
16
|
-
|
|
17
|
-
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
18
|
-
this list of conditions and the following disclaimer in the documentation
|
|
19
|
-
and/or other materials provided with the distribution.
|
|
20
|
-
|
|
21
|
-
3. Neither the name of the copyright holder nor the names of its
|
|
22
|
-
contributors may be used to endorse or promote products derived from
|
|
23
|
-
this software without specific prior written permission.
|
|
24
|
-
|
|
25
|
-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
26
|
-
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
27
|
-
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
28
|
-
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
29
|
-
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
30
|
-
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
31
|
-
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
32
|
-
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
33
|
-
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
34
|
-
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
7
|
+
License-Expression: BSD-3-Clause
|
|
35
8
|
Project-URL: Repository, https://github.com/molinfo-vienna/nerdd-module
|
|
36
9
|
Keywords: science,research,development,nerdd
|
|
37
10
|
Classifier: Intended Audience :: Science/Research
|
|
38
11
|
Classifier: Intended Audience :: Developers
|
|
39
|
-
Classifier: License :: OSI Approved :: BSD License
|
|
40
12
|
Classifier: Programming Language :: Python
|
|
41
13
|
Classifier: Topic :: Software Development
|
|
42
14
|
Classifier: Topic :: Scientific/Engineering
|
|
@@ -56,7 +56,7 @@ def auto_cli(f: Callable[..., Model], *args: Any, **kwargs: Any) -> None:
|
|
|
56
56
|
input_format_list = "\n".join([f"* {fmt}" for fmt in ["smiles", "sdf", "inchi"]])
|
|
57
57
|
|
|
58
58
|
help_text = input_description.format(
|
|
59
|
-
description=model.description, input_format_list=input_format_list
|
|
59
|
+
description=model.config.description, input_format_list=input_format_list
|
|
60
60
|
)
|
|
61
61
|
|
|
62
62
|
output_format_list = [
|
|
@@ -117,7 +117,7 @@ def auto_cli(f: Callable[..., Model], *args: Any, **kwargs: Any) -> None:
|
|
|
117
117
|
#
|
|
118
118
|
# Add job parameters
|
|
119
119
|
#
|
|
120
|
-
for param in model.job_parameters:
|
|
120
|
+
for param in model.config.job_parameters:
|
|
121
121
|
# convert parameter name to spinal case (e.g. "max_confs" -> "max-confs")
|
|
122
122
|
param_name = spinalcase(param.name)
|
|
123
123
|
main = click.option(
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
|
|
1
|
+
import logging
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
2
3
|
from functools import cached_property
|
|
3
4
|
from typing import Any, Iterable, List, Optional, Tuple, Union
|
|
4
5
|
|
|
@@ -8,7 +9,6 @@ from ..config import (
|
|
|
8
9
|
Configuration,
|
|
9
10
|
DefaultConfiguration,
|
|
10
11
|
DictConfiguration,
|
|
11
|
-
JobParameter,
|
|
12
12
|
MergedConfiguration,
|
|
13
13
|
Module,
|
|
14
14
|
PackageConfiguration,
|
|
@@ -17,21 +17,22 @@ from ..config import (
|
|
|
17
17
|
from ..input import DepthFirstExplorer
|
|
18
18
|
from ..preprocessing import PreprocessingStep
|
|
19
19
|
from ..problem import Problem
|
|
20
|
-
from ..steps import Step
|
|
20
|
+
from ..steps import OutputStep, Step
|
|
21
21
|
from ..util import get_file_path_to_instance
|
|
22
22
|
from .assign_name_step import AssignNameStep
|
|
23
23
|
from .convert_representations_step import ConvertRepresentationsStep
|
|
24
24
|
from .enforce_schema_step import EnforceSchemaStep
|
|
25
|
-
from .
|
|
25
|
+
from .prediction_step import PredictionStep
|
|
26
26
|
from .read_input_step import ReadInputStep
|
|
27
27
|
from .write_output_step import WriteOutputStep
|
|
28
28
|
|
|
29
|
-
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
30
|
|
|
31
31
|
|
|
32
|
-
class
|
|
32
|
+
class Model(ABC):
|
|
33
33
|
def __init__(self, preprocessing_steps: Iterable[Step] = []) -> None:
|
|
34
34
|
super().__init__()
|
|
35
|
+
|
|
35
36
|
assert isinstance(
|
|
36
37
|
preprocessing_steps, Iterable
|
|
37
38
|
), f"Expected Iterable for argument preprocessing_steps, got {type(preprocessing_steps)}"
|
|
@@ -39,8 +40,12 @@ class SimpleModel(Model):
|
|
|
39
40
|
f"Expected all elements of preprocessing_steps to be of type Step, "
|
|
40
41
|
f"got {[type(step) for step in preprocessing_steps if not isinstance(step, Step)]}"
|
|
41
42
|
)
|
|
43
|
+
|
|
42
44
|
self._preprocessing_steps = preprocessing_steps
|
|
43
45
|
|
|
46
|
+
def _preprocess(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
|
|
47
|
+
return mol, []
|
|
48
|
+
|
|
44
49
|
def _get_input_steps(
|
|
45
50
|
self, input: Any, input_format: Optional[str], **kwargs: Any
|
|
46
51
|
) -> List[Step]:
|
|
@@ -59,6 +64,10 @@ class SimpleModel(Model):
|
|
|
59
64
|
CustomPreprocessingStep(self),
|
|
60
65
|
]
|
|
61
66
|
|
|
67
|
+
@abstractmethod
|
|
68
|
+
def _predict_mols(self, mols: List[Mol], **kwargs: Any) -> Iterable[dict]:
|
|
69
|
+
pass
|
|
70
|
+
|
|
62
71
|
def _get_postprocessing_steps(self, output_format: Optional[str], **kwargs: Any) -> List[Step]:
|
|
63
72
|
output_format = output_format or "pandas"
|
|
64
73
|
return [
|
|
@@ -67,13 +76,38 @@ class SimpleModel(Model):
|
|
|
67
76
|
WriteOutputStep(output_format, config=self.config, **kwargs),
|
|
68
77
|
]
|
|
69
78
|
|
|
70
|
-
def
|
|
71
|
-
|
|
79
|
+
def predict(
|
|
80
|
+
self,
|
|
81
|
+
input: Any,
|
|
82
|
+
input_format: Optional[str] = None,
|
|
83
|
+
output_format: Optional[str] = None,
|
|
84
|
+
**kwargs: Any,
|
|
85
|
+
) -> Any:
|
|
86
|
+
input_steps = self._get_input_steps(input, input_format, **kwargs)
|
|
87
|
+
preprocessing_steps = self._get_preprocessing_steps(input, input_format, **kwargs)
|
|
88
|
+
postprocessing_steps = self._get_postprocessing_steps(output_format, **kwargs)
|
|
89
|
+
output_step = postprocessing_steps[-1]
|
|
90
|
+
|
|
91
|
+
assert isinstance(output_step, OutputStep), "The last step must be an OutputStep."
|
|
92
|
+
|
|
93
|
+
steps = [
|
|
94
|
+
*input_steps,
|
|
95
|
+
*preprocessing_steps,
|
|
96
|
+
PredictionStep(self._predict_mols, batch_size=self.config.batch_size, **kwargs),
|
|
97
|
+
*postprocessing_steps,
|
|
98
|
+
]
|
|
72
99
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
100
|
+
# build the pipeline from the list of steps
|
|
101
|
+
pipeline = None
|
|
102
|
+
for t in steps:
|
|
103
|
+
pipeline = t(pipeline)
|
|
104
|
+
|
|
105
|
+
# the last pipeline step holds the result
|
|
106
|
+
return output_step.get_result()
|
|
76
107
|
|
|
108
|
+
#
|
|
109
|
+
# Configuration
|
|
110
|
+
#
|
|
77
111
|
def _get_base_config(self) -> Union[Configuration, dict]:
|
|
78
112
|
# get the class of the nerdd module, e.g. <CypstrateModel>
|
|
79
113
|
nerdd_module_class = self.__class__
|
|
@@ -192,24 +226,9 @@ class SimpleModel(Model):
|
|
|
192
226
|
def config(self) -> Module:
|
|
193
227
|
return self._get_config().get_dict()
|
|
194
228
|
|
|
195
|
-
def _get_batch_size(self) -> int:
|
|
196
|
-
default = super()._get_batch_size()
|
|
197
|
-
return self.config.batch_size or default
|
|
198
|
-
|
|
199
|
-
def _get_name(self) -> str:
|
|
200
|
-
default = super()._get_name()
|
|
201
|
-
return self.config.name or default
|
|
202
|
-
|
|
203
|
-
def _get_description(self) -> str:
|
|
204
|
-
default = super()._get_description()
|
|
205
|
-
return self.config.description or default
|
|
206
|
-
|
|
207
|
-
def _get_job_parameters(self) -> List[JobParameter]:
|
|
208
|
-
return super()._get_job_parameters() + self.config.job_parameters
|
|
209
|
-
|
|
210
229
|
|
|
211
230
|
class CustomPreprocessingStep(PreprocessingStep):
|
|
212
|
-
def __init__(self, model:
|
|
231
|
+
def __init__(self, model: Model):
|
|
213
232
|
super().__init__()
|
|
214
233
|
self.model = model
|
|
215
234
|
|
|
@@ -1,114 +1,22 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from abc import ABC, abstractmethod
|
|
3
2
|
from collections import defaultdict
|
|
4
|
-
from typing import Any,
|
|
3
|
+
from typing import Any, Callable, Iterator, List, Tuple
|
|
5
4
|
|
|
6
|
-
from
|
|
7
|
-
from
|
|
8
|
-
|
|
9
|
-
from ..config import JobParameter
|
|
10
|
-
from ..problem import Problem
|
|
11
|
-
from ..steps import OutputStep, Step
|
|
5
|
+
from ..problem import IncompletePredictionProblem, UnknownPredictionProblem
|
|
6
|
+
from ..steps import Step
|
|
12
7
|
from ..util import call_with_mappings
|
|
13
8
|
|
|
14
9
|
logger = logging.getLogger(__name__)
|
|
15
10
|
|
|
16
|
-
|
|
17
|
-
# an unknown prediction problem indicates that the model raised an exception during
|
|
18
|
-
# prediction
|
|
19
|
-
def UnknownPredictionProblem() -> Problem:
|
|
20
|
-
return Problem("unknown_prediction_error", "An unknown error occured during prediction.")
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
# an incomplete prediction problem indicates that the model successfully returns
|
|
24
|
-
# predictions, but part of the input molecules are missing in the results
|
|
25
|
-
def IncompletePredictionProblem() -> Problem:
|
|
26
|
-
return Problem("incomplete_prediction_error", "The model couldn't process the molecule.")
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
class Model(ABC):
|
|
30
|
-
def __init__(self) -> None:
|
|
31
|
-
super().__init__()
|
|
32
|
-
|
|
33
|
-
@abstractmethod
|
|
34
|
-
def _predict_mols(self, mols: List[Mol], **kwargs: Any) -> Iterable[dict]:
|
|
35
|
-
pass
|
|
36
|
-
|
|
37
|
-
@abstractmethod
|
|
38
|
-
def _get_input_steps(
|
|
39
|
-
self, input: Any, input_format: Optional[str], **kwargs: Any
|
|
40
|
-
) -> List[Step]:
|
|
41
|
-
pass
|
|
42
|
-
|
|
43
|
-
@abstractmethod
|
|
44
|
-
def _get_preprocessing_steps(
|
|
45
|
-
self, input: Any, input_format: Optional[str], **kwargs: Any
|
|
46
|
-
) -> List[Step]:
|
|
47
|
-
pass
|
|
48
|
-
|
|
49
|
-
@abstractmethod
|
|
50
|
-
def _get_postprocessing_steps(self, output_format: Optional[str], **kwargs: Any) -> List[Step]:
|
|
51
|
-
pass
|
|
52
|
-
|
|
53
|
-
def predict(
|
|
54
|
-
self,
|
|
55
|
-
input: Any,
|
|
56
|
-
input_format: Optional[str] = None,
|
|
57
|
-
output_format: Optional[str] = None,
|
|
58
|
-
**kwargs: Any,
|
|
59
|
-
) -> Any:
|
|
60
|
-
input_steps = self._get_input_steps(input, input_format, **kwargs)
|
|
61
|
-
preprocessing_steps = self._get_preprocessing_steps(input, input_format, **kwargs)
|
|
62
|
-
postprocessing_steps = self._get_postprocessing_steps(output_format, **kwargs)
|
|
63
|
-
output_step = postprocessing_steps[-1]
|
|
64
|
-
|
|
65
|
-
assert isinstance(output_step, OutputStep), "The last step must be an OutputStep."
|
|
66
|
-
|
|
67
|
-
steps = [
|
|
68
|
-
*input_steps,
|
|
69
|
-
*preprocessing_steps,
|
|
70
|
-
PredictionStep(self, batch_size=self.batch_size, **kwargs),
|
|
71
|
-
*postprocessing_steps,
|
|
72
|
-
]
|
|
73
|
-
|
|
74
|
-
# build the pipeline from the list of steps
|
|
75
|
-
pipeline = None
|
|
76
|
-
for t in steps:
|
|
77
|
-
pipeline = t(pipeline)
|
|
78
|
-
|
|
79
|
-
# the last pipeline step holds the result
|
|
80
|
-
return output_step.get_result()
|
|
81
|
-
|
|
82
|
-
#
|
|
83
|
-
# Properties
|
|
84
|
-
#
|
|
85
|
-
def _get_batch_size(self) -> int:
|
|
86
|
-
return 1
|
|
87
|
-
|
|
88
|
-
batch_size = property(fget=lambda self: self._get_batch_size())
|
|
89
|
-
|
|
90
|
-
def _get_name(self) -> str:
|
|
91
|
-
return snakecase(self.__class__.__name__)
|
|
92
|
-
|
|
93
|
-
name = property(fget=lambda self: self._get_name())
|
|
94
|
-
|
|
95
|
-
def _get_description(self) -> str:
|
|
96
|
-
return ""
|
|
97
|
-
|
|
98
|
-
description = property(fget=lambda self: self._get_description())
|
|
99
|
-
|
|
100
|
-
def _get_job_parameters(self) -> List[JobParameter]:
|
|
101
|
-
return []
|
|
102
|
-
|
|
103
|
-
job_parameters = property(fget=lambda self: self._get_job_parameters())
|
|
11
|
+
__all__ = ["PredictionStep"]
|
|
104
12
|
|
|
105
13
|
|
|
106
14
|
class PredictionStep(Step):
|
|
107
|
-
def __init__(self,
|
|
15
|
+
def __init__(self, predict_fn: Callable, batch_size: int, **kwargs: Any) -> None:
|
|
108
16
|
super().__init__()
|
|
109
|
-
self.
|
|
110
|
-
self.
|
|
111
|
-
self.
|
|
17
|
+
self._predict_fn = predict_fn
|
|
18
|
+
self._batch_size = batch_size
|
|
19
|
+
self._kwargs = kwargs
|
|
112
20
|
|
|
113
21
|
def _run(self, source: Iterator[dict]) -> Iterator[dict]:
|
|
114
22
|
# We need to process the molecules in batches, because most ML models perform
|
|
@@ -131,7 +39,7 @@ class PredictionStep(Step):
|
|
|
131
39
|
if len(batch) > 0 or len(none_batch) > 0:
|
|
132
40
|
yield batch, none_batch
|
|
133
41
|
|
|
134
|
-
for batch, none_batch in _batch_and_filter(source, self.
|
|
42
|
+
for batch, none_batch in _batch_and_filter(source, self._batch_size):
|
|
135
43
|
# return the records where mols are None
|
|
136
44
|
yield from none_batch
|
|
137
45
|
|
|
@@ -151,8 +59,8 @@ class PredictionStep(Step):
|
|
|
151
59
|
if len(batch) > 0:
|
|
152
60
|
predictions = list(
|
|
153
61
|
call_with_mappings(
|
|
154
|
-
self.
|
|
155
|
-
{**self.
|
|
62
|
+
self._predict_fn,
|
|
63
|
+
{**self._kwargs, "mols": mols},
|
|
156
64
|
)
|
|
157
65
|
)
|
|
158
66
|
else:
|
|
@@ -2,6 +2,8 @@ from typing import Iterable, NamedTuple
|
|
|
2
2
|
|
|
3
3
|
__all__ = [
|
|
4
4
|
"Problem",
|
|
5
|
+
"UnknownPredictionProblem",
|
|
6
|
+
"IncompletePredictionProblem",
|
|
5
7
|
"InvalidSmiles",
|
|
6
8
|
"UnknownProblem",
|
|
7
9
|
"InvalidWeightProblem",
|
|
@@ -14,6 +16,18 @@ class Problem(NamedTuple):
|
|
|
14
16
|
message: str
|
|
15
17
|
|
|
16
18
|
|
|
19
|
+
# an unknown prediction problem indicates that the model raised an exception during
|
|
20
|
+
# prediction
|
|
21
|
+
def UnknownPredictionProblem() -> Problem:
|
|
22
|
+
return Problem("unknown_prediction_error", "An unknown error occured during prediction.")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# an incomplete prediction problem indicates that the model successfully returns
|
|
26
|
+
# predictions, but part of the input molecules are missing in the results
|
|
27
|
+
def IncompletePredictionProblem() -> Problem:
|
|
28
|
+
return Problem("incomplete_prediction_error", "The model couldn't process the molecule.")
|
|
29
|
+
|
|
30
|
+
|
|
17
31
|
def InvalidSmiles() -> Problem:
|
|
18
32
|
return Problem(type="invalid_smiles", message="Invalid SMILES string")
|
|
19
33
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from nerdd_module import
|
|
1
|
+
from nerdd_module import Model
|
|
2
2
|
from nerdd_module.preprocessing import Sanitize
|
|
3
3
|
|
|
4
4
|
__all__ = ["AtomicMassModel"]
|
|
@@ -7,7 +7,7 @@ __all__ = ["AtomicMassModel"]
|
|
|
7
7
|
allowed_versions = ["mol_ids", "mols", "iterator", "error"]
|
|
8
8
|
|
|
9
9
|
|
|
10
|
-
class AtomicMassModel(
|
|
10
|
+
class AtomicMassModel(Model):
|
|
11
11
|
def __init__(self, preprocessing_steps=[Sanitize()], version="mol_ids", **kwargs):
|
|
12
12
|
assert (
|
|
13
13
|
version in allowed_versions
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from rdkit.Chem.rdMolDescriptors import CalcExactMolWt
|
|
2
2
|
|
|
3
|
-
from nerdd_module import
|
|
3
|
+
from nerdd_module import Model
|
|
4
4
|
from nerdd_module.preprocessing import Sanitize
|
|
5
5
|
|
|
6
6
|
__all__ = ["MolWeightModel"]
|
|
@@ -8,7 +8,7 @@ __all__ = ["MolWeightModel"]
|
|
|
8
8
|
allowed_versions = ["order_based", "mol_ids", "mols", "iterator", "error"]
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
class MolWeightModel(
|
|
11
|
+
class MolWeightModel(Model):
|
|
12
12
|
def __init__(self, preprocessing_steps=[Sanitize()], version="order_based", **kwargs):
|
|
13
13
|
assert (
|
|
14
14
|
version in allowed_versions
|
|
@@ -1,42 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: nerdd-module
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.38
|
|
4
4
|
Summary: Base package to create NERDD modules
|
|
5
5
|
Author-email: Steffen Hirte <steffen.hirte@univie.ac.at>
|
|
6
6
|
Maintainer-email: Steffen Hirte <steffen.hirte@univie.ac.at>
|
|
7
|
-
License: BSD
|
|
8
|
-
|
|
9
|
-
Copyright (c) 2023 - present, The Computational Drug Discovery and Design Group (COMP3D)
|
|
10
|
-
|
|
11
|
-
Redistribution and use in source and binary forms, with or without
|
|
12
|
-
modification, are permitted provided that the following conditions are met:
|
|
13
|
-
|
|
14
|
-
1. Redistributions of source code must retain the above copyright notice, this
|
|
15
|
-
list of conditions and the following disclaimer.
|
|
16
|
-
|
|
17
|
-
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
18
|
-
this list of conditions and the following disclaimer in the documentation
|
|
19
|
-
and/or other materials provided with the distribution.
|
|
20
|
-
|
|
21
|
-
3. Neither the name of the copyright holder nor the names of its
|
|
22
|
-
contributors may be used to endorse or promote products derived from
|
|
23
|
-
this software without specific prior written permission.
|
|
24
|
-
|
|
25
|
-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
26
|
-
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
27
|
-
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
28
|
-
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
29
|
-
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
30
|
-
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
31
|
-
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
32
|
-
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
33
|
-
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
34
|
-
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
7
|
+
License-Expression: BSD-3-Clause
|
|
35
8
|
Project-URL: Repository, https://github.com/molinfo-vienna/nerdd-module
|
|
36
9
|
Keywords: science,research,development,nerdd
|
|
37
10
|
Classifier: Intended Audience :: Science/Research
|
|
38
11
|
Classifier: Intended Audience :: Developers
|
|
39
|
-
Classifier: License :: OSI Approved :: BSD License
|
|
40
12
|
Classifier: Programming Language :: Python
|
|
41
13
|
Classifier: Topic :: Software Development
|
|
42
14
|
Classifier: Topic :: Scientific/Engineering
|
|
@@ -49,8 +49,8 @@ nerdd_module/model/assign_name_step.py
|
|
|
49
49
|
nerdd_module/model/convert_representations_step.py
|
|
50
50
|
nerdd_module/model/enforce_schema_step.py
|
|
51
51
|
nerdd_module/model/model.py
|
|
52
|
+
nerdd_module/model/prediction_step.py
|
|
52
53
|
nerdd_module/model/read_input_step.py
|
|
53
|
-
nerdd_module/model/simple_model.py
|
|
54
54
|
nerdd_module/model/write_output_step.py
|
|
55
55
|
nerdd_module/output/__init__.py
|
|
56
56
|
nerdd_module/output/csv_writer.py
|
|
@@ -4,10 +4,11 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "nerdd-module"
|
|
7
|
-
version = "0.3.
|
|
7
|
+
version = "0.3.38"
|
|
8
8
|
description = "Base package to create NERDD modules"
|
|
9
9
|
readme = "README.md"
|
|
10
|
-
license =
|
|
10
|
+
license = "BSD-3-Clause"
|
|
11
|
+
license-files = ["LICENSE"]
|
|
11
12
|
authors = [{ name = "Steffen Hirte", email = "steffen.hirte@univie.ac.at" }]
|
|
12
13
|
maintainers = [{ name = "Steffen Hirte", email = "steffen.hirte@univie.ac.at" }]
|
|
13
14
|
dependencies = [
|
|
@@ -29,8 +30,6 @@ classifiers = [
|
|
|
29
30
|
# audience
|
|
30
31
|
"Intended Audience :: Science/Research",
|
|
31
32
|
"Intended Audience :: Developers",
|
|
32
|
-
# license
|
|
33
|
-
"License :: OSI Approved :: BSD License",
|
|
34
33
|
# programming language
|
|
35
34
|
"Programming Language :: Python",
|
|
36
35
|
# topics
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/config/search_yaml_configuration.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/converters/problem_list_converter.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/converters/representation_converter.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/converters/source_list_converter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/model/convert_representations_step.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/preprocessing/check_valid_smiles.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/preprocessing/chembl_structure_pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/preprocessing/preprocessing_step.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.36 → nerdd_module-0.3.38}/nerdd_module/preprocessing/remove_stereochemistry.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|