nerdd-module 0.2.4__tar.gz → 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/PKG-INFO +6 -3
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/abstract_model.py +16 -11
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/cli.py +1 -1
- nerdd_module-0.2.6/nerdd_module/config/default_configuration.py +41 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/config/merged_configuration.py +2 -0
- {nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/__init__.py +0 -4
- {nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/file_reader.py +21 -10
- nerdd_module-0.2.6/nerdd_module/input/reader_registry.py +64 -0
- nerdd_module-0.2.6/nerdd_module/output/__init__.py +1 -0
- {nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/output}/csv_writer.py +1 -1
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/polyfills/__init__.py +1 -0
- {nerdd-module-0.2.4/nerdd_module → nerdd_module-0.2.6/nerdd_module/polyfills}/version.py +2 -4
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/preprocessing/__init__.py +2 -1
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/preprocessing/check_valid_smiles.py +4 -6
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/preprocessing/chembl_structure_pipeline.py +3 -3
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/preprocessing/filter_by_element.py +2 -2
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/preprocessing/filter_by_weight.py +7 -5
- nerdd_module-0.2.6/nerdd_module/preprocessing/sanitize.py +18 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/preprocessing/step.py +3 -3
- nerdd_module-0.2.6/nerdd_module/problem.py +13 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/tests/checks.py +54 -4
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/tests/representations.py +1 -1
- nerdd_module-0.2.6/nerdd_module/version.py +5 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module.egg-info/PKG-INFO +6 -3
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module.egg-info/SOURCES.txt +22 -20
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module.egg-info/requires.txt +5 -1
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/setup.py +13 -6
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/tests/conftest.py +1 -1
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/tests/models/MolWeightModel.py +3 -6
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/tests/models/MolWeightModelWithExplicitMolIds.py +2 -1
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/tests/models/MolWeightModelWithExplicitMols.py +3 -5
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/tests/steps/__init__.py +0 -1
- nerdd-module-0.2.4/nerdd_module/config/default_configuration.py +0 -17
- nerdd-module-0.2.4/nerdd_module/io/reader_registry.py +0 -30
- nerdd-module-0.2.4/nerdd_module/problem.py +0 -8
- nerdd-module-0.2.4/tests/steps/molecules.py +0 -54
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/LICENSE +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/README.md +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/__init__.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/config/__init__.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/config/auto_configuration.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/config/configuration.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/config/dict_configuration.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/config/package_configuration.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/config/yaml_configuration.py +0 -0
- {nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/depth_first_explorer.py +0 -0
- {nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/explorer.py +0 -0
- {nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/gzip_reader.py +0 -0
- {nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/inchi_reader.py +0 -0
- {nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/list_reader.py +0 -0
- {nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/mol_reader.py +0 -0
- {nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/reader.py +0 -0
- {nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/sdf_reader.py +0 -0
- {nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/smiles_reader.py +0 -0
- {nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/string_reader.py +0 -0
- {nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/tar_reader.py +0 -0
- {nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/zip_reader.py +0 -0
- {nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/output}/sdf_writer.py +0 -0
- {nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/output}/writer.py +0 -0
- {nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/output}/writer_registry.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/polyfills/files.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/polyfills/get_entry_points.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/preprocessing/empty_pipeline.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/preprocessing/pipeline.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/preprocessing/registry.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/preprocessing/remove_stereochemistry.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/tests/__init__.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/tests/predictions.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module.egg-info/dependency_links.txt +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module.egg-info/top_level.txt +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/setup.cfg +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/tests/__init__.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/tests/models/AtomicMassModel.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/tests/models/__init__.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/tests/steps/checks.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/tests/steps/predictors.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/tests/steps/preprocessing.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/tests/test_atom_property_prediction.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/tests/test_molecule_property_prediction.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/tests/test_preprocessing.py +0 -0
- {nerdd-module-0.2.4 → nerdd_module-0.2.6}/tests/test_reading_formats.py +0 -0
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: nerdd-module
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: Base package to create NERDD modules
|
|
5
|
-
Home-page: https://github.com/molinfo-vienna/nerdd-module
|
|
5
|
+
Home-page: https://github.com/molinfo-vienna/nerdd-module
|
|
6
6
|
Maintainer: Steffen Hirte
|
|
7
7
|
Maintainer-email: steffen.hirte@univie.ac.at
|
|
8
8
|
License: BSD 3-Clause License
|
|
@@ -33,8 +33,11 @@ Requires-Dist: stringcase>=1.2.0
|
|
|
33
33
|
Requires-Dist: decorator>=5.1.1
|
|
34
34
|
Requires-Dist: importlib-resources>=5; python_version < "3.10"
|
|
35
35
|
Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
|
|
36
|
-
Requires-Dist: chembl_structure_pipeline>=1.0.0
|
|
37
36
|
Provides-Extra: dev
|
|
37
|
+
Requires-Dist: black; extra == "dev"
|
|
38
|
+
Requires-Dist: isort; extra == "dev"
|
|
39
|
+
Provides-Extra: csp
|
|
40
|
+
Requires-Dist: chembl_structure_pipeline>=1.0.0; extra == "csp"
|
|
38
41
|
Provides-Extra: test
|
|
39
42
|
Requires-Dist: pytest; extra == "test"
|
|
40
43
|
Requires-Dist: pytest-sugar; extra == "test"
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Callable, Iterable, List, Tuple, Union
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
|
-
from rdkit.Chem import Mol
|
|
5
|
+
from rdkit.Chem import Mol
|
|
6
6
|
|
|
7
7
|
from .config import AutoConfiguration, Configuration
|
|
8
|
-
from .
|
|
8
|
+
from .input import DepthFirstExplorer, MoleculeEntry
|
|
9
9
|
from .preprocessing import Pipeline, Step, registry
|
|
10
|
-
from .problem import Problem
|
|
10
|
+
from .problem import Problem, UnknownProblem
|
|
11
11
|
|
|
12
12
|
__all__ = ["AbstractModel"]
|
|
13
13
|
|
|
@@ -156,17 +156,20 @@ class AbstractModel(ABC):
|
|
|
156
156
|
# (and we assume that the order of the molecules is the same)
|
|
157
157
|
if "mol_id" in df_predictions.columns:
|
|
158
158
|
# check that mol_id contains only valid ids
|
|
159
|
-
assert set(df_predictions.mol_id).issubset(
|
|
160
|
-
|
|
161
|
-
|
|
159
|
+
assert set(df_predictions.mol_id).issubset(set(df_valid_subset.mol_id)), (
|
|
160
|
+
f"The mol_id column contains invalid ids: "
|
|
161
|
+
f"{set(df_predictions.mol_id).difference(set(df_valid_subset.mol_id))}."
|
|
162
|
+
)
|
|
163
|
+
|
|
162
164
|
# use mol_id as index
|
|
163
165
|
df_predictions.set_index("mol_id", drop=True, inplace=True)
|
|
164
166
|
elif "mol" in df_predictions.columns:
|
|
165
167
|
# check that molecule names contain only valid ids
|
|
166
168
|
names = df_predictions.mol.apply(lambda mol: int(mol.GetProp("_Name")))
|
|
167
|
-
assert set(names).issubset(
|
|
168
|
-
|
|
169
|
-
|
|
169
|
+
assert set(names).issubset(set(df_preprocess.mol_id)), (
|
|
170
|
+
f"The mol_id column contains invalid ids: "
|
|
171
|
+
f"{set(df_predictions.mol_id).difference(set(df_valid_subset.mol_id))}."
|
|
172
|
+
)
|
|
170
173
|
|
|
171
174
|
# use mol_id as index
|
|
172
175
|
df_predictions.set_index(
|
|
@@ -184,6 +187,8 @@ class AbstractModel(ABC):
|
|
|
184
187
|
df_valid_subset.index.astype("int64"), inplace=True
|
|
185
188
|
)
|
|
186
189
|
|
|
190
|
+
# TODO: check derivative_id or atom_id
|
|
191
|
+
|
|
187
192
|
# add column that indicates whether a molecule was missing
|
|
188
193
|
missing_mol_ids = set(df_preprocess.mol_id).difference(df_predictions.index)
|
|
189
194
|
df_preprocess["missing"] = df_preprocess.mol_id.isin(missing_mol_ids)
|
|
@@ -212,7 +217,7 @@ class AbstractModel(ABC):
|
|
|
212
217
|
else:
|
|
213
218
|
df_result["errors"] = df_result.preprocessing_errors
|
|
214
219
|
df_result["errors"] = df_result.errors + df_result.missing.map(
|
|
215
|
-
lambda x: [
|
|
220
|
+
lambda x: [UnknownProblem()] if x else []
|
|
216
221
|
)
|
|
217
222
|
df_result.drop(columns=["missing", "preprocessing_errors"], inplace=True)
|
|
218
223
|
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from stringcase import snakecase
|
|
2
|
+
|
|
3
|
+
from ..polyfills import version
|
|
4
|
+
from .configuration import Configuration
|
|
5
|
+
|
|
6
|
+
__all__ = ["DefaultConfiguration"]
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class DefaultConfiguration(Configuration):
|
|
10
|
+
def __init__(self, nerdd_module):
|
|
11
|
+
super().__init__()
|
|
12
|
+
|
|
13
|
+
# generate a name from the module name
|
|
14
|
+
class_name = nerdd_module.__class__.__name__
|
|
15
|
+
if class_name.endswith("Model"):
|
|
16
|
+
# remove the "Model" suffix
|
|
17
|
+
# e.g. SkinDoctorModel -> SkinDoctor
|
|
18
|
+
class_name = class_name[: -len("Model")]
|
|
19
|
+
|
|
20
|
+
# convert the class name to snake case
|
|
21
|
+
# e.g. SkinDoctor -> skin_doctor
|
|
22
|
+
name = snakecase(class_name)
|
|
23
|
+
|
|
24
|
+
# append version to the configuration
|
|
25
|
+
try:
|
|
26
|
+
module = nerdd_module.__module__
|
|
27
|
+
root_module = module.split(".", 1)[0]
|
|
28
|
+
version_ = version(root_module)
|
|
29
|
+
except ModuleNotFoundError:
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
self.config = dict(
|
|
33
|
+
name=name,
|
|
34
|
+
version=version_,
|
|
35
|
+
task="molecular_property_prediction",
|
|
36
|
+
job_parameters=[],
|
|
37
|
+
result_properties=[],
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
def _get_dict(self):
|
|
41
|
+
return self.config
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
from .csv_writer import *
|
|
2
1
|
from .depth_first_explorer import *
|
|
3
2
|
from .file_reader import *
|
|
4
3
|
from .gzip_reader import *
|
|
@@ -8,10 +7,7 @@ from .mol_reader import *
|
|
|
8
7
|
from .reader import *
|
|
9
8
|
from .reader_registry import *
|
|
10
9
|
from .sdf_reader import *
|
|
11
|
-
from .sdf_writer import *
|
|
12
10
|
from .smiles_reader import *
|
|
13
11
|
from .string_reader import *
|
|
14
12
|
from .tar_reader import *
|
|
15
|
-
from .writer import *
|
|
16
|
-
from .writer_registry import *
|
|
17
13
|
from .zip_reader import *
|
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
import os
|
|
2
1
|
from pathlib import Path
|
|
3
|
-
from typing import Generator
|
|
2
|
+
from typing import Generator, Tuple
|
|
4
3
|
|
|
5
4
|
from .reader import MoleculeEntry, Reader
|
|
6
5
|
from .reader_registry import register_reader
|
|
@@ -8,30 +7,42 @@ from .reader_registry import register_reader
|
|
|
8
7
|
__all__ = ["FileReader"]
|
|
9
8
|
|
|
10
9
|
|
|
11
|
-
@register_reader
|
|
10
|
+
@register_reader("data_dir")
|
|
12
11
|
class FileReader(Reader):
|
|
13
|
-
def __init__(self, data_dir=
|
|
12
|
+
def __init__(self, data_dir=None):
|
|
14
13
|
super().__init__()
|
|
15
|
-
self.data_dir =
|
|
14
|
+
self.data_dir = data_dir
|
|
15
|
+
if self.data_dir is not None:
|
|
16
|
+
self.data_dir = Path(self.data_dir)
|
|
16
17
|
|
|
17
18
|
def read(self, filename, explore) -> Generator[MoleculeEntry, None, None]:
|
|
18
19
|
assert isinstance(filename, str), "input must be a string"
|
|
19
20
|
|
|
21
|
+
# convert filename to path
|
|
20
22
|
try:
|
|
21
23
|
path = Path(filename)
|
|
22
|
-
|
|
23
|
-
if not path.is_absolute():
|
|
24
|
-
path = self.data_dir / path
|
|
25
24
|
except:
|
|
26
25
|
raise ValueError("input must be a valid path")
|
|
27
26
|
|
|
28
|
-
|
|
27
|
+
# convert to absolute path
|
|
28
|
+
if not path.is_absolute():
|
|
29
|
+
if self.data_dir is not None:
|
|
30
|
+
path = self.data_dir / path
|
|
31
|
+
else:
|
|
32
|
+
path = Path(".") / path
|
|
33
|
+
|
|
34
|
+
# check that the file is within the data_dir
|
|
35
|
+
assert (
|
|
36
|
+
self.data_dir is None or self.data_dir in path.parents
|
|
37
|
+
), "input must be a relative path"
|
|
38
|
+
|
|
39
|
+
# check that the file exists
|
|
29
40
|
assert path.exists(), "input must be a valid file"
|
|
30
41
|
|
|
31
42
|
with open(path, "rb") as f:
|
|
32
43
|
for entry in explore(f):
|
|
33
44
|
if len(entry.source) == 1 and entry.source[0] == "raw_input":
|
|
34
|
-
source = tuple()
|
|
45
|
+
source: Tuple[str, ...] = tuple()
|
|
35
46
|
else:
|
|
36
47
|
source = entry.source
|
|
37
48
|
yield entry._replace(source=tuple([filename, *source]))
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from functools import lru_cache
|
|
2
|
+
from typing import Dict, Generator, List, Tuple, Type
|
|
3
|
+
|
|
4
|
+
from .reader import Reader
|
|
5
|
+
|
|
6
|
+
__all__ = ["ReaderRegistry", "register_reader"]
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# lru_cache makes the registry a singleton
|
|
10
|
+
@lru_cache(maxsize=1)
|
|
11
|
+
class ReaderRegistry:
|
|
12
|
+
def __init__(self):
|
|
13
|
+
self._factories: List[Tuple[Type[Reader], Tuple[str, ...], Dict[str, str]]] = []
|
|
14
|
+
self._config = {}
|
|
15
|
+
|
|
16
|
+
def _create_reader(self, ReaderClass: Type[Reader], *args, **kwargs) -> Reader:
|
|
17
|
+
# translate all args
|
|
18
|
+
args = tuple(self._config.get(arg, None) for arg in args)
|
|
19
|
+
# translate all kwargs
|
|
20
|
+
kwargs = {
|
|
21
|
+
k: self._config.get(v, None) for k, v in kwargs.items() if v in self._config
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
return ReaderClass(*args, **kwargs)
|
|
25
|
+
|
|
26
|
+
def register(self, ReaderClass: Type[Reader], *args: str, **kwargs: str):
|
|
27
|
+
assert issubclass(ReaderClass, Reader)
|
|
28
|
+
assert all([isinstance(arg, str) for arg in args])
|
|
29
|
+
assert all(
|
|
30
|
+
[isinstance(k, str) and isinstance(v, str) for k, v in kwargs.items()]
|
|
31
|
+
)
|
|
32
|
+
self._factories.append((ReaderClass, args, kwargs))
|
|
33
|
+
|
|
34
|
+
def readers(self) -> Generator[Reader, None, None]:
|
|
35
|
+
for reader, args, kwargs in self._factories:
|
|
36
|
+
yield self._create_reader(reader, *args, **kwargs)
|
|
37
|
+
|
|
38
|
+
def __iter__(self):
|
|
39
|
+
return iter(self.readers())
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def register_reader(*args, **kwargs):
|
|
43
|
+
def wrapper(cls, *args, **kwargs):
|
|
44
|
+
ReaderRegistry().register(cls, *args, **kwargs)
|
|
45
|
+
return cls
|
|
46
|
+
|
|
47
|
+
# Case 1: first argument is a class
|
|
48
|
+
# --> decorator is used without arguments
|
|
49
|
+
# @register_reader
|
|
50
|
+
# class F:
|
|
51
|
+
# ...
|
|
52
|
+
if len(args) > 0 and isinstance(args[0], type):
|
|
53
|
+
return wrapper(args[0], *args[1:], **kwargs)
|
|
54
|
+
|
|
55
|
+
# Case 2: first argument is a not a class
|
|
56
|
+
# --> decorator is used with arguments
|
|
57
|
+
# @register_reader("blah")
|
|
58
|
+
# class F:
|
|
59
|
+
# ...
|
|
60
|
+
def inner(cls):
|
|
61
|
+
assert isinstance(cls, type), "Decorator must be used with a class"
|
|
62
|
+
return wrapper(cls, *args, **kwargs)
|
|
63
|
+
|
|
64
|
+
return inner
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .writer_registry import *
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
from typing import List, Tuple
|
|
1
|
+
from typing import List, Optional, Tuple
|
|
2
2
|
|
|
3
3
|
from rdkit.Chem import Mol, MolFromSmiles, MolToSmiles
|
|
4
4
|
|
|
5
|
-
from ..problem import Problem
|
|
5
|
+
from ..problem import InvalidSmiles, Problem
|
|
6
6
|
from .step import Step
|
|
7
7
|
|
|
8
8
|
__all__ = ["CheckValidSmiles"]
|
|
@@ -14,15 +14,13 @@ class CheckValidSmiles(Step):
|
|
|
14
14
|
def __init__(self):
|
|
15
15
|
super().__init__()
|
|
16
16
|
|
|
17
|
-
def _run(self, mol: Mol) -> Tuple[Mol, List[Problem]]:
|
|
17
|
+
def _run(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
|
|
18
18
|
errors = []
|
|
19
19
|
|
|
20
20
|
smi = MolToSmiles(mol, True)
|
|
21
21
|
check_mol = MolFromSmiles(smi)
|
|
22
22
|
if check_mol is None:
|
|
23
|
-
errors.append(
|
|
24
|
-
Problem("invalid_smiles", "Cannot convert molecule to SMILES")
|
|
25
|
-
)
|
|
23
|
+
errors.append(InvalidSmiles())
|
|
26
24
|
mol = None
|
|
27
25
|
|
|
28
26
|
return mol, errors
|
{nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/preprocessing/chembl_structure_pipeline.py
RENAMED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import warnings
|
|
2
|
-
from typing import List, Tuple
|
|
2
|
+
from typing import List, Optional, Tuple
|
|
3
3
|
|
|
4
4
|
from rdkit.Chem import Mol
|
|
5
5
|
from rdkit.rdBase import BlockLogs
|
|
@@ -41,7 +41,7 @@ class StandardizeWithCsp(Step):
|
|
|
41
41
|
if import_error is not None:
|
|
42
42
|
raise import_error
|
|
43
43
|
|
|
44
|
-
def _run(self, mol: Mol) -> Tuple[Mol, List[Problem]]:
|
|
44
|
+
def _run(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
|
|
45
45
|
errors = []
|
|
46
46
|
|
|
47
47
|
# chembl structure pipeline cannot handle molecules with 3D coordinates
|
|
@@ -65,7 +65,7 @@ class GetParentMol(Step):
|
|
|
65
65
|
if import_error is not None:
|
|
66
66
|
raise import_error
|
|
67
67
|
|
|
68
|
-
def _run(self, mol: Mol) -> Tuple[Mol, List[Problem]]:
|
|
68
|
+
def _run(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
|
|
69
69
|
errors = []
|
|
70
70
|
|
|
71
71
|
# chembl structure pipeline cannot handle molecules with 3D coordinates
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Iterable, List, Tuple
|
|
1
|
+
from typing import Iterable, List, Optional, Tuple
|
|
2
2
|
|
|
3
3
|
from rdkit.Chem import Mol
|
|
4
4
|
|
|
@@ -14,7 +14,7 @@ class FilterByElement(Step):
|
|
|
14
14
|
self.allowed_elements = set(allowed_elements)
|
|
15
15
|
self.remove_invalid_molecules = remove_invalid_molecules
|
|
16
16
|
|
|
17
|
-
def _run(self, mol: Mol) -> Tuple[Mol, List[Problem]]:
|
|
17
|
+
def _run(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
|
|
18
18
|
errors = []
|
|
19
19
|
result_mol = mol
|
|
20
20
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import List, Tuple
|
|
1
|
+
from typing import List, Optional, Tuple
|
|
2
2
|
|
|
3
3
|
from rdkit.Chem import Mol
|
|
4
4
|
from rdkit.Chem.Descriptors import MolWt
|
|
@@ -14,7 +14,7 @@ class FilterByWeight(Step):
|
|
|
14
14
|
self.max_weight = max_weight
|
|
15
15
|
self.remove_invalid_molecules = remove_invalid_molecules
|
|
16
16
|
|
|
17
|
-
def _run(self, mol: Mol) -> Tuple[Mol, List[Problem]]:
|
|
17
|
+
def _run(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
|
|
18
18
|
errors = []
|
|
19
19
|
|
|
20
20
|
weight = MolWt(mol)
|
|
@@ -25,9 +25,11 @@ class FilterByWeight(Step):
|
|
|
25
25
|
result_mol = mol
|
|
26
26
|
errors.append(
|
|
27
27
|
Problem(
|
|
28
|
-
"invalid_weight",
|
|
29
|
-
|
|
30
|
-
|
|
28
|
+
type="invalid_weight",
|
|
29
|
+
message=(
|
|
30
|
+
f"Molecular weight {weight:.2f} out of range "
|
|
31
|
+
f"[{self.min_weight}, {self.max_weight}]"
|
|
32
|
+
),
|
|
31
33
|
)
|
|
32
34
|
)
|
|
33
35
|
else:
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from rdkit.Chem import SanitizeMol
|
|
2
|
+
|
|
3
|
+
from .step import Step
|
|
4
|
+
|
|
5
|
+
__all__ = ["Sanitize"]
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Sanitize(Step):
|
|
9
|
+
def __init__(self):
|
|
10
|
+
super().__init__()
|
|
11
|
+
|
|
12
|
+
def _run(self, mol):
|
|
13
|
+
errors = []
|
|
14
|
+
|
|
15
|
+
# sanitize molecule
|
|
16
|
+
SanitizeMol(mol)
|
|
17
|
+
|
|
18
|
+
return mol, errors
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from abc import ABC, abstractmethod
|
|
2
|
-
from typing import List, Tuple
|
|
2
|
+
from typing import List, Optional, Tuple
|
|
3
3
|
|
|
4
4
|
from rdkit.Chem import Mol
|
|
5
5
|
|
|
@@ -12,14 +12,14 @@ class Step(ABC):
|
|
|
12
12
|
def __init__(self):
|
|
13
13
|
pass
|
|
14
14
|
|
|
15
|
-
def run(self, mol: Mol) -> Tuple[Mol, List[Problem]]:
|
|
15
|
+
def run(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
|
|
16
16
|
"""
|
|
17
17
|
Runs the step on a molecule.
|
|
18
18
|
"""
|
|
19
19
|
return self._run(mol)
|
|
20
20
|
|
|
21
21
|
@abstractmethod
|
|
22
|
-
def _run(self, mol: Mol) -> Tuple[Mol, List[Problem]]:
|
|
22
|
+
def _run(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
|
|
23
23
|
"""
|
|
24
24
|
Runs the step on a molecule.
|
|
25
25
|
"""
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from typing import NamedTuple
|
|
2
|
+
|
|
3
|
+
__all__ = ["Problem", "InvalidSmiles", "UnknownProblem"]
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class Problem(NamedTuple):
|
|
7
|
+
type: str
|
|
8
|
+
message: str
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
InvalidSmiles = lambda: Problem(type="invalid_smiles", message="Invalid SMILES string")
|
|
12
|
+
|
|
13
|
+
UnknownProblem = lambda: Problem(type="unknown", message="Unknown error occurred")
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
|
+
from ast import literal_eval
|
|
2
3
|
|
|
3
4
|
import numpy as np
|
|
4
5
|
import pandas as pd
|
|
@@ -39,14 +40,17 @@ def check_column_range(subset, column_name, low, high):
|
|
|
39
40
|
|
|
40
41
|
|
|
41
42
|
@then(parsers.parse("the value in column '{column_name}' should be '{expected_value}'"))
|
|
42
|
-
def check_column_value(
|
|
43
|
-
|
|
43
|
+
def check_column_value(subset, column_name, expected_value):
|
|
44
|
+
if len(subset) == 0:
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
value = subset[column_name].iloc[0]
|
|
44
48
|
|
|
45
49
|
# expected value is always provided as string
|
|
46
50
|
# try to convert to float if possible
|
|
47
51
|
try:
|
|
48
|
-
expected_value =
|
|
49
|
-
except
|
|
52
|
+
expected_value = literal_eval(expected_value)
|
|
53
|
+
except:
|
|
50
54
|
pass
|
|
51
55
|
|
|
52
56
|
if expected_value == "(none)":
|
|
@@ -132,3 +136,49 @@ def check_column_length(subset, column_name, length):
|
|
|
132
136
|
assert (
|
|
133
137
|
subset[column_name].map(lambda x: len(x) > length)
|
|
134
138
|
).all(), f"Column {column_name} has unexpected length"
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@then(
|
|
142
|
+
parsers.parse(
|
|
143
|
+
"when '{condition_column_name}' is '{condition_value}' "
|
|
144
|
+
"the value in column '{column_name}' should be '{expected_value}'"
|
|
145
|
+
)
|
|
146
|
+
)
|
|
147
|
+
def check_conditional_column_value(
|
|
148
|
+
subset, condition_column_name, condition_value, column_name, expected_value
|
|
149
|
+
):
|
|
150
|
+
# expected value is always provided as string
|
|
151
|
+
# try to convert to float if possible
|
|
152
|
+
try:
|
|
153
|
+
expected_value = literal_eval(expected_value)
|
|
154
|
+
except:
|
|
155
|
+
pass
|
|
156
|
+
|
|
157
|
+
# same for condition value
|
|
158
|
+
try:
|
|
159
|
+
condition_value = literal_eval(condition_value)
|
|
160
|
+
except:
|
|
161
|
+
pass
|
|
162
|
+
|
|
163
|
+
# condition value can be (none) to indicate None
|
|
164
|
+
if condition_value == "(none)":
|
|
165
|
+
subset = subset[pd.isnull(subset[condition_column_name])]
|
|
166
|
+
else:
|
|
167
|
+
subset = subset[subset[condition_column_name] == condition_value]
|
|
168
|
+
|
|
169
|
+
value = subset[column_name]
|
|
170
|
+
assert (
|
|
171
|
+
len(value) > 0
|
|
172
|
+
), f"No rows found for condition {condition_column_name} == {condition_value}"
|
|
173
|
+
|
|
174
|
+
# expected value can be (none) to indicate None
|
|
175
|
+
if expected_value == "(none)":
|
|
176
|
+
# if expected_value is the magic string "(none)", we expect None
|
|
177
|
+
assert pd.isnull(
|
|
178
|
+
value
|
|
179
|
+
).all(), f"Column {column_name} is assigned to {value} != None"
|
|
180
|
+
else:
|
|
181
|
+
# otherwise, we expect the value to be equal to the expected value
|
|
182
|
+
assert (
|
|
183
|
+
value == expected_value
|
|
184
|
+
).all(), f"Column {column_name} is assigned to {value} != {expected_value}"
|
|
@@ -45,7 +45,7 @@ def representations_from_molecules(molecules, input_type):
|
|
|
45
45
|
),
|
|
46
46
|
target_fixture="molecules",
|
|
47
47
|
)
|
|
48
|
-
def molecules(num, num_none, random_seed):
|
|
48
|
+
def molecules(num, num_none, random_seed=0):
|
|
49
49
|
result = None
|
|
50
50
|
|
|
51
51
|
# pytest-bdd and hypothesis don't play well together (yet)
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: nerdd-module
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: Base package to create NERDD modules
|
|
5
|
-
Home-page: https://github.com/molinfo-vienna/nerdd-module
|
|
5
|
+
Home-page: https://github.com/molinfo-vienna/nerdd-module
|
|
6
6
|
Maintainer: Steffen Hirte
|
|
7
7
|
Maintainer-email: steffen.hirte@univie.ac.at
|
|
8
8
|
License: BSD 3-Clause License
|
|
@@ -33,8 +33,11 @@ Requires-Dist: stringcase>=1.2.0
|
|
|
33
33
|
Requires-Dist: decorator>=5.1.1
|
|
34
34
|
Requires-Dist: importlib-resources>=5; python_version < "3.10"
|
|
35
35
|
Requires-Dist: importlib-metadata>=4.6; python_version < "3.10"
|
|
36
|
-
Requires-Dist: chembl_structure_pipeline>=1.0.0
|
|
37
36
|
Provides-Extra: dev
|
|
37
|
+
Requires-Dist: black; extra == "dev"
|
|
38
|
+
Requires-Dist: isort; extra == "dev"
|
|
39
|
+
Provides-Extra: csp
|
|
40
|
+
Requires-Dist: chembl_structure_pipeline>=1.0.0; extra == "csp"
|
|
38
41
|
Provides-Extra: test
|
|
39
42
|
Requires-Dist: pytest; extra == "test"
|
|
40
43
|
Requires-Dist: pytest-sugar; extra == "test"
|
|
@@ -19,28 +19,30 @@ nerdd_module/config/dict_configuration.py
|
|
|
19
19
|
nerdd_module/config/merged_configuration.py
|
|
20
20
|
nerdd_module/config/package_configuration.py
|
|
21
21
|
nerdd_module/config/yaml_configuration.py
|
|
22
|
-
nerdd_module/
|
|
23
|
-
nerdd_module/
|
|
24
|
-
nerdd_module/
|
|
25
|
-
nerdd_module/
|
|
26
|
-
nerdd_module/
|
|
27
|
-
nerdd_module/
|
|
28
|
-
nerdd_module/
|
|
29
|
-
nerdd_module/
|
|
30
|
-
nerdd_module/
|
|
31
|
-
nerdd_module/
|
|
32
|
-
nerdd_module/
|
|
33
|
-
nerdd_module/
|
|
34
|
-
nerdd_module/
|
|
35
|
-
nerdd_module/
|
|
36
|
-
nerdd_module/
|
|
37
|
-
nerdd_module/
|
|
38
|
-
nerdd_module/
|
|
39
|
-
nerdd_module/
|
|
40
|
-
nerdd_module/
|
|
22
|
+
nerdd_module/input/__init__.py
|
|
23
|
+
nerdd_module/input/depth_first_explorer.py
|
|
24
|
+
nerdd_module/input/explorer.py
|
|
25
|
+
nerdd_module/input/file_reader.py
|
|
26
|
+
nerdd_module/input/gzip_reader.py
|
|
27
|
+
nerdd_module/input/inchi_reader.py
|
|
28
|
+
nerdd_module/input/list_reader.py
|
|
29
|
+
nerdd_module/input/mol_reader.py
|
|
30
|
+
nerdd_module/input/reader.py
|
|
31
|
+
nerdd_module/input/reader_registry.py
|
|
32
|
+
nerdd_module/input/sdf_reader.py
|
|
33
|
+
nerdd_module/input/smiles_reader.py
|
|
34
|
+
nerdd_module/input/string_reader.py
|
|
35
|
+
nerdd_module/input/tar_reader.py
|
|
36
|
+
nerdd_module/input/zip_reader.py
|
|
37
|
+
nerdd_module/output/__init__.py
|
|
38
|
+
nerdd_module/output/csv_writer.py
|
|
39
|
+
nerdd_module/output/sdf_writer.py
|
|
40
|
+
nerdd_module/output/writer.py
|
|
41
|
+
nerdd_module/output/writer_registry.py
|
|
41
42
|
nerdd_module/polyfills/__init__.py
|
|
42
43
|
nerdd_module/polyfills/files.py
|
|
43
44
|
nerdd_module/polyfills/get_entry_points.py
|
|
45
|
+
nerdd_module/polyfills/version.py
|
|
44
46
|
nerdd_module/preprocessing/__init__.py
|
|
45
47
|
nerdd_module/preprocessing/check_valid_smiles.py
|
|
46
48
|
nerdd_module/preprocessing/chembl_structure_pipeline.py
|
|
@@ -50,6 +52,7 @@ nerdd_module/preprocessing/filter_by_weight.py
|
|
|
50
52
|
nerdd_module/preprocessing/pipeline.py
|
|
51
53
|
nerdd_module/preprocessing/registry.py
|
|
52
54
|
nerdd_module/preprocessing/remove_stereochemistry.py
|
|
55
|
+
nerdd_module/preprocessing/sanitize.py
|
|
53
56
|
nerdd_module/preprocessing/step.py
|
|
54
57
|
nerdd_module/tests/__init__.py
|
|
55
58
|
nerdd_module/tests/checks.py
|
|
@@ -68,6 +71,5 @@ tests/models/MolWeightModelWithExplicitMols.py
|
|
|
68
71
|
tests/models/__init__.py
|
|
69
72
|
tests/steps/__init__.py
|
|
70
73
|
tests/steps/checks.py
|
|
71
|
-
tests/steps/molecules.py
|
|
72
74
|
tests/steps/predictors.py
|
|
73
75
|
tests/steps/preprocessing.py
|
|
@@ -5,13 +5,17 @@ filetype~=1.2.0
|
|
|
5
5
|
rich-click>=1.7.1
|
|
6
6
|
stringcase>=1.2.0
|
|
7
7
|
decorator>=5.1.1
|
|
8
|
-
chembl_structure_pipeline>=1.0.0
|
|
9
8
|
|
|
10
9
|
[:python_version < "3.10"]
|
|
11
10
|
importlib-resources>=5
|
|
12
11
|
importlib-metadata>=4.6
|
|
13
12
|
|
|
13
|
+
[csp]
|
|
14
|
+
chembl_structure_pipeline>=1.0.0
|
|
15
|
+
|
|
14
16
|
[dev]
|
|
17
|
+
black
|
|
18
|
+
isort
|
|
15
19
|
|
|
16
20
|
[docs]
|
|
17
21
|
mkdocs
|
|
@@ -16,11 +16,11 @@ rdkit_requirement = ["rdkit>=2022.3.3"] if not rdkit_installed else []
|
|
|
16
16
|
|
|
17
17
|
setup(
|
|
18
18
|
name="nerdd-module",
|
|
19
|
-
version="0.2.
|
|
19
|
+
version="0.2.6",
|
|
20
20
|
maintainer="Steffen Hirte",
|
|
21
21
|
maintainer_email="steffen.hirte@univie.ac.at",
|
|
22
22
|
packages=find_packages(),
|
|
23
|
-
url="https://github.com/molinfo-vienna/nerdd-module
|
|
23
|
+
url="https://github.com/molinfo-vienna/nerdd-module",
|
|
24
24
|
description="Base package to create NERDD modules",
|
|
25
25
|
license="BSD 3-Clause License",
|
|
26
26
|
long_description=open("README.md").read(),
|
|
@@ -36,12 +36,19 @@ setup(
|
|
|
36
36
|
# install importlib-resources and importlib-metadata for old Python versions
|
|
37
37
|
"importlib-resources>=5; python_version<'3.10'",
|
|
38
38
|
"importlib-metadata>=4.6; python_version<'3.10'",
|
|
39
|
-
# note: version 1.0.0 of chembl_structure_pipeline is not available on pypi,
|
|
40
|
-
# but it could potentially be installed from github
|
|
41
|
-
"chembl_structure_pipeline>=1.0.0",
|
|
42
39
|
],
|
|
43
40
|
extras_require={
|
|
44
|
-
"dev": [
|
|
41
|
+
"dev": [
|
|
42
|
+
"black",
|
|
43
|
+
"isort",
|
|
44
|
+
],
|
|
45
|
+
"csp": [
|
|
46
|
+
# note: version 1.0.0 of chembl_structure_pipeline is not available on pypi
|
|
47
|
+
# BUT: maybe it was already installed in the current environment manually
|
|
48
|
+
# other note: chembl_structure_pipeline *always* installs a recent version
|
|
49
|
+
# of rdkit
|
|
50
|
+
"chembl_structure_pipeline>=1.0.0"
|
|
51
|
+
],
|
|
45
52
|
"test": [
|
|
46
53
|
"pytest",
|
|
47
54
|
"pytest-sugar",
|
|
@@ -1,20 +1,17 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
2
|
from nerdd_module import AbstractModel
|
|
3
|
+
from nerdd_module.preprocessing import Sanitize
|
|
3
4
|
from rdkit.Chem.Descriptors import MolWt
|
|
4
5
|
|
|
5
6
|
__all__ = ["MolWeightModel"]
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
class MolWeightModel(AbstractModel):
|
|
9
|
-
def __init__(self, preprocessing_pipeline=
|
|
10
|
+
def __init__(self, preprocessing_pipeline=[Sanitize()], **kwargs):
|
|
10
11
|
super().__init__(preprocessing_pipeline, **kwargs)
|
|
11
12
|
|
|
12
13
|
def _predict_mols(self, mols, multiplier):
|
|
13
|
-
return pd.DataFrame(
|
|
14
|
-
{
|
|
15
|
-
"weight": [MolWt(m) * multiplier for m in mols],
|
|
16
|
-
}
|
|
17
|
-
)
|
|
14
|
+
return pd.DataFrame({"weight": [MolWt(m) * multiplier for m in mols]})
|
|
18
15
|
|
|
19
16
|
def _get_config(self):
|
|
20
17
|
return {
|
|
@@ -1,12 +1,13 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
2
|
from nerdd_module import AbstractModel
|
|
3
|
+
from nerdd_module.preprocessing import Sanitize
|
|
3
4
|
from rdkit.Chem.Descriptors import MolWt
|
|
4
5
|
|
|
5
6
|
__all__ = ["MolWeightModelWithExplicitMolIds"]
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
class MolWeightModelWithExplicitMolIds(AbstractModel):
|
|
9
|
-
def __init__(self, preprocessing_pipeline=
|
|
10
|
+
def __init__(self, preprocessing_pipeline=[Sanitize()], **kwargs):
|
|
10
11
|
super().__init__(preprocessing_pipeline, **kwargs)
|
|
11
12
|
|
|
12
13
|
def _predict_mols(self, mols, multiplier):
|
|
@@ -1,20 +1,18 @@
|
|
|
1
1
|
import pandas as pd
|
|
2
2
|
from nerdd_module import AbstractModel
|
|
3
|
+
from nerdd_module.preprocessing import Sanitize
|
|
3
4
|
from rdkit.Chem.Descriptors import MolWt
|
|
4
5
|
|
|
5
6
|
__all__ = ["MolWeightModelWithExplicitMols"]
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
class MolWeightModelWithExplicitMols(AbstractModel):
|
|
9
|
-
def __init__(self, preprocessing_pipeline=
|
|
10
|
+
def __init__(self, preprocessing_pipeline=[Sanitize()], **kwargs):
|
|
10
11
|
super().__init__(preprocessing_pipeline, **kwargs)
|
|
11
12
|
|
|
12
13
|
def _predict_mols(self, mols, multiplier):
|
|
13
14
|
return pd.DataFrame(
|
|
14
|
-
{
|
|
15
|
-
"mol": mols,
|
|
16
|
-
"weight": [MolWt(m) * multiplier for m in mols],
|
|
17
|
-
}
|
|
15
|
+
{"mol": mols, "weight": [MolWt(m) * multiplier for m in mols]}
|
|
18
16
|
)
|
|
19
17
|
|
|
20
18
|
def _get_config(self):
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
from .configuration import Configuration
|
|
2
|
-
|
|
3
|
-
__all__ = ["DefaultConfiguration"]
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class DefaultConfiguration(Configuration):
|
|
7
|
-
def __init__(self, nerdd_module):
|
|
8
|
-
super().__init__()
|
|
9
|
-
|
|
10
|
-
self.config = dict(
|
|
11
|
-
task="molecular_property_prediction",
|
|
12
|
-
job_parameters=[],
|
|
13
|
-
result_properties=[],
|
|
14
|
-
)
|
|
15
|
-
|
|
16
|
-
def _get_dict(self):
|
|
17
|
-
return self.config
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
from functools import lru_cache
|
|
2
|
-
from typing import Generator, Type
|
|
3
|
-
|
|
4
|
-
from .reader import Reader
|
|
5
|
-
|
|
6
|
-
__all__ = ["ReaderRegistry", "register_reader"]
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
# lru_cache makes the registry a singleton
|
|
10
|
-
@lru_cache(maxsize=1)
|
|
11
|
-
class ReaderRegistry:
|
|
12
|
-
def __init__(self):
|
|
13
|
-
self._factories = []
|
|
14
|
-
|
|
15
|
-
def register(self, ReaderClass: Type[Reader], *args, **kwargs):
|
|
16
|
-
assert issubclass(ReaderClass, Reader)
|
|
17
|
-
self._factories.append(lambda: ReaderClass(*args, **kwargs))
|
|
18
|
-
|
|
19
|
-
def readers(self) -> Generator[Reader, None, None]:
|
|
20
|
-
for reader in self._factories:
|
|
21
|
-
yield reader()
|
|
22
|
-
|
|
23
|
-
def __iter__(self):
|
|
24
|
-
return iter(map(lambda f: f(), self._factories))
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def register_reader(clazz, *args, **kwargs):
|
|
28
|
-
# TODO: implement both decorator modes
|
|
29
|
-
ReaderRegistry().register(clazz, *args, **kwargs)
|
|
30
|
-
return clazz
|
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
import numpy as np
|
|
2
|
-
from hypothesis import given as hgiven
|
|
3
|
-
from hypothesis import settings
|
|
4
|
-
from hypothesis import strategies as st
|
|
5
|
-
from hypothesis_rdkit import mols
|
|
6
|
-
from pytest_bdd import given, parsers
|
|
7
|
-
from rdkit.Chem import MolToInchi, MolToMolBlock, MolToSmiles
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
@given(
|
|
11
|
-
parsers.parse(
|
|
12
|
-
"a list of {num:d} random molecules, where {num_none:d} entries are None"
|
|
13
|
-
),
|
|
14
|
-
target_fixture="molecules",
|
|
15
|
-
)
|
|
16
|
-
def molecules(num, num_none):
|
|
17
|
-
result = None
|
|
18
|
-
|
|
19
|
-
@hgiven(st.lists(mols(), min_size=num, max_size=num, unique_by=MolToSmiles))
|
|
20
|
-
@settings(max_examples=1, deadline=None)
|
|
21
|
-
def generate(mols):
|
|
22
|
-
nonlocal result
|
|
23
|
-
# ensure that all molecules are valid
|
|
24
|
-
result = mols
|
|
25
|
-
|
|
26
|
-
generate()
|
|
27
|
-
|
|
28
|
-
# replace random entries with None
|
|
29
|
-
indices = np.random.choice(num, num_none, replace=False)
|
|
30
|
-
for i in indices:
|
|
31
|
-
result[i] = None
|
|
32
|
-
|
|
33
|
-
return result
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
@given(
|
|
37
|
-
parsers.parse("the representations of the molecules as {input_type}"),
|
|
38
|
-
target_fixture="representations",
|
|
39
|
-
)
|
|
40
|
-
def representations(molecules, input_type):
|
|
41
|
-
if input_type == "smiles":
|
|
42
|
-
converter = MolToSmiles
|
|
43
|
-
elif input_type == "mol_block":
|
|
44
|
-
converter = MolToMolBlock
|
|
45
|
-
elif input_type == "inchi":
|
|
46
|
-
converter = MolToInchi
|
|
47
|
-
elif input_type == "rdkit_mol":
|
|
48
|
-
converter = lambda mol: mol
|
|
49
|
-
else:
|
|
50
|
-
raise ValueError(f"Unknown input_type: {input_type}")
|
|
51
|
-
|
|
52
|
-
result = [converter(mol) if mol is not None else None for mol in molecules]
|
|
53
|
-
|
|
54
|
-
return result
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/depth_first_explorer.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/inchi_reader.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/smiles_reader.py
RENAMED
|
File without changes
|
{nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/input}/string_reader.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd-module-0.2.4/nerdd_module/io → nerdd_module-0.2.6/nerdd_module/output}/writer_registry.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd-module-0.2.4 → nerdd_module-0.2.6}/nerdd_module/preprocessing/remove_stereochemistry.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|