nerdd-module 0.2.3__tar.gz → 0.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/PKG-INFO +1 -1
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/abstract_model.py +2 -2
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/io/csv_writer.py +1 -1
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/io/file_reader.py +17 -7
- nerdd-module-0.2.5/nerdd_module/io/reader_registry.py +59 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/polyfills/get_entry_points.py +4 -1
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/preprocessing/chembl_structure_pipeline.py +8 -0
- nerdd-module-0.2.5/nerdd_module/tests/__init__.py +3 -0
- nerdd-module-0.2.5/nerdd_module/tests/checks.py +174 -0
- nerdd-module-0.2.5/nerdd_module/tests/predictions.py +30 -0
- nerdd-module-0.2.3/tests/steps/molecules.py → nerdd-module-0.2.5/nerdd_module/tests/representations.py +44 -26
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module.egg-info/PKG-INFO +1 -1
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module.egg-info/SOURCES.txt +4 -1
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/setup.py +1 -1
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/tests/conftest.py +1 -1
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/tests/steps/__init__.py +0 -1
- nerdd-module-0.2.3/nerdd_module/io/reader_registry.py +0 -30
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/LICENSE +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/README.md +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/__init__.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/cli.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/config/__init__.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/config/auto_configuration.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/config/configuration.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/config/default_configuration.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/config/dict_configuration.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/config/merged_configuration.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/config/package_configuration.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/config/yaml_configuration.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/io/__init__.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/io/depth_first_explorer.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/io/explorer.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/io/gzip_reader.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/io/inchi_reader.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/io/list_reader.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/io/mol_reader.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/io/reader.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/io/sdf_reader.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/io/sdf_writer.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/io/smiles_reader.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/io/string_reader.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/io/tar_reader.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/io/writer.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/io/writer_registry.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/io/zip_reader.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/polyfills/__init__.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/polyfills/files.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/preprocessing/__init__.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/preprocessing/check_valid_smiles.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/preprocessing/empty_pipeline.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/preprocessing/filter_by_element.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/preprocessing/filter_by_weight.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/preprocessing/pipeline.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/preprocessing/registry.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/preprocessing/remove_stereochemistry.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/preprocessing/step.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/problem.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/version.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module.egg-info/dependency_links.txt +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module.egg-info/requires.txt +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module.egg-info/top_level.txt +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/setup.cfg +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/tests/__init__.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/tests/models/AtomicMassModel.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/tests/models/MolWeightModel.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/tests/models/MolWeightModelWithExplicitMolIds.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/tests/models/MolWeightModelWithExplicitMols.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/tests/models/__init__.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/tests/steps/checks.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/tests/steps/predictors.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/tests/steps/preprocessing.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/tests/test_atom_property_prediction.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/tests/test_molecule_property_prediction.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/tests/test_preprocessing.py +0 -0
- {nerdd-module-0.2.3 → nerdd-module-0.2.5}/tests/test_reading_formats.py +0 -0
|
@@ -158,7 +158,7 @@ class AbstractModel(ABC):
|
|
|
158
158
|
# check that mol_id contains only valid ids
|
|
159
159
|
assert set(df_predictions.mol_id).issubset(
|
|
160
160
|
set(df_valid_subset.mol_id)
|
|
161
|
-
), "The mol_id column
|
|
161
|
+
), f"The mol_id column contains invalid ids: {set(df_predictions.mol_id).difference(set(df_valid_subset.mol_id))}."
|
|
162
162
|
# use mol_id as index
|
|
163
163
|
df_predictions.set_index("mol_id", drop=True, inplace=True)
|
|
164
164
|
elif "mol" in df_predictions.columns:
|
|
@@ -166,7 +166,7 @@ class AbstractModel(ABC):
|
|
|
166
166
|
names = df_predictions.mol.apply(lambda mol: int(mol.GetProp("_Name")))
|
|
167
167
|
assert set(names).issubset(
|
|
168
168
|
set(df_preprocess.mol_id)
|
|
169
|
-
), "The
|
|
169
|
+
), f"The mol_id column contains invalid ids: {set(df_predictions.mol_id).difference(set(df_valid_subset.mol_id))}."
|
|
170
170
|
|
|
171
171
|
# use mol_id as index
|
|
172
172
|
df_predictions.set_index(
|
|
@@ -8,24 +8,34 @@ from .reader_registry import register_reader
|
|
|
8
8
|
__all__ = ["FileReader"]
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
@register_reader
|
|
11
|
+
@register_reader("data_dir")
|
|
12
12
|
class FileReader(Reader):
|
|
13
|
-
def __init__(self, data_dir=
|
|
13
|
+
def __init__(self, data_dir=None):
|
|
14
14
|
super().__init__()
|
|
15
|
-
self.data_dir =
|
|
15
|
+
self.data_dir = data_dir
|
|
16
|
+
if self.data_dir is not None:
|
|
17
|
+
self.data_dir = Path(self.data_dir)
|
|
16
18
|
|
|
17
19
|
def read(self, filename, explore) -> Generator[MoleculeEntry, None, None]:
|
|
18
20
|
assert isinstance(filename, str), "input must be a string"
|
|
19
21
|
|
|
22
|
+
# convert filename to path
|
|
20
23
|
try:
|
|
21
24
|
path = Path(filename)
|
|
22
|
-
|
|
23
|
-
if not path.is_absolute():
|
|
24
|
-
path = self.data_dir / path
|
|
25
25
|
except:
|
|
26
26
|
raise ValueError("input must be a valid path")
|
|
27
|
+
|
|
28
|
+
# convert to absolute path
|
|
29
|
+
if not path.is_absolute():
|
|
30
|
+
if self.data_dir is not None:
|
|
31
|
+
path = self.data_dir / path
|
|
32
|
+
else:
|
|
33
|
+
path = Path(".") / path
|
|
34
|
+
|
|
35
|
+
# check that the file is within the data_dir
|
|
36
|
+
assert self.data_dir is None or self.data_dir in path.parents, "input must be a relative path"
|
|
27
37
|
|
|
28
|
-
|
|
38
|
+
# check that the file exists
|
|
29
39
|
assert path.exists(), "input must be a valid file"
|
|
30
40
|
|
|
31
41
|
with open(path, "rb") as f:
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from functools import lru_cache
|
|
2
|
+
from typing import Dict, Generator, List, Tuple, Type
|
|
3
|
+
|
|
4
|
+
from .reader import Reader
|
|
5
|
+
|
|
6
|
+
__all__ = ["ReaderRegistry", "register_reader"]
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# lru_cache makes the registry a singleton
|
|
10
|
+
@lru_cache(maxsize=1)
|
|
11
|
+
class ReaderRegistry:
|
|
12
|
+
def __init__(self):
|
|
13
|
+
self._factories : List[Tuple[Type[Reader], Tuple[str, ...], Dict[str, str]]] = []
|
|
14
|
+
self._config = {}
|
|
15
|
+
|
|
16
|
+
def _create_reader(self, ReaderClass: Type[Reader], *args, **kwargs) -> Reader:
|
|
17
|
+
# translate all args
|
|
18
|
+
args = [self._config.get(arg, None) for arg in args]
|
|
19
|
+
# translate all kwargs
|
|
20
|
+
kwargs = {k: self._config.get(v, None) for k, v in kwargs.items() if v in self._config}
|
|
21
|
+
|
|
22
|
+
return ReaderClass(*args, **kwargs)
|
|
23
|
+
|
|
24
|
+
def register(self, ReaderClass: Type[Reader], *args :str , **kwargs:str):
|
|
25
|
+
assert issubclass(ReaderClass, Reader)
|
|
26
|
+
assert all([isinstance(arg, str) for arg in args])
|
|
27
|
+
assert all([isinstance(k, str) and isinstance(v, str) for k, v in kwargs.items()])
|
|
28
|
+
self._factories.append((ReaderClass, args, kwargs))
|
|
29
|
+
|
|
30
|
+
def readers(self) -> Generator[Reader, None, None]:
|
|
31
|
+
for reader, args, kwargs in self._factories:
|
|
32
|
+
yield self._create_reader(reader, *args, **kwargs)
|
|
33
|
+
|
|
34
|
+
def __iter__(self):
|
|
35
|
+
return iter(self.readers())
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def register_reader(*args, **kwargs):
|
|
39
|
+
def wrapper(cls, *args, **kwargs):
|
|
40
|
+
ReaderRegistry().register(cls, *args, **kwargs)
|
|
41
|
+
return cls
|
|
42
|
+
|
|
43
|
+
# Case 1: first argument is a class
|
|
44
|
+
# --> decorator is used without arguments
|
|
45
|
+
# @register_reader
|
|
46
|
+
# class F:
|
|
47
|
+
# ...
|
|
48
|
+
if len(args) > 0 and isinstance(args[0], type):
|
|
49
|
+
return wrapper(args[0], *args[1:], **kwargs)
|
|
50
|
+
|
|
51
|
+
# Case 2: first argument is a not a class
|
|
52
|
+
# --> decorator is used with arguments
|
|
53
|
+
# @register_reader("blah")
|
|
54
|
+
# class F:
|
|
55
|
+
# ...
|
|
56
|
+
def inner(cls):
|
|
57
|
+
assert isinstance(cls, type), "Decorator must be used with a class"
|
|
58
|
+
return wrapper(cls, *args, **kwargs)
|
|
59
|
+
return inner
|
|
@@ -6,7 +6,10 @@ try:
|
|
|
6
6
|
from importlib.metadata import entry_points
|
|
7
7
|
|
|
8
8
|
def get_entry_points(group):
|
|
9
|
-
|
|
9
|
+
try:
|
|
10
|
+
return entry_points(group=group)
|
|
11
|
+
except TypeError:
|
|
12
|
+
return entry_points().get(group, [])
|
|
10
13
|
|
|
11
14
|
except ImportError:
|
|
12
15
|
import pkg_resources
|
{nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/preprocessing/chembl_structure_pipeline.py
RENAMED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import warnings
|
|
1
2
|
from typing import List, Tuple
|
|
2
3
|
|
|
3
4
|
from rdkit.Chem import Mol
|
|
@@ -11,6 +12,13 @@ from .pipeline import Pipeline
|
|
|
11
12
|
from .remove_stereochemistry import RemoveStereochemistry
|
|
12
13
|
from .step import Step
|
|
13
14
|
|
|
15
|
+
# before importing chembl_structure_pipeline, we need to suppress RDKit warnings
|
|
16
|
+
warnings.filterwarnings(
|
|
17
|
+
"ignore",
|
|
18
|
+
category=DeprecationWarning,
|
|
19
|
+
module="rdkit.Chem.MolStandardize",
|
|
20
|
+
)
|
|
21
|
+
|
|
14
22
|
try:
|
|
15
23
|
# importing chembl_structure_pipeline already logs messages
|
|
16
24
|
# --> suppress them temporarily
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from ast import literal_eval
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from pytest_bdd import parsers, then
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@then(parsers.parse("The result should contain the columns:\n{column_names}"))
|
|
10
|
+
def check_result_columns(predictions, column_names):
|
|
11
|
+
column_names = column_names.strip()
|
|
12
|
+
for c in column_names.split("\n"):
|
|
13
|
+
assert (
|
|
14
|
+
c in predictions.columns
|
|
15
|
+
), f"Column {c} not in predictions {predictions.columns.tolist()}"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@then(
|
|
19
|
+
parsers.parse(
|
|
20
|
+
"the value in column '{column_name}' should be between {low} and {high}"
|
|
21
|
+
)
|
|
22
|
+
)
|
|
23
|
+
def check_column_range(subset, column_name, low, high):
|
|
24
|
+
if low == "infinity":
|
|
25
|
+
low = np.inf
|
|
26
|
+
elif low == "-infinity":
|
|
27
|
+
low = -np.inf
|
|
28
|
+
else:
|
|
29
|
+
low = float(low)
|
|
30
|
+
|
|
31
|
+
if high == "infinity":
|
|
32
|
+
high = np.inf
|
|
33
|
+
elif high == "-infinity":
|
|
34
|
+
high = -np.inf
|
|
35
|
+
else:
|
|
36
|
+
high = float(high)
|
|
37
|
+
|
|
38
|
+
assert (low <= subset[column_name]).all()
|
|
39
|
+
assert (subset[column_name] <= high).all()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@then(parsers.parse("the value in column '{column_name}' should be '{expected_value}'"))
|
|
43
|
+
def check_column_value(subset, column_name, expected_value):
|
|
44
|
+
if len(subset) == 0:
|
|
45
|
+
return
|
|
46
|
+
|
|
47
|
+
value = subset[column_name].iloc[0]
|
|
48
|
+
|
|
49
|
+
# expected value is always provided as string
|
|
50
|
+
# try to convert to float if possible
|
|
51
|
+
try:
|
|
52
|
+
expected_value = literal_eval(expected_value)
|
|
53
|
+
except:
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
if expected_value == "(none)":
|
|
57
|
+
# if expected_value is the magic string "(none)", we expect None
|
|
58
|
+
assert pd.isnull(value), f"Column {column_name} is assigned to {value} != None"
|
|
59
|
+
else:
|
|
60
|
+
# otherwise, we expect the value to be equal to the expected value
|
|
61
|
+
assert (
|
|
62
|
+
value == expected_value
|
|
63
|
+
), f"Column {column_name} is assigned to {value} != {expected_value}"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@then(
|
|
67
|
+
parsers.parse(
|
|
68
|
+
"the value in column '{column_name}' should be a subset of {superset}"
|
|
69
|
+
)
|
|
70
|
+
)
|
|
71
|
+
def check_column_subset(subset, column_name, superset):
|
|
72
|
+
superset = set(json.loads(superset))
|
|
73
|
+
|
|
74
|
+
assert all(
|
|
75
|
+
set(value).issubset(superset) for value in subset[column_name]
|
|
76
|
+
), f"Column {column_name} contains value not in {superset}"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@then(parsers.parse("the value in column '{column_name}' should be one of {superset}"))
|
|
80
|
+
def check_column_membership(subset, column_name, superset):
|
|
81
|
+
superset = json.loads(superset)
|
|
82
|
+
|
|
83
|
+
assert isinstance(
|
|
84
|
+
superset, list
|
|
85
|
+
), f"Expected a list for superset, got {type(superset)}"
|
|
86
|
+
|
|
87
|
+
assert (
|
|
88
|
+
subset[column_name].isin(superset).all()
|
|
89
|
+
), f"Column {column_name} contains value not in {superset}"
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@then(parsers.parse("the value in column '{column_name}' should be a png image"))
|
|
93
|
+
def check_png_image(subset, column_name):
|
|
94
|
+
if len(subset) == 0:
|
|
95
|
+
return
|
|
96
|
+
|
|
97
|
+
assert (
|
|
98
|
+
subset[column_name].str.startswith('<img src="data:image/png;base64,')
|
|
99
|
+
).all(), f"Column {column_name} does not contain a PNG image"
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@then(
|
|
103
|
+
parsers.parse("the value in column '{column_name}' should contain only '{value}'")
|
|
104
|
+
)
|
|
105
|
+
def check_column_membership_single(predictions, column_name, value):
|
|
106
|
+
if value == "(none)":
|
|
107
|
+
assert all(
|
|
108
|
+
pd.isnull(predictions[column_name])
|
|
109
|
+
), f"Column {column_name} must be none"
|
|
110
|
+
else:
|
|
111
|
+
assert all(
|
|
112
|
+
value in values for values in predictions[column_name]
|
|
113
|
+
), f"Column {column_name} contains value {value}"
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
@then(
|
|
117
|
+
parsers.parse(
|
|
118
|
+
"the value in column '{column_name}' should have type '{expected_type}'"
|
|
119
|
+
)
|
|
120
|
+
)
|
|
121
|
+
def check_column_type(subset, column_name, expected_type):
|
|
122
|
+
expected_type = eval(expected_type)
|
|
123
|
+
|
|
124
|
+
assert (
|
|
125
|
+
subset[column_name].map(lambda x: isinstance(x, expected_type)).all()
|
|
126
|
+
), f"Column {column_name} has unexpected type"
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
@then(
|
|
130
|
+
parsers.parse(
|
|
131
|
+
"the value in column '{column_name}' should have length greater than {length}"
|
|
132
|
+
)
|
|
133
|
+
)
|
|
134
|
+
def check_column_length(subset, column_name, length):
|
|
135
|
+
length = int(length)
|
|
136
|
+
assert (
|
|
137
|
+
subset[column_name].map(lambda x: len(x) > length)
|
|
138
|
+
).all(), f"Column {column_name} has unexpected length"
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@then(parsers.parse("when '{condition_column_name}' is '{condition_value}' "
|
|
142
|
+
"the value in column '{column_name}' should be '{expected_value}'"))
|
|
143
|
+
def check_conditional_column_value(subset, condition_column_name, condition_value, column_name, expected_value):
|
|
144
|
+
# expected value is always provided as string
|
|
145
|
+
# try to convert to float if possible
|
|
146
|
+
try:
|
|
147
|
+
expected_value = literal_eval(expected_value)
|
|
148
|
+
except:
|
|
149
|
+
pass
|
|
150
|
+
|
|
151
|
+
# same for condition value
|
|
152
|
+
try:
|
|
153
|
+
condition_value = literal_eval(condition_value)
|
|
154
|
+
except:
|
|
155
|
+
pass
|
|
156
|
+
|
|
157
|
+
# condition value can be (none) to indicate None
|
|
158
|
+
if condition_value == "(none)":
|
|
159
|
+
subset = subset[pd.isnull(subset[condition_column_name])]
|
|
160
|
+
else:
|
|
161
|
+
subset = subset[subset[condition_column_name] == condition_value]
|
|
162
|
+
|
|
163
|
+
value = subset[column_name]
|
|
164
|
+
assert len(value) > 0, f"No rows found for condition {condition_column_name} == {condition_value}"
|
|
165
|
+
|
|
166
|
+
# expected value can be (none) to indicate None
|
|
167
|
+
if expected_value == "(none)":
|
|
168
|
+
# if expected_value is the magic string "(none)", we expect None
|
|
169
|
+
assert pd.isnull(value).all(), f"Column {column_name} is assigned to {value} != None"
|
|
170
|
+
else:
|
|
171
|
+
# otherwise, we expect the value to be equal to the expected value
|
|
172
|
+
assert (
|
|
173
|
+
(value == expected_value).all()
|
|
174
|
+
), f"Column {column_name} is assigned to {value} != {expected_value}"
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from pytest_bdd import parsers, then, when
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
@when(
|
|
6
|
+
parsers.parse("the model generates predictions for the molecule representations"),
|
|
7
|
+
target_fixture="predictions",
|
|
8
|
+
)
|
|
9
|
+
def predictions(
|
|
10
|
+
representations,
|
|
11
|
+
model,
|
|
12
|
+
input_type,
|
|
13
|
+
):
|
|
14
|
+
return model.predict(
|
|
15
|
+
representations,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@when(
|
|
20
|
+
"The subset of the result where the input was not None is considered",
|
|
21
|
+
target_fixture="subset",
|
|
22
|
+
)
|
|
23
|
+
def subset_without_none(predictions):
|
|
24
|
+
# remove None entries
|
|
25
|
+
return predictions[predictions.preprocessed_mol.notnull()]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@then("the result should be a pandas DataFrame")
|
|
29
|
+
def check_result(predictions):
|
|
30
|
+
assert isinstance(predictions, pd.DataFrame)
|
|
@@ -1,10 +1,42 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
from hypothesis import given as hgiven
|
|
3
|
-
from hypothesis import settings
|
|
3
|
+
from hypothesis import seed, settings
|
|
4
4
|
from hypothesis import strategies as st
|
|
5
|
-
from hypothesis_rdkit import mols
|
|
5
|
+
from hypothesis_rdkit import mols, smiles
|
|
6
6
|
from pytest_bdd import given, parsers
|
|
7
|
-
from rdkit.Chem import
|
|
7
|
+
from rdkit.Chem import MolFromSmiles, MolToMolBlock, MolToSmiles
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@given(parsers.parse("a random seed set to {seed:d}"), target_fixture="random_seed")
|
|
11
|
+
def random_seed(seed):
|
|
12
|
+
return seed
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@given(
|
|
16
|
+
parsers.parse("an input molecule specified by '{input}'"),
|
|
17
|
+
target_fixture="representations",
|
|
18
|
+
)
|
|
19
|
+
def representations_from_input(input):
|
|
20
|
+
return [input]
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@given(
|
|
24
|
+
parsers.parse("the representations of the molecules"),
|
|
25
|
+
target_fixture="representations",
|
|
26
|
+
)
|
|
27
|
+
def representations_from_molecules(molecules, input_type):
|
|
28
|
+
if input_type == "smiles":
|
|
29
|
+
converter = MolToSmiles
|
|
30
|
+
elif input_type == "mol_block":
|
|
31
|
+
converter = MolToMolBlock
|
|
32
|
+
elif input_type == "rdkit_mol":
|
|
33
|
+
converter = lambda mol: mol
|
|
34
|
+
else:
|
|
35
|
+
raise ValueError(f"Unknown input_type: {input_type}")
|
|
36
|
+
|
|
37
|
+
result = [converter(mol) if mol is not None else None for mol in molecules]
|
|
38
|
+
|
|
39
|
+
return result
|
|
8
40
|
|
|
9
41
|
|
|
10
42
|
@given(
|
|
@@ -13,15 +45,17 @@ from rdkit.Chem import MolToInchi, MolToMolBlock, MolToSmiles
|
|
|
13
45
|
),
|
|
14
46
|
target_fixture="molecules",
|
|
15
47
|
)
|
|
16
|
-
def molecules(num, num_none):
|
|
48
|
+
def molecules(num, num_none, random_seed=0):
|
|
17
49
|
result = None
|
|
18
50
|
|
|
51
|
+
# pytest-bdd and hypothesis don't play well together (yet)
|
|
52
|
+
# --> use this workaround to generate random molecules
|
|
19
53
|
@hgiven(st.lists(mols(), min_size=num, max_size=num, unique_by=MolToSmiles))
|
|
20
54
|
@settings(max_examples=1, deadline=None)
|
|
21
|
-
|
|
55
|
+
@seed(random_seed)
|
|
56
|
+
def generate(ms):
|
|
22
57
|
nonlocal result
|
|
23
|
-
|
|
24
|
-
result = mols
|
|
58
|
+
result = ms
|
|
25
59
|
|
|
26
60
|
generate()
|
|
27
61
|
|
|
@@ -33,22 +67,6 @@ def molecules(num, num_none):
|
|
|
33
67
|
return result
|
|
34
68
|
|
|
35
69
|
|
|
36
|
-
@given(
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
)
|
|
40
|
-
def representations(molecules, input_type):
|
|
41
|
-
if input_type == "smiles":
|
|
42
|
-
converter = MolToSmiles
|
|
43
|
-
elif input_type == "mol_block":
|
|
44
|
-
converter = MolToMolBlock
|
|
45
|
-
elif input_type == "inchi":
|
|
46
|
-
converter = MolToInchi
|
|
47
|
-
elif input_type == "rdkit_mol":
|
|
48
|
-
converter = lambda mol: mol
|
|
49
|
-
else:
|
|
50
|
-
raise ValueError(f"Unknown input_type: {input_type}")
|
|
51
|
-
|
|
52
|
-
result = [converter(mol) if mol is not None else None for mol in molecules]
|
|
53
|
-
|
|
54
|
-
return result
|
|
70
|
+
@given(parsers.parse("the input type is '{input_type}'"), target_fixture="input_type")
|
|
71
|
+
def input_type(input_type):
|
|
72
|
+
return input_type
|
|
@@ -51,6 +51,10 @@ nerdd_module/preprocessing/pipeline.py
|
|
|
51
51
|
nerdd_module/preprocessing/registry.py
|
|
52
52
|
nerdd_module/preprocessing/remove_stereochemistry.py
|
|
53
53
|
nerdd_module/preprocessing/step.py
|
|
54
|
+
nerdd_module/tests/__init__.py
|
|
55
|
+
nerdd_module/tests/checks.py
|
|
56
|
+
nerdd_module/tests/predictions.py
|
|
57
|
+
nerdd_module/tests/representations.py
|
|
54
58
|
tests/__init__.py
|
|
55
59
|
tests/conftest.py
|
|
56
60
|
tests/test_atom_property_prediction.py
|
|
@@ -64,6 +68,5 @@ tests/models/MolWeightModelWithExplicitMols.py
|
|
|
64
68
|
tests/models/__init__.py
|
|
65
69
|
tests/steps/__init__.py
|
|
66
70
|
tests/steps/checks.py
|
|
67
|
-
tests/steps/molecules.py
|
|
68
71
|
tests/steps/predictors.py
|
|
69
72
|
tests/steps/preprocessing.py
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
from functools import lru_cache
|
|
2
|
-
from typing import Generator, Type
|
|
3
|
-
|
|
4
|
-
from .reader import Reader
|
|
5
|
-
|
|
6
|
-
__all__ = ["ReaderRegistry", "register_reader"]
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
# lru_cache makes the registry a singleton
|
|
10
|
-
@lru_cache(maxsize=1)
|
|
11
|
-
class ReaderRegistry:
|
|
12
|
-
def __init__(self):
|
|
13
|
-
self._factories = []
|
|
14
|
-
|
|
15
|
-
def register(self, ReaderClass: Type[Reader], *args, **kwargs):
|
|
16
|
-
assert issubclass(ReaderClass, Reader)
|
|
17
|
-
self._factories.append(lambda: ReaderClass(*args, **kwargs))
|
|
18
|
-
|
|
19
|
-
def readers(self) -> Generator[Reader, None, None]:
|
|
20
|
-
for reader in self._factories:
|
|
21
|
-
yield reader()
|
|
22
|
-
|
|
23
|
-
def __iter__(self):
|
|
24
|
-
return iter(map(lambda f: f(), self._factories))
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def register_reader(clazz, *args, **kwargs):
|
|
28
|
-
# TODO: implement both decorator modes
|
|
29
|
-
ReaderRegistry().register(clazz, *args, **kwargs)
|
|
30
|
-
return clazz
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd-module-0.2.3 → nerdd-module-0.2.5}/nerdd_module/preprocessing/remove_stereochemistry.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|