nerdd-module 0.3.45__tar.gz → 0.3.47__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/PKG-INFO +1 -1
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/input/depth_first_explorer.py +2 -4
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/input/inchi_reader.py +23 -3
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/input/list_reader.py +3 -3
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/input/sdf_reader.py +1 -1
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/input/smiles_reader.py +20 -2
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/input/string_reader.py +5 -2
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/tests/checks.py +4 -4
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/tests/files.py +5 -4
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/tests/models/AtomicMassModel.py +3 -3
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/tests/models/MolWeightModel.py +4 -2
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/tests/predictions.py +8 -3
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/tests/representations.py +23 -13
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module.egg-info/PKG-INFO +1 -1
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/pyproject.toml +1 -2
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/LICENSE +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/README.md +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/__init__.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/cli.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/config/__init__.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/config/configuration.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/config/default_configuration.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/config/dict_configuration.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/config/merged_configuration.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/config/models.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/config/package_configuration.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/config/search_yaml_configuration.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/config/yaml_configuration.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/converters/__init__.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/converters/basic_type_converter.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/converters/converter.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/converters/converter_config.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/converters/mol_converter.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/converters/problem_list_converter.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/converters/representation_converter.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/converters/source_list_converter.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/converters/void_converter.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/input/__init__.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/input/explorer.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/input/file_reader.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/input/gzip_reader.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/input/mol_reader.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/input/reader.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/input/reader_config.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/input/stream_reader.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/input/tar_reader.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/input/zip_reader.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/model/__init__.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/model/assign_name_step.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/model/convert_representations_step.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/model/enforce_schema_step.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/model/model.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/model/prediction_step.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/model/read_input_step.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/model/write_output_step.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/output/__init__.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/output/csv_writer.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/output/file_writer.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/output/iterator_writer.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/output/pandas_writer.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/output/record_list_writer.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/output/sdf_writer.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/output/writer.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/output/writer_config.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/polyfills/__init__.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/polyfills/block_logs.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/polyfills/files.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/polyfills/get_entry_points.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/polyfills/literal.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/polyfills/typed_dict.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/polyfills/types.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/polyfills/version.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/preprocessing/__init__.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/preprocessing/check_valid_smiles.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/preprocessing/chembl_structure_pipeline.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/preprocessing/filter_by_element.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/preprocessing/filter_by_weight.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/preprocessing/preprocessing_step.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/preprocessing/remove_small_fragments.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/preprocessing/remove_stereochemistry.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/preprocessing/sanitize.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/problem.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/py.typed +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/steps/__init__.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/steps/map_step.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/steps/output_step.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/steps/step.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/tests/__init__.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/tests/models/__init__.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/tests/preprocessing/DummyPreprocessingStep.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/tests/preprocessing/__init__.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/util/__init__.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/util/call_with_mappings.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/util/package.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/version.py +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module.egg-info/SOURCES.txt +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module.egg-info/dependency_links.txt +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module.egg-info/requires.txt +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module.egg-info/top_level.txt +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/setup.cfg +0 -0
- {nerdd_module-0.3.45 → nerdd_module-0.3.47}/tests/test_features.py +0 -0
|
@@ -85,11 +85,9 @@ class DepthFirstExplorer(Explorer):
|
|
|
85
85
|
|
|
86
86
|
if (
|
|
87
87
|
score > best_score
|
|
88
|
-
# if the score is the same, prefer the reader with higher ratio
|
|
89
|
-
# of valid entries
|
|
88
|
+
# if the score is the same, prefer the reader with higher ratio of valid entries
|
|
90
89
|
or (score == best_score and ratio > best_ratio)
|
|
91
|
-
# if the ratio is the same, prefer the reader with less invalid
|
|
92
|
-
# results
|
|
90
|
+
# if the ratio is the same, prefer the reader with less invalid results
|
|
93
91
|
or (
|
|
94
92
|
score == best_score
|
|
95
93
|
and ratio == best_ratio
|
|
@@ -12,8 +12,9 @@ __all__ = ["InchiReader"]
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class InchiReader(StreamReader):
|
|
15
|
-
def __init__(self) -> None:
|
|
15
|
+
def __init__(self, max_length_inchi: int = 10_000) -> None:
|
|
16
16
|
super().__init__()
|
|
17
|
+
self._max_length_inchi = max_length_inchi
|
|
17
18
|
|
|
18
19
|
def _read_stream(self, input_stream: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
19
20
|
# suppress RDKit warnings
|
|
@@ -27,6 +28,23 @@ class InchiReader(StreamReader):
|
|
|
27
28
|
if line.strip().startswith("#"):
|
|
28
29
|
continue
|
|
29
30
|
|
|
31
|
+
# avoid long InChI strings, because they might take veeeeery long to parse
|
|
32
|
+
if len(line) > self._max_length_inchi:
|
|
33
|
+
errors = [
|
|
34
|
+
Problem(
|
|
35
|
+
"line_too_long",
|
|
36
|
+
f"Line exceeds max length of {self._max_length_inchi} characters",
|
|
37
|
+
)
|
|
38
|
+
]
|
|
39
|
+
yield MoleculeEntry(
|
|
40
|
+
raw_input=line.strip("\n")[: self._max_length_inchi - 3] + "...",
|
|
41
|
+
input_type="inchi",
|
|
42
|
+
source=("raw_input",),
|
|
43
|
+
mol=None,
|
|
44
|
+
errors=errors,
|
|
45
|
+
)
|
|
46
|
+
continue
|
|
47
|
+
|
|
30
48
|
try:
|
|
31
49
|
mol = MolFromInchi(line, sanitize=False)
|
|
32
50
|
except: # noqa: E722 (allow bare except, because RDKit is unpredictable)
|
|
@@ -46,10 +64,12 @@ class InchiReader(StreamReader):
|
|
|
46
64
|
)
|
|
47
65
|
|
|
48
66
|
def __repr__(self) -> str:
|
|
49
|
-
return "InchiReader()"
|
|
67
|
+
return f"InchiReader(max_length_inchi={self._max_length_inchi})"
|
|
50
68
|
|
|
51
69
|
config = ReaderConfig(
|
|
52
70
|
examples=[
|
|
53
|
-
|
|
71
|
+
# (this is one InChI string, split into two lines)
|
|
72
|
+
"InChI=1S/C18H16O3/c1-2-13(12-8-4-3-5-9-12)16-17(19)14-10-6-7-11-15(14)21-18(16)20"
|
|
73
|
+
"/h3-11,13,19H,2H2,1H3"
|
|
54
74
|
]
|
|
55
75
|
)
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
from io import
|
|
2
|
-
from typing import Any,
|
|
1
|
+
from io import IOBase
|
|
2
|
+
from typing import Any, Iterable, Iterator
|
|
3
3
|
|
|
4
4
|
from .reader import ExploreCallable, MoleculeEntry, Reader
|
|
5
5
|
|
|
@@ -12,7 +12,7 @@ class ListReader(Reader):
|
|
|
12
12
|
|
|
13
13
|
def read(self, input_iterable: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
14
14
|
assert isinstance(input_iterable, Iterable) and not isinstance(
|
|
15
|
-
input_iterable, (str, bytes,
|
|
15
|
+
input_iterable, (str, bytes, IOBase)
|
|
16
16
|
), f"input must be an iterable, but is {type(input_iterable)}"
|
|
17
17
|
|
|
18
18
|
for entry in input_iterable:
|
|
@@ -11,7 +11,7 @@ __all__ = ["SdfReader"]
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class SdfReader(StreamReader):
|
|
14
|
-
def __init__(self, max_num_lines_mol_block: int =
|
|
14
|
+
def __init__(self, max_num_lines_mol_block: int = 10_000) -> None:
|
|
15
15
|
super().__init__()
|
|
16
16
|
self.max_num_lines_mol_block = max_num_lines_mol_block
|
|
17
17
|
|
|
@@ -12,8 +12,9 @@ __all__ = ["SmilesReader"]
|
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class SmilesReader(StreamReader):
|
|
15
|
-
def __init__(self) -> None:
|
|
15
|
+
def __init__(self, max_length_smiles: int = 10_000) -> None:
|
|
16
16
|
super().__init__()
|
|
17
|
+
self._max_length_smiles = max_length_smiles
|
|
17
18
|
|
|
18
19
|
def _read_stream(self, input_stream: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
19
20
|
# suppress RDKit warnings
|
|
@@ -27,6 +28,23 @@ class SmilesReader(StreamReader):
|
|
|
27
28
|
if line.strip().startswith("#"):
|
|
28
29
|
continue
|
|
29
30
|
|
|
31
|
+
# avoid long smiles strings, because they might take veeeeery long to parse
|
|
32
|
+
if len(line) > self._max_length_smiles:
|
|
33
|
+
errors = [
|
|
34
|
+
Problem(
|
|
35
|
+
"line_too_long",
|
|
36
|
+
f"Line exceeds max length of {self._max_length_smiles} characters",
|
|
37
|
+
)
|
|
38
|
+
]
|
|
39
|
+
yield MoleculeEntry(
|
|
40
|
+
raw_input=line.strip("\n")[: self._max_length_smiles - 3] + "...",
|
|
41
|
+
input_type="smiles",
|
|
42
|
+
source=("raw_input",),
|
|
43
|
+
mol=None,
|
|
44
|
+
errors=errors,
|
|
45
|
+
)
|
|
46
|
+
continue
|
|
47
|
+
|
|
30
48
|
try:
|
|
31
49
|
mol = MolFromSmiles(line, sanitize=False)
|
|
32
50
|
except: # noqa: E722 (allow bare except, because RDKit is unpredictable)
|
|
@@ -56,6 +74,6 @@ class SmilesReader(StreamReader):
|
|
|
56
74
|
)
|
|
57
75
|
|
|
58
76
|
def __repr__(self) -> str:
|
|
59
|
-
return "SmilesReader()"
|
|
77
|
+
return f"SmilesReader(max_length={self._max_length_smiles})"
|
|
60
78
|
|
|
61
79
|
config = ReaderConfig(examples=["C1=NC2=C(N1COCCO)N=C(NC2=O)N"])
|
|
@@ -11,9 +11,12 @@ class StringReader(Reader):
|
|
|
11
11
|
super().__init__()
|
|
12
12
|
|
|
13
13
|
def read(self, input: Any, explore: ExploreCallable) -> Iterator[MoleculeEntry]:
|
|
14
|
-
assert isinstance(input, str)
|
|
14
|
+
assert isinstance(input, (str, bytes))
|
|
15
15
|
|
|
16
|
-
|
|
16
|
+
if isinstance(input, str):
|
|
17
|
+
input = input.encode("utf-8")
|
|
18
|
+
|
|
19
|
+
with BytesIO(input) as f:
|
|
17
20
|
yield from explore(f)
|
|
18
21
|
|
|
19
22
|
def __repr__(self) -> str:
|
|
@@ -46,7 +46,7 @@ def check_column_value_equality(subset, column_name, expected_value):
|
|
|
46
46
|
# try to convert to float if possible
|
|
47
47
|
try:
|
|
48
48
|
expected_value = literal_eval(expected_value)
|
|
49
|
-
except:
|
|
49
|
+
except: # noqa: E722
|
|
50
50
|
pass
|
|
51
51
|
|
|
52
52
|
if expected_value is None:
|
|
@@ -72,7 +72,7 @@ def check_column_value_inequality(subset, column_name, forbidden_value):
|
|
|
72
72
|
# try to convert to float if possible
|
|
73
73
|
try:
|
|
74
74
|
forbidden_value = literal_eval(forbidden_value)
|
|
75
|
-
except:
|
|
75
|
+
except: # noqa: E722
|
|
76
76
|
pass
|
|
77
77
|
|
|
78
78
|
if forbidden_value is None:
|
|
@@ -156,13 +156,13 @@ def check_conditional_column_value(
|
|
|
156
156
|
# try to convert to float if possible
|
|
157
157
|
try:
|
|
158
158
|
expected_value = literal_eval(expected_value)
|
|
159
|
-
except:
|
|
159
|
+
except: # noqa: E722
|
|
160
160
|
pass
|
|
161
161
|
|
|
162
162
|
# same for condition value
|
|
163
163
|
try:
|
|
164
164
|
condition_value = literal_eval(condition_value)
|
|
165
|
-
except:
|
|
165
|
+
except: # noqa: E722
|
|
166
166
|
pass
|
|
167
167
|
|
|
168
168
|
# condition value can be (none) to indicate None
|
|
@@ -39,9 +39,7 @@ def representation_files(molecules, input_type, num_files):
|
|
|
39
39
|
# choose num_files-1 numbers to split the representations into num_files parts
|
|
40
40
|
# the while loop makes sure that each part contains at least one valid molecule
|
|
41
41
|
while True:
|
|
42
|
-
split_indices = np.random.choice(
|
|
43
|
-
len(representations), size=num_files - 1, replace=False
|
|
44
|
-
)
|
|
42
|
+
split_indices = np.random.choice(len(representations), size=num_files - 1, replace=False)
|
|
45
43
|
split_indices = np.sort(split_indices)
|
|
46
44
|
|
|
47
45
|
# split the representations
|
|
@@ -57,13 +55,16 @@ def representation_files(molecules, input_type, num_files):
|
|
|
57
55
|
# write the representations to files
|
|
58
56
|
representations_files = []
|
|
59
57
|
|
|
60
|
-
for
|
|
58
|
+
for split_representation in split_representations:
|
|
61
59
|
with NamedTemporaryFile("w", delete=False) as f:
|
|
62
60
|
for representation in split_representation:
|
|
61
|
+
# write representation
|
|
63
62
|
if representation is None:
|
|
64
63
|
f.write("None")
|
|
65
64
|
else:
|
|
66
65
|
f.write(representation)
|
|
66
|
+
|
|
67
|
+
# write separator
|
|
67
68
|
if input_type in ["smiles", "inchi"]:
|
|
68
69
|
f.write("\n")
|
|
69
70
|
elif input_type == "mol_block":
|
|
@@ -8,7 +8,9 @@ allowed_versions = ["mol_ids", "mols", "iterator", "error"]
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class AtomicMassModel(Model):
|
|
11
|
-
def __init__(self, preprocessing_steps=
|
|
11
|
+
def __init__(self, preprocessing_steps=None, version="mol_ids", **kwargs):
|
|
12
|
+
if preprocessing_steps is None:
|
|
13
|
+
preprocessing_steps = [Sanitize()]
|
|
12
14
|
assert (
|
|
13
15
|
version in allowed_versions
|
|
14
16
|
), f"version must be one of {allowed_versions}, got {version}"
|
|
@@ -64,5 +66,3 @@ class AtomicMassModel(Model):
|
|
|
64
66
|
{"name": "mass", "type": "float", "level": "atom"},
|
|
65
67
|
],
|
|
66
68
|
}
|
|
67
|
-
|
|
68
|
-
|
|
@@ -9,7 +9,9 @@ allowed_versions = ["order_based", "mol_ids", "mols", "iterator", "error"]
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class MolWeightModel(Model):
|
|
12
|
-
def __init__(self, preprocessing_steps=
|
|
12
|
+
def __init__(self, preprocessing_steps=None, version="order_based", **kwargs):
|
|
13
|
+
if preprocessing_steps is None:
|
|
14
|
+
preprocessing_steps = [Sanitize()]
|
|
13
15
|
assert (
|
|
14
16
|
version in allowed_versions
|
|
15
17
|
), f"version must be one of {allowed_versions}, got {version}"
|
|
@@ -48,4 +50,4 @@ class MolWeightModel(Model):
|
|
|
48
50
|
"result_properties": [
|
|
49
51
|
{"name": "weight", "type": "float"},
|
|
50
52
|
],
|
|
51
|
-
}
|
|
53
|
+
}
|
|
@@ -21,7 +21,8 @@ def mol_weight_model(version):
|
|
|
21
21
|
|
|
22
22
|
@when(
|
|
23
23
|
parsers.parse(
|
|
24
|
-
"the mol weight model (version '{version}') generates predictions for the molecule
|
|
24
|
+
"the mol weight model (version '{version}') generates predictions for the molecule "
|
|
25
|
+
"representations"
|
|
25
26
|
),
|
|
26
27
|
target_fixture="predictions",
|
|
27
28
|
)
|
|
@@ -36,7 +37,8 @@ def predictions_mol_weight_model(representations, version, multiplier):
|
|
|
36
37
|
|
|
37
38
|
@when(
|
|
38
39
|
parsers.parse(
|
|
39
|
-
"the atomic mass model (version '{version}') generates predictions for the molecule
|
|
40
|
+
"the atomic mass model (version '{version}') generates predictions for the molecule "
|
|
41
|
+
"representations"
|
|
40
42
|
),
|
|
41
43
|
target_fixture="predictions",
|
|
42
44
|
)
|
|
@@ -48,6 +50,7 @@ def predictions_atomic_mass_model(representations, version, multiplier):
|
|
|
48
50
|
output_format="record_list",
|
|
49
51
|
)
|
|
50
52
|
|
|
53
|
+
|
|
51
54
|
@when(
|
|
52
55
|
"all results are considered",
|
|
53
56
|
target_fixture="subset",
|
|
@@ -55,6 +58,7 @@ def predictions_atomic_mass_model(representations, version, multiplier):
|
|
|
55
58
|
def all_results(predictions):
|
|
56
59
|
return predictions
|
|
57
60
|
|
|
61
|
+
|
|
58
62
|
@when(
|
|
59
63
|
"the subset of the result where the input was not None is considered",
|
|
60
64
|
target_fixture="subset",
|
|
@@ -63,10 +67,11 @@ def subset_without_input_none(predictions):
|
|
|
63
67
|
# remove None entries
|
|
64
68
|
return [p for p in predictions if p["input_mol"] is not None]
|
|
65
69
|
|
|
70
|
+
|
|
66
71
|
@when(
|
|
67
72
|
"the subset of the result where the preprocessed mol was not None is considered",
|
|
68
73
|
target_fixture="subset",
|
|
69
74
|
)
|
|
70
75
|
def subset_without_preprocessed_none(predictions):
|
|
71
76
|
# remove None entries
|
|
72
|
-
return [p for p in predictions if p["preprocessed_mol"] is not None]
|
|
77
|
+
return [p for p in predictions if p["preprocessed_mol"] is not None]
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from functools import reduce
|
|
3
|
+
|
|
1
4
|
import numpy as np
|
|
2
5
|
from hypothesis import given as hgiven
|
|
3
6
|
from hypothesis import seed, settings
|
|
@@ -5,10 +8,9 @@ from hypothesis import strategies as st
|
|
|
5
8
|
from hypothesis_rdkit import mols
|
|
6
9
|
from pytest_bdd import given, parsers
|
|
7
10
|
from rdkit.Chem import MolToInchi, MolToMolBlock, MolToSmiles
|
|
8
|
-
from ..polyfills import BlockLogs
|
|
9
11
|
from rdkit.Chem.rdMolDescriptors import CalcExactMolWt
|
|
10
|
-
|
|
11
|
-
from
|
|
12
|
+
|
|
13
|
+
from ..polyfills import BlockLogs
|
|
12
14
|
|
|
13
15
|
|
|
14
16
|
@given(parsers.parse("a random seed set to {seed:d}"), target_fixture="random_seed")
|
|
@@ -38,7 +40,9 @@ def representations_from_molecules(molecules, input_type):
|
|
|
38
40
|
elif input_type == "inchi":
|
|
39
41
|
converter = MolToInchi
|
|
40
42
|
elif input_type == "rdkit_mol":
|
|
41
|
-
|
|
43
|
+
|
|
44
|
+
def converter(mol):
|
|
45
|
+
return mol
|
|
42
46
|
else:
|
|
43
47
|
raise ValueError(f"Unknown input_type: {input_type}")
|
|
44
48
|
|
|
@@ -49,9 +53,7 @@ def representations_from_molecules(molecules, input_type):
|
|
|
49
53
|
|
|
50
54
|
|
|
51
55
|
@given(
|
|
52
|
-
parsers.re(
|
|
53
|
-
r"a list of (?P<num>\d+) random molecules(?:, where(?P<conditions>[\s\S]*))?"
|
|
54
|
-
),
|
|
56
|
+
parsers.re(r"a list of (?P<num>\d+) random molecules(?:, where(?P<conditions>[\s\S]*))?"),
|
|
55
57
|
target_fixture="molecules",
|
|
56
58
|
)
|
|
57
59
|
def molecules(num, conditions, random_seed=0):
|
|
@@ -61,6 +63,7 @@ def molecules(num, conditions, random_seed=0):
|
|
|
61
63
|
maps = []
|
|
62
64
|
|
|
63
65
|
if conditions is not None:
|
|
66
|
+
|
|
64
67
|
def filter_weight(min_weight, max_weight):
|
|
65
68
|
min_weight = float(min_weight)
|
|
66
69
|
max_weight = float(max_weight)
|
|
@@ -76,15 +79,19 @@ def molecules(num, conditions, random_seed=0):
|
|
|
76
79
|
|
|
77
80
|
expressions = [
|
|
78
81
|
# filters are functions that return True if the molecule should be kept
|
|
79
|
-
(
|
|
82
|
+
(
|
|
83
|
+
"filter",
|
|
84
|
+
r"each mol has a weight between (?P<min_weight>\d+) and (?P<max_weight>\d+)",
|
|
85
|
+
filter_weight,
|
|
86
|
+
),
|
|
80
87
|
# maps are functions that modify the molecule
|
|
81
|
-
("map", r"(?P<num_none>\d+) entries are None", map_to_none)
|
|
88
|
+
("map", r"(?P<num_none>\d+) entries are None", map_to_none),
|
|
82
89
|
]
|
|
83
90
|
|
|
84
91
|
conditions_list = [c for c in conditions.split("\n") if c.strip() != ""]
|
|
85
92
|
|
|
86
93
|
for condition in conditions_list:
|
|
87
|
-
for kind, expression, f in expressions:
|
|
94
|
+
for kind, expression, f in expressions: # noqa: B007
|
|
88
95
|
# conditions might be a markdown list (starting with a star character)
|
|
89
96
|
expression = r"\s*(\*\s*)?" + expression + r"\s*"
|
|
90
97
|
|
|
@@ -92,7 +99,7 @@ def molecules(num, conditions, random_seed=0):
|
|
|
92
99
|
if match:
|
|
93
100
|
params = match.groupdict()
|
|
94
101
|
break
|
|
95
|
-
|
|
102
|
+
|
|
96
103
|
assert match is not None, f"Could not parse condition: {condition}"
|
|
97
104
|
|
|
98
105
|
if kind == "filter":
|
|
@@ -102,8 +109,11 @@ def molecules(num, conditions, random_seed=0):
|
|
|
102
109
|
else:
|
|
103
110
|
raise ValueError(f"Unknown kind: {kind}")
|
|
104
111
|
|
|
105
|
-
filter_func
|
|
106
|
-
|
|
112
|
+
def filter_func(mol):
|
|
113
|
+
return all(f(mol) for f in filters)
|
|
114
|
+
|
|
115
|
+
def map_func(ms):
|
|
116
|
+
return reduce(lambda ms, f: f(ms), maps, ms)
|
|
107
117
|
|
|
108
118
|
result = None
|
|
109
119
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "nerdd-module"
|
|
7
|
-
version = "0.3.
|
|
7
|
+
version = "0.3.47"
|
|
8
8
|
description = "Base package to create NERDD modules"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "BSD-3-Clause"
|
|
@@ -105,7 +105,6 @@ patterns = ["*.py", "*.feature", "pyproject.toml"]
|
|
|
105
105
|
|
|
106
106
|
[tool.ruff]
|
|
107
107
|
line-length = 100
|
|
108
|
-
extend-exclude = ["tests", "nerdd_module/tests"]
|
|
109
108
|
|
|
110
109
|
[tool.ruff.lint]
|
|
111
110
|
select = [
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/config/search_yaml_configuration.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/converters/problem_list_converter.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/converters/representation_converter.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/converters/source_list_converter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/model/convert_representations_step.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/preprocessing/check_valid_smiles.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/preprocessing/chembl_structure_pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/preprocessing/preprocessing_step.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/preprocessing/remove_small_fragments.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.45 → nerdd_module-0.3.47}/nerdd_module/preprocessing/remove_stereochemistry.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|