nerdd-module 0.3.38__tar.gz → 0.3.40__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/PKG-INFO +1 -1
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/config/models.py +1 -1
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/model/model.py +12 -2
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/model/prediction_step.py +77 -16
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/preprocessing/__init__.py +1 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/preprocessing/filter_by_weight.py +4 -2
- nerdd_module-0.3.40/nerdd_module/preprocessing/remove_small_fragments.py +26 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/preprocessing/remove_stereochemistry.py +2 -0
- nerdd_module-0.3.40/nerdd_module/preprocessing/sanitize.py +31 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/tests/predictions.py +6 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module.egg-info/PKG-INFO +1 -1
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module.egg-info/SOURCES.txt +1 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/pyproject.toml +1 -1
- nerdd_module-0.3.38/nerdd_module/preprocessing/sanitize.py +0 -21
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/LICENSE +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/README.md +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/__init__.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/cli.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/config/__init__.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/config/configuration.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/config/default_configuration.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/config/dict_configuration.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/config/merged_configuration.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/config/package_configuration.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/config/search_yaml_configuration.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/config/yaml_configuration.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/converters/__init__.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/converters/basic_type_converter.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/converters/converter.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/converters/converter_config.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/converters/mol_converter.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/converters/problem_list_converter.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/converters/representation_converter.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/converters/source_list_converter.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/converters/void_converter.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/input/__init__.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/input/depth_first_explorer.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/input/explorer.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/input/file_reader.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/input/gzip_reader.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/input/inchi_reader.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/input/list_reader.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/input/mol_reader.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/input/reader.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/input/reader_config.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/input/sdf_reader.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/input/smiles_reader.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/input/string_reader.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/input/tar_reader.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/input/zip_reader.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/model/__init__.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/model/assign_name_step.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/model/convert_representations_step.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/model/enforce_schema_step.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/model/read_input_step.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/model/write_output_step.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/output/__init__.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/output/csv_writer.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/output/file_writer.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/output/iterator_writer.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/output/pandas_writer.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/output/record_list_writer.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/output/sdf_writer.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/output/writer.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/output/writer_config.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/polyfills/__init__.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/polyfills/block_logs.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/polyfills/files.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/polyfills/get_entry_points.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/polyfills/literal.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/polyfills/typed_dict.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/polyfills/types.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/polyfills/version.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/preprocessing/check_valid_smiles.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/preprocessing/chembl_structure_pipeline.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/preprocessing/filter_by_element.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/preprocessing/preprocessing_step.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/problem.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/py.typed +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/steps/__init__.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/steps/map_step.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/steps/output_step.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/steps/step.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/tests/__init__.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/tests/checks.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/tests/files.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/tests/models/AtomicMassModel.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/tests/models/MolWeightModel.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/tests/models/__init__.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/tests/preprocessing/DummyPreprocessingStep.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/tests/preprocessing/__init__.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/tests/representations.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/util/__init__.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/util/call_with_mappings.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/util/package.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/version.py +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module.egg-info/dependency_links.txt +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module.egg-info/requires.txt +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module.egg-info/top_level.txt +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/setup.cfg +0 -0
- {nerdd_module-0.3.38 → nerdd_module-0.3.40}/tests/test_features.py +0 -0
|
@@ -203,7 +203,7 @@ class Module(BaseModel):
|
|
|
203
203
|
for i, j in zip(indices[:-1], indices[1:]):
|
|
204
204
|
assert i + 1 == j, (
|
|
205
205
|
f"Properties with the same group should appear next to each other, "
|
|
206
|
-
f"but group {group} appears at
|
|
206
|
+
f"but group {group} appears at indices {i} and {j}."
|
|
207
207
|
)
|
|
208
208
|
|
|
209
209
|
return values
|
|
@@ -90,10 +90,18 @@ class Model(ABC):
|
|
|
90
90
|
|
|
91
91
|
assert isinstance(output_step, OutputStep), "The last step must be an OutputStep."
|
|
92
92
|
|
|
93
|
+
# make mypy happy by restricting the type of self.config.task
|
|
94
|
+
assert self.config.task is not None
|
|
95
|
+
|
|
93
96
|
steps = [
|
|
94
97
|
*input_steps,
|
|
95
98
|
*preprocessing_steps,
|
|
96
|
-
PredictionStep(
|
|
99
|
+
PredictionStep(
|
|
100
|
+
self._predict_mols,
|
|
101
|
+
task=self.config.task,
|
|
102
|
+
batch_size=self.config.batch_size,
|
|
103
|
+
**kwargs,
|
|
104
|
+
),
|
|
97
105
|
*postprocessing_steps,
|
|
98
106
|
]
|
|
99
107
|
|
|
@@ -141,6 +149,9 @@ class Model(ABC):
|
|
|
141
149
|
if isinstance(base_config, dict):
|
|
142
150
|
base_config = DictConfiguration(base_config)
|
|
143
151
|
|
|
152
|
+
# ensure that mandatory properties are present
|
|
153
|
+
base_config = MergedConfiguration(DefaultConfiguration(self), base_config)
|
|
154
|
+
|
|
144
155
|
# add default properties mol_id, raw_input, etc.
|
|
145
156
|
task = base_config.get_dict().task
|
|
146
157
|
|
|
@@ -214,7 +225,6 @@ class Model(ABC):
|
|
|
214
225
|
]
|
|
215
226
|
|
|
216
227
|
configs = [
|
|
217
|
-
DefaultConfiguration(self),
|
|
218
228
|
DictConfiguration({"result_properties": default_properties_start}),
|
|
219
229
|
base_config,
|
|
220
230
|
DictConfiguration({"result_properties": default_properties_end}),
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from collections import defaultdict
|
|
3
|
-
from typing import Any, Callable, Iterator, List, Tuple
|
|
3
|
+
from typing import Any, Callable, DefaultDict, Iterator, List, Set, Tuple
|
|
4
4
|
|
|
5
|
+
from ..config import Task
|
|
5
6
|
from ..problem import IncompletePredictionProblem, UnknownPredictionProblem
|
|
6
7
|
from ..steps import Step
|
|
7
8
|
from ..util import call_with_mappings
|
|
@@ -12,9 +13,10 @@ __all__ = ["PredictionStep"]
|
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class PredictionStep(Step):
|
|
15
|
-
def __init__(self, predict_fn: Callable, batch_size: int, **kwargs: Any) -> None:
|
|
16
|
+
def __init__(self, predict_fn: Callable, task: Task, batch_size: int, **kwargs: Any) -> None:
|
|
16
17
|
super().__init__()
|
|
17
18
|
self._predict_fn = predict_fn
|
|
19
|
+
self._task = task
|
|
18
20
|
self._batch_size = batch_size
|
|
19
21
|
self._kwargs = kwargs
|
|
20
22
|
|
|
@@ -116,10 +118,25 @@ class PredictionStep(Step):
|
|
|
116
118
|
record["mol_id"] in mol_id_set
|
|
117
119
|
), f"The mol_id {record['mol_id']} is not in the batch."
|
|
118
120
|
|
|
121
|
+
# depending on the task, we need to check atom_id or derivative_id
|
|
122
|
+
if self._task == "atom_property_prediction":
|
|
123
|
+
sub_id_property = "atom_id"
|
|
124
|
+
elif self._task == "derivative_property_prediction":
|
|
125
|
+
sub_id_property = "derivative_id"
|
|
126
|
+
else:
|
|
127
|
+
sub_id_property = None
|
|
128
|
+
|
|
119
129
|
# create a mapping from mol_id to record (for quick access)
|
|
120
|
-
mol_id_to_record = defaultdict(list)
|
|
130
|
+
mol_id_to_record: DefaultDict[int, List[dict]] = defaultdict(list)
|
|
121
131
|
for record in predictions:
|
|
122
|
-
mol_id_to_record[record["mol_id"]]
|
|
132
|
+
current_record_list = mol_id_to_record[record["mol_id"]]
|
|
133
|
+
current_record_list.append(record)
|
|
134
|
+
if len(current_record_list) > 1 and sub_id_property is None:
|
|
135
|
+
raise ValueError(
|
|
136
|
+
f"There are duplicate records for mol_id={record['mol_id']}, but the "
|
|
137
|
+
f"prediction task {self._task} requires unique mol_id values. The duplicates "
|
|
138
|
+
f"are: {current_record_list}."
|
|
139
|
+
)
|
|
123
140
|
|
|
124
141
|
# add all records that are missing in the predictions
|
|
125
142
|
for mol_id in temporary_mol_ids:
|
|
@@ -132,19 +149,63 @@ class PredictionStep(Step):
|
|
|
132
149
|
}
|
|
133
150
|
)
|
|
134
151
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
for
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
152
|
+
if sub_id_property is not None:
|
|
153
|
+
# task must be either atom_property_prediction or derivative_property_prediction
|
|
154
|
+
# -> check consistency of sub_id_property
|
|
155
|
+
for mol_id, records in mol_id_to_record.items():
|
|
156
|
+
sub_ids: Set[int] = set()
|
|
157
|
+
|
|
158
|
+
for record in records:
|
|
159
|
+
sub_id = record.get(sub_id_property)
|
|
160
|
+
if sub_id is not None:
|
|
161
|
+
# check that sub_id is an integer
|
|
162
|
+
if not isinstance(sub_id, int):
|
|
163
|
+
raise ValueError(
|
|
164
|
+
f"The {sub_id_property} must be an integer, but got {sub_id}. "
|
|
165
|
+
f"Record: {record}"
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
sub_ids.add(sub_id)
|
|
169
|
+
|
|
170
|
+
if (
|
|
171
|
+
len(records) == 1
|
|
172
|
+
and "problems" in records[0]
|
|
173
|
+
and len(records[0]["problems"]) > 0
|
|
174
|
+
):
|
|
175
|
+
# this record was not predicted, so we skip it
|
|
176
|
+
continue
|
|
177
|
+
elif len(sub_ids) == 0:
|
|
178
|
+
# no record has a sub id, we assign them (sequentially)
|
|
179
|
+
for i, record in enumerate(records):
|
|
180
|
+
record[sub_id_property] = i
|
|
181
|
+
continue
|
|
182
|
+
elif len(sub_ids) < len(records):
|
|
183
|
+
# None is not in sub_ids, but the number of unique sub ids is less than
|
|
184
|
+
# the number of records.
|
|
185
|
+
# -> there must be duplicates
|
|
186
|
+
sub_id_list = [record.get(sub_id_property) for record in records]
|
|
187
|
+
raise ValueError(
|
|
188
|
+
f"The result with mol_id={mol_id} contains multiple entries per "
|
|
189
|
+
f"molecule, but the sequence of {sub_id_property} is not unique. "
|
|
190
|
+
f"Found: {sub_id_list}."
|
|
145
191
|
)
|
|
146
|
-
|
|
147
|
-
|
|
192
|
+
else:
|
|
193
|
+
min_sub_id = min(sub_ids)
|
|
194
|
+
max_sub_id = max(sub_ids)
|
|
195
|
+
|
|
196
|
+
if min_sub_id != 0:
|
|
197
|
+
raise ValueError(
|
|
198
|
+
f"The sequence of {sub_id_property} does not start at 0 for "
|
|
199
|
+
f"mol_id={mol_id}. Instead, the minimum {sub_id_property} was "
|
|
200
|
+
f"{min_sub_id}."
|
|
201
|
+
)
|
|
202
|
+
elif max_sub_id - min_sub_id + 1 != len(sub_ids):
|
|
203
|
+
# there are gaps in the sequence of sub ids
|
|
204
|
+
raise ValueError(
|
|
205
|
+
f"The result with mol_id={mol_id} contains multiple entries per "
|
|
206
|
+
f"molecule, but the sequence of {sub_id_property} has gaps. "
|
|
207
|
+
f"Found: {sub_ids}."
|
|
208
|
+
)
|
|
148
209
|
|
|
149
210
|
for key, records in mol_id_to_record.items():
|
|
150
211
|
for record in records:
|
|
@@ -6,12 +6,14 @@ from rdkit.Chem.rdMolDescriptors import CalcExactMolWt
|
|
|
6
6
|
from ..problem import InvalidWeightProblem, Problem
|
|
7
7
|
from .preprocessing_step import PreprocessingStep
|
|
8
8
|
|
|
9
|
+
__all__ = ["FilterByWeight"]
|
|
10
|
+
|
|
9
11
|
|
|
10
12
|
class FilterByWeight(PreprocessingStep):
|
|
11
13
|
def __init__(
|
|
12
14
|
self,
|
|
13
|
-
min_weight: float,
|
|
14
|
-
max_weight: float,
|
|
15
|
+
min_weight: float = 0,
|
|
16
|
+
max_weight: float = float("inf"),
|
|
15
17
|
remove_invalid_molecules: bool = False,
|
|
16
18
|
) -> None:
|
|
17
19
|
super().__init__()
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
from typing import List, Optional, Tuple
|
|
2
|
+
|
|
3
|
+
from rdkit.Chem import GetMolFrags, Mol
|
|
4
|
+
from rdkit.Chem.rdMolDescriptors import CalcExactMolWt
|
|
5
|
+
|
|
6
|
+
from ..problem import Problem
|
|
7
|
+
from .preprocessing_step import PreprocessingStep
|
|
8
|
+
|
|
9
|
+
__all__ = ["RemoveSmallFragments"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class RemoveSmallFragments(PreprocessingStep):
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
) -> None:
|
|
16
|
+
super().__init__()
|
|
17
|
+
|
|
18
|
+
def _preprocess(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
|
|
19
|
+
fragments = GetMolFrags(mol, asMols=True)
|
|
20
|
+
if len(fragments) > 1:
|
|
21
|
+
# select the largest fragment
|
|
22
|
+
largest_fragment = max(fragments, key=CalcExactMolWt)
|
|
23
|
+
else:
|
|
24
|
+
largest_fragment = mol
|
|
25
|
+
|
|
26
|
+
return largest_fragment, []
|
{nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/preprocessing/remove_stereochemistry.py
RENAMED
|
@@ -6,6 +6,8 @@ from rdkit.Chem import RemoveStereochemistry as remove_stereochemistry
|
|
|
6
6
|
from ..problem import Problem
|
|
7
7
|
from .preprocessing_step import PreprocessingStep
|
|
8
8
|
|
|
9
|
+
__all__ = ["RemoveStereochemistry"]
|
|
10
|
+
|
|
9
11
|
|
|
10
12
|
class RemoveStereochemistry(PreprocessingStep):
|
|
11
13
|
def __init__(self) -> None:
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import List, Optional, Tuple
|
|
3
|
+
|
|
4
|
+
from rdkit.Chem import AtomKekulizeException, KekulizeException, Mol, SanitizeMol
|
|
5
|
+
|
|
6
|
+
from ..problem import Problem
|
|
7
|
+
from .preprocessing_step import PreprocessingStep
|
|
8
|
+
|
|
9
|
+
__all__ = ["Sanitize"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Sanitize(PreprocessingStep):
|
|
16
|
+
def __init__(self) -> None:
|
|
17
|
+
super().__init__()
|
|
18
|
+
|
|
19
|
+
def _preprocess(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
|
|
20
|
+
try:
|
|
21
|
+
SanitizeMol(mol)
|
|
22
|
+
return mol, []
|
|
23
|
+
except KekulizeException:
|
|
24
|
+
return None, [Problem("kekulization_error", "Failed kekulizing the molecule.")]
|
|
25
|
+
except AtomKekulizeException:
|
|
26
|
+
return None, [
|
|
27
|
+
Problem("atom_kekulization_error", "Failed kekulizing an atom in the molecule.")
|
|
28
|
+
]
|
|
29
|
+
except Exception as e:
|
|
30
|
+
logger.exception(e)
|
|
31
|
+
return None, [Problem("sanitization_error", "Failed sanitizing the molecule.")]
|
|
@@ -48,6 +48,12 @@ def predictions_atomic_mass_model(representations, version, multiplier):
|
|
|
48
48
|
output_format="record_list",
|
|
49
49
|
)
|
|
50
50
|
|
|
51
|
+
@when(
|
|
52
|
+
"all results are considered",
|
|
53
|
+
target_fixture="subset",
|
|
54
|
+
)
|
|
55
|
+
def subset_without_none(predictions):
|
|
56
|
+
return predictions
|
|
51
57
|
|
|
52
58
|
@when(
|
|
53
59
|
"the subset of the result where the input was not None is considered",
|
|
@@ -75,6 +75,7 @@ nerdd_module/preprocessing/chembl_structure_pipeline.py
|
|
|
75
75
|
nerdd_module/preprocessing/filter_by_element.py
|
|
76
76
|
nerdd_module/preprocessing/filter_by_weight.py
|
|
77
77
|
nerdd_module/preprocessing/preprocessing_step.py
|
|
78
|
+
nerdd_module/preprocessing/remove_small_fragments.py
|
|
78
79
|
nerdd_module/preprocessing/remove_stereochemistry.py
|
|
79
80
|
nerdd_module/preprocessing/sanitize.py
|
|
80
81
|
nerdd_module/steps/__init__.py
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
from typing import List, Optional, Tuple
|
|
2
|
-
|
|
3
|
-
from rdkit.Chem import Mol, SanitizeMol
|
|
4
|
-
|
|
5
|
-
from ..problem import Problem
|
|
6
|
-
from .preprocessing_step import PreprocessingStep
|
|
7
|
-
|
|
8
|
-
__all__ = ["Sanitize"]
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class Sanitize(PreprocessingStep):
|
|
12
|
-
def __init__(self) -> None:
|
|
13
|
-
super().__init__()
|
|
14
|
-
|
|
15
|
-
def _preprocess(self, mol: Mol) -> Tuple[Optional[Mol], List[Problem]]:
|
|
16
|
-
problems: List[Problem] = []
|
|
17
|
-
|
|
18
|
-
# sanitize molecule
|
|
19
|
-
SanitizeMol(mol)
|
|
20
|
-
|
|
21
|
-
return mol, problems
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/config/search_yaml_configuration.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/converters/problem_list_converter.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/converters/representation_converter.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/converters/source_list_converter.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/model/convert_representations_step.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/preprocessing/check_valid_smiles.py
RENAMED
|
File without changes
|
{nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/preprocessing/chembl_structure_pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
{nerdd_module-0.3.38 → nerdd_module-0.3.40}/nerdd_module/preprocessing/preprocessing_step.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|