sapiopycommons 2025.7.14a610__py3-none-any.whl → 2025.7.15a611__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sapiopycommons might be problematic. Click here for more details.

@@ -1,763 +0,0 @@
1
- """
2
- Parallel Synthesis Commons
3
- Author: Yechen Qiao
4
- """
5
- import itertools
6
- import json
7
- from dataclasses import dataclass
8
- from typing import Any
9
-
10
- from indigo import IndigoObject, IndigoException
11
- from sapiopycommons.chem.IndigoMolecules import indigo, get_aromatic_dearomatic_forms, renderer
12
-
13
-
14
- class SerializableQueryMolecule:
15
- mol_block: str
16
- smarts: str
17
- render_svg: str
18
-
19
- @staticmethod
20
- def create(query_molecule: IndigoObject):
21
- aromatic, dearomatic = get_aromatic_dearomatic_forms(query_molecule)
22
- ret: SerializableQueryMolecule = SerializableQueryMolecule()
23
- ret.mol_block = aromatic.molfile()
24
- ret.smarts = aromatic.smarts()
25
- ret.render_svg = renderer.renderToString(dearomatic)
26
- return ret
27
-
28
- def to_json(self) -> dict[str, Any]:
29
- """
30
- Save the SerializableQueryMolecule to a JSON string.
31
- :return: A JSON string representation of the query molecule.
32
- """
33
- return {
34
- "mol_block": self.mol_block,
35
- "smarts": self.smarts,
36
- "render_svg": self.render_svg
37
- }
38
-
39
-
40
- class SerializableMoleculeMatch:
41
- """
42
- A serializable match that stores and loads a match that can be serialized to JSON.
43
- """
44
- _query_atom_to_atom: dict[int, int]
45
- _query_bond_to_bond: dict[int, int]
46
- _query_molecule_file: str
47
- _matching_molecule_file: str
48
- _query_molecule: IndigoObject
49
- _matching_molecule: IndigoObject
50
- _record_id: int # Only when received from Sapio.
51
-
52
- @property
53
- def record_id(self) -> int:
54
- """
55
- Get the record ID of the match.
56
- :return: The record ID.
57
- """
58
- return self._record_id
59
-
60
- @property
61
- def query_atom_indexes(self) -> set[int]:
62
- return set(self._query_atom_to_atom.keys())
63
-
64
- @property
65
- def matching_atom_indexes(self) -> set[int]:
66
- return set(self._query_atom_to_atom.values())
67
-
68
- @property
69
- def matching_molecule_copy(self) -> IndigoObject:
70
- return self._matching_molecule.clone()
71
-
72
- def __str__(self):
73
- return json.dumps(self.to_json())
74
-
75
- def __hash__(self):
76
- return hash(self._query_molecule.smarts())
77
-
78
- def __eq__(self, other):
79
- if not isinstance(other, SerializableMoleculeMatch):
80
- return False
81
- if self._query_atom_to_atom == other._query_atom_to_atom and \
82
- self._query_bond_to_bond == other._query_bond_to_bond and \
83
- self._query_molecule_file == other._query_molecule_file and \
84
- self._matching_molecule_file == other._matching_molecule_file and \
85
- self._record_id == other._record_id:
86
- return True
87
- if self._query_molecule.smarts() != other._query_molecule.smarts():
88
- return False
89
- return are_symmetrical_subs(self, other)
90
-
91
- def mapAtom(self, atom: IndigoObject) -> IndigoObject | None:
92
- if not self._query_atom_to_atom or atom.index() not in self._query_atom_to_atom:
93
- return None
94
- index = self._query_atom_to_atom[atom.index()]
95
- return self._matching_molecule.getAtom(index)
96
-
97
- def mapBond(self, bond: IndigoObject) -> IndigoObject | None:
98
- if not self._query_bond_to_bond or bond.index() not in self._query_bond_to_bond:
99
- return None
100
- index = self._query_bond_to_bond[bond.index()]
101
- return self._matching_molecule.getBond(index)
102
-
103
- def to_json(self) -> dict[str, Any]:
104
- """
105
- Save the SerializableMoleculeMatch to a JSON string.
106
- :return: A JSON string representation of the match.
107
- """
108
- return {
109
- "query_molecule_file": self._query_molecule_file,
110
- "matching_molecule_file": self._matching_molecule_file,
111
- "query_atom_to_atom": self._query_atom_to_atom,
112
- "query_bond_to_bond": self._query_bond_to_bond,
113
- "record_id": self._record_id
114
- }
115
-
116
- @staticmethod
117
- def from_json(json_dct: dict[str, Any]) -> 'SerializableMoleculeMatch':
118
- """
119
- Load a SerializableMoleculeMatch from a JSON string.
120
- :param json_dct: A JSON string representation of the match.
121
- :return: A new SerializableMoleculeMatch instance.
122
- """
123
- smm = SerializableMoleculeMatch()
124
- smm._query_atom_to_atom = {}
125
- for key, value in json_dct.get("query_atom_to_atom", {}).items():
126
- smm._query_atom_to_atom[int(key)] = int(value)
127
- smm._query_bond_to_bond = {}
128
- for key, value in json_dct.get("query_bond_to_bond", {}).items():
129
- smm._query_bond_to_bond[int(key)] = int(value)
130
- smm._query_molecule_file = json_dct.get("query_molecule_file")
131
- smm._matching_molecule_file = json_dct.get("matching_molecule_file")
132
- smm._query_molecule = indigo.loadQueryMolecule(smm._query_molecule_file)
133
- smm._matching_molecule = indigo.loadMolecule(smm._matching_molecule_file)
134
- smm._record_id = json_dct.get("record_id", 0) # Default to 0 if not present
135
- return smm
136
-
137
- @staticmethod
138
- def create(query_molecule: IndigoObject, matching_molecule: IndigoObject,
139
- match: IndigoObject, query_mol_atom_index_filter: set[int] | None = None) -> 'SerializableMoleculeMatch':
140
- """
141
- Create a SerializableMoleculeMatch from a query molecule, matching molecule, and match.
142
- :param query_molecule: The query molecule.
143
- :param matching_molecule: The matching molecule.
144
- :param match: The match object containing atom mappings.
145
- :param query_mol_atom_index_filter: Optional list of atom indexes to filter the query molecule atoms.
146
- :return: A new SerializableMoleculeMatch instance.
147
- """
148
- smm = SerializableMoleculeMatch()
149
- smm._query_atom_to_atom = {}
150
- smm._query_bond_to_bond = {}
151
- smm._query_molecule = query_molecule.clone()
152
- smm._matching_molecule = matching_molecule.clone()
153
- smm._query_molecule_file = query_molecule.molfile()
154
- smm._matching_molecule_file = matching_molecule.molfile()
155
- smm._record_id = 0
156
-
157
- for qatom in query_molecule.iterateAtoms():
158
- if query_mol_atom_index_filter and qatom.index() not in query_mol_atom_index_filter:
159
- continue
160
- concrete_atom = match.mapAtom(qatom)
161
- if concrete_atom is None:
162
- continue
163
- smm._query_atom_to_atom[qatom.index()] = concrete_atom.index()
164
-
165
- qbond: IndigoObject
166
- for qbond in query_molecule.iterateBonds():
167
- if query_mol_atom_index_filter:
168
- if (qbond.source().index() not in query_mol_atom_index_filter or
169
- qbond.destination().index() not in query_mol_atom_index_filter):
170
- continue
171
- concrete_bond = match.mapBond(qbond)
172
- if concrete_bond is None:
173
- continue
174
- smm._query_bond_to_bond[qbond.index()] = concrete_bond.index()
175
- return smm
176
-
177
- def get_matched_molecule_copy(self):
178
- return self._matching_molecule.clone()
179
-
180
-
181
- def is_reaction_atom_map_completed(q_reaction: IndigoObject) -> bool:
182
- """
183
- Tests each atom in product of query reaction.
184
- :param q_reaction: The query reaction to test.
185
- :return: True if and only if for every atom that is not an R-Site, it has a mapping number.
186
- """
187
- for product in q_reaction.iterateProducts():
188
- for atom in product.iterateAtoms():
189
- if atom.isRSite():
190
- continue
191
- map_num = q_reaction.atomMappingNumber(atom)
192
- if map_num == 0:
193
- return False
194
- return True
195
-
196
-
197
- @dataclass
198
- class ReplacementReaction:
199
- """
200
- A replacement reaction stores reactio template with 1 reactant replaced by specific user match.
201
- """
202
- reaction: IndigoObject
203
- reaction_reactant: IndigoObject
204
- replacement_reactant: IndigoObject
205
- replacement_query_reaction_match: SerializableMoleculeMatch
206
-
207
-
208
- # noinspection PyProtectedMember
209
- def highlight_mol_substructure_serial_match(molecule: IndigoObject, serializable_match: SerializableMoleculeMatch):
210
- """
211
- Highlight the substructure in the molecule based on the SerializableMoleculeMatch.
212
- :param molecule: The molecule to highlight.
213
- :param serializable_match: The SerializableMoleculeMatch containing atom mappings.
214
- """
215
- for qatom in serializable_match._query_molecule.iterateAtoms():
216
- atom = serializable_match.mapAtom(qatom)
217
- if atom is None:
218
- continue
219
- atom.highlight()
220
-
221
- for nei in atom.iterateNeighbors():
222
- if not nei.isPseudoatom() and not nei.isRSite() and nei.atomicNumber() == 1:
223
- nei.highlight()
224
- nei.bond().highlight()
225
-
226
- for bond in serializable_match._query_molecule.iterateBonds():
227
- bond = serializable_match.mapBond(bond)
228
- if bond is None:
229
- continue
230
- bond.highlight()
231
-
232
-
233
- def clear_highlights(molecule: IndigoObject):
234
- """
235
- Clear all highlights in the molecule.
236
- :param molecule: The molecule to clear highlights from.
237
- """
238
- for atom in molecule.iterateAtoms():
239
- atom.unhighlight()
240
- for bond in molecule.iterateBonds():
241
- bond.unhighlight()
242
-
243
-
244
- def clear_reaction_highlights(reaction: IndigoObject):
245
- """
246
- Clear all highlights in the reaction.
247
- :param reaction: The reaction to clear highlights from.
248
- """
249
- for reactant in reaction.iterateReactants():
250
- clear_highlights(reactant)
251
- for product in reaction.iterateProducts():
252
- clear_highlights(product)
253
-
254
-
255
- def reserve_atom_mapping_number_of_search_result(q_reaction: IndigoObject, q_reactant: IndigoObject,
256
- new_reaction_reactant: IndigoObject, new_reaction: IndigoObject,
257
- sub_match: SerializableMoleculeMatch) -> None:
258
- """
259
- Set the atom mapping number on the query molecule based on the atom mapping number of the sub_match molecule, if it exists.
260
- :param new_reaction: The new reaction where the new reaction's reactant is found. This will be the target reaciton to write AAM to.
261
- :param new_reaction_reactant: The new reaction's reactant where the AAM will be written to.
262
- :param q_reactant: The query reactant from the query reaction that is being matched.
263
- :param q_reaction: The query reaction that contains the query reactant for the sub_match.
264
- :param sub_match: The substructure search match obtained from indigo.substructureMatcher(mol).match(query).
265
- """
266
- for query_atom in q_reactant.iterateAtoms():
267
- concrete_atom = sub_match.mapAtom(query_atom)
268
- if concrete_atom is None:
269
- continue
270
- reaction_atom = q_reactant.getAtom(query_atom.index())
271
- map_num = q_reaction.atomMappingNumber(reaction_atom)
272
- if map_num:
273
- concrete_atom = new_reaction_reactant.getAtom(concrete_atom.index())
274
- new_reaction.setAtomMappingNumber(concrete_atom, map_num)
275
-
276
-
277
- def clean_product_aam(reaction: IndigoObject):
278
- """
279
- Remove atom mappings from product that are not present in the reactants.
280
- """
281
- existing_mapping_numbers = set()
282
- for reactant in reaction.iterateReactants():
283
- for atom in reactant.iterateAtoms():
284
- map_num = reaction.atomMappingNumber(atom)
285
- if map_num:
286
- existing_mapping_numbers.add(map_num)
287
-
288
- for product in reaction.iterateProducts():
289
- for atom in product.iterateAtoms():
290
- map_num = reaction.atomMappingNumber(atom)
291
- if map_num and map_num not in existing_mapping_numbers:
292
- reaction.setAtomMappingNumber(atom, 0) # YQ: atom number 0 means no mapping number in Indigo
293
-
294
-
295
- def make_concrete_reaction(reactants: list[IndigoObject], products: list[IndigoObject], replacement: IndigoObject,
296
- replacement_index: int) -> tuple[IndigoObject, IndigoObject]:
297
- """
298
- Create a concrete reaction from the given reactants and products, replacing the specified reactant with the replacement molecule.
299
- :param reactants: List of reactant molecules.
300
- :param products: List of product molecules.
301
- :param replacement: The molecule to replace in the reactants.
302
- :param replacement_index: The index of the reactant to replace.
303
- :return: A new IndigoObject representing the concrete reaction.
304
- """
305
- concrete_reaction = indigo.createQueryReaction()
306
- for i, reactant in enumerate(reactants):
307
- if i == replacement_index:
308
- concrete_reaction.addReactant(indigo.loadQueryMolecule(replacement.molfile()))
309
- else:
310
- concrete_reaction.addReactant(reactant.clone())
311
- for product in products:
312
- concrete_reaction.addProduct(product.clone())
313
- return concrete_reaction, concrete_reaction.getMolecule(replacement_index)
314
-
315
-
316
- def is_ambiguous_atom(atom: IndigoObject) -> bool:
317
- """
318
- Test whether the symbol is an adjacent matching wildcard.
319
- """
320
- if atom.isPseudoatom() or atom.isRSite():
321
- return True
322
- symbol = atom.symbol()
323
- if symbol in {'A', 'Q', 'X', 'M', 'AH', 'QH', 'XH', 'MH', 'NOT', 'R', '*'}:
324
- return True
325
- return "[" in symbol and "]" in symbol
326
-
327
-
328
- def get_react_site_highlights(product, ignored_atom_indexes):
329
- """
330
- Get the highlights for the reaction site in the product, ignoring the atoms that are not part of the reaction site.
331
- :param product: The product molecule.
332
- :param ignored_atom_indexes: A set of atom indexes to ignore.
333
- :return: An IndigoObject with highlighted atoms and bonds that are part of the reaction site.
334
- """
335
- highlight = product.clone()
336
- for atom in highlight.iterateAtoms():
337
- if atom.index() not in ignored_atom_indexes:
338
- atom.highlight()
339
- for nei in atom.iterateNeighbors():
340
- if nei.index() not in ignored_atom_indexes:
341
- nei.highlight()
342
- nei.bond().highlight()
343
- return highlight
344
-
345
-
346
- def inherit_auto_map_by_match(target_reaction: IndigoObject, source_reaction: IndigoObject,
347
- reaction_match: IndigoObject):
348
- """
349
- Inherit the auto-mapping from the source reaction to the target reaction based on the reaction match.
350
- :param target_reaction: The target reaction to inherit auto-mapping to.
351
- :param source_reaction: The source reaction to inherit auto-mapping from.
352
- :param reaction_match: The match object that maps atoms and bonds between the source and target reactions.
353
- """
354
- source_molecules = []
355
- for q_reactant in source_reaction.iterateReactants():
356
- source_molecules.append(q_reactant)
357
- for q_product in source_reaction.iterateProducts():
358
- source_molecules.append(q_product)
359
- for source_molecule in source_molecules:
360
- for source_atom in source_molecule.iterateAtoms():
361
- source_atom_map_number = source_reaction.atomMappingNumber(source_atom)
362
- if source_atom_map_number == 0:
363
- continue
364
- target_atom = reaction_match.mapAtom(source_atom)
365
- if target_atom:
366
- target_reaction.setAtomMappingNumber(target_atom, source_atom_map_number)
367
- target_reaction.automap("keep")
368
-
369
-
370
- def are_symmetrical_subs(match1: SerializableMoleculeMatch, match2: SerializableMoleculeMatch) -> bool:
371
- """
372
- Check if two SerializableMoleculeMatch objects are symmetrical.
373
- That is, if we only get the atoms and bonds in the mapping, the two molecules are identical.
374
- :param match1: The first SerializableMoleculeMatch object.
375
- :param match2: The second SerializableMoleculeMatch object.
376
- :return: True if the matches are symmetrical, False otherwise.
377
- """
378
- match1_test = match1.get_matched_molecule_copy()
379
- match1_atom_indexes = set(match1._query_atom_to_atom.values())
380
- match1_bond_indexes = set(match1._query_bond_to_bond.values())
381
- atom_delete_list: list[int] = []
382
- atom_mirror_list: list[int] = []
383
- bond_delete_list: list[int] = []
384
- bond_mirror_list: list[int] = []
385
- for atom in match1_test.iterateAtoms():
386
- if atom.index() not in match1_atom_indexes:
387
- atom_delete_list.append(atom.index())
388
- else:
389
- atom_mirror_list.append(atom.index())
390
- for bond in match1_test.iterateBonds():
391
- if bond.index() not in match1_bond_indexes:
392
- bond_delete_list.append(bond.index())
393
- else:
394
- bond_mirror_list.append(bond.index())
395
- match1_test.removeBonds(bond_delete_list)
396
- match1_test.removeAtoms(atom_delete_list)
397
- match1_mirror_test = match1.get_matched_molecule_copy()
398
- match1_mirror_test.removeBonds(bond_mirror_list)
399
- match1_mirror_test.removeAtoms(atom_mirror_list)
400
-
401
- match2_test = match2.get_matched_molecule_copy()
402
- match2_atom_indexes = set(match2._query_atom_to_atom.values())
403
- match2_bond_indexes = set(match2._query_bond_to_bond.values())
404
- atom_delete_list = []
405
- bond_delete_list = []
406
- atom_mirror_list = []
407
- bond_mirror_list = []
408
- for atom in match2_test.iterateAtoms():
409
- if atom.index() not in match2_atom_indexes:
410
- atom_delete_list.append(atom.index())
411
- else:
412
- atom_mirror_list.append(atom.index())
413
- for bond in match2_test.iterateBonds():
414
- if bond.index() not in match2_bond_indexes:
415
- bond_delete_list.append(bond.index())
416
- else:
417
- bond_mirror_list.append(bond.index())
418
- match2_test.removeBonds(bond_delete_list)
419
- match2_test.removeAtoms(atom_delete_list)
420
- match2_mirror_test = match2.get_matched_molecule_copy()
421
- match2_mirror_test.removeBonds(bond_mirror_list)
422
- match2_mirror_test.removeAtoms(atom_mirror_list)
423
-
424
- return match1_test.canonicalSmiles() == match2_test.canonicalSmiles() and \
425
- match1_mirror_test.canonicalSmiles() == match2_mirror_test.canonicalSmiles()
426
-
427
-
428
- def replace_r_site_with_wildcards(mol: IndigoObject) -> IndigoObject:
429
- """
430
- This will be used to replace molecule's R sites with wildcard *.
431
- The substructure matcher at molecular level will not touch R sites. Therefore if we are to preserve mapping with bonds we need to replace R sites with wildcards.
432
- :param mol: The molecule to process.
433
- :return: A cloned molecule with R sites replaced by wildcards.
434
- """
435
- ret = mol.clone()
436
- for atom in ret.iterateAtoms():
437
- if atom.isRSite():
438
- atom.resetAtom("*")
439
- return ret
440
-
441
-
442
- def get_r_substructure(query_mol: IndigoObject, mol: IndigoObject,
443
- initial_atom: IndigoObject, match: IndigoObject | SerializableMoleculeMatch,
444
- r_site: str) -> IndigoObject:
445
- """
446
- Return a connected R substructure sourced from the symbol, that is not within the original query match.
447
- :param query_mol: The query molecule that contains the R site.
448
- :param mol: The molecule that contains the R site.
449
- :param initial_atom: The initial atom that is the R site.
450
- :param match: The match object that maps atoms and bonds between the query and the molecule.
451
- Note the within-R site molecules will not be part of the match.
452
- But the starting position of R site is replaced with psuedoatom "*" and thus matches.
453
- :param r_site: The R site symbol to match against.
454
- """
455
- keeping_atom_index_set = set()
456
- visiting: set[int] = set()
457
- visiting.add(initial_atom.index())
458
- visited: set[int] = set()
459
-
460
- exclusion_indexes = set()
461
- for q_atom in query_mol.iterateAtoms():
462
- mapped_atom = match.mapAtom(q_atom)
463
- if mapped_atom:
464
- to_exclude: bool
465
- if q_atom.isRSite():
466
- to_exclude = q_atom.symbol() != r_site
467
- else:
468
- to_exclude = True
469
- if to_exclude:
470
- exclusion_indexes.add(mapped_atom.index())
471
- while visiting:
472
- visiting_atom: IndigoObject = mol.getAtom(visiting.pop())
473
- keeping_atom_index_set.add(visiting_atom.index())
474
- visited.add(visiting_atom.index())
475
- for nei in visiting_atom.iterateNeighbors():
476
- nei_index = nei.index()
477
- if nei_index in visited or nei_index in visiting:
478
- continue
479
- if nei_index in exclusion_indexes and nei_index != initial_atom.index():
480
- continue
481
- visiting.add(nei_index)
482
- removing_index_set: list[int] = list()
483
- for atom in mol.iterateAtoms():
484
- if atom.index() not in keeping_atom_index_set:
485
- removing_index_set.append(atom.index())
486
- r_substructure = mol.clone()
487
- r_substructure.removeAtoms(removing_index_set)
488
- return r_substructure
489
-
490
-
491
- def get_rr_substructure_by_symbol(query_reactant, replacement_reaction) -> dict[str, IndigoObject]:
492
- rr_substructure_by_symbol: dict[str, IndigoObject] = {}
493
- for q_atom in query_reactant.iterateAtoms():
494
- if not q_atom.isRSite():
495
- continue
496
- r_site_symbol = q_atom.symbol()
497
- mapped_atom = replacement_reaction.replacement_query_reaction_match.mapAtom(q_atom)
498
- if mapped_atom is None:
499
- raise ValueError(
500
- "The replacement reactant " + replacement_reaction.replacement_reactant.smiles() + " do not have R Site: " + r_site_symbol + ". This should not happen.")
501
- r_substructure = get_r_substructure(query_reactant, replacement_reaction.replacement_reactant, mapped_atom,
502
- replacement_reaction.replacement_query_reaction_match, r_site_symbol)
503
- rr_substructure_by_symbol[r_site_symbol] = r_substructure
504
- return rr_substructure_by_symbol
505
-
506
-
507
- def __test_reactant_match(replacement_reaction: ReplacementReaction,
508
- testing_reactant: IndigoObject,
509
- query_reactant: IndigoObject) -> SerializableMoleculeMatch | None:
510
- """ YQ: Finally piecing together both sides...
511
- Test whether the reactant in the replacement reaction matches the reactant in the testing reaction.
512
- We will be matching against the highlighted portion on each section to ensure the highlighted atom and bonds match.
513
- :param replacement_reaction: The replacement reaction containing the reactant to test.
514
- :param testing_reactant: The reactant in the testing reaction to match against.
515
- :param query_reactant: The reactant in the query reaction to match against.
516
- """
517
- orig_query_reactant = query_reactant
518
- query_reactant = replace_r_site_with_wildcards(query_reactant)
519
- if not indigo.exactMatch(replacement_reaction.replacement_reactant, testing_reactant):
520
- return None
521
- outer_matcher: IndigoObject = indigo.substructureMatcher(testing_reactant)
522
- used_query_atom_indexes = replacement_reaction.replacement_query_reaction_match.query_atom_indexes
523
- used_rr_atom_indexes = replacement_reaction.replacement_query_reaction_match.matching_atom_indexes
524
- rr_substructure_by_symbol: dict[str, IndigoObject] = get_rr_substructure_by_symbol(orig_query_reactant,
525
- replacement_reaction)
526
-
527
- for outer_match in outer_matcher.iterateMatches(query_reactant):
528
- ret: SerializableMoleculeMatch = SerializableMoleculeMatch.create(
529
- orig_query_reactant, testing_reactant, outer_match, used_query_atom_indexes)
530
- used_testing_reactant_atoms = []
531
- for q_atom in query_reactant.iterateAtoms():
532
- if q_atom.index() not in used_query_atom_indexes:
533
- continue
534
- mapped_atom = outer_match.mapAtom(q_atom)
535
- if mapped_atom is None:
536
- continue
537
- used_testing_reactant_atoms.append(mapped_atom.index())
538
- used_replacement_mol = replacement_reaction.replacement_reactant.clone()
539
- used_replacement_mol_delete_indexes = []
540
- for atom in used_replacement_mol.iterateAtoms():
541
- if atom.index() not in used_rr_atom_indexes:
542
- used_replacement_mol_delete_indexes.append(atom.index())
543
- used_replacement_mol.removeAtoms(used_replacement_mol_delete_indexes)
544
- used_testing_mol = testing_reactant.clone()
545
- used_testing_mol_delete_indexes = []
546
- for atom in used_testing_mol.iterateAtoms():
547
- if atom.index() not in used_testing_reactant_atoms:
548
- used_testing_mol_delete_indexes.append(atom.index())
549
- used_testing_mol.removeAtoms(used_testing_mol_delete_indexes)
550
- try:
551
- exact_match = indigo.exactMatch(used_replacement_mol, used_testing_mol)
552
- if not exact_match:
553
- continue
554
- except IndigoException:
555
- continue
556
- # Now check each R site substructure and it should be an exact match.
557
- outer_match_r_substructure_by_symbol: dict[str, IndigoObject] = {}
558
- missing_r_site = False
559
- for q_atom in query_reactant.iterateAtoms():
560
- orig_q_atom = orig_query_reactant.getAtom(q_atom.index())
561
- if not orig_q_atom.isRSite():
562
- continue
563
- r_site_symbol = orig_q_atom.symbol()
564
- mapped_atom = outer_match.mapAtom(q_atom)
565
- if mapped_atom is None:
566
- missing_r_site = True
567
- continue
568
- r_substructure = get_r_substructure(orig_query_reactant, testing_reactant, mapped_atom, outer_match,
569
- r_site_symbol)
570
- outer_match_r_substructure_by_symbol[r_site_symbol] = r_substructure
571
- if missing_r_site:
572
- # If we are missing an R site, we cannot match.
573
- continue
574
- r_site_mismatch = False
575
- for r_site_symbol in rr_substructure_by_symbol.keys():
576
- rr_substructure = rr_substructure_by_symbol[r_site_symbol]
577
- outer_match_r_substructure = outer_match_r_substructure_by_symbol[r_site_symbol]
578
- if not indigo.exactMatch(rr_substructure, outer_match_r_substructure):
579
- r_site_mismatch = True
580
- break
581
- if r_site_mismatch:
582
- # If we have a mismatch in R site substructure, we cannot match.
583
- continue
584
-
585
- # We are done matching. Return the match mapping.
586
- return ret
587
- return None
588
-
589
-
590
- def __test_product_match(testing_reaction: IndigoObject, q_reaction: IndigoObject,
591
- cur_rr_list: list[ReplacementReaction],
592
- testing_reactants_match_list: list[SerializableMoleculeMatch]) -> list[
593
- SerializableMoleculeMatch] | None:
594
- """ YQ: My fifth try OOF
595
- For each product, we are testing against two criteria:
596
- 1. That every R site from a product would exact match to the R site defined within reactant.
597
- 2. That atomic mapping numbers for query matches are within the matches of intersection of cur_rr_list and testing_reactants_match_list.
598
- And the matching result for each atom via their reaction atom mapping numbers should follow atomic conservation law:
599
- 2.1 For each atom number in the reactant part, there should be no more than one atom in the product part with the same mapping number.
600
- 2.2 For each atom number in the reactant part, the mapped atom in the product part has the nucleus.
601
- :param testing_reaction:
602
- :param q_reaction:
603
- :param cur_rr_list:
604
- :param testing_reactants_match_list:
605
- :return:
606
- """
607
- # ********* PREPARE DATA *********
608
- ret: list[SerializableMoleculeMatch] = []
609
- testing_reactants = []
610
- for testing_reactant in testing_reaction.iterateReactants():
611
- testing_reactants.append(testing_reactant)
612
- testing_products = []
613
- for testing_product in testing_reaction.iterateProducts():
614
- testing_products.append(testing_product)
615
- query_reactants = []
616
- for q_reactant in q_reaction.iterateReactants():
617
- query_reactants.append(q_reactant)
618
- query_products = []
619
- for q_product in q_reaction.iterateProducts():
620
- query_products.append(q_product)
621
- replacement_reactants = []
622
- replacement_reactant_match_list = []
623
- q_atom_mapping_number_to_rr_reactant_atom: dict[int, IndigoObject] = {}
624
- for reactant_index, replacement_reaction in enumerate(cur_rr_list):
625
- replacement_reactants.append(replacement_reaction.replacement_reactant)
626
- replacement_reactant_match_list.append(replacement_reaction.replacement_query_reaction_match)
627
- query_reactant = query_reactants[reactant_index]
628
- for q_atom in query_reactant.iterateAtoms():
629
- mapped_atom = replacement_reaction.replacement_query_reaction_match.mapAtom(q_atom)
630
- q_atom_mapping_number = q_reaction.atomMappingNumber(q_atom)
631
- if q_atom_mapping_number == 0:
632
- continue
633
- if mapped_atom is None:
634
- continue
635
- q_atom_mapping_number_to_rr_reactant_atom[q_atom_mapping_number] = mapped_atom
636
- rr_substructure_by_symbol: dict[str, IndigoObject] = {}
637
- for reactant_index, replacement_reaction in enumerate(cur_rr_list):
638
- query_reactant = query_reactants[reactant_index]
639
- cur_dict = get_rr_substructure_by_symbol(query_reactant, replacement_reaction)
640
- rr_substructure_by_symbol.update(cur_dict)
641
-
642
- # ********* TESTING PRODUCTS *********
643
- accepted_used_atom_mapping_numbers: set[int] = set()
644
- for product_index, testing_product in enumerate(testing_products):
645
- q_product = query_products[product_index]
646
- orig_q_product = q_product
647
- q_product = replace_r_site_with_wildcards(q_product)
648
- outer_matcher: IndigoObject = indigo.substructureMatcher(testing_product)
649
- found_match: SerializableMoleculeMatch | None = None
650
- for outer_match in outer_matcher.iterateMatches(q_product):
651
- ss_match: SerializableMoleculeMatch = SerializableMoleculeMatch.create(orig_q_product, testing_product,
652
- outer_match)
653
- valid_q_product_aam = True
654
- valid_r_group = True
655
- used_atom_mapping_numbers: set[int] = set()
656
- for q_atom in q_product.iterateAtoms():
657
- mapped_product_atom = outer_match.mapAtom(q_atom)
658
- orig_q_atom = orig_q_product.getAtom(q_atom.index())
659
- q_atom_mapping_number = q_reaction.atomMappingNumber(orig_q_atom)
660
- if q_atom_mapping_number > 0 and not orig_q_atom.isRSite():
661
- if q_atom_mapping_number in used_atom_mapping_numbers or q_atom_mapping_number in accepted_used_atom_mapping_numbers:
662
- raise ValueError(
663
- "Multiple atoms in the product with the same query atom mapping number: " + str(
664
- q_atom_mapping_number))
665
- rr_atom = q_atom_mapping_number_to_rr_reactant_atom.get(q_atom_mapping_number)
666
- if rr_atom is None:
667
- valid_q_product_aam = False
668
- break
669
- if not rr_atom.symbol() == mapped_product_atom.symbol():
670
- valid_q_product_aam = False
671
- break
672
- used_atom_mapping_numbers.add(q_atom_mapping_number)
673
- elif orig_q_atom.isRSite():
674
- r_site_symbol = orig_q_atom.symbol()
675
- r_substructure = get_r_substructure(orig_q_product, testing_product, mapped_product_atom,
676
- outer_match,
677
- r_site_symbol)
678
- rr_substructure = rr_substructure_by_symbol.get(r_site_symbol)
679
- if rr_substructure is None:
680
- # This only happens if we didn't replace wildcard properly in original highlight or a misalignment between reactant and actual reaction template in Sapio.
681
- raise ValueError("Missing RR substructure for R site: " + r_site_symbol + ".")
682
- if not indigo.exactMatch(rr_substructure, r_substructure):
683
- valid_r_group = False
684
- break
685
- if valid_q_product_aam and valid_r_group:
686
- found_match = ss_match
687
- accepted_used_atom_mapping_numbers.update(used_atom_mapping_numbers)
688
- break
689
- if not found_match:
690
- return None
691
- ret.append(found_match)
692
- return ret
693
-
694
-
695
- def __get_final_highlighted_reaction(cur_rr_list: list[ReplacementReaction],
696
- product_matches: list[SerializableMoleculeMatch]) -> tuple[IndigoObject, list[
697
- ReplacementReaction]] | None:
698
- """
699
- Translates the final match into a highlighted reaction.
700
- :param cur_rr_list: The selected reactants that together forms an acceptable reaction.
701
- :param product_matches: The generated products coming from the selected reactants.
702
- :return: An IndigoObject representing the final reaction with highlights.
703
- """
704
- ret: IndigoObject = indigo.createReaction()
705
- for replacement_reaction in cur_rr_list:
706
- reactant_mol = replacement_reaction.replacement_reactant.clone()
707
- highlighting_atom_indexes = replacement_reaction.replacement_query_reaction_match.matching_atom_indexes
708
- for atom in reactant_mol.iterateAtoms():
709
- if atom.index() in highlighting_atom_indexes:
710
- atom.highlight()
711
- for nei in atom.iterateNeighbors():
712
- if nei.index() in highlighting_atom_indexes:
713
- nei.bond().highlight()
714
- ret.addReactant(reactant_mol)
715
- for product_match in product_matches:
716
- product_mol = product_match.get_matched_molecule_copy()
717
- highlighting_atom_indexes = product_match.matching_atom_indexes
718
- for atom in product_mol.iterateAtoms():
719
- if atom.index() in highlighting_atom_indexes:
720
- atom.highlight()
721
- for nei in atom.iterateNeighbors():
722
- if nei.index() in highlighting_atom_indexes:
723
- nei.bond().highlight()
724
- ret.addProduct(product_mol)
725
- _, ret = get_aromatic_dearomatic_forms(ret)
726
- return ret, cur_rr_list
727
-
728
-
729
- def ps_match(testing_reaction: IndigoObject, q_reaction: IndigoObject,
730
- kept_replacement_reaction_list_list: list[list[ReplacementReaction]]) -> tuple[IndigoObject, list[
731
- ReplacementReaction]] | None:
732
- testing_reactants = []
733
- for testing_reactant in testing_reaction.iterateReactants():
734
- testing_reactants.append(testing_reactant)
735
- query_reactants = []
736
- for q_reactant in q_reaction.iterateReactants():
737
- query_reactants.append(q_reactant)
738
-
739
- reactant_ranges = []
740
- for replacement_reaction_list in kept_replacement_reaction_list_list:
741
- reactant_ranges.append(range(len(replacement_reaction_list)))
742
- reactant_cartesian_products = itertools.product(*reactant_ranges)
743
- for reactant_combination in reactant_cartesian_products:
744
- cur_rr_list: list[ReplacementReaction] = []
745
- for reactant_index, replacement_reaction_index in enumerate(reactant_combination):
746
- replacement_reaction: ReplacementReaction = kept_replacement_reaction_list_list[reactant_index][
747
- replacement_reaction_index]
748
- cur_rr_list.append(replacement_reaction)
749
- is_valid_reactants = True
750
- testing_reactants_match_list = []
751
- for reactant_index, replacement_reaction in enumerate(cur_rr_list):
752
- match = __test_reactant_match(
753
- replacement_reaction, testing_reactants[reactant_index], query_reactants[reactant_index])
754
- if not match:
755
- is_valid_reactants = False
756
- break
757
- testing_reactants_match_list.append(match)
758
- if not is_valid_reactants:
759
- continue
760
- product_matches = __test_product_match(testing_reaction, q_reaction, cur_rr_list, testing_reactants_match_list)
761
- if product_matches:
762
- return __get_final_highlighted_reaction(cur_rr_list, product_matches)
763
- return None