sapiopycommons 2025.7.10a595__py3-none-any.whl → 2025.7.14a610__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of sapiopycommons might be problematic. Click here for more details.
- sapiopycommons/chem/ps_commons.py +363 -123
- {sapiopycommons-2025.7.10a595.dist-info → sapiopycommons-2025.7.14a610.dist-info}/METADATA +1 -1
- {sapiopycommons-2025.7.10a595.dist-info → sapiopycommons-2025.7.14a610.dist-info}/RECORD +5 -5
- {sapiopycommons-2025.7.10a595.dist-info → sapiopycommons-2025.7.14a610.dist-info}/WHEEL +0 -0
- {sapiopycommons-2025.7.10a595.dist-info → sapiopycommons-2025.7.14a610.dist-info}/licenses/LICENSE +0 -0
|
@@ -2,11 +2,12 @@
|
|
|
2
2
|
Parallel Synthesis Commons
|
|
3
3
|
Author: Yechen Qiao
|
|
4
4
|
"""
|
|
5
|
+
import itertools
|
|
5
6
|
import json
|
|
6
7
|
from dataclasses import dataclass
|
|
7
8
|
from typing import Any
|
|
8
9
|
|
|
9
|
-
from indigo import IndigoObject
|
|
10
|
+
from indigo import IndigoObject, IndigoException
|
|
10
11
|
from sapiopycommons.chem.IndigoMolecules import indigo, get_aromatic_dearomatic_forms, renderer
|
|
11
12
|
|
|
12
13
|
|
|
@@ -56,6 +57,18 @@ class SerializableMoleculeMatch:
|
|
|
56
57
|
"""
|
|
57
58
|
return self._record_id
|
|
58
59
|
|
|
60
|
+
@property
|
|
61
|
+
def query_atom_indexes(self) -> set[int]:
|
|
62
|
+
return set(self._query_atom_to_atom.keys())
|
|
63
|
+
|
|
64
|
+
@property
|
|
65
|
+
def matching_atom_indexes(self) -> set[int]:
|
|
66
|
+
return set(self._query_atom_to_atom.values())
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def matching_molecule_copy(self) -> IndigoObject:
|
|
70
|
+
return self._matching_molecule.clone()
|
|
71
|
+
|
|
59
72
|
def __str__(self):
|
|
60
73
|
return json.dumps(self.to_json())
|
|
61
74
|
|
|
@@ -123,12 +136,13 @@ class SerializableMoleculeMatch:
|
|
|
123
136
|
|
|
124
137
|
@staticmethod
|
|
125
138
|
def create(query_molecule: IndigoObject, matching_molecule: IndigoObject,
|
|
126
|
-
match: IndigoObject) -> 'SerializableMoleculeMatch':
|
|
139
|
+
match: IndigoObject, query_mol_atom_index_filter: set[int] | None = None) -> 'SerializableMoleculeMatch':
|
|
127
140
|
"""
|
|
128
141
|
Create a SerializableMoleculeMatch from a query molecule, matching molecule, and match.
|
|
129
142
|
:param query_molecule: The query molecule.
|
|
130
143
|
:param matching_molecule: The matching molecule.
|
|
131
144
|
:param match: The match object containing atom mappings.
|
|
145
|
+
:param query_mol_atom_index_filter: Optional list of atom indexes to filter the query molecule atoms.
|
|
132
146
|
:return: A new SerializableMoleculeMatch instance.
|
|
133
147
|
"""
|
|
134
148
|
smm = SerializableMoleculeMatch()
|
|
@@ -141,12 +155,19 @@ class SerializableMoleculeMatch:
|
|
|
141
155
|
smm._record_id = 0
|
|
142
156
|
|
|
143
157
|
for qatom in query_molecule.iterateAtoms():
|
|
158
|
+
if query_mol_atom_index_filter and qatom.index() not in query_mol_atom_index_filter:
|
|
159
|
+
continue
|
|
144
160
|
concrete_atom = match.mapAtom(qatom)
|
|
145
161
|
if concrete_atom is None:
|
|
146
162
|
continue
|
|
147
163
|
smm._query_atom_to_atom[qatom.index()] = concrete_atom.index()
|
|
148
164
|
|
|
165
|
+
qbond: IndigoObject
|
|
149
166
|
for qbond in query_molecule.iterateBonds():
|
|
167
|
+
if query_mol_atom_index_filter:
|
|
168
|
+
if (qbond.source().index() not in query_mol_atom_index_filter or
|
|
169
|
+
qbond.destination().index() not in query_mol_atom_index_filter):
|
|
170
|
+
continue
|
|
150
171
|
concrete_bond = match.mapBond(qbond)
|
|
151
172
|
if concrete_bond is None:
|
|
152
173
|
continue
|
|
@@ -157,6 +178,22 @@ class SerializableMoleculeMatch:
|
|
|
157
178
|
return self._matching_molecule.clone()
|
|
158
179
|
|
|
159
180
|
|
|
181
|
+
def is_reaction_atom_map_completed(q_reaction: IndigoObject) -> bool:
|
|
182
|
+
"""
|
|
183
|
+
Tests each atom in product of query reaction.
|
|
184
|
+
:param q_reaction: The query reaction to test.
|
|
185
|
+
:return: True if and only if for every atom that is not an R-Site, it has a mapping number.
|
|
186
|
+
"""
|
|
187
|
+
for product in q_reaction.iterateProducts():
|
|
188
|
+
for atom in product.iterateAtoms():
|
|
189
|
+
if atom.isRSite():
|
|
190
|
+
continue
|
|
191
|
+
map_num = q_reaction.atomMappingNumber(atom)
|
|
192
|
+
if map_num == 0:
|
|
193
|
+
return False
|
|
194
|
+
return True
|
|
195
|
+
|
|
196
|
+
|
|
160
197
|
@dataclass
|
|
161
198
|
class ReplacementReaction:
|
|
162
199
|
"""
|
|
@@ -330,127 +367,6 @@ def inherit_auto_map_by_match(target_reaction: IndigoObject, source_reaction: In
|
|
|
330
367
|
target_reaction.automap("keep")
|
|
331
368
|
|
|
332
369
|
|
|
333
|
-
def get_used_reactants_for_match(
|
|
334
|
-
reaction: IndigoObject, q_reaction: IndigoObject, reaction_match: IndigoObject,
|
|
335
|
-
kept_replacement_reaction_list_list: list[list[ReplacementReaction]]) -> list[ReplacementReaction]:
|
|
336
|
-
"""
|
|
337
|
-
Find the replacement reactions that correspond to the reactants in reaction that also matches the query reaction.
|
|
338
|
-
Return None if any of the reactants do not have a corresponding replacement reaction, even though reaction may have matches directly to the query reaction.
|
|
339
|
-
Otherwise, return a list of ReplacementReaction objects that correspond to the reactants in the reaction ordered by the reactants in the query reaction.
|
|
340
|
-
"""
|
|
341
|
-
q_reactants = []
|
|
342
|
-
for q_reactant in q_reaction.iterateReactants():
|
|
343
|
-
q_reactants.append(q_reactant)
|
|
344
|
-
q_products = []
|
|
345
|
-
for rr_product in q_reaction.iterateProducts():
|
|
346
|
-
q_products.append(rr_product)
|
|
347
|
-
reactants = []
|
|
348
|
-
for enum_r in reaction.iterateReactants():
|
|
349
|
-
reactants.append(enum_r)
|
|
350
|
-
products = []
|
|
351
|
-
for enum_p in reaction.iterateProducts():
|
|
352
|
-
products.append(enum_p)
|
|
353
|
-
q_reactant: IndigoObject
|
|
354
|
-
ret: list[ReplacementReaction] = []
|
|
355
|
-
for reactant_index, q_reactant in enumerate(q_reactants):
|
|
356
|
-
replacement_list = kept_replacement_reaction_list_list[reactant_index]
|
|
357
|
-
enum_r = reactants[reactant_index]
|
|
358
|
-
useful_enumr_atom_indexes = set()
|
|
359
|
-
for q_atom in q_reactant.iterateAtoms():
|
|
360
|
-
enum_atom = reaction_match.mapAtom(q_atom)
|
|
361
|
-
if enum_atom:
|
|
362
|
-
useful_enumr_atom_indexes.add(enum_atom.index())
|
|
363
|
-
found: ReplacementReaction | None = None
|
|
364
|
-
for rr_index, rr in enumerate(replacement_list):
|
|
365
|
-
exact_match = indigo.exactMatch(rr.replacement_reactant, enum_r)
|
|
366
|
-
if not exact_match:
|
|
367
|
-
# YQ Skip if this enumeration is not meant to be the same reactant as replacement we are iterating.
|
|
368
|
-
continue
|
|
369
|
-
query_reactant_atom_by_index: dict[int, IndigoObject] = {}
|
|
370
|
-
rr_reactant_atom_by_index: dict[int, IndigoObject] = {}
|
|
371
|
-
query_reactant_index_to_rr_reactant_index: dict[int, int] = {}
|
|
372
|
-
rr_reactant_index_to_query_reactant_index: dict[int, int] = {}
|
|
373
|
-
enum_r_atom_mapping_number_to_rr_atom: dict[int, IndigoObject] = {}
|
|
374
|
-
q_reaction_atom_mapping_number_to_rr_atom: dict[int, IndigoObject] = {}
|
|
375
|
-
q_r_site_to_rr_atom: dict[str, IndigoObject] = {}
|
|
376
|
-
for q_atom in q_reactant.iterateAtoms():
|
|
377
|
-
query_reactant_atom_by_index[q_atom.index()] = q_atom
|
|
378
|
-
rr_atom = rr.replacement_query_reaction_match.mapAtom(q_atom)
|
|
379
|
-
if rr_atom:
|
|
380
|
-
query_reactant_index_to_rr_reactant_index[q_atom.index()] = rr_atom.index()
|
|
381
|
-
rr_reactant_index_to_query_reactant_index[rr_atom.index()] = q_atom.index()
|
|
382
|
-
q_reaction_atom_mapping_number = q_reaction.atomMappingNumber(q_atom)
|
|
383
|
-
if q_reaction_atom_mapping_number > 0:
|
|
384
|
-
q_reaction_atom_mapping_number_to_rr_atom[q_reaction_atom_mapping_number] = rr_atom
|
|
385
|
-
if q_atom.isRSite():
|
|
386
|
-
r_site = q_atom.symbol()
|
|
387
|
-
q_r_site_to_rr_atom[r_site] = rr_atom
|
|
388
|
-
for rr_atom in rr.replacement_reactant.iterateAtoms():
|
|
389
|
-
rr_reactant_atom_by_index[rr_atom.index()] = rr_atom
|
|
390
|
-
enum_r_atom = exact_match.mapAtom(rr_atom)
|
|
391
|
-
if enum_r_atom:
|
|
392
|
-
enum_r_atom_mapping_number = reaction.atomMappingNumber(enum_r_atom)
|
|
393
|
-
if enum_r_atom_mapping_number > 0:
|
|
394
|
-
enum_r_atom_mapping_number_to_rr_atom[enum_r_atom_mapping_number] = rr_atom
|
|
395
|
-
|
|
396
|
-
rr_products = []
|
|
397
|
-
for rr_product in rr.reaction.iterateProducts():
|
|
398
|
-
rr_products.append(rr_product)
|
|
399
|
-
still_valid_rr = True
|
|
400
|
-
for product_index, enum_product in enumerate(products):
|
|
401
|
-
if not still_valid_rr:
|
|
402
|
-
break
|
|
403
|
-
query_product = q_products[product_index]
|
|
404
|
-
enum_r_atom_mapping_number_to_q_product_atom = {}
|
|
405
|
-
for q_atom in query_product.iterateAtoms():
|
|
406
|
-
enum_atom = reaction_match.mapAtom(q_atom)
|
|
407
|
-
if enum_atom:
|
|
408
|
-
enum_mapping_number = reaction.atomMappingNumber(enum_atom)
|
|
409
|
-
if enum_mapping_number > 0:
|
|
410
|
-
enum_r_atom_mapping_number_to_q_product_atom[enum_mapping_number] = q_atom
|
|
411
|
-
|
|
412
|
-
for enum_atom in enum_product.iterateAtoms():
|
|
413
|
-
enum_mapping_number = reaction.atomMappingNumber(enum_atom)
|
|
414
|
-
if enum_mapping_number == 0:
|
|
415
|
-
continue
|
|
416
|
-
rr_atom = enum_r_atom_mapping_number_to_rr_atom.get(enum_mapping_number)
|
|
417
|
-
if not rr_atom:
|
|
418
|
-
continue
|
|
419
|
-
q_product_atom: IndigoObject = enum_r_atom_mapping_number_to_q_product_atom.get(enum_mapping_number)
|
|
420
|
-
if not q_product_atom:
|
|
421
|
-
continue
|
|
422
|
-
if q_product_atom.isRSite():
|
|
423
|
-
r_site = q_product_atom.symbol()
|
|
424
|
-
rr_atom_r_site = q_r_site_to_rr_atom.get(r_site)
|
|
425
|
-
if not rr_atom_r_site:
|
|
426
|
-
still_valid_rr = False
|
|
427
|
-
break
|
|
428
|
-
if rr_atom.index() != rr_atom_r_site.index():
|
|
429
|
-
still_valid_rr = False
|
|
430
|
-
break
|
|
431
|
-
else:
|
|
432
|
-
q_product_atom_mapping_number = q_reaction.atomMappingNumber(q_product_atom)
|
|
433
|
-
if q_product_atom_mapping_number == 0:
|
|
434
|
-
continue
|
|
435
|
-
query_reactant_atom_index = rr_reactant_index_to_query_reactant_index.get(rr_atom.index())
|
|
436
|
-
if query_reactant_atom_index is None:
|
|
437
|
-
still_valid_rr = False
|
|
438
|
-
break
|
|
439
|
-
query_reactant_atom = query_reactant_atom_by_index.get(query_reactant_atom_index)
|
|
440
|
-
query_reactant_atom_mapping_number = q_reaction.atomMappingNumber(query_reactant_atom)
|
|
441
|
-
if q_product_atom_mapping_number != query_reactant_atom_mapping_number:
|
|
442
|
-
still_valid_rr = False
|
|
443
|
-
break
|
|
444
|
-
if still_valid_rr:
|
|
445
|
-
found = rr
|
|
446
|
-
break
|
|
447
|
-
if found:
|
|
448
|
-
ret.append(found)
|
|
449
|
-
else:
|
|
450
|
-
return []
|
|
451
|
-
return ret
|
|
452
|
-
|
|
453
|
-
|
|
454
370
|
def are_symmetrical_subs(match1: SerializableMoleculeMatch, match2: SerializableMoleculeMatch) -> bool:
|
|
455
371
|
"""
|
|
456
372
|
Check if two SerializableMoleculeMatch objects are symmetrical.
|
|
@@ -521,3 +437,327 @@ def replace_r_site_with_wildcards(mol: IndigoObject) -> IndigoObject:
|
|
|
521
437
|
if atom.isRSite():
|
|
522
438
|
atom.resetAtom("*")
|
|
523
439
|
return ret
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
def get_r_substructure(query_mol: IndigoObject, mol: IndigoObject,
|
|
443
|
+
initial_atom: IndigoObject, match: IndigoObject | SerializableMoleculeMatch,
|
|
444
|
+
r_site: str) -> IndigoObject:
|
|
445
|
+
"""
|
|
446
|
+
Return a connected R substructure sourced from the symbol, that is not within the original query match.
|
|
447
|
+
:param query_mol: The query molecule that contains the R site.
|
|
448
|
+
:param mol: The molecule that contains the R site.
|
|
449
|
+
:param initial_atom: The initial atom that is the R site.
|
|
450
|
+
:param match: The match object that maps atoms and bonds between the query and the molecule.
|
|
451
|
+
Note the within-R site molecules will not be part of the match.
|
|
452
|
+
But the starting position of R site is replaced with psuedoatom "*" and thus matches.
|
|
453
|
+
:param r_site: The R site symbol to match against.
|
|
454
|
+
"""
|
|
455
|
+
keeping_atom_index_set = set()
|
|
456
|
+
visiting: set[int] = set()
|
|
457
|
+
visiting.add(initial_atom.index())
|
|
458
|
+
visited: set[int] = set()
|
|
459
|
+
|
|
460
|
+
exclusion_indexes = set()
|
|
461
|
+
for q_atom in query_mol.iterateAtoms():
|
|
462
|
+
mapped_atom = match.mapAtom(q_atom)
|
|
463
|
+
if mapped_atom:
|
|
464
|
+
to_exclude: bool
|
|
465
|
+
if q_atom.isRSite():
|
|
466
|
+
to_exclude = q_atom.symbol() != r_site
|
|
467
|
+
else:
|
|
468
|
+
to_exclude = True
|
|
469
|
+
if to_exclude:
|
|
470
|
+
exclusion_indexes.add(mapped_atom.index())
|
|
471
|
+
while visiting:
|
|
472
|
+
visiting_atom: IndigoObject = mol.getAtom(visiting.pop())
|
|
473
|
+
keeping_atom_index_set.add(visiting_atom.index())
|
|
474
|
+
visited.add(visiting_atom.index())
|
|
475
|
+
for nei in visiting_atom.iterateNeighbors():
|
|
476
|
+
nei_index = nei.index()
|
|
477
|
+
if nei_index in visited or nei_index in visiting:
|
|
478
|
+
continue
|
|
479
|
+
if nei_index in exclusion_indexes and nei_index != initial_atom.index():
|
|
480
|
+
continue
|
|
481
|
+
visiting.add(nei_index)
|
|
482
|
+
removing_index_set: list[int] = list()
|
|
483
|
+
for atom in mol.iterateAtoms():
|
|
484
|
+
if atom.index() not in keeping_atom_index_set:
|
|
485
|
+
removing_index_set.append(atom.index())
|
|
486
|
+
r_substructure = mol.clone()
|
|
487
|
+
r_substructure.removeAtoms(removing_index_set)
|
|
488
|
+
return r_substructure
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
def get_rr_substructure_by_symbol(query_reactant, replacement_reaction) -> dict[str, IndigoObject]:
|
|
492
|
+
rr_substructure_by_symbol: dict[str, IndigoObject] = {}
|
|
493
|
+
for q_atom in query_reactant.iterateAtoms():
|
|
494
|
+
if not q_atom.isRSite():
|
|
495
|
+
continue
|
|
496
|
+
r_site_symbol = q_atom.symbol()
|
|
497
|
+
mapped_atom = replacement_reaction.replacement_query_reaction_match.mapAtom(q_atom)
|
|
498
|
+
if mapped_atom is None:
|
|
499
|
+
raise ValueError(
|
|
500
|
+
"The replacement reactant " + replacement_reaction.replacement_reactant.smiles() + " do not have R Site: " + r_site_symbol + ". This should not happen.")
|
|
501
|
+
r_substructure = get_r_substructure(query_reactant, replacement_reaction.replacement_reactant, mapped_atom,
|
|
502
|
+
replacement_reaction.replacement_query_reaction_match, r_site_symbol)
|
|
503
|
+
rr_substructure_by_symbol[r_site_symbol] = r_substructure
|
|
504
|
+
return rr_substructure_by_symbol
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
def __test_reactant_match(replacement_reaction: ReplacementReaction,
|
|
508
|
+
testing_reactant: IndigoObject,
|
|
509
|
+
query_reactant: IndigoObject) -> SerializableMoleculeMatch | None:
|
|
510
|
+
""" YQ: Finally piecing together both sides...
|
|
511
|
+
Test whether the reactant in the replacement reaction matches the reactant in the testing reaction.
|
|
512
|
+
We will be matching against the highlighted portion on each section to ensure the highlighted atom and bonds match.
|
|
513
|
+
:param replacement_reaction: The replacement reaction containing the reactant to test.
|
|
514
|
+
:param testing_reactant: The reactant in the testing reaction to match against.
|
|
515
|
+
:param query_reactant: The reactant in the query reaction to match against.
|
|
516
|
+
"""
|
|
517
|
+
orig_query_reactant = query_reactant
|
|
518
|
+
query_reactant = replace_r_site_with_wildcards(query_reactant)
|
|
519
|
+
if not indigo.exactMatch(replacement_reaction.replacement_reactant, testing_reactant):
|
|
520
|
+
return None
|
|
521
|
+
outer_matcher: IndigoObject = indigo.substructureMatcher(testing_reactant)
|
|
522
|
+
used_query_atom_indexes = replacement_reaction.replacement_query_reaction_match.query_atom_indexes
|
|
523
|
+
used_rr_atom_indexes = replacement_reaction.replacement_query_reaction_match.matching_atom_indexes
|
|
524
|
+
rr_substructure_by_symbol: dict[str, IndigoObject] = get_rr_substructure_by_symbol(orig_query_reactant,
|
|
525
|
+
replacement_reaction)
|
|
526
|
+
|
|
527
|
+
for outer_match in outer_matcher.iterateMatches(query_reactant):
|
|
528
|
+
ret: SerializableMoleculeMatch = SerializableMoleculeMatch.create(
|
|
529
|
+
orig_query_reactant, testing_reactant, outer_match, used_query_atom_indexes)
|
|
530
|
+
used_testing_reactant_atoms = []
|
|
531
|
+
for q_atom in query_reactant.iterateAtoms():
|
|
532
|
+
if q_atom.index() not in used_query_atom_indexes:
|
|
533
|
+
continue
|
|
534
|
+
mapped_atom = outer_match.mapAtom(q_atom)
|
|
535
|
+
if mapped_atom is None:
|
|
536
|
+
continue
|
|
537
|
+
used_testing_reactant_atoms.append(mapped_atom.index())
|
|
538
|
+
used_replacement_mol = replacement_reaction.replacement_reactant.clone()
|
|
539
|
+
used_replacement_mol_delete_indexes = []
|
|
540
|
+
for atom in used_replacement_mol.iterateAtoms():
|
|
541
|
+
if atom.index() not in used_rr_atom_indexes:
|
|
542
|
+
used_replacement_mol_delete_indexes.append(atom.index())
|
|
543
|
+
used_replacement_mol.removeAtoms(used_replacement_mol_delete_indexes)
|
|
544
|
+
used_testing_mol = testing_reactant.clone()
|
|
545
|
+
used_testing_mol_delete_indexes = []
|
|
546
|
+
for atom in used_testing_mol.iterateAtoms():
|
|
547
|
+
if atom.index() not in used_testing_reactant_atoms:
|
|
548
|
+
used_testing_mol_delete_indexes.append(atom.index())
|
|
549
|
+
used_testing_mol.removeAtoms(used_testing_mol_delete_indexes)
|
|
550
|
+
try:
|
|
551
|
+
exact_match = indigo.exactMatch(used_replacement_mol, used_testing_mol)
|
|
552
|
+
if not exact_match:
|
|
553
|
+
continue
|
|
554
|
+
except IndigoException:
|
|
555
|
+
continue
|
|
556
|
+
# Now check each R site substructure and it should be an exact match.
|
|
557
|
+
outer_match_r_substructure_by_symbol: dict[str, IndigoObject] = {}
|
|
558
|
+
missing_r_site = False
|
|
559
|
+
for q_atom in query_reactant.iterateAtoms():
|
|
560
|
+
orig_q_atom = orig_query_reactant.getAtom(q_atom.index())
|
|
561
|
+
if not orig_q_atom.isRSite():
|
|
562
|
+
continue
|
|
563
|
+
r_site_symbol = orig_q_atom.symbol()
|
|
564
|
+
mapped_atom = outer_match.mapAtom(q_atom)
|
|
565
|
+
if mapped_atom is None:
|
|
566
|
+
missing_r_site = True
|
|
567
|
+
continue
|
|
568
|
+
r_substructure = get_r_substructure(orig_query_reactant, testing_reactant, mapped_atom, outer_match,
|
|
569
|
+
r_site_symbol)
|
|
570
|
+
outer_match_r_substructure_by_symbol[r_site_symbol] = r_substructure
|
|
571
|
+
if missing_r_site:
|
|
572
|
+
# If we are missing an R site, we cannot match.
|
|
573
|
+
continue
|
|
574
|
+
r_site_mismatch = False
|
|
575
|
+
for r_site_symbol in rr_substructure_by_symbol.keys():
|
|
576
|
+
rr_substructure = rr_substructure_by_symbol[r_site_symbol]
|
|
577
|
+
outer_match_r_substructure = outer_match_r_substructure_by_symbol[r_site_symbol]
|
|
578
|
+
if not indigo.exactMatch(rr_substructure, outer_match_r_substructure):
|
|
579
|
+
r_site_mismatch = True
|
|
580
|
+
break
|
|
581
|
+
if r_site_mismatch:
|
|
582
|
+
# If we have a mismatch in R site substructure, we cannot match.
|
|
583
|
+
continue
|
|
584
|
+
|
|
585
|
+
# We are done matching. Return the match mapping.
|
|
586
|
+
return ret
|
|
587
|
+
return None
|
|
588
|
+
|
|
589
|
+
|
|
590
|
+
def __test_product_match(testing_reaction: IndigoObject, q_reaction: IndigoObject,
|
|
591
|
+
cur_rr_list: list[ReplacementReaction],
|
|
592
|
+
testing_reactants_match_list: list[SerializableMoleculeMatch]) -> list[
|
|
593
|
+
SerializableMoleculeMatch] | None:
|
|
594
|
+
""" YQ: My fifth try OOF
|
|
595
|
+
For each product, we are testing against two criteria:
|
|
596
|
+
1. That every R site from a product would exact match to the R site defined within reactant.
|
|
597
|
+
2. That atomic mapping numbers for query matches are within the matches of intersection of cur_rr_list and testing_reactants_match_list.
|
|
598
|
+
And the matching result for each atom via their reaction atom mapping numbers should follow atomic conservation law:
|
|
599
|
+
2.1 For each atom number in the reactant part, there should be no more than one atom in the product part with the same mapping number.
|
|
600
|
+
2.2 For each atom number in the reactant part, the mapped atom in the product part has the nucleus.
|
|
601
|
+
:param testing_reaction:
|
|
602
|
+
:param q_reaction:
|
|
603
|
+
:param cur_rr_list:
|
|
604
|
+
:param testing_reactants_match_list:
|
|
605
|
+
:return:
|
|
606
|
+
"""
|
|
607
|
+
# ********* PREPARE DATA *********
|
|
608
|
+
ret: list[SerializableMoleculeMatch] = []
|
|
609
|
+
testing_reactants = []
|
|
610
|
+
for testing_reactant in testing_reaction.iterateReactants():
|
|
611
|
+
testing_reactants.append(testing_reactant)
|
|
612
|
+
testing_products = []
|
|
613
|
+
for testing_product in testing_reaction.iterateProducts():
|
|
614
|
+
testing_products.append(testing_product)
|
|
615
|
+
query_reactants = []
|
|
616
|
+
for q_reactant in q_reaction.iterateReactants():
|
|
617
|
+
query_reactants.append(q_reactant)
|
|
618
|
+
query_products = []
|
|
619
|
+
for q_product in q_reaction.iterateProducts():
|
|
620
|
+
query_products.append(q_product)
|
|
621
|
+
replacement_reactants = []
|
|
622
|
+
replacement_reactant_match_list = []
|
|
623
|
+
q_atom_mapping_number_to_rr_reactant_atom: dict[int, IndigoObject] = {}
|
|
624
|
+
for reactant_index, replacement_reaction in enumerate(cur_rr_list):
|
|
625
|
+
replacement_reactants.append(replacement_reaction.replacement_reactant)
|
|
626
|
+
replacement_reactant_match_list.append(replacement_reaction.replacement_query_reaction_match)
|
|
627
|
+
query_reactant = query_reactants[reactant_index]
|
|
628
|
+
for q_atom in query_reactant.iterateAtoms():
|
|
629
|
+
mapped_atom = replacement_reaction.replacement_query_reaction_match.mapAtom(q_atom)
|
|
630
|
+
q_atom_mapping_number = q_reaction.atomMappingNumber(q_atom)
|
|
631
|
+
if q_atom_mapping_number == 0:
|
|
632
|
+
continue
|
|
633
|
+
if mapped_atom is None:
|
|
634
|
+
continue
|
|
635
|
+
q_atom_mapping_number_to_rr_reactant_atom[q_atom_mapping_number] = mapped_atom
|
|
636
|
+
rr_substructure_by_symbol: dict[str, IndigoObject] = {}
|
|
637
|
+
for reactant_index, replacement_reaction in enumerate(cur_rr_list):
|
|
638
|
+
query_reactant = query_reactants[reactant_index]
|
|
639
|
+
cur_dict = get_rr_substructure_by_symbol(query_reactant, replacement_reaction)
|
|
640
|
+
rr_substructure_by_symbol.update(cur_dict)
|
|
641
|
+
|
|
642
|
+
# ********* TESTING PRODUCTS *********
|
|
643
|
+
accepted_used_atom_mapping_numbers: set[int] = set()
|
|
644
|
+
for product_index, testing_product in enumerate(testing_products):
|
|
645
|
+
q_product = query_products[product_index]
|
|
646
|
+
orig_q_product = q_product
|
|
647
|
+
q_product = replace_r_site_with_wildcards(q_product)
|
|
648
|
+
outer_matcher: IndigoObject = indigo.substructureMatcher(testing_product)
|
|
649
|
+
found_match: SerializableMoleculeMatch | None = None
|
|
650
|
+
for outer_match in outer_matcher.iterateMatches(q_product):
|
|
651
|
+
ss_match: SerializableMoleculeMatch = SerializableMoleculeMatch.create(orig_q_product, testing_product,
|
|
652
|
+
outer_match)
|
|
653
|
+
valid_q_product_aam = True
|
|
654
|
+
valid_r_group = True
|
|
655
|
+
used_atom_mapping_numbers: set[int] = set()
|
|
656
|
+
for q_atom in q_product.iterateAtoms():
|
|
657
|
+
mapped_product_atom = outer_match.mapAtom(q_atom)
|
|
658
|
+
orig_q_atom = orig_q_product.getAtom(q_atom.index())
|
|
659
|
+
q_atom_mapping_number = q_reaction.atomMappingNumber(orig_q_atom)
|
|
660
|
+
if q_atom_mapping_number > 0 and not orig_q_atom.isRSite():
|
|
661
|
+
if q_atom_mapping_number in used_atom_mapping_numbers or q_atom_mapping_number in accepted_used_atom_mapping_numbers:
|
|
662
|
+
raise ValueError(
|
|
663
|
+
"Multiple atoms in the product with the same query atom mapping number: " + str(
|
|
664
|
+
q_atom_mapping_number))
|
|
665
|
+
rr_atom = q_atom_mapping_number_to_rr_reactant_atom.get(q_atom_mapping_number)
|
|
666
|
+
if rr_atom is None:
|
|
667
|
+
valid_q_product_aam = False
|
|
668
|
+
break
|
|
669
|
+
if not rr_atom.symbol() == mapped_product_atom.symbol():
|
|
670
|
+
valid_q_product_aam = False
|
|
671
|
+
break
|
|
672
|
+
used_atom_mapping_numbers.add(q_atom_mapping_number)
|
|
673
|
+
elif orig_q_atom.isRSite():
|
|
674
|
+
r_site_symbol = orig_q_atom.symbol()
|
|
675
|
+
r_substructure = get_r_substructure(orig_q_product, testing_product, mapped_product_atom,
|
|
676
|
+
outer_match,
|
|
677
|
+
r_site_symbol)
|
|
678
|
+
rr_substructure = rr_substructure_by_symbol.get(r_site_symbol)
|
|
679
|
+
if rr_substructure is None:
|
|
680
|
+
# This only happens if we didn't replace wildcard properly in original highlight or a misalignment between reactant and actual reaction template in Sapio.
|
|
681
|
+
raise ValueError("Missing RR substructure for R site: " + r_site_symbol + ".")
|
|
682
|
+
if not indigo.exactMatch(rr_substructure, r_substructure):
|
|
683
|
+
valid_r_group = False
|
|
684
|
+
break
|
|
685
|
+
if valid_q_product_aam and valid_r_group:
|
|
686
|
+
found_match = ss_match
|
|
687
|
+
accepted_used_atom_mapping_numbers.update(used_atom_mapping_numbers)
|
|
688
|
+
break
|
|
689
|
+
if not found_match:
|
|
690
|
+
return None
|
|
691
|
+
ret.append(found_match)
|
|
692
|
+
return ret
|
|
693
|
+
|
|
694
|
+
|
|
695
|
+
def __get_final_highlighted_reaction(cur_rr_list: list[ReplacementReaction],
|
|
696
|
+
product_matches: list[SerializableMoleculeMatch]) -> tuple[IndigoObject, list[
|
|
697
|
+
ReplacementReaction]] | None:
|
|
698
|
+
"""
|
|
699
|
+
Translates the final match into a highlighted reaction.
|
|
700
|
+
:param cur_rr_list: The selected reactants that together forms an acceptable reaction.
|
|
701
|
+
:param product_matches: The generated products coming from the selected reactants.
|
|
702
|
+
:return: An IndigoObject representing the final reaction with highlights.
|
|
703
|
+
"""
|
|
704
|
+
ret: IndigoObject = indigo.createReaction()
|
|
705
|
+
for replacement_reaction in cur_rr_list:
|
|
706
|
+
reactant_mol = replacement_reaction.replacement_reactant.clone()
|
|
707
|
+
highlighting_atom_indexes = replacement_reaction.replacement_query_reaction_match.matching_atom_indexes
|
|
708
|
+
for atom in reactant_mol.iterateAtoms():
|
|
709
|
+
if atom.index() in highlighting_atom_indexes:
|
|
710
|
+
atom.highlight()
|
|
711
|
+
for nei in atom.iterateNeighbors():
|
|
712
|
+
if nei.index() in highlighting_atom_indexes:
|
|
713
|
+
nei.bond().highlight()
|
|
714
|
+
ret.addReactant(reactant_mol)
|
|
715
|
+
for product_match in product_matches:
|
|
716
|
+
product_mol = product_match.get_matched_molecule_copy()
|
|
717
|
+
highlighting_atom_indexes = product_match.matching_atom_indexes
|
|
718
|
+
for atom in product_mol.iterateAtoms():
|
|
719
|
+
if atom.index() in highlighting_atom_indexes:
|
|
720
|
+
atom.highlight()
|
|
721
|
+
for nei in atom.iterateNeighbors():
|
|
722
|
+
if nei.index() in highlighting_atom_indexes:
|
|
723
|
+
nei.bond().highlight()
|
|
724
|
+
ret.addProduct(product_mol)
|
|
725
|
+
_, ret = get_aromatic_dearomatic_forms(ret)
|
|
726
|
+
return ret, cur_rr_list
|
|
727
|
+
|
|
728
|
+
|
|
729
|
+
def ps_match(testing_reaction: IndigoObject, q_reaction: IndigoObject,
|
|
730
|
+
kept_replacement_reaction_list_list: list[list[ReplacementReaction]]) -> tuple[IndigoObject, list[
|
|
731
|
+
ReplacementReaction]] | None:
|
|
732
|
+
testing_reactants = []
|
|
733
|
+
for testing_reactant in testing_reaction.iterateReactants():
|
|
734
|
+
testing_reactants.append(testing_reactant)
|
|
735
|
+
query_reactants = []
|
|
736
|
+
for q_reactant in q_reaction.iterateReactants():
|
|
737
|
+
query_reactants.append(q_reactant)
|
|
738
|
+
|
|
739
|
+
reactant_ranges = []
|
|
740
|
+
for replacement_reaction_list in kept_replacement_reaction_list_list:
|
|
741
|
+
reactant_ranges.append(range(len(replacement_reaction_list)))
|
|
742
|
+
reactant_cartesian_products = itertools.product(*reactant_ranges)
|
|
743
|
+
for reactant_combination in reactant_cartesian_products:
|
|
744
|
+
cur_rr_list: list[ReplacementReaction] = []
|
|
745
|
+
for reactant_index, replacement_reaction_index in enumerate(reactant_combination):
|
|
746
|
+
replacement_reaction: ReplacementReaction = kept_replacement_reaction_list_list[reactant_index][
|
|
747
|
+
replacement_reaction_index]
|
|
748
|
+
cur_rr_list.append(replacement_reaction)
|
|
749
|
+
is_valid_reactants = True
|
|
750
|
+
testing_reactants_match_list = []
|
|
751
|
+
for reactant_index, replacement_reaction in enumerate(cur_rr_list):
|
|
752
|
+
match = __test_reactant_match(
|
|
753
|
+
replacement_reaction, testing_reactants[reactant_index], query_reactants[reactant_index])
|
|
754
|
+
if not match:
|
|
755
|
+
is_valid_reactants = False
|
|
756
|
+
break
|
|
757
|
+
testing_reactants_match_list.append(match)
|
|
758
|
+
if not is_valid_reactants:
|
|
759
|
+
continue
|
|
760
|
+
product_matches = __test_product_match(testing_reaction, q_reaction, cur_rr_list, testing_reactants_match_list)
|
|
761
|
+
if product_matches:
|
|
762
|
+
return __get_final_highlighted_reaction(cur_rr_list, product_matches)
|
|
763
|
+
return None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: sapiopycommons
|
|
3
|
-
Version: 2025.7.
|
|
3
|
+
Version: 2025.7.14a610
|
|
4
4
|
Summary: Official Sapio Python API Utilities Package
|
|
5
5
|
Project-URL: Homepage, https://github.com/sapiosciences
|
|
6
6
|
Author-email: Jonathan Steck <jsteck@sapiosciences.com>, Yechen Qiao <yqiao@sapiosciences.com>
|
|
@@ -5,7 +5,7 @@ sapiopycommons/callbacks/field_builder.py,sha256=rnIP-RJafk3mZlAx1eJ8a0eSW9Ps_L6
|
|
|
5
5
|
sapiopycommons/chem/IndigoMolecules.py,sha256=30bsnZ2o4fJXUV6kUTI-I6fDa7bQj7zfE3rOQQ7WD5M,5287
|
|
6
6
|
sapiopycommons/chem/Molecules.py,sha256=mVqPn32MPMjF0iZas-5MFkS-upIdoW5OB72KKZmJRJA,12523
|
|
7
7
|
sapiopycommons/chem/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
sapiopycommons/chem/ps_commons.py,sha256=
|
|
8
|
+
sapiopycommons/chem/ps_commons.py,sha256=aUxytBeFkkNUzmwgK4KUKUUg5yIyfw6VUrHXnXCykNA,36032
|
|
9
9
|
sapiopycommons/customreport/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
sapiopycommons/customreport/auto_pagers.py,sha256=89p-tik0MhsOplYje6LbAW4WClldpAmb8YXFDoXhIlY,17144
|
|
11
11
|
sapiopycommons/customreport/column_builder.py,sha256=0RO53e9rKPZ07C--KcepN6_tpRw_FxF3O9vdG0ilKG8,3014
|
|
@@ -63,7 +63,7 @@ sapiopycommons/webhook/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
|
|
|
63
63
|
sapiopycommons/webhook/webhook_context.py,sha256=D793uLsb1691SalaPnBUk3rOSxn_hYLhdvkaIxjNXss,1909
|
|
64
64
|
sapiopycommons/webhook/webhook_handlers.py,sha256=tUVNCw05CDGu1gFDm2g558hX_O203WVm_n__ojjoRRM,39841
|
|
65
65
|
sapiopycommons/webhook/webservice_handlers.py,sha256=tyaYGG1-v_JJrJHZ6cy5mGCxX9z1foLw7pM4MDJlFxs,14297
|
|
66
|
-
sapiopycommons-2025.7.
|
|
67
|
-
sapiopycommons-2025.7.
|
|
68
|
-
sapiopycommons-2025.7.
|
|
69
|
-
sapiopycommons-2025.7.
|
|
66
|
+
sapiopycommons-2025.7.14a610.dist-info/METADATA,sha256=9bpnGjMnsjoJME1IsB-lTJovX4HB3ZHKf6ffDNdjIPA,3143
|
|
67
|
+
sapiopycommons-2025.7.14a610.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
|
68
|
+
sapiopycommons-2025.7.14a610.dist-info/licenses/LICENSE,sha256=HyVuytGSiAUQ6ErWBHTqt1iSGHhLmlC8fO7jTCuR8dU,16725
|
|
69
|
+
sapiopycommons-2025.7.14a610.dist-info/RECORD,,
|
|
File without changes
|
{sapiopycommons-2025.7.10a595.dist-info → sapiopycommons-2025.7.14a610.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|