@datagrok/bio 2.25.2 → 2.25.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +2 -2
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +2 -2
- package/dist/package.js.map +1 -1
- package/package.json +3 -3
- package/scripts/mol-to-helm.py +259 -67
- package/src/utils/monomer-cell-renderer.ts +8 -6
- package/src/utils/monomer-lib/monomer-manager/const.ts +40 -0
- package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +13 -2
- package/test-console-output-1.log +366 -3222
- package/test-record-1.mp4 +0 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Davit Rizhinashvili",
|
|
6
6
|
"email": "drizhinashvili@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.25.
|
|
8
|
+
"version": "2.25.4",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -44,10 +44,10 @@
|
|
|
44
44
|
],
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"@biowasm/aioli": "^3.1.0",
|
|
47
|
-
"@datagrok-libraries/bio": "^5.61.
|
|
47
|
+
"@datagrok-libraries/bio": "^5.61.3",
|
|
48
48
|
"@datagrok-libraries/chem-meta": "^1.2.9",
|
|
49
49
|
"@datagrok-libraries/math": "^1.2.6",
|
|
50
|
-
"@datagrok-libraries/ml": "^6.10.
|
|
50
|
+
"@datagrok-libraries/ml": "^6.10.7",
|
|
51
51
|
"@datagrok-libraries/tutorials": "^1.7.4",
|
|
52
52
|
"@datagrok-libraries/utils": "^4.6.9",
|
|
53
53
|
"@webgpu/types": "^0.1.40",
|
package/scripts/mol-to-helm.py
CHANGED
|
@@ -179,6 +179,37 @@ class FragmentGraph:
|
|
|
179
179
|
for node in ordered_nodes
|
|
180
180
|
]
|
|
181
181
|
|
|
182
|
+
def is_cyclic(self) -> bool:
|
|
183
|
+
"""
|
|
184
|
+
Detect if the peptide is cyclic.
|
|
185
|
+
A cyclic peptide has a peptide bond connecting the last residue back to near the beginning.
|
|
186
|
+
Handles cases where N-terminal caps (like 'ac' from Lys_Ac) create an extra fragment at position 0.
|
|
187
|
+
"""
|
|
188
|
+
if len(self.nodes) < 3:
|
|
189
|
+
return False
|
|
190
|
+
|
|
191
|
+
# Get ordered nodes
|
|
192
|
+
ordered = self.get_ordered_nodes()
|
|
193
|
+
if len(ordered) < 3:
|
|
194
|
+
return False
|
|
195
|
+
|
|
196
|
+
# Get the last node ID
|
|
197
|
+
last_id = ordered[-1].id
|
|
198
|
+
|
|
199
|
+
# For a cyclic peptide, the last residue should connect back to one of the first few residues
|
|
200
|
+
# (usually first, but could be second if there's an N-terminal cap like 'ac')
|
|
201
|
+
# Check if last node has a peptide bond to any of the first 3 nodes
|
|
202
|
+
first_few_ids = [ordered[i].id for i in range(min(3, len(ordered)))]
|
|
203
|
+
|
|
204
|
+
for link in self.links:
|
|
205
|
+
if link.linkage_type == LinkageType.PEPTIDE:
|
|
206
|
+
# Check if link connects last node to one of the first few nodes
|
|
207
|
+
if (link.from_node_id == last_id and link.to_node_id in first_few_ids) or \
|
|
208
|
+
(link.to_node_id == last_id and link.from_node_id in first_few_ids):
|
|
209
|
+
return True
|
|
210
|
+
|
|
211
|
+
return False
|
|
212
|
+
|
|
182
213
|
def __len__(self):
|
|
183
214
|
return len(self.nodes)
|
|
184
215
|
|
|
@@ -221,8 +252,15 @@ class BondDetector:
|
|
|
221
252
|
#GENERALIZATION ITEM: BOND PATTERNS SHOULD BE DERIVED FROM LIBRARY
|
|
222
253
|
def __init__(self):
|
|
223
254
|
# True peptide bond: C and N both in backbone (each bonded to carbons)
|
|
224
|
-
#
|
|
225
|
-
|
|
255
|
+
# First carbon can be aliphatic or aromatic (for amino acids like NMe2Abz)
|
|
256
|
+
# Carbonyl carbon is sp2 (X3)
|
|
257
|
+
# Exclude if carbonyl is in a small ring (r5 or r6) to avoid cleaving lactams like Pyr
|
|
258
|
+
# !r5 = not in 5-membered ring, !r6 = not in 6-membered ring
|
|
259
|
+
# This preserves lactams but allows large macrocycles and proline (C=O outside ring)
|
|
260
|
+
# Nitrogen can be X2 (proline, imino) or X3 (standard amino, N-methyl)
|
|
261
|
+
# N-C bond can be single (-) or double (=) for imine bonds in dehydro amino acids
|
|
262
|
+
# Alpha carbon after N can be sp3 (X4) or sp2 (X3) for dehydroamino acids
|
|
263
|
+
self.peptide_bond = Chem.MolFromSmarts('[#6]-[C;X3;!r5;!r6](=[O;X1])-[N;X2,X3]~[C;X3,X4]')
|
|
226
264
|
# True disulfide bond: S-S where each S is bonded to carbon (cysteine residues)
|
|
227
265
|
self.disulfide_bond = Chem.MolFromSmarts('[C;X4]-[S;X2]-[S;X2]-[C;X4]')
|
|
228
266
|
# Primary amine at N-terminus (can be NH2 or NH3+), alpha-C can be sp3 or sp2
|
|
@@ -265,7 +303,7 @@ class BondDetector:
|
|
|
265
303
|
matches = mol.GetSubstructMatches(self.peptide_bond)
|
|
266
304
|
for match in matches:
|
|
267
305
|
if len(match) >= 5:
|
|
268
|
-
# Pattern: [C;X3,X4]-[C;X3](=[O;X1])-[N;X3]
|
|
306
|
+
# Pattern: [C;X3,X4]-[C;X3](=[O;X1])-[N;X2,X3]~[C;X3,X4]
|
|
269
307
|
# match[0]=alpha-C (sp2 or sp3), match[1]=carbonyl-C, match[2]=O, match[3]=N, match[4]=next-alpha-C (sp2 or sp3)
|
|
270
308
|
c_atom = match[1] # Carbonyl carbon
|
|
271
309
|
n_atom = match[3] # Nitrogen
|
|
@@ -399,17 +437,16 @@ class FragmentProcessor:
|
|
|
399
437
|
# Fragment the molecule
|
|
400
438
|
fragmented_mol = Chem.FragmentOnBonds(mol, bond_indices, addDummies=True)
|
|
401
439
|
|
|
402
|
-
# Get fragments
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
)
|
|
408
|
-
fragments = list(fragments_tuple)
|
|
440
|
+
# Get fragments as molecules
|
|
441
|
+
fragments = Chem.GetMolFrags(fragmented_mol, asMols=True, sanitizeFrags=True)
|
|
442
|
+
|
|
443
|
+
# Get atom mappings separately (which original atoms are in which fragment)
|
|
444
|
+
atom_mappings = Chem.GetMolFrags(fragmented_mol, asMols=False, fragsMolAtomMapping=True)
|
|
409
445
|
|
|
410
446
|
# Store bond cleavage info for recovery - we'll use this to selectively re-fragment
|
|
411
447
|
graph.cleaved_bond_indices = bond_indices
|
|
412
448
|
graph.bond_info = bond_info
|
|
449
|
+
graph.atom_mappings = atom_mappings
|
|
413
450
|
print(f"DEBUG: Created {len(fragments)} fragments, cleaved {len(bond_indices)} bonds")
|
|
414
451
|
|
|
415
452
|
# Create nodes for each fragment
|
|
@@ -426,20 +463,45 @@ class FragmentProcessor:
|
|
|
426
463
|
graph.add_node(node)
|
|
427
464
|
fragment_nodes.append((i, node))
|
|
428
465
|
|
|
429
|
-
# Create links between fragments based on cleaved bonds
|
|
430
|
-
#
|
|
431
|
-
|
|
432
|
-
for
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
466
|
+
# Create links between fragments based on the actual cleaved bonds
|
|
467
|
+
# Build mapping: original atom index → (fragment_idx, new_atom_idx_in_fragment)
|
|
468
|
+
atom_to_fragment_and_idx = {}
|
|
469
|
+
for frag_idx, original_atom_indices in enumerate(atom_mappings):
|
|
470
|
+
for new_idx_in_frag, original_atom_idx in enumerate(original_atom_indices):
|
|
471
|
+
atom_to_fragment_and_idx[original_atom_idx] = (frag_idx, new_idx_in_frag)
|
|
472
|
+
|
|
473
|
+
print(f"DEBUG: Processing {len(bond_info)} cleaved bonds to create links")
|
|
474
|
+
print(f"DEBUG: atom_to_fragment_and_idx has {len(atom_to_fragment_and_idx)} entries")
|
|
475
|
+
|
|
476
|
+
# For each cleaved bond, determine which fragments it connects
|
|
477
|
+
link_count = 0
|
|
478
|
+
for bond_idx, atom1_orig, atom2_orig, linkage_type in bond_info:
|
|
479
|
+
# Find which fragments contain these atoms and their new indices
|
|
480
|
+
frag1_info = atom_to_fragment_and_idx.get(atom1_orig)
|
|
481
|
+
frag2_info = atom_to_fragment_and_idx.get(atom2_orig)
|
|
482
|
+
|
|
483
|
+
if frag1_info is None or frag2_info is None:
|
|
484
|
+
print(f"DEBUG: Skipping bond atoms {atom1_orig}-{atom2_orig}: not found in fragments")
|
|
485
|
+
continue
|
|
486
|
+
|
|
487
|
+
frag1, atom1_new = frag1_info
|
|
488
|
+
frag2, atom2_new = frag2_info
|
|
489
|
+
|
|
490
|
+
# Create link even if both atoms are in same fragment (internal bond like in Phe_4Sdihydroorotamido)
|
|
491
|
+
# This creates a "self-link" that will be used during recovery to reconstruct the monomer
|
|
492
|
+
link = FragmentLink(frag1, frag2, linkage_type,
|
|
493
|
+
from_atom_idx=atom1_new, to_atom_idx=atom2_new)
|
|
436
494
|
graph.add_link(link)
|
|
495
|
+
link_count += 1
|
|
496
|
+
|
|
497
|
+
if frag1 == frag2:
|
|
498
|
+
print(f"DEBUG: Link {link_count}: {linkage_type.value.upper()} SELF-LINK frag{frag1} "
|
|
499
|
+
f"orig_atoms({atom1_orig}<->{atom2_orig}) frag_atoms({atom1_new}<->{atom2_new})")
|
|
500
|
+
else:
|
|
501
|
+
print(f"DEBUG: Link {link_count}: {linkage_type.value.upper()} frag{frag1}<->frag{frag2} "
|
|
502
|
+
f"orig_atoms({atom1_orig}<->{atom2_orig}) frag_atoms({atom1_new}<->{atom2_new})")
|
|
437
503
|
|
|
438
|
-
|
|
439
|
-
# TODO: Track which fragments contain the S atoms for proper linking
|
|
440
|
-
disulfide_links = [b for b in bond_info if b[3] == LinkageType.DISULFIDE]
|
|
441
|
-
# For now, disulfide bonds require more complex atom tracking
|
|
442
|
-
# This is a placeholder for future enhancement
|
|
504
|
+
print(f"DEBUG: Created {link_count} links total")
|
|
443
505
|
|
|
444
506
|
return graph
|
|
445
507
|
|
|
@@ -472,6 +534,94 @@ class FragmentProcessor:
|
|
|
472
534
|
except Exception:
|
|
473
535
|
return None
|
|
474
536
|
|
|
537
|
+
def _reconstruct_fragment_with_links(self, node_ids: list, graph: FragmentGraph,
|
|
538
|
+
links_to_exclude: list) -> Chem.Mol:
|
|
539
|
+
"""
|
|
540
|
+
Reconstruct a molecule by combining multiple fragment nodes, using link information.
|
|
541
|
+
|
|
542
|
+
Args:
|
|
543
|
+
node_ids: List of node IDs to merge
|
|
544
|
+
graph: The fragment graph
|
|
545
|
+
links_to_exclude: List of FragmentLink objects connecting the nodes to merge
|
|
546
|
+
|
|
547
|
+
Returns:
|
|
548
|
+
Combined RDKit molecule, or None if reconstruction fails
|
|
549
|
+
"""
|
|
550
|
+
if not node_ids or not hasattr(graph, 'original_mol'):
|
|
551
|
+
return None
|
|
552
|
+
|
|
553
|
+
if not hasattr(graph, 'cleaved_bond_indices') or not hasattr(graph, 'bond_info'):
|
|
554
|
+
return None
|
|
555
|
+
|
|
556
|
+
try:
|
|
557
|
+
# Find which bond indices correspond to the links we want to exclude
|
|
558
|
+
bonds_to_exclude_indices = []
|
|
559
|
+
|
|
560
|
+
for link in links_to_exclude:
|
|
561
|
+
# Find the bond_info entry that matches this link's original atoms
|
|
562
|
+
# We need to find which bond connected these fragments
|
|
563
|
+
for bond_list_idx, (bond_idx, atom1, atom2, linkage_type) in enumerate(graph.bond_info):
|
|
564
|
+
# Check if this bond connects the fragments in this link
|
|
565
|
+
if hasattr(graph, 'atom_mappings'):
|
|
566
|
+
# Find which fragments contain these atoms
|
|
567
|
+
frag1 = None
|
|
568
|
+
frag2 = None
|
|
569
|
+
for frag_idx, atom_indices in enumerate(graph.atom_mappings):
|
|
570
|
+
if atom1 in atom_indices:
|
|
571
|
+
frag1 = frag_idx
|
|
572
|
+
if atom2 in atom_indices:
|
|
573
|
+
frag2 = frag_idx
|
|
574
|
+
|
|
575
|
+
# If this bond connects the two fragments in the link, exclude it
|
|
576
|
+
if (frag1 == link.from_node_id and frag2 == link.to_node_id) or \
|
|
577
|
+
(frag1 == link.to_node_id and frag2 == link.from_node_id):
|
|
578
|
+
bonds_to_exclude_indices.append(bond_list_idx)
|
|
579
|
+
print(f"DEBUG: Excluding {linkage_type.value} bond at index {bond_list_idx} (atoms {atom1}<->{atom2})")
|
|
580
|
+
break
|
|
581
|
+
|
|
582
|
+
# Create new bond list excluding the bonds we want to keep
|
|
583
|
+
new_bond_indices = [
|
|
584
|
+
bond_idx for i, bond_idx in enumerate(graph.cleaved_bond_indices)
|
|
585
|
+
if i not in bonds_to_exclude_indices
|
|
586
|
+
]
|
|
587
|
+
|
|
588
|
+
print(f"DEBUG reconstruct: Original had {len(graph.cleaved_bond_indices)} cleaved bonds, "
|
|
589
|
+
f"excluding {len(bonds_to_exclude_indices)} bonds, new list has {len(new_bond_indices)} bonds")
|
|
590
|
+
|
|
591
|
+
# Re-fragment with the modified bond list
|
|
592
|
+
if not new_bond_indices:
|
|
593
|
+
# No bonds to cleave - return whole molecule
|
|
594
|
+
return graph.original_mol
|
|
595
|
+
|
|
596
|
+
fragmented_mol = Chem.FragmentOnBonds(graph.original_mol, new_bond_indices, addDummies=True)
|
|
597
|
+
fragments = Chem.GetMolFrags(fragmented_mol, asMols=True, sanitizeFrags=True)
|
|
598
|
+
new_atom_mappings = Chem.GetMolFrags(fragmented_mol, asMols=False, fragsMolAtomMapping=True)
|
|
599
|
+
|
|
600
|
+
# Find which new fragment contains atoms from our target nodes
|
|
601
|
+
# Look for the fragment that contains atoms from the first node we want to merge
|
|
602
|
+
sorted_nodes = sorted(node_ids)
|
|
603
|
+
first_node_atoms = set(graph.atom_mappings[sorted_nodes[0]])
|
|
604
|
+
|
|
605
|
+
target_fragment_idx = None
|
|
606
|
+
for new_frag_idx, new_atoms in enumerate(new_atom_mappings):
|
|
607
|
+
# Check if this new fragment contains any atoms from our first target node
|
|
608
|
+
if first_node_atoms & set(new_atoms):
|
|
609
|
+
target_fragment_idx = new_frag_idx
|
|
610
|
+
break
|
|
611
|
+
|
|
612
|
+
print(f"DEBUG reconstruct: Got {len(fragments)} fragments after re-fragmentation, "
|
|
613
|
+
f"target_fragment_idx={target_fragment_idx}")
|
|
614
|
+
|
|
615
|
+
if target_fragment_idx is not None and target_fragment_idx < len(fragments):
|
|
616
|
+
clean_frag = self._clean_fragment(fragments[target_fragment_idx])
|
|
617
|
+
return clean_frag if clean_frag else fragments[target_fragment_idx]
|
|
618
|
+
|
|
619
|
+
return None
|
|
620
|
+
|
|
621
|
+
except Exception as e:
|
|
622
|
+
print(f"DEBUG reconstruct: Exception: {e}")
|
|
623
|
+
return None
|
|
624
|
+
|
|
475
625
|
def _reconstruct_fragment(self, node_ids: list, graph: FragmentGraph) -> Chem.Mol:
|
|
476
626
|
"""
|
|
477
627
|
Reconstruct a molecule by combining multiple fragment nodes.
|
|
@@ -581,7 +731,7 @@ class FragmentProcessor:
|
|
|
581
731
|
|
|
582
732
|
def recover_unmatched_fragments(self, graph: FragmentGraph, matcher) -> bool:
|
|
583
733
|
"""
|
|
584
|
-
Try to recover unmatched fragments by merging with neighbors.
|
|
734
|
+
Try to recover unmatched fragments by merging with neighbors based on graph links.
|
|
585
735
|
Returns True if any merges were successful.
|
|
586
736
|
"""
|
|
587
737
|
# Identify unmatched nodes
|
|
@@ -603,33 +753,33 @@ class FragmentProcessor:
|
|
|
603
753
|
if node_id not in graph.nodes:
|
|
604
754
|
continue
|
|
605
755
|
|
|
606
|
-
# Get neighbors
|
|
756
|
+
# Get neighbors from graph links (returns list of (neighbor_id, linkage_type))
|
|
607
757
|
neighbors = graph.get_neighbors(node_id)
|
|
608
|
-
neighbor_ids = [n[0] for n in neighbors]
|
|
609
758
|
|
|
610
|
-
if not
|
|
759
|
+
if not neighbors:
|
|
760
|
+
print(f"DEBUG: Node {node_id} has no neighbors")
|
|
611
761
|
continue
|
|
612
762
|
|
|
613
|
-
|
|
614
|
-
left_neighbors = [n for n in neighbor_ids if n < node_id]
|
|
615
|
-
right_neighbors = [n for n in neighbor_ids if n > node_id]
|
|
616
|
-
|
|
617
|
-
# Try merge combinations: left only, right only, both
|
|
618
|
-
merge_attempts = []
|
|
763
|
+
print(f"DEBUG: Node {node_id} neighbors: {[(n[0], n[1].value) for n in neighbors]}")
|
|
619
764
|
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
765
|
+
# Try merging with each individual neighbor first
|
|
766
|
+
for neighbor_id, linkage_type in neighbors:
|
|
767
|
+
if neighbor_id not in graph.nodes:
|
|
768
|
+
continue
|
|
769
|
+
|
|
770
|
+
nodes_to_merge = sorted([node_id, neighbor_id])
|
|
771
|
+
print(f"DEBUG: Trying to merge nodes {nodes_to_merge} (via {linkage_type.value} bond)")
|
|
772
|
+
|
|
773
|
+
# Find the links between nodes we're merging
|
|
774
|
+
links_to_exclude = []
|
|
775
|
+
for link in graph.links:
|
|
776
|
+
from_in = link.from_node_id in nodes_to_merge
|
|
777
|
+
to_in = link.to_node_id in nodes_to_merge
|
|
778
|
+
if from_in and to_in:
|
|
779
|
+
links_to_exclude.append(link)
|
|
630
780
|
|
|
631
781
|
# Reconstruct combined molecule
|
|
632
|
-
combined_mol = self.
|
|
782
|
+
combined_mol = self._reconstruct_fragment_with_links(nodes_to_merge, graph, links_to_exclude)
|
|
633
783
|
if not combined_mol:
|
|
634
784
|
print(f"DEBUG: Failed to reconstruct molecule for {nodes_to_merge}")
|
|
635
785
|
continue
|
|
@@ -637,7 +787,7 @@ class FragmentProcessor:
|
|
|
637
787
|
print(f"DEBUG: Reconstructed mol with {combined_mol.GetNumAtoms()} atoms")
|
|
638
788
|
|
|
639
789
|
# Count expected connections for this merged fragment
|
|
640
|
-
# Get all unique neighbors of the merged set
|
|
790
|
+
# Get all unique neighbors of the merged set (excluding internal connections)
|
|
641
791
|
all_neighbors = set()
|
|
642
792
|
for nid in nodes_to_merge:
|
|
643
793
|
if nid in graph.nodes:
|
|
@@ -655,7 +805,7 @@ class FragmentProcessor:
|
|
|
655
805
|
if monomer:
|
|
656
806
|
print(f"DEBUG: SUCCESS! Matched to {monomer.symbol}")
|
|
657
807
|
# Success! Create new merged node
|
|
658
|
-
new_node_id = min(nodes_to_merge)
|
|
808
|
+
new_node_id = min(nodes_to_merge)
|
|
659
809
|
new_node = FragmentNode(new_node_id, combined_mol)
|
|
660
810
|
new_node.monomer = monomer
|
|
661
811
|
|
|
@@ -663,9 +813,12 @@ class FragmentProcessor:
|
|
|
663
813
|
self._merge_nodes_in_graph(graph, nodes_to_merge, new_node)
|
|
664
814
|
|
|
665
815
|
had_changes = True
|
|
666
|
-
break # Stop trying other
|
|
816
|
+
break # Stop trying other neighbors for this node
|
|
667
817
|
else:
|
|
668
818
|
print(f"DEBUG: No match found for merge {nodes_to_merge}")
|
|
819
|
+
|
|
820
|
+
if had_changes:
|
|
821
|
+
break # Restart from beginning after a successful merge
|
|
669
822
|
|
|
670
823
|
return had_changes
|
|
671
824
|
|
|
@@ -710,30 +863,69 @@ class HELMGenerator:
|
|
|
710
863
|
ordered_nodes = graph.get_ordered_nodes()
|
|
711
864
|
sequence_symbols = [node.monomer.symbol if node.monomer else "X" for node in ordered_nodes]
|
|
712
865
|
|
|
713
|
-
#
|
|
714
|
-
|
|
866
|
+
# Check if cyclic
|
|
867
|
+
is_cyclic = graph.is_cyclic()
|
|
715
868
|
|
|
716
|
-
#
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
for
|
|
720
|
-
|
|
869
|
+
# Generate sequence notation
|
|
870
|
+
if is_cyclic:
|
|
871
|
+
# Cyclic: wrap multi-letter monomers in brackets, single-letter ones stay as-is
|
|
872
|
+
formatted_symbols = [f"[{symbol}]" if len(symbol) > 1 else symbol for symbol in sequence_symbols]
|
|
873
|
+
sequence = ".".join(formatted_symbols)
|
|
874
|
+
else:
|
|
875
|
+
# Linear: no brackets
|
|
876
|
+
sequence = ".".join(sequence_symbols)
|
|
721
877
|
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
)
|
|
878
|
+
# Collect non-sequential connections (disulfide bridges, cyclic bonds, etc.)
|
|
879
|
+
connections = []
|
|
880
|
+
|
|
881
|
+
if is_cyclic:
|
|
882
|
+
# Find the actual cyclic peptide bond (last residue connects back to beginning)
|
|
883
|
+
# This handles cases where N-terminal caps (like 'ac') are at position 1
|
|
884
|
+
last_id = ordered_nodes[-1].id
|
|
885
|
+
first_few_ids = [ordered_nodes[i].id for i in range(min(3, len(ordered_nodes)))]
|
|
731
886
|
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
887
|
+
for link in graph.links:
|
|
888
|
+
if link.linkage_type == LinkageType.PEPTIDE:
|
|
889
|
+
# Check if this is the cyclic bond (last to one of first few)
|
|
890
|
+
is_cyclic_bond = False
|
|
891
|
+
from_id, to_id = None, None
|
|
892
|
+
|
|
893
|
+
if link.from_node_id == last_id and link.to_node_id in first_few_ids:
|
|
894
|
+
from_id, to_id = link.from_node_id, link.to_node_id
|
|
895
|
+
is_cyclic_bond = True
|
|
896
|
+
elif link.to_node_id == last_id and link.from_node_id in first_few_ids:
|
|
897
|
+
from_id, to_id = link.to_node_id, link.from_node_id
|
|
898
|
+
is_cyclic_bond = True
|
|
899
|
+
|
|
900
|
+
if is_cyclic_bond:
|
|
901
|
+
# Find positions (1-indexed)
|
|
902
|
+
from_pos = next((i + 1 for i, n in enumerate(ordered_nodes) if n.id == from_id), None)
|
|
903
|
+
to_pos = next((i + 1 for i, n in enumerate(ordered_nodes) if n.id == to_id), None)
|
|
904
|
+
|
|
905
|
+
if from_pos and to_pos:
|
|
906
|
+
connections.append(f"PEPTIDE1,PEPTIDE1,{from_pos}:R2-{to_pos}:R1")
|
|
907
|
+
break
|
|
908
|
+
|
|
909
|
+
# Add disulfide bridges
|
|
910
|
+
for link in graph.links:
|
|
911
|
+
if link.linkage_type == LinkageType.DISULFIDE:
|
|
912
|
+
# Get positions in ordered sequence (1-indexed)
|
|
913
|
+
from_pos = None
|
|
914
|
+
to_pos = None
|
|
915
|
+
for i, node in enumerate(ordered_nodes):
|
|
916
|
+
if node.id == link.from_node_id:
|
|
917
|
+
from_pos = i + 1
|
|
918
|
+
if node.id == link.to_node_id:
|
|
919
|
+
to_pos = i + 1
|
|
920
|
+
|
|
921
|
+
if from_pos and to_pos:
|
|
922
|
+
# Format: PEPTIDE1,PEPTIDE1,from_pos:R3-to_pos:R3
|
|
923
|
+
connections.append(f"PEPTIDE1,PEPTIDE1,{from_pos}:R3-{to_pos}:R3")
|
|
924
|
+
|
|
925
|
+
# Generate final HELM notation
|
|
926
|
+
if connections:
|
|
927
|
+
connection_str = "|".join(connections)
|
|
928
|
+
helm = f"PEPTIDE1{{{sequence}}}${connection_str}$$$V2.0"
|
|
737
929
|
else:
|
|
738
930
|
helm = f"PEPTIDE1{{{sequence}}}$$$$"
|
|
739
931
|
|
|
@@ -81,12 +81,13 @@ export class MonomerCellRendererBack extends CellRendererWithMonomerLibBackBase
|
|
|
81
81
|
}
|
|
82
82
|
|
|
83
83
|
override onMouseMove(gridCell: GridCell, e: MouseEvent) {
|
|
84
|
+
const [gridCol, tableCol, temp] = getGridCellColTemp(gridCell);
|
|
84
85
|
if (
|
|
85
|
-
gridCell.grid.dart != this.gridCol?.grid.dart ||
|
|
86
|
-
!
|
|
86
|
+
gridCell.grid.dart != this.gridCol?.grid.dart || gridCol?.dart != this.gridCol?.dart ||
|
|
87
|
+
!tableCol || !gridCell.isTableCell
|
|
87
88
|
) return false;
|
|
88
89
|
|
|
89
|
-
const alphabet =
|
|
90
|
+
const alphabet = tableCol.getTag(bioTAGS.alphabet) as ALPHABET;
|
|
90
91
|
const monomerName: string = gridCell.cell.value;
|
|
91
92
|
const canvasClientRect = gridCell.grid.canvas.getBoundingClientRect();
|
|
92
93
|
const x1 = gridCell.bounds.right + canvasClientRect.left - 4;
|
|
@@ -127,9 +128,10 @@ export class MonomerCellRendererBack extends CellRendererWithMonomerLibBackBase
|
|
|
127
128
|
|
|
128
129
|
private getHelmType(gridCell: GridCell, defaultType: HelmType): HelmType {
|
|
129
130
|
let biotype = defaultType;
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
const
|
|
131
|
+
const [gridCol, tableCol, temp] = getGridCellColTemp(gridCell);
|
|
132
|
+
if ((gridCell.tableRowIndex ?? -1) > -1 && tableCol?.getTag(BioTags.polymerTypeColumnName)) {
|
|
133
|
+
const ptColName = tableCol.getTag(BioTags.polymerTypeColumnName);
|
|
134
|
+
const ptCol = tableCol.dataFrame?.col(ptColName);
|
|
133
135
|
if (ptCol) {
|
|
134
136
|
const ptrString = ptCol.get(gridCell.tableRowIndex!);
|
|
135
137
|
if (ptrString && [PolymerTypes.BLOB, PolymerTypes.CHEM, PolymerTypes.G, PolymerTypes.PEPTIDE, PolymerTypes.RNA].includes(ptrString))
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import {RGroup} from '@datagrok-libraries/bio/src/types/monomer-library';
|
|
2
|
+
|
|
3
|
+
export const STANDRARD_R_GROUPS: RGroup[] = [
|
|
4
|
+
{
|
|
5
|
+
alternateId: 'R1-H',
|
|
6
|
+
capGroupName: 'H',
|
|
7
|
+
capGroupSmiles: '[*:1][H]',
|
|
8
|
+
label: 'R1'
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
alternateId: 'R2-OH',
|
|
12
|
+
capGroupName: 'OH',
|
|
13
|
+
capGroupSmiles: 'O[*:2]',
|
|
14
|
+
label: 'R2'
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
alternateId: 'R3-H',
|
|
18
|
+
capGroupName: 'H',
|
|
19
|
+
capGroupSmiles: '[*:3][H]',
|
|
20
|
+
label: 'R3'
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
alternateId: 'R4-H',
|
|
24
|
+
capGroupName: 'H',
|
|
25
|
+
capGroupSmiles: '[*:4][H]',
|
|
26
|
+
label: 'R4'
|
|
27
|
+
},
|
|
28
|
+
{
|
|
29
|
+
alternateId: 'R5-H',
|
|
30
|
+
capGroupName: 'H',
|
|
31
|
+
capGroupSmiles: '[*:5][H]',
|
|
32
|
+
label: 'R5'
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
alternateId: 'R6-H',
|
|
36
|
+
capGroupName: 'H',
|
|
37
|
+
capGroupSmiles: '[*:6][H]',
|
|
38
|
+
label: 'R6'
|
|
39
|
+
},
|
|
40
|
+
];
|
|
@@ -19,7 +19,7 @@ import {BioTags} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
|
|
|
19
19
|
//@ts-ignore
|
|
20
20
|
import '../../../../css/monomer-manager.css';
|
|
21
21
|
import {Subscription} from 'rxjs';
|
|
22
|
-
|
|
22
|
+
import {STANDRARD_R_GROUPS} from './const';
|
|
23
23
|
|
|
24
24
|
// columns of monomers dataframe, note that rgroups is hidden and will be displayed as separate columns
|
|
25
25
|
export enum MONOMER_DF_COLUMN_NAMES {
|
|
@@ -69,6 +69,9 @@ export async function matchMoleculesWithMonomers(molDf: DG.DataFrame, molColName
|
|
|
69
69
|
// first: stamdardize monomers
|
|
70
70
|
const monomers = monomerLib.getMonomerSymbolsByType(polymerType).map((s) => monomerLib.getMonomer(polymerType, s)!).filter((m) => m && (m.smiles || m.molfile));
|
|
71
71
|
const fixedMonomers = await standardiseMonomers(monomers);
|
|
72
|
+
fixedMonomers.forEach((m, i) => {
|
|
73
|
+
m.lib = monomers[i].lib;
|
|
74
|
+
});
|
|
72
75
|
const unCappedMonomerSmilesMap = fixedMonomers.filter((m) => !!m.smiles).reduce((acc, m) => {
|
|
73
76
|
acc[m.smiles] = {symbol: m.symbol, smiles: m.smiles, original: m.smiles, source: m.lib?.source}; return acc;
|
|
74
77
|
}, {} as {[smiles: string]: {symbol: string, smiles: string, original: string | undefined, source: string | undefined}});
|
|
@@ -115,7 +118,15 @@ export async function matchMoleculesWithMonomers(molDf: DG.DataFrame, molColName
|
|
|
115
118
|
for (let i = 0; i < canonicalizedMolecules.length; i++) {
|
|
116
119
|
const mol = canonicalizedMolecules[i];
|
|
117
120
|
if (!mol) continue;
|
|
118
|
-
|
|
121
|
+
let match = cappedMonomerSmilesMap[mol] ?? unCappedMonomerSmilesMap[mol];
|
|
122
|
+
if (!match) {
|
|
123
|
+
// try capping the molecule and matching again
|
|
124
|
+
const cappedMol = capSmiles(mol, STANDRARD_R_GROUPS);
|
|
125
|
+
if (cappedMol !== mol) {
|
|
126
|
+
const correctedMol = grok.chem.convert(cappedMol, DG.chem.Notation.Unknown, DG.chem.Notation.Smiles);
|
|
127
|
+
match = cappedMonomerSmilesMap[correctedMol] ?? unCappedMonomerSmilesMap[correctedMol];
|
|
128
|
+
}
|
|
129
|
+
}
|
|
119
130
|
if (match) {
|
|
120
131
|
const matchSymbol = match.symbol;
|
|
121
132
|
const sources = (duplicates[matchSymbol]?.length ?? 0) > 0 ? duplicates[matchSymbol].map((m) => m?.lib?.source).filter((s) => !!s).join(', ') : (match.source ?? '');
|