@datagrok/bio 2.25.2 → 2.25.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Davit Rizhinashvili",
6
6
  "email": "drizhinashvili@datagrok.ai"
7
7
  },
8
- "version": "2.25.2",
8
+ "version": "2.25.4",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -44,10 +44,10 @@
44
44
  ],
45
45
  "dependencies": {
46
46
  "@biowasm/aioli": "^3.1.0",
47
- "@datagrok-libraries/bio": "^5.61.2",
47
+ "@datagrok-libraries/bio": "^5.61.3",
48
48
  "@datagrok-libraries/chem-meta": "^1.2.9",
49
49
  "@datagrok-libraries/math": "^1.2.6",
50
- "@datagrok-libraries/ml": "^6.10.6",
50
+ "@datagrok-libraries/ml": "^6.10.7",
51
51
  "@datagrok-libraries/tutorials": "^1.7.4",
52
52
  "@datagrok-libraries/utils": "^4.6.9",
53
53
  "@webgpu/types": "^0.1.40",
@@ -179,6 +179,37 @@ class FragmentGraph:
179
179
  for node in ordered_nodes
180
180
  ]
181
181
 
182
+ def is_cyclic(self) -> bool:
183
+ """
184
+ Detect if the peptide is cyclic.
185
+ A cyclic peptide has a peptide bond connecting the last residue back to near the beginning.
186
+ Handles cases where N-terminal caps (like 'ac' from Lys_Ac) create an extra fragment at position 0.
187
+ """
188
+ if len(self.nodes) < 3:
189
+ return False
190
+
191
+ # Get ordered nodes
192
+ ordered = self.get_ordered_nodes()
193
+ if len(ordered) < 3:
194
+ return False
195
+
196
+ # Get the last node ID
197
+ last_id = ordered[-1].id
198
+
199
+ # For a cyclic peptide, the last residue should connect back to one of the first few residues
200
+ # (usually first, but could be second if there's an N-terminal cap like 'ac')
201
+ # Check if last node has a peptide bond to any of the first 3 nodes
202
+ first_few_ids = [ordered[i].id for i in range(min(3, len(ordered)))]
203
+
204
+ for link in self.links:
205
+ if link.linkage_type == LinkageType.PEPTIDE:
206
+ # Check if link connects last node to one of the first few nodes
207
+ if (link.from_node_id == last_id and link.to_node_id in first_few_ids) or \
208
+ (link.to_node_id == last_id and link.from_node_id in first_few_ids):
209
+ return True
210
+
211
+ return False
212
+
182
213
  def __len__(self):
183
214
  return len(self.nodes)
184
215
 
@@ -221,8 +252,15 @@ class BondDetector:
221
252
  #GENERALIZATION ITEM: BOND PATTERNS SHOULD BE DERIVED FROM LIBRARY
222
253
  def __init__(self):
223
254
  # True peptide bond: C and N both in backbone (each bonded to carbons)
224
- # Alpha carbons can be sp3 (X4) or sp2 (X3) for dehydroamino acids
225
- self.peptide_bond = Chem.MolFromSmarts('[C;X3,X4]-[C;X3](=[O;X1])-[N;X3]-[C;X3,X4]')
255
+ # First carbon can be aliphatic or aromatic (for amino acids like NMe2Abz)
256
+ # Carbonyl carbon is sp2 (X3)
257
+ # Exclude if carbonyl is in a small ring (r5 or r6) to avoid cleaving lactams like Pyr
258
+ # !r5 = not in 5-membered ring, !r6 = not in 6-membered ring
259
+ # This preserves lactams but allows large macrocycles and proline (C=O outside ring)
260
+ # Nitrogen can be X2 (proline, imino) or X3 (standard amino, N-methyl)
261
+ # N-C bond can be single (-) or double (=) for imine bonds in dehydro amino acids
262
+ # Alpha carbon after N can be sp3 (X4) or sp2 (X3) for dehydroamino acids
263
+ self.peptide_bond = Chem.MolFromSmarts('[#6]-[C;X3;!r5;!r6](=[O;X1])-[N;X2,X3]~[C;X3,X4]')
226
264
  # True disulfide bond: S-S where each S is bonded to carbon (cysteine residues)
227
265
  self.disulfide_bond = Chem.MolFromSmarts('[C;X4]-[S;X2]-[S;X2]-[C;X4]')
228
266
  # Primary amine at N-terminus (can be NH2 or NH3+), alpha-C can be sp3 or sp2
@@ -265,7 +303,7 @@ class BondDetector:
265
303
  matches = mol.GetSubstructMatches(self.peptide_bond)
266
304
  for match in matches:
267
305
  if len(match) >= 5:
268
- # Pattern: [C;X3,X4]-[C;X3](=[O;X1])-[N;X3]-[C;X3,X4]
306
+ # Pattern: [C;X3,X4]-[C;X3](=[O;X1])-[N;X2,X3]~[C;X3,X4]
269
307
  # match[0]=alpha-C (sp2 or sp3), match[1]=carbonyl-C, match[2]=O, match[3]=N, match[4]=next-alpha-C (sp2 or sp3)
270
308
  c_atom = match[1] # Carbonyl carbon
271
309
  n_atom = match[3] # Nitrogen
@@ -399,17 +437,16 @@ class FragmentProcessor:
399
437
  # Fragment the molecule
400
438
  fragmented_mol = Chem.FragmentOnBonds(mol, bond_indices, addDummies=True)
401
439
 
402
- # Get fragments AND their atom mappings separately
403
- fragments_tuple = Chem.GetMolFrags(
404
- fragmented_mol,
405
- asMols=True,
406
- sanitizeFrags=True
407
- )
408
- fragments = list(fragments_tuple)
440
+ # Get fragments as molecules
441
+ fragments = Chem.GetMolFrags(fragmented_mol, asMols=True, sanitizeFrags=True)
442
+
443
+ # Get atom mappings separately (which original atoms are in which fragment)
444
+ atom_mappings = Chem.GetMolFrags(fragmented_mol, asMols=False, fragsMolAtomMapping=True)
409
445
 
410
446
  # Store bond cleavage info for recovery - we'll use this to selectively re-fragment
411
447
  graph.cleaved_bond_indices = bond_indices
412
448
  graph.bond_info = bond_info
449
+ graph.atom_mappings = atom_mappings
413
450
  print(f"DEBUG: Created {len(fragments)} fragments, cleaved {len(bond_indices)} bonds")
414
451
 
415
452
  # Create nodes for each fragment
@@ -426,20 +463,45 @@ class FragmentProcessor:
426
463
  graph.add_node(node)
427
464
  fragment_nodes.append((i, node))
428
465
 
429
- # Create links between fragments based on cleaved bonds
430
- # For sequential peptide bonds
431
- peptide_links = [b for b in bond_info if b[3] == LinkageType.PEPTIDE]
432
- for i in range(len(fragment_nodes) - 1):
433
- from_id, _ = fragment_nodes[i]
434
- to_id, _ = fragment_nodes[i + 1]
435
- link = FragmentLink(from_id, to_id, LinkageType.PEPTIDE)
466
+ # Create links between fragments based on the actual cleaved bonds
467
+ # Build mapping: original atom index → (fragment_idx, new_atom_idx_in_fragment)
468
+ atom_to_fragment_and_idx = {}
469
+ for frag_idx, original_atom_indices in enumerate(atom_mappings):
470
+ for new_idx_in_frag, original_atom_idx in enumerate(original_atom_indices):
471
+ atom_to_fragment_and_idx[original_atom_idx] = (frag_idx, new_idx_in_frag)
472
+
473
+ print(f"DEBUG: Processing {len(bond_info)} cleaved bonds to create links")
474
+ print(f"DEBUG: atom_to_fragment_and_idx has {len(atom_to_fragment_and_idx)} entries")
475
+
476
+ # For each cleaved bond, determine which fragments it connects
477
+ link_count = 0
478
+ for bond_idx, atom1_orig, atom2_orig, linkage_type in bond_info:
479
+ # Find which fragments contain these atoms and their new indices
480
+ frag1_info = atom_to_fragment_and_idx.get(atom1_orig)
481
+ frag2_info = atom_to_fragment_and_idx.get(atom2_orig)
482
+
483
+ if frag1_info is None or frag2_info is None:
484
+ print(f"DEBUG: Skipping bond atoms {atom1_orig}-{atom2_orig}: not found in fragments")
485
+ continue
486
+
487
+ frag1, atom1_new = frag1_info
488
+ frag2, atom2_new = frag2_info
489
+
490
+ # Create link even if both atoms are in same fragment (internal bond like in Phe_4Sdihydroorotamido)
491
+ # This creates a "self-link" that will be used during recovery to reconstruct the monomer
492
+ link = FragmentLink(frag1, frag2, linkage_type,
493
+ from_atom_idx=atom1_new, to_atom_idx=atom2_new)
436
494
  graph.add_link(link)
495
+ link_count += 1
496
+
497
+ if frag1 == frag2:
498
+ print(f"DEBUG: Link {link_count}: {linkage_type.value.upper()} SELF-LINK frag{frag1} "
499
+ f"orig_atoms({atom1_orig}<->{atom2_orig}) frag_atoms({atom1_new}<->{atom2_new})")
500
+ else:
501
+ print(f"DEBUG: Link {link_count}: {linkage_type.value.upper()} frag{frag1}<->frag{frag2} "
502
+ f"orig_atoms({atom1_orig}<->{atom2_orig}) frag_atoms({atom1_new}<->{atom2_new})")
437
503
 
438
- # Add disulfide bridges (if any)
439
- # TODO: Track which fragments contain the S atoms for proper linking
440
- disulfide_links = [b for b in bond_info if b[3] == LinkageType.DISULFIDE]
441
- # For now, disulfide bonds require more complex atom tracking
442
- # This is a placeholder for future enhancement
504
+ print(f"DEBUG: Created {link_count} links total")
443
505
 
444
506
  return graph
445
507
 
@@ -472,6 +534,94 @@ class FragmentProcessor:
472
534
  except Exception:
473
535
  return None
474
536
 
537
+ def _reconstruct_fragment_with_links(self, node_ids: list, graph: FragmentGraph,
538
+ links_to_exclude: list) -> Chem.Mol:
539
+ """
540
+ Reconstruct a molecule by combining multiple fragment nodes, using link information.
541
+
542
+ Args:
543
+ node_ids: List of node IDs to merge
544
+ graph: The fragment graph
545
+ links_to_exclude: List of FragmentLink objects connecting the nodes to merge
546
+
547
+ Returns:
548
+ Combined RDKit molecule, or None if reconstruction fails
549
+ """
550
+ if not node_ids or not hasattr(graph, 'original_mol'):
551
+ return None
552
+
553
+ if not hasattr(graph, 'cleaved_bond_indices') or not hasattr(graph, 'bond_info'):
554
+ return None
555
+
556
+ try:
557
+ # Find which bond indices correspond to the links we want to exclude
558
+ bonds_to_exclude_indices = []
559
+
560
+ for link in links_to_exclude:
561
+ # Find the bond_info entry that matches this link's original atoms
562
+ # We need to find which bond connected these fragments
563
+ for bond_list_idx, (bond_idx, atom1, atom2, linkage_type) in enumerate(graph.bond_info):
564
+ # Check if this bond connects the fragments in this link
565
+ if hasattr(graph, 'atom_mappings'):
566
+ # Find which fragments contain these atoms
567
+ frag1 = None
568
+ frag2 = None
569
+ for frag_idx, atom_indices in enumerate(graph.atom_mappings):
570
+ if atom1 in atom_indices:
571
+ frag1 = frag_idx
572
+ if atom2 in atom_indices:
573
+ frag2 = frag_idx
574
+
575
+ # If this bond connects the two fragments in the link, exclude it
576
+ if (frag1 == link.from_node_id and frag2 == link.to_node_id) or \
577
+ (frag1 == link.to_node_id and frag2 == link.from_node_id):
578
+ bonds_to_exclude_indices.append(bond_list_idx)
579
+ print(f"DEBUG: Excluding {linkage_type.value} bond at index {bond_list_idx} (atoms {atom1}<->{atom2})")
580
+ break
581
+
582
+ # Create new bond list excluding the bonds we want to keep
583
+ new_bond_indices = [
584
+ bond_idx for i, bond_idx in enumerate(graph.cleaved_bond_indices)
585
+ if i not in bonds_to_exclude_indices
586
+ ]
587
+
588
+ print(f"DEBUG reconstruct: Original had {len(graph.cleaved_bond_indices)} cleaved bonds, "
589
+ f"excluding {len(bonds_to_exclude_indices)} bonds, new list has {len(new_bond_indices)} bonds")
590
+
591
+ # Re-fragment with the modified bond list
592
+ if not new_bond_indices:
593
+ # No bonds to cleave - return whole molecule
594
+ return graph.original_mol
595
+
596
+ fragmented_mol = Chem.FragmentOnBonds(graph.original_mol, new_bond_indices, addDummies=True)
597
+ fragments = Chem.GetMolFrags(fragmented_mol, asMols=True, sanitizeFrags=True)
598
+ new_atom_mappings = Chem.GetMolFrags(fragmented_mol, asMols=False, fragsMolAtomMapping=True)
599
+
600
+ # Find which new fragment contains atoms from our target nodes
601
+ # Look for the fragment that contains atoms from the first node we want to merge
602
+ sorted_nodes = sorted(node_ids)
603
+ first_node_atoms = set(graph.atom_mappings[sorted_nodes[0]])
604
+
605
+ target_fragment_idx = None
606
+ for new_frag_idx, new_atoms in enumerate(new_atom_mappings):
607
+ # Check if this new fragment contains any atoms from our first target node
608
+ if first_node_atoms & set(new_atoms):
609
+ target_fragment_idx = new_frag_idx
610
+ break
611
+
612
+ print(f"DEBUG reconstruct: Got {len(fragments)} fragments after re-fragmentation, "
613
+ f"target_fragment_idx={target_fragment_idx}")
614
+
615
+ if target_fragment_idx is not None and target_fragment_idx < len(fragments):
616
+ clean_frag = self._clean_fragment(fragments[target_fragment_idx])
617
+ return clean_frag if clean_frag else fragments[target_fragment_idx]
618
+
619
+ return None
620
+
621
+ except Exception as e:
622
+ print(f"DEBUG reconstruct: Exception: {e}")
623
+ return None
624
+
475
625
  def _reconstruct_fragment(self, node_ids: list, graph: FragmentGraph) -> Chem.Mol:
476
626
  """
477
627
  Reconstruct a molecule by combining multiple fragment nodes.
@@ -581,7 +731,7 @@ class FragmentProcessor:
581
731
 
582
732
  def recover_unmatched_fragments(self, graph: FragmentGraph, matcher) -> bool:
583
733
  """
584
- Try to recover unmatched fragments by merging with neighbors.
734
+ Try to recover unmatched fragments by merging with neighbors based on graph links.
585
735
  Returns True if any merges were successful.
586
736
  """
587
737
  # Identify unmatched nodes
@@ -603,33 +753,33 @@ class FragmentProcessor:
603
753
  if node_id not in graph.nodes:
604
754
  continue
605
755
 
606
- # Get neighbors
756
+ # Get neighbors from graph links (returns list of (neighbor_id, linkage_type))
607
757
  neighbors = graph.get_neighbors(node_id)
608
- neighbor_ids = [n[0] for n in neighbors]
609
758
 
610
- if not neighbor_ids:
759
+ if not neighbors:
760
+ print(f"DEBUG: Node {node_id} has no neighbors")
611
761
  continue
612
762
 
613
- # Separate left and right neighbors (assuming sequential order)
614
- left_neighbors = [n for n in neighbor_ids if n < node_id]
615
- right_neighbors = [n for n in neighbor_ids if n > node_id]
616
-
617
- # Try merge combinations: left only, right only, both
618
- merge_attempts = []
763
+ print(f"DEBUG: Node {node_id} neighbors: {[(n[0], n[1].value) for n in neighbors]}")
619
764
 
620
- if left_neighbors:
621
- merge_attempts.append([left_neighbors[0], node_id])
622
- if right_neighbors:
623
- merge_attempts.append([node_id, right_neighbors[0]])
624
- if left_neighbors and right_neighbors:
625
- merge_attempts.append([left_neighbors[0], node_id, right_neighbors[0]])
626
-
627
- # Try each merge combination
628
- for nodes_to_merge in merge_attempts:
629
- print(f"DEBUG: Trying to merge nodes {nodes_to_merge}")
765
+ # Try merging with each individual neighbor first
766
+ for neighbor_id, linkage_type in neighbors:
767
+ if neighbor_id not in graph.nodes:
768
+ continue
769
+
770
+ nodes_to_merge = sorted([node_id, neighbor_id])
771
+ print(f"DEBUG: Trying to merge nodes {nodes_to_merge} (via {linkage_type.value} bond)")
772
+
773
+ # Find the links between nodes we're merging
774
+ links_to_exclude = []
775
+ for link in graph.links:
776
+ from_in = link.from_node_id in nodes_to_merge
777
+ to_in = link.to_node_id in nodes_to_merge
778
+ if from_in and to_in:
779
+ links_to_exclude.append(link)
630
780
 
631
781
  # Reconstruct combined molecule
632
- combined_mol = self._reconstruct_fragment(nodes_to_merge, graph)
782
+ combined_mol = self._reconstruct_fragment_with_links(nodes_to_merge, graph, links_to_exclude)
633
783
  if not combined_mol:
634
784
  print(f"DEBUG: Failed to reconstruct molecule for {nodes_to_merge}")
635
785
  continue
@@ -637,7 +787,7 @@ class FragmentProcessor:
637
787
  print(f"DEBUG: Reconstructed mol with {combined_mol.GetNumAtoms()} atoms")
638
788
 
639
789
  # Count expected connections for this merged fragment
640
- # Get all unique neighbors of the merged set
790
+ # Get all unique neighbors of the merged set (excluding internal connections)
641
791
  all_neighbors = set()
642
792
  for nid in nodes_to_merge:
643
793
  if nid in graph.nodes:
@@ -655,7 +805,7 @@ class FragmentProcessor:
655
805
  if monomer:
656
806
  print(f"DEBUG: SUCCESS! Matched to {monomer.symbol}")
657
807
  # Success! Create new merged node
658
- new_node_id = min(nodes_to_merge) # Use lowest ID
808
+ new_node_id = min(nodes_to_merge)
659
809
  new_node = FragmentNode(new_node_id, combined_mol)
660
810
  new_node.monomer = monomer
661
811
 
@@ -663,9 +813,12 @@ class FragmentProcessor:
663
813
  self._merge_nodes_in_graph(graph, nodes_to_merge, new_node)
664
814
 
665
815
  had_changes = True
666
- break # Stop trying other combinations for this node
816
+ break # Stop trying other neighbors for this node
667
817
  else:
668
818
  print(f"DEBUG: No match found for merge {nodes_to_merge}")
819
+
820
+ if had_changes:
821
+ break # Restart from beginning after a successful merge
669
822
 
670
823
  return had_changes
671
824
 
@@ -710,30 +863,69 @@ class HELMGenerator:
710
863
  ordered_nodes = graph.get_ordered_nodes()
711
864
  sequence_symbols = [node.monomer.symbol if node.monomer else "X" for node in ordered_nodes]
712
865
 
713
- # Generate linear peptide notation
714
- sequence = ".".join(sequence_symbols)
866
+ # Check if cyclic
867
+ is_cyclic = graph.is_cyclic()
715
868
 
716
- # Check for disulfide bridges or other non-peptide bonds
717
- has_special_bonds = any(
718
- link.linkage_type != LinkageType.PEPTIDE
719
- for link in graph.links
720
- )
869
+ # Generate sequence notation
870
+ if is_cyclic:
871
+ # Cyclic: wrap multi-letter monomers in brackets, single-letter ones stay as-is
872
+ formatted_symbols = [f"[{symbol}]" if len(symbol) > 1 else symbol for symbol in sequence_symbols]
873
+ sequence = ".".join(formatted_symbols)
874
+ else:
875
+ # Linear: no brackets
876
+ sequence = ".".join(sequence_symbols)
721
877
 
722
- if has_special_bonds:
723
- # Add connection notation for disulfide bridges
724
- connections = []
725
- for link in graph.links:
726
- if link.linkage_type == LinkageType.DISULFIDE:
727
- # Format: PEPTIDE1,PEPTIDE1,from_idx:R3-to_idx:R3
728
- connections.append(
729
- f"PEPTIDE1,PEPTIDE1,{link.from_node_id + 1}:R3-{link.to_node_id + 1}:R3"
730
- )
878
+ # Collect non-sequential connections (disulfide bridges, cyclic bonds, etc.)
879
+ connections = []
880
+
881
+ if is_cyclic:
882
+ # Find the actual cyclic peptide bond (last residue connects back to beginning)
883
+ # This handles cases where N-terminal caps (like 'ac') are at position 1
884
+ last_id = ordered_nodes[-1].id
885
+ first_few_ids = [ordered_nodes[i].id for i in range(min(3, len(ordered_nodes)))]
731
886
 
732
- if connections:
733
- connection_str = "|".join(connections)
734
- helm = f"PEPTIDE1{{{sequence}}}${connection_str}$$$V2.0"
735
- else:
736
- helm = f"PEPTIDE1{{{sequence}}}$$$$"
887
+ for link in graph.links:
888
+ if link.linkage_type == LinkageType.PEPTIDE:
889
+ # Check if this is the cyclic bond (last to one of first few)
890
+ is_cyclic_bond = False
891
+ from_id, to_id = None, None
892
+
893
+ if link.from_node_id == last_id and link.to_node_id in first_few_ids:
894
+ from_id, to_id = link.from_node_id, link.to_node_id
895
+ is_cyclic_bond = True
896
+ elif link.to_node_id == last_id and link.from_node_id in first_few_ids:
897
+ from_id, to_id = link.to_node_id, link.from_node_id
898
+ is_cyclic_bond = True
899
+
900
+ if is_cyclic_bond:
901
+ # Find positions (1-indexed)
902
+ from_pos = next((i + 1 for i, n in enumerate(ordered_nodes) if n.id == from_id), None)
903
+ to_pos = next((i + 1 for i, n in enumerate(ordered_nodes) if n.id == to_id), None)
904
+
905
+ if from_pos and to_pos:
906
+ connections.append(f"PEPTIDE1,PEPTIDE1,{from_pos}:R2-{to_pos}:R1")
907
+ break
908
+
909
+ # Add disulfide bridges
910
+ for link in graph.links:
911
+ if link.linkage_type == LinkageType.DISULFIDE:
912
+ # Get positions in ordered sequence (1-indexed)
913
+ from_pos = None
914
+ to_pos = None
915
+ for i, node in enumerate(ordered_nodes):
916
+ if node.id == link.from_node_id:
917
+ from_pos = i + 1
918
+ if node.id == link.to_node_id:
919
+ to_pos = i + 1
920
+
921
+ if from_pos and to_pos:
922
+ # Format: PEPTIDE1,PEPTIDE1,from_pos:R3-to_pos:R3
923
+ connections.append(f"PEPTIDE1,PEPTIDE1,{from_pos}:R3-{to_pos}:R3")
924
+
925
+ # Generate final HELM notation
926
+ if connections:
927
+ connection_str = "|".join(connections)
928
+ helm = f"PEPTIDE1{{{sequence}}}${connection_str}$$$V2.0"
737
929
  else:
738
930
  helm = f"PEPTIDE1{{{sequence}}}$$$$"
739
931
 
@@ -81,12 +81,13 @@ export class MonomerCellRendererBack extends CellRendererWithMonomerLibBackBase
81
81
  }
82
82
 
83
83
  override onMouseMove(gridCell: GridCell, e: MouseEvent) {
84
+ const [gridCol, tableCol, temp] = getGridCellColTemp(gridCell);
84
85
  if (
85
- gridCell.grid.dart != this.gridCol?.grid.dart || gridCell.gridColumn.dart != this.gridCol?.dart ||
86
- !gridCell.tableColumn || !gridCell.isTableCell
86
+ gridCell.grid.dart != this.gridCol?.grid.dart || gridCol?.dart != this.gridCol?.dart ||
87
+ !tableCol || !gridCell.isTableCell
87
88
  ) return false;
88
89
 
89
- const alphabet = gridCell.tableColumn.getTag(bioTAGS.alphabet) as ALPHABET;
90
+ const alphabet = tableCol.getTag(bioTAGS.alphabet) as ALPHABET;
90
91
  const monomerName: string = gridCell.cell.value;
91
92
  const canvasClientRect = gridCell.grid.canvas.getBoundingClientRect();
92
93
  const x1 = gridCell.bounds.right + canvasClientRect.left - 4;
@@ -127,9 +128,10 @@ export class MonomerCellRendererBack extends CellRendererWithMonomerLibBackBase
127
128
 
128
129
  private getHelmType(gridCell: GridCell, defaultType: HelmType): HelmType {
129
130
  let biotype = defaultType;
130
- if ((gridCell.tableRowIndex ?? -1) > -1 && gridCell.tableColumn?.getTag(BioTags.polymerTypeColumnName)) {
131
- const ptColName = gridCell.tableColumn.getTag(BioTags.polymerTypeColumnName);
132
- const ptCol = gridCell.tableColumn.dataFrame?.col(ptColName);
131
+ const [gridCol, tableCol, temp] = getGridCellColTemp(gridCell);
132
+ if ((gridCell.tableRowIndex ?? -1) > -1 && tableCol?.getTag(BioTags.polymerTypeColumnName)) {
133
+ const ptColName = tableCol.getTag(BioTags.polymerTypeColumnName);
134
+ const ptCol = tableCol.dataFrame?.col(ptColName);
133
135
  if (ptCol) {
134
136
  const ptrString = ptCol.get(gridCell.tableRowIndex!);
135
137
  if (ptrString && [PolymerTypes.BLOB, PolymerTypes.CHEM, PolymerTypes.G, PolymerTypes.PEPTIDE, PolymerTypes.RNA].includes(ptrString))
@@ -0,0 +1,40 @@
1
+ import {RGroup} from '@datagrok-libraries/bio/src/types/monomer-library';
2
+
3
+ export const STANDRARD_R_GROUPS: RGroup[] = [
4
+ {
5
+ alternateId: 'R1-H',
6
+ capGroupName: 'H',
7
+ capGroupSmiles: '[*:1][H]',
8
+ label: 'R1'
9
+ },
10
+ {
11
+ alternateId: 'R2-OH',
12
+ capGroupName: 'OH',
13
+ capGroupSmiles: 'O[*:2]',
14
+ label: 'R2'
15
+ },
16
+ {
17
+ alternateId: 'R3-H',
18
+ capGroupName: 'H',
19
+ capGroupSmiles: '[*:3][H]',
20
+ label: 'R3'
21
+ },
22
+ {
23
+ alternateId: 'R4-H',
24
+ capGroupName: 'H',
25
+ capGroupSmiles: '[*:4][H]',
26
+ label: 'R4'
27
+ },
28
+ {
29
+ alternateId: 'R5-H',
30
+ capGroupName: 'H',
31
+ capGroupSmiles: '[*:5][H]',
32
+ label: 'R5'
33
+ },
34
+ {
35
+ alternateId: 'R6-H',
36
+ capGroupName: 'H',
37
+ capGroupSmiles: '[*:6][H]',
38
+ label: 'R6'
39
+ },
40
+ ];
@@ -19,7 +19,7 @@ import {BioTags} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
19
19
  //@ts-ignore
20
20
  import '../../../../css/monomer-manager.css';
21
21
  import {Subscription} from 'rxjs';
22
-
22
+ import {STANDRARD_R_GROUPS} from './const';
23
23
 
24
24
  // columns of monomers dataframe, note that rgroups is hidden and will be displayed as separate columns
25
25
  export enum MONOMER_DF_COLUMN_NAMES {
@@ -69,6 +69,9 @@ export async function matchMoleculesWithMonomers(molDf: DG.DataFrame, molColName
69
69
  // first: stamdardize monomers
70
70
  const monomers = monomerLib.getMonomerSymbolsByType(polymerType).map((s) => monomerLib.getMonomer(polymerType, s)!).filter((m) => m && (m.smiles || m.molfile));
71
71
  const fixedMonomers = await standardiseMonomers(monomers);
72
+ fixedMonomers.forEach((m, i) => {
73
+ m.lib = monomers[i].lib;
74
+ });
72
75
  const unCappedMonomerSmilesMap = fixedMonomers.filter((m) => !!m.smiles).reduce((acc, m) => {
73
76
  acc[m.smiles] = {symbol: m.symbol, smiles: m.smiles, original: m.smiles, source: m.lib?.source}; return acc;
74
77
  }, {} as {[smiles: string]: {symbol: string, smiles: string, original: string | undefined, source: string | undefined}});
@@ -115,7 +118,15 @@ export async function matchMoleculesWithMonomers(molDf: DG.DataFrame, molColName
115
118
  for (let i = 0; i < canonicalizedMolecules.length; i++) {
116
119
  const mol = canonicalizedMolecules[i];
117
120
  if (!mol) continue;
118
- const match = cappedMonomerSmilesMap[mol] ?? unCappedMonomerSmilesMap[mol];
121
+ let match = cappedMonomerSmilesMap[mol] ?? unCappedMonomerSmilesMap[mol];
122
+ if (!match) {
123
+ // try capping the molecule and matching again
124
+ const cappedMol = capSmiles(mol, STANDRARD_R_GROUPS);
125
+ if (cappedMol !== mol) {
126
+ const correctedMol = grok.chem.convert(cappedMol, DG.chem.Notation.Unknown, DG.chem.Notation.Smiles);
127
+ match = cappedMonomerSmilesMap[correctedMol] ?? unCappedMonomerSmilesMap[correctedMol];
128
+ }
129
+ }
119
130
  if (match) {
120
131
  const matchSymbol = match.symbol;
121
132
  const sources = (duplicates[matchSymbol]?.length ?? 0) > 0 ? duplicates[matchSymbol].map((m) => m?.lib?.source).filter((s) => !!s).join(', ') : (match.source ?? '');