@datagrok/bio 2.25.4 → 2.25.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Davit Rizhinashvili",
6
6
  "email": "drizhinashvili@datagrok.ai"
7
7
  },
8
- "version": "2.25.4",
8
+ "version": "2.25.5",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -25,6 +25,7 @@ from typing import Optional
25
25
  from typing import Tuple
26
26
  import json
27
27
  import os
28
+ import re
28
29
 
29
30
  # ============================================================================
30
31
  # Content from: fragment_graph.py
@@ -259,8 +260,8 @@ class BondDetector:
259
260
  # This preserves lactams but allows large macrocycles and proline (C=O outside ring)
260
261
  # Nitrogen can be X2 (proline, imino) or X3 (standard amino, N-methyl)
261
262
  # N-C bond can be single (-) or double (=) for imine bonds in dehydro amino acids
262
- # Alpha carbon after N can be sp3 (X4) or sp2 (X3) for dehydroamino acids
263
- self.peptide_bond = Chem.MolFromSmarts('[#6]-[C;X3;!r5;!r6](=[O;X1])-[N;X2,X3]~[C;X3,X4]')
263
+ # Alpha carbon after N can be sp3 (X4) or sp2 (X3) for dehydroamino acids, or aromatic (#6 includes both)
264
+ self.peptide_bond = Chem.MolFromSmarts('[#6]-[C;X3;!r5;!r6](=[O;X1])-[N;X2,X3]~[#6;X3,X4]')
264
265
  # True disulfide bond: S-S where each S is bonded to carbon (cysteine residues)
265
266
  self.disulfide_bond = Chem.MolFromSmarts('[C;X4]-[S;X2]-[S;X2]-[C;X4]')
266
267
  # Primary amine at N-terminus (can be NH2 or NH3+), alpha-C can be sp3 or sp2
@@ -447,7 +448,6 @@ class FragmentProcessor:
447
448
  graph.cleaved_bond_indices = bond_indices
448
449
  graph.bond_info = bond_info
449
450
  graph.atom_mappings = atom_mappings
450
- print(f"DEBUG: Created {len(fragments)} fragments, cleaved {len(bond_indices)} bonds")
451
451
 
452
452
  # Create nodes for each fragment
453
453
  fragment_nodes = []
@@ -470,8 +470,6 @@ class FragmentProcessor:
470
470
  for new_idx_in_frag, original_atom_idx in enumerate(original_atom_indices):
471
471
  atom_to_fragment_and_idx[original_atom_idx] = (frag_idx, new_idx_in_frag)
472
472
 
473
- print(f"DEBUG: Processing {len(bond_info)} cleaved bonds to create links")
474
- print(f"DEBUG: atom_to_fragment_and_idx has {len(atom_to_fragment_and_idx)} entries")
475
473
 
476
474
  # For each cleaved bond, determine which fragments it connects
477
475
  link_count = 0
@@ -743,8 +741,6 @@ class FragmentProcessor:
743
741
  if not unmatched_nodes:
744
742
  return False
745
743
 
746
- print(f"DEBUG: Found {len(unmatched_nodes)} unmatched nodes: {unmatched_nodes}")
747
-
748
744
  had_changes = False
749
745
 
750
746
  # Try to recover each unmatched node
@@ -757,18 +753,14 @@ class FragmentProcessor:
757
753
  neighbors = graph.get_neighbors(node_id)
758
754
 
759
755
  if not neighbors:
760
- print(f"DEBUG: Node {node_id} has no neighbors")
761
756
  continue
762
757
 
763
- print(f"DEBUG: Node {node_id} neighbors: {[(n[0], n[1].value) for n in neighbors]}")
764
-
765
758
  # Try merging with each individual neighbor first
766
759
  for neighbor_id, linkage_type in neighbors:
767
760
  if neighbor_id not in graph.nodes:
768
761
  continue
769
762
 
770
763
  nodes_to_merge = sorted([node_id, neighbor_id])
771
- print(f"DEBUG: Trying to merge nodes {nodes_to_merge} (via {linkage_type.value} bond)")
772
764
 
773
765
  # Find the links between nodes we're merging
774
766
  links_to_exclude = []
@@ -797,13 +789,11 @@ class FragmentProcessor:
797
789
  all_neighbors.add(neighbor_id)
798
790
 
799
791
  num_connections = len(all_neighbors)
800
- print(f"DEBUG: Expecting {num_connections} connections")
801
792
 
802
- # Try to match the combined fragment
793
+ # Try to match the combined fragment (exact match only)
803
794
  monomer = matcher.find_exact_match(combined_mol, num_connections)
804
795
 
805
796
  if monomer:
806
- print(f"DEBUG: SUCCESS! Matched to {monomer.symbol}")
807
797
  # Success! Create new merged node
808
798
  new_node_id = min(nodes_to_merge)
809
799
  new_node = FragmentNode(new_node_id, combined_mol)
@@ -814,13 +804,69 @@ class FragmentProcessor:
814
804
 
815
805
  had_changes = True
816
806
  break # Stop trying other neighbors for this node
817
- else:
818
- print(f"DEBUG: No match found for merge {nodes_to_merge}")
819
807
 
820
808
  if had_changes:
821
809
  break # Restart from beginning after a successful merge
822
810
 
823
811
  return had_changes
812
+
813
+ def recover_unmatched_with_stereo_agnostic(self, graph: FragmentGraph, matcher) -> int:
814
+ """
815
+ Separate recovery procedure: Try to match remaining unmatched fragments
816
+ using stereochemistry-agnostic comparison.
817
+
818
+ This handles poor quality input data where stereochemistry is not assigned.
819
+ Only called after regular recovery attempts have finished.
820
+
821
+ Args:
822
+ graph: FragmentGraph with some unmatched nodes
823
+ matcher: MonomerMatcher instance
824
+
825
+ Returns:
826
+ Number of fragments that were successfully matched
827
+ """
828
+ from rdkit import Chem
829
+
830
+ # Find all unmatched nodes (nodes with mock/unknown monomers)
831
+ unmatched_nodes = []
832
+ for node_id, node in graph.nodes.items():
833
+ if node.monomer and (node.monomer.symbol.startswith('X') or
834
+ node.monomer.name.startswith('Unknown')):
835
+ unmatched_nodes.append(node_id)
836
+
837
+ if not unmatched_nodes:
838
+ return 0
839
+
840
+ print(f"DEBUG: Attempting stereo-agnostic recovery for {len(unmatched_nodes)} unmatched nodes")
841
+
842
+ matched_count = 0
843
+
844
+ for node_id in unmatched_nodes:
845
+ if node_id not in graph.nodes:
846
+ continue
847
+
848
+ node = graph.nodes[node_id]
849
+
850
+ # Get fragment SMILES
851
+ fragment_smiles = Chem.MolToSmiles(node.mol, canonical=True)
852
+
853
+ # Count connections
854
+ neighbors = graph.get_neighbors(node_id)
855
+ num_connections = len(neighbors)
856
+
857
+ # Try stereo-agnostic matching
858
+ monomer = matcher.monomer_library.find_monomer_by_fragment_smiles_no_stereo(
859
+ fragment_smiles, num_connections
860
+ )
861
+
862
+ if monomer:
863
+ print(f"DEBUG: Stereo-agnostic match for node {node_id}: {monomer.symbol}")
864
+ node.monomer = monomer
865
+ matched_count += 1
866
+ else:
867
+ print(f"DEBUG: No stereo-agnostic match for node {node_id}")
868
+
869
+ return matched_count
824
870
 
825
871
  # ============================================================================
826
872
  # Content from: helm_generator.py
@@ -859,13 +905,49 @@ class HELMGenerator:
859
905
  if len(graph) == 0:
860
906
  return ""
861
907
 
862
- # Get ordered sequence of monomers
863
- ordered_nodes = graph.get_ordered_nodes()
864
- sequence_symbols = [node.monomer.symbol if node.monomer else "X" for node in ordered_nodes]
908
+ # Get ordered sequence of monomers (backbone)
909
+ ordered_nodes_raw = graph.get_ordered_nodes()
865
910
 
866
911
  # Check if cyclic
867
912
  is_cyclic = graph.is_cyclic()
868
913
 
914
+ # Filter backbone: nodes that are part of R1-R2 chain are backbone
915
+ # Nodes connected only via R3 (side chain) are branches
916
+ #
917
+ # Logic: A node at position 1 is a branch if:
918
+ # - It has no R1 (N-terminus) - meaning it's a cap like 'ac' that only has R2
919
+ # - It only has 1 peptide connection (to the real backbone)
920
+ #
921
+ # Example: [ac].K in cyclic peptide
922
+ # - 'ac' has only R2, no R1 → it's a cap
923
+ # - 'ac' connects to K's R3 (side chain), not K's R1 (backbone)
924
+ # - So 'ac' should be PEPTIDE2, not part of PEPTIDE1
925
+
926
+ backbone_nodes = []
927
+ for i, node in enumerate(ordered_nodes_raw):
928
+ is_branch = False
929
+
930
+ if i == 0 and len(ordered_nodes_raw) > 1 and node.monomer:
931
+ # Check if this first node lacks R1 (N-terminus)
932
+ # If it has no R1, it's a cap that should be a branch
933
+ has_r1 = 'R1' in node.monomer.r_groups
934
+
935
+ if not has_r1:
936
+ # This is an N-terminal cap (like 'ac') at position 1
937
+ # It should be a branch, not part of the main backbone
938
+ is_branch = True
939
+
940
+ if not is_branch:
941
+ backbone_nodes.append(node)
942
+
943
+ ordered_nodes = backbone_nodes
944
+ sequence_symbols = [node.monomer.symbol if node.monomer else "X" for node in ordered_nodes]
945
+
946
+ # Detect branch nodes (nodes not in backbone)
947
+ ordered_node_ids = {node.id for node in ordered_nodes}
948
+ branch_nodes = [(node_id, node) for node_id, node in graph.nodes.items()
949
+ if node_id not in ordered_node_ids]
950
+
869
951
  # Generate sequence notation
870
952
  if is_cyclic:
871
953
  # Cyclic: wrap multi-letter monomers in brackets, single-letter ones stay as-is
@@ -922,12 +1004,55 @@ class HELMGenerator:
922
1004
  # Format: PEPTIDE1,PEPTIDE1,from_pos:R3-to_pos:R3
923
1005
  connections.append(f"PEPTIDE1,PEPTIDE1,{from_pos}:R3-{to_pos}:R3")
924
1006
 
1007
+ # Handle branch nodes (side chain modifications)
1008
+ # Create separate PEPTIDE chains for each branch
1009
+ branch_chains = []
1010
+ if branch_nodes:
1011
+ for branch_idx, (branch_node_id, branch_node) in enumerate(branch_nodes, start=2):
1012
+ branch_chain_name = f"PEPTIDE{branch_idx}"
1013
+ branch_symbol = branch_node.monomer.symbol if branch_node.monomer else f"X{branch_node_id}"
1014
+
1015
+ # Format branch chain (single monomer, so no dots needed)
1016
+ if is_cyclic and len(branch_symbol) > 1:
1017
+ branch_chains.append(f"{branch_chain_name}{{[{branch_symbol}]}}")
1018
+ else:
1019
+ branch_chains.append(f"{branch_chain_name}{{{branch_symbol}}}")
1020
+
1021
+ # Find which backbone node this branch connects to
1022
+ # Look for links connecting this branch to the main backbone
1023
+ for link in graph.links:
1024
+ backbone_node_id = None
1025
+ if link.from_node_id == branch_node_id and link.to_node_id in ordered_node_ids:
1026
+ backbone_node_id = link.to_node_id
1027
+ elif link.to_node_id == branch_node_id and link.from_node_id in ordered_node_ids:
1028
+ backbone_node_id = link.from_node_id
1029
+
1030
+ if backbone_node_id is not None:
1031
+ # Find position of backbone node (1-indexed)
1032
+ backbone_pos = next((i + 1 for i, n in enumerate(ordered_nodes) if n.id == backbone_node_id), None)
1033
+ if backbone_pos:
1034
+ # Determine which R-group the branch uses
1035
+ # If branch has R1, connect to R1; if only R2, connect to R2
1036
+ branch_r_group = "R1"
1037
+ if branch_node.monomer:
1038
+ if 'R1' in branch_node.monomer.r_groups:
1039
+ branch_r_group = "R1"
1040
+ elif 'R2' in branch_node.monomer.r_groups:
1041
+ branch_r_group = "R2"
1042
+
1043
+ # Connection: backbone position R3 (side chain) to branch position 1 R-group
1044
+ connections.append(f"PEPTIDE1,{branch_chain_name},{backbone_pos}:R3-1:{branch_r_group}")
1045
+ break
1046
+
925
1047
  # Generate final HELM notation
1048
+ all_chains = [f"PEPTIDE1{{{sequence}}}"] + branch_chains
1049
+ helm_chains = "|".join(all_chains)
1050
+
926
1051
  if connections:
927
1052
  connection_str = "|".join(connections)
928
- helm = f"PEPTIDE1{{{sequence}}}${connection_str}$$$V2.0"
1053
+ helm = f"{helm_chains}${connection_str}$$$V2.0"
929
1054
  else:
930
- helm = f"PEPTIDE1{{{sequence}}}$$$$"
1055
+ helm = f"{helm_chains}$$$$V2.0"
931
1056
 
932
1057
  return helm
933
1058
 
@@ -960,10 +1085,34 @@ from collections import defaultdict
960
1085
  from itertools import combinations
961
1086
  import json
962
1087
  import os
1088
+ import re
963
1089
 
964
1090
  # Suppress RDKit warnings
965
1091
  RDLogger.DisableLog('rdApp.warning')
966
1092
 
1093
+ def remove_stereochemistry_from_smiles(smiles: str) -> str:
1094
+ """
1095
+ Remove stereochemistry markers from SMILES string.
1096
+ Converts [C@@H], [C@H] to C, etc.
1097
+
1098
+ This is used for matching when input molecules don't have stereochemistry defined.
1099
+ """
1100
+ if not smiles:
1101
+ return smiles
1102
+
1103
+ # Remove @ symbols (stereochemistry markers)
1104
+ # Pattern: [@]+ inside brackets
1105
+ smiles_no_stereo = re.sub(r'(@+)', '', smiles)
1106
+
1107
+ # Also remove H when it's explicit in brackets like [C@@H] -> [C] -> C
1108
+ # But we need to be careful not to remove H from [H] or CH3
1109
+ # After removing @, we might have [CH] which should become C
1110
+ smiles_no_stereo = re.sub(r'\[([A-Z][a-z]?)H\]', r'\1', smiles_no_stereo)
1111
+ # Handle [C] -> C (single atoms in brackets with no other info)
1112
+ smiles_no_stereo = re.sub(r'\[([A-Z][a-z]?)\]', r'\1', smiles_no_stereo)
1113
+
1114
+ return smiles_no_stereo
1115
+
967
1116
  class MonomerData:
968
1117
  def __init__(self):
969
1118
  self.symbol = ""
@@ -1201,11 +1350,64 @@ class MonomerLibrary:
1201
1350
  # Generate SMILES with these R-groups removed (lazy, cached)
1202
1351
  candidate_smiles = monomer.get_capped_smiles_for_removed_rgroups(removed_set)
1203
1352
 
1204
- # Check if it matches the fragment
1353
+ # Check if it matches the fragment (exact match only)
1205
1354
  if candidate_smiles == fragment_smiles:
1206
1355
  return monomer
1207
1356
 
1208
1357
  return None
1358
+
1359
+ def find_monomer_by_fragment_smiles_no_stereo(self, fragment_smiles: str, num_connections: int):
1360
+ """
1361
+ Find monomer by matching fragment SMILES WITHOUT stereochemistry.
1362
+ Used only in recovery for handling poor quality input data.
1363
+
1364
+ Uses molecular graph isomorphism to handle cases where RDKit generates
1365
+ different canonical SMILES for the same molecule.
1366
+
1367
+ Args:
1368
+ fragment_smiles: Canonical SMILES of the fragment
1369
+ num_connections: Number of connections this fragment has in the graph
1370
+
1371
+ Returns:
1372
+ MonomerData if match found, None otherwise
1373
+ """
1374
+ # Parse fragment molecule once (without stereochemistry)
1375
+ fragment_no_stereo_smiles = remove_stereochemistry_from_smiles(fragment_smiles)
1376
+ fragment_mol = Chem.MolFromSmiles(fragment_no_stereo_smiles)
1377
+ if not fragment_mol:
1378
+ return None
1379
+
1380
+ # Search through all monomers
1381
+ for symbol, monomer in self.monomers.items():
1382
+ # Skip if monomer doesn't have enough R-groups
1383
+ if monomer.r_group_count < num_connections:
1384
+ continue
1385
+
1386
+ # Generate all combinations of num_connections R-groups that could have been removed
1387
+ r_group_labels = list(monomer.r_groups.keys())
1388
+
1389
+ # For each combination of R-groups that could have been removed
1390
+ for removed_combo in combinations(r_group_labels, num_connections):
1391
+ removed_set = frozenset(removed_combo)
1392
+
1393
+ # Generate SMILES with these R-groups removed (lazy, cached)
1394
+ candidate_smiles = monomer.get_capped_smiles_for_removed_rgroups(removed_set)
1395
+
1396
+ # Try string comparison first (fast path)
1397
+ candidate_no_stereo = remove_stereochemistry_from_smiles(candidate_smiles)
1398
+
1399
+ if candidate_no_stereo == fragment_no_stereo_smiles:
1400
+ return monomer
1401
+
1402
+ # If string comparison fails, try molecular graph isomorphism (slower but more robust)
1403
+ # This handles cases where RDKit generates different canonical SMILES for same molecule
1404
+ candidate_mol = Chem.MolFromSmiles(candidate_no_stereo)
1405
+ if candidate_mol and fragment_mol.HasSubstructMatch(candidate_mol) and candidate_mol.HasSubstructMatch(fragment_mol):
1406
+ # Both molecules are substructures of each other = they're the same
1407
+ if fragment_mol.GetNumAtoms() == candidate_mol.GetNumAtoms():
1408
+ return monomer
1409
+
1410
+ return None
1209
1411
 
1210
1412
  def find_monomer_by_symbol(self, symbol: str):
1211
1413
  return self.symbol_to_monomer.get(symbol)
@@ -1355,14 +1557,16 @@ def preload_library():
1355
1557
  return processor is not None
1356
1558
 
1357
1559
 
1358
- def convert_molecules_batch(molfiles: list, library_json: str = None) -> list:
1560
+ def convert_molecules_batch(molecules: list, library_json: str = None, input_type: str = "auto") -> list:
1359
1561
  """
1360
- Convert a batch of molecules from molfile format to HELM notation.
1562
+ Convert a batch of molecules to HELM notation.
1361
1563
 
1362
1564
  Args:
1363
- molfiles: List of molfile strings
1565
+ molecules: List of molecule strings (molfiles or SMILES)
1364
1566
  library_json: Optional monomer library as JSON string.
1365
1567
  If None, uses default cached library from HELMCoreLibrary.json
1568
+ input_type: Type of input molecules - "molfile", "smiles", or "auto" (default).
1569
+ "auto" will attempt to detect the format automatically.
1366
1570
 
1367
1571
  Returns:
1368
1572
  List of tuples: (success: bool, helm_notation: str)
@@ -1376,13 +1580,13 @@ def convert_molecules_batch(molfiles: list, library_json: str = None) -> list:
1376
1580
  print("Initializing monomer library and processors...")
1377
1581
  if not preload_library():
1378
1582
  print("ERROR: Failed to load monomer library")
1379
- return [(False, "Library initialization failed") for _ in molfiles]
1583
+ return [(False, "Library initialization failed") for _ in molecules]
1380
1584
  print()
1381
1585
 
1382
1586
  # Use shared processor instances
1383
1587
  processor, matcher, helm_generator = _get_processors()
1384
1588
  if not processor:
1385
- return [(False, "") for _ in molfiles]
1589
+ return [(False, "") for _ in molecules]
1386
1590
  else:
1387
1591
  # Load custom library from provided JSON string (no caching)
1388
1592
  try:
@@ -1410,7 +1614,7 @@ def convert_molecules_batch(molfiles: list, library_json: str = None) -> list:
1410
1614
 
1411
1615
  if not library.monomers:
1412
1616
  print("ERROR: No monomers loaded from custom library")
1413
- return [(False, "Library loading failed") for _ in molfiles]
1617
+ return [(False, "Library loading failed") for _ in molecules]
1414
1618
 
1415
1619
  print(f"Custom library loaded: {len(library.monomers)} monomers")
1416
1620
 
@@ -1419,11 +1623,46 @@ def convert_molecules_batch(molfiles: list, library_json: str = None) -> list:
1419
1623
  matcher = MonomerMatcher(library)
1420
1624
  helm_generator = HELMGenerator()
1421
1625
 
1626
+ # Helper function to detect molecule format
1627
+ def _is_molfile(mol_string: str) -> bool:
1628
+ """Check if string is a molfile (starts with RDKit molfile markers or has multiple lines)"""
1629
+ if not mol_string:
1630
+ return False
1631
+ lines = mol_string.strip().split('\n')
1632
+ # Molfiles typically have multiple lines and specific format
1633
+ if len(lines) > 3:
1634
+ # Check for V2000 or V3000 molfile markers
1635
+ if 'V2000' in mol_string or 'V3000' in mol_string:
1636
+ return True
1637
+ # Check for typical molfile structure (counts line format)
1638
+ if len(lines) > 3:
1639
+ counts_line = lines[3] if len(lines) > 3 else ""
1640
+ # Molfile counts line has specific format with atom/bond counts
1641
+ if len(counts_line) >= 6 and counts_line[:6].replace(' ', '').isdigit():
1642
+ return True
1643
+ return False
1644
+
1422
1645
  results = []
1423
1646
 
1424
- for i in range(len(molfiles)):
1425
- molfile = molfiles[i]
1426
- mol = Chem.MolFromMolBlock(molfile)
1647
+ for i in range(len(molecules)):
1648
+ mol_string = molecules[i]
1649
+
1650
+ # Determine input type and parse molecule
1651
+ if input_type == "auto":
1652
+ # Auto-detect format
1653
+ if _is_molfile(mol_string):
1654
+ mol = Chem.MolFromMolBlock(mol_string)
1655
+ else:
1656
+ # Assume SMILES if not molfile
1657
+ mol = Chem.MolFromSmiles(mol_string)
1658
+ elif input_type == "molfile":
1659
+ mol = Chem.MolFromMolBlock(mol_string)
1660
+ elif input_type == "smiles":
1661
+ mol = Chem.MolFromSmiles(mol_string)
1662
+ else:
1663
+ results.append((False, f"Invalid input_type: {input_type}"))
1664
+ continue
1665
+
1427
1666
  if not mol:
1428
1667
  results.append((False, ""))
1429
1668
  continue
@@ -1457,6 +1696,12 @@ def convert_molecules_batch(molfiles: list, library_json: str = None) -> list:
1457
1696
  if not had_changes:
1458
1697
  break
1459
1698
 
1699
+ # After regular recovery, try stereo-agnostic matching for remaining unmatched fragments
1700
+ # This handles poor quality data with missing stereochemistry
1701
+ stereo_matched = processor.recover_unmatched_with_stereo_agnostic(graph, matcher)
1702
+ if stereo_matched > 0:
1703
+ print(f"DEBUG: Stereo-agnostic recovery matched {stereo_matched} additional fragments")
1704
+
1460
1705
  if len(graph.nodes) > 0:
1461
1706
  helm_notation = helm_generator.generate_helm_from_graph(graph)
1462
1707
  results.append((True, helm_notation))
@@ -1467,5 +1712,35 @@ def convert_molecules_batch(molfiles: list, library_json: str = None) -> list:
1467
1712
 
1468
1713
  return results
1469
1714
 
1715
+
1716
+ def convert_molfiles_to_helm(molfiles: list, library_json: str = None) -> list:
1717
+ """
1718
+ Convert a batch of molfiles to HELM notation.
1719
+ Convenience wrapper for convert_molecules_batch with input_type="molfile".
1720
+
1721
+ Args:
1722
+ molfiles: List of molfile strings
1723
+ library_json: Optional monomer library as JSON string
1724
+
1725
+ Returns:
1726
+ List of tuples: (success: bool, helm_notation: str)
1727
+ """
1728
+ return convert_molecules_batch(molfiles, library_json=library_json, input_type="molfile")
1729
+
1730
+
1731
+ def convert_smiles_to_helm(smiles_list: list, library_json: str = None) -> list:
1732
+ """
1733
+ Convert a batch of SMILES to HELM notation.
1734
+ Convenience wrapper for convert_molecules_batch with input_type="smiles".
1735
+
1736
+ Args:
1737
+ smiles_list: List of SMILES strings
1738
+ library_json: Optional monomer library as JSON string
1739
+
1740
+ Returns:
1741
+ List of tuples: (success: bool, helm_notation: str)
1742
+ """
1743
+ return convert_molecules_batch(smiles_list, library_json=library_json, input_type="smiles")
1744
+
1470
1745
  res_helm_list = convert_molecules_batch(molListToProcess, library_json=libraryJSON)
1471
1746
  result_helm = pd.DataFrame(map(lambda x: x[1], res_helm_list), columns=["regenerated sequences"])
package/src/package.g.ts CHANGED
@@ -277,7 +277,7 @@ export async function moleculesToHelmTopMenu(table: DG.DataFrame, molecules: DG.
277
277
  //description: Converts sequences to molblocks
278
278
  //input: dataframe table { description: Input data table }
279
279
  //input: column seqCol { semType: Macromolecule; caption: Sequence }
280
- //input: bool nonlinear = false { caption: Non-linear; description: Slower mode for cycling/branching HELM structures }
280
+ //input: bool nonlinear = true { caption: Non-linear; description: Slower mode for cycling/branching HELM structures }
281
281
  //input: bool highlight = false { caption: Highlight monomers; description: Highlight monomers' substructures of the molecule }
282
282
  //top-menu: Bio | Transform | To Atomic Level...
283
283
  export async function toAtomicLevel(table: DG.DataFrame, seqCol: DG.Column, nonlinear: boolean, highlight: boolean) : Promise<void> {
package/src/package.ts CHANGED
@@ -651,7 +651,7 @@ export class PackageFunctions {
651
651
  static async toAtomicLevel(
652
652
  @grok.decorators.param({options: {description: 'Input data table'}})table: DG.DataFrame,
653
653
  @grok.decorators.param({options: {semType: 'Macromolecule', caption: 'Sequence'}})seqCol: DG.Column,
654
- @grok.decorators.param({options: {initialValue: 'false', caption: 'Non-linear', description: 'Slower mode for cycling/branching HELM structures'}}) nonlinear: boolean,
654
+ @grok.decorators.param({options: {initialValue: 'true', caption: 'Non-linear', description: 'Slower mode for cycling/branching HELM structures'}}) nonlinear: boolean = true,
655
655
  @grok.decorators.param({options: {initialValue: 'false', caption: 'Highlight monomers', description: 'Highlight monomers\' substructures of the molecule'}}) highlight: boolean = false
656
656
  ): Promise<void> {
657
657
  const pi = DG.TaskBarProgressIndicator.create('Converting to atomic level ...');