npm - @datagrok/bio - Versions diffs - 2.25.10 → 2.25.12 - Mend

@datagrok/bio 2.25.10 → 2.25.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/CHANGELOG.md +10 -0
package/detectors.js +1 -0
package/dist/242.js +1 -1
package/dist/242.js.map +1 -1
package/dist/284.js.map +1 -1
package/dist/455.js +1 -1
package/dist/455.js.map +1 -1
package/dist/705.js +1 -1
package/dist/705.js.map +1 -1
package/dist/980.js.map +1 -1
package/dist/package-test.js +5 -5
package/dist/package-test.js.map +1 -1
package/dist/package.js +3 -3
package/dist/package.js.map +1 -1
package/package.json +3 -4
package/src/analysis/sequence-similarity-viewer.ts +4 -4
package/src/tests/to-atomic-level-tests.ts +121 -1
package/src/utils/biln.ts +10 -2
package/src/utils/convert.ts +62 -16
package/src/utils/helm-to-molfile/converter/mol-atoms.ts +5 -0
package/src/utils/helm-to-molfile/converter/mol-bonds.ts +5 -0
package/src/utils/helm-to-molfile/converter/mol-wrapper.ts +4 -2
package/src/utils/helm-to-molfile/converter/monomer-wrapper.ts +24 -10
package/src/utils/helm-to-molfile/converter/r-group-handler.ts +100 -7
package/src/utils/helm-to-molfile/converter/types.ts +12 -0
package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +108 -11
package/test-console-output-1.log +456 -417
package/test-record-1.mp4 +0 -0
package/webpack.config.js +22 -1

package/package.json CHANGED Viewed

@@ -5,7 +5,7 @@
     "name": "Davit Rizhinashvili",
     "email": "drizhinashvili@datagrok.ai"
   },
-  "version": "2.25.10",
+  "version": "2.25.12",
   "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
   "repository": {
     "type": "git",
@@ -44,10 +44,10 @@
   ],
   "dependencies": {
     "@biowasm/aioli": "^3.1.0",
-    "@datagrok-libraries/bio": "^5.61.4",
+    "@datagrok-libraries/bio": "^5.61.6",
     "@datagrok-libraries/chem-meta": "^1.2.9",
     "@datagrok-libraries/math": "^1.2.6",
-    "@datagrok-libraries/ml": "^6.10.7",
+    "@datagrok-libraries/ml": "^6.10.9",
     "@datagrok-libraries/test": "^1.1.0",
     "@datagrok-libraries/tutorials": "^1.7.4",
     "@datagrok-libraries/utils": "^4.6.9",
@@ -77,7 +77,6 @@
     "@types/wu": "^2.1.44",
     "@typescript-eslint/eslint-plugin": "^8.8.1",
     "@typescript-eslint/parser": "^8.8.1",
-    "datagrok-tools": "^5.1.5",
     "eslint": "^8.57.1",
     "eslint-config-google": "^0.14.0",
     "eslint-plugin-rxjs": "^5.0.3",

package/src/analysis/sequence-similarity-viewer.ts CHANGED Viewed

@@ -62,7 +62,7 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
     if (!this.beforeRender())
       return;
     if (this.targetColumn) {
-      this.curIdx = this.dataFrame!.currentRowIdx == -1 ? 0 : this.dataFrame!.currentRowIdx;
+      this.curIdx = (this.dataFrame?.currentRowIdx ?? -1) == -1 ? 0 : this.dataFrame!.currentRowIdx;
       // Force recomputation if parameters changed
       const parametersChanged =
@@ -72,7 +72,7 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
         this.lastGapExtend !== this.gapExtend;
       if ((computeData && !this.gridSelect) || parametersChanged) {
-        this.targetMoleculeIdx = (this.dataFrame!.currentRowIdx ?? -1) < 0 ? 0 : this.dataFrame!.currentRowIdx; await this.computeByMM();
+        this.targetMoleculeIdx = (this.dataFrame?.currentRowIdx ?? -1) < 0 ? 0 : this.dataFrame!.currentRowIdx; await this.computeByMM();
         const similarColumnName: string = this.similarColumnLabel != null ? this.similarColumnLabel :
           `similar (${this.targetColumn})`;
         this.molCol = DG.Column.string(similarColumnName,
@@ -87,9 +87,9 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
         let prevTimer: any = null;
         const _ = resDf.onCurrentRowChanged.subscribe((_: any) => {
           prevTimer && clearTimeout(prevTimer);
-          if ((resDf.currentRowIdx ?? -1) < 0)
+          if ((resDf?.currentRowIdx ?? -1) < 0)
             return;
-          this.dataFrame.currentRowIdx = resDf.col('indexes')!.get(resDf.currentRowIdx);
+          this.dataFrame && (this.dataFrame.currentRowIdx = resDf.col('indexes')!.get(resDf.currentRowIdx));
           prevTimer = setTimeout(() => { this.createPropertyPanel(resDf); }, 300);
           this.gridSelect = true;
         });

package/src/tests/to-atomic-level-tests.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+/* eslint-disable max-len */
 /* Do not change these import lines to match external modules in webpack configuration */
 import * as grok from 'datagrok-api/grok';
 import * as ui from 'datagrok-api/ui';
@@ -8,7 +9,7 @@ import wu from 'wu';
 import {before, after, category, test, expectArray, expect} from '@datagrok-libraries/test/src/test';
 import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
 import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
-import {IMonomerLib} from '@datagrok-libraries/bio/src/types/monomer-library';
+import {IMonomerLib, Monomer} from '@datagrok-libraries/bio/src/types/monomer-library';
 import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
 import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/types/monomer-library';
 import {
@@ -23,6 +24,80 @@ import {_package} from '../package-test';
 const appPath = 'System:AppData/Bio';
 const fileSource = new DG.FileSource(appPath);
+const complexMonomerAllylRgroup: Monomer = {
+  'symbol': 'allyl_mon',
+  'name': 'monomer with Allyl R group',
+  'molfile': '\n     RDKit          2D\n\n  9  8  0  0  0  0  0  0  0  0999 V2000\n    1.4434   -2.1667    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0\n    1.4434   -0.6667    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    0.1443    0.0833    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n   -1.1547   -0.6667    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n   -2.4537    0.0833    0.0000 S   0  0  0  0  0  0  0  0  0  0  0  0\n   -3.7528   -0.6667    0.0000 R#  0  0  0  0  0  0  0  0  0  0  0  0\n    0.1443    1.5833    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0\n    1.4434    2.3333    0.0000 R#  0  0  0  0  0  0  0  0  0  0  0  0\n    2.7424    0.0833    0.0000 R#  0  0  0  0  0  0  0  0  0  0  0  0\n  1  2  2  0\n  2  3  1  0\n  3  4  1  6\n  4  5  1  0\n  5  6  1  0\n  3  7  1  0\n  7  8  1  0\n  2  9  1  0\nM  RGP  3   6   3   8   1   9   2\nM  END\n',
+  'smiles': 'O=C([C@H](CS[*:3])N[*:1])[*:2]',
+  'polymerType': 'PEPTIDE',
+  'monomerType': 'Backbone',
+  'naturalAnalog': 'C',
+  'id': 16,
+  'rgroups': [
+    {
+      'alternateId': 'R1-H',
+      'capGroupName': 'H',
+      'capGroupSmiles': '[H][*:1]',
+      'label': 'R1'
+    },
+    {
+      'alternateId': 'R2-OH',
+      'capGroupName': 'OH',
+      'capGroupSmiles': 'O[*:2]',
+      'label': 'R2'
+    },
+    {
+      'alternateId': 'R3-Allyl',
+      'capGroupName': 'Allyl',
+      'capGroupSmiles': 'C=C[*:3]',
+      'label': 'R3'
+    }
+  ],
+  'author': 'Admin',
+  'createDate': '2026-02-18T14:48:41.723Z',
+  'meta': {}
+};
+const complexMonomerWithComplexRgroup: Monomer = {
+  'symbol': 'SomeComplex',
+  'name': 'Some complex monomer with complex R group',
+  'molfile': '\n     RDKit          2D\n\n 10  9  0  0  0  0  0  0  0  0999 V2000\n   -1.4289   -0.3750    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n   -2.7280    0.3750    0.0000 S   0  0  0  0  0  0  0  0  0  0  0  0\n   -4.0270   -0.3750    0.0000 R#  0  0  0  0  0  0  0  0  0  0  0  0\n   -0.1299    0.3750    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n   -0.1299    1.8750    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0\n    1.1691    2.6250    0.0000 R#  0  0  0  0  0  0  0  0  0  0  0  0\n    1.1691   -0.3750    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0\n    1.1691   -1.8750    0.0000 O   0  0  0  0  0  0  0  0  0  0  0  0\n    2.4682   -2.6250    0.0000 R#  0  0  0  0  0  0  0  0  0  0  0  0\n    2.4682    0.3750    0.0000 R#  0  0  0  0  0  0  0  0  0  0  0  0\n  1  2  1  0\n  2  3  1  0\n  4  1  1  6\n  4  5  1  0\n  5  6  1  0\n  4  7  1  0\n  7  8  1  0\n  8  9  1  0\n  7 10  1  0\nM  RGP  4   3   3   6   1   9   4  10   2\nM  END\n',
+  'smiles': '[*:4]OC([C@H](CS[*:3])N[*:1])[*:2]',
+  'polymerType': 'PEPTIDE',
+  'monomerType': 'Backbone',
+  'naturalAnalog': 'C',
+  'id': 16,
+  'rgroups': [
+    {
+      'alternateId': 'R1-H',
+      'capGroupName': 'H',
+      'capGroupSmiles': '[H][*:1]',
+      'label': 'R1'
+    },
+    {
+      'alternateId': 'R2-OH',
+      'capGroupName': 'OH',
+      'capGroupSmiles': 'O[*:2]',
+      'label': 'R2'
+    },
+    {
+      'alternateId': 'R3-Something',
+      'capGroupName': 'Something',
+      'capGroupSmiles': 'C=CC([*:3])=C',
+      'label': 'R3'
+    },
+    {
+      'alternateId': 'R4-SomethingElse',
+      'capGroupName': 'SomethingElse',
+      'capGroupSmiles': 'ClCCCC=CC([*:4])=CCC',
+      'label': 'R4'
+    }
+  ],
+  'author': 'Admin',
+  'createDate': '2026-02-18T14:48:41.723Z',
+  'meta': {}
+};
 const enum Tests {
   PT = 'peptides-fasta',
   DNA = 'dna-fasta',
@@ -225,6 +300,51 @@ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.Thr_PO3H2.Aca.Tyr
     expect(polishMolfile(resCol.get(0)), polishMolfile(tgtMol));
   });
+  async function _testToAtomicLevelWithCustomMonomer(srcHelm: string, expectedSmiles: string): Promise<void> {
+    let error: any = null;
+    // first, patch the monomer library with a custom monomers
+    const monomerLib = monomerLibHelper.getMonomerLib();
+    // @ts-ignore
+    monomerLib._monomers['PEPTIDE'][complexMonomerAllylRgroup.symbol] = complexMonomerAllylRgroup;
+    // @ts-ignore
+    monomerLib._monomers['PEPTIDE'][complexMonomerWithComplexRgroup.symbol] = complexMonomerWithComplexRgroup;
+    try {
+      const converter = await seqHelper.getHelmToMolfileConverter(monomerLib);
+      const resMolFile = seqHelper.helmToAtomicLevelSingle(srcHelm, converter, true, true);
+      const resSmiles = grok.chem.convert(resMolFile.molfile, grok.chem.Notation.Unknown, grok.chem.Notation.Smiles);
+      expect(resSmiles, expectedSmiles);
+    } catch (err) {
+      error = err;
+    }
+    // restore the monomer library to avoid affecting other tests
+    // @ts-ignore
+    delete monomerLib._monomers['PEPTIDE'][complexMonomerAllylRgroup.symbol];
+    // @ts-ignore
+    delete monomerLib._monomers['PEPTIDE'][complexMonomerWithComplexRgroup.symbol];
+    if (error)
+      throw error;
+  }
+  test('SingleHelmMonomerWithAllylGroups', async () => {
+    const srcHelm = `PEPTIDE1{[${complexMonomerAllylRgroup.symbol}]}$$$$V2.0`;
+    const expectedSmiles = 'C=CSC[C@H](N)C(=O)O';
+    await _testToAtomicLevelWithCustomMonomer(srcHelm, expectedSmiles);
+  });
+  test('SingleHelmMonomerWithComplexRGroups', async () => {
+    const srcHelm = `PEPTIDE1{[${complexMonomerWithComplexRgroup.symbol}]}$$$$V2.0`;
+    const expectedSmiles = 'C=CC(=C)SC[C@H](N)C(O)OC(C=CCCCCl)=CCC';
+    await _testToAtomicLevelWithCustomMonomer(srcHelm, expectedSmiles);
+  });
+  test('HelmPolymerWithComplexRGroups', async () => {
+    const srcHelm = `PEPTIDE1{[dI].[Trp_Ome].[Asp_OMe].[D-Cit].[meG].[Phe_4NH2].[Phe_34diCl].[meY].[Pro_4Me3OH].[Met_O].[NMe2Abz].[Tyr_Ph4OH].[3Pal].[xiIle].[Tyr_35diI].[Ala_tBu]}|PEPTIDE2{[${complexMonomerAllylRgroup.symbol}].[${complexMonomerWithComplexRgroup.symbol}]}$PEPTIDE1,PEPTIDE1,16:R2-1:R1|PEPTIDE1,PEPTIDE2,1:R3-1:R1$$$V2.0`;
+    const expectedSmiles = 'C=CSC[C@H](NCC[C@@H](C)[C@H]1NC(=O)[C@H](C(C)(C)C)NC(=O)[C@H](Cc2cc(I)c(O)c(I)c2)NC(=O)[C@H](C(C)CC)NC(=O)[C@H](Cc2cccnc2)NC(=O)[C@H](Cc2ccc(Oc3ccc(O)cc3)cc2)NC(=O)c2ccccc2N(C)C(=O)[C@H](CCS(C)=O)NC(=O)[C@@H]2C(O)C(C)CN2C(=O)[C@H](Cc2ccc(O)cc2)N(C)C(=O)[C@H](Cc2ccc(Cl)c(Cl)c2)NC(=O)[C@H](Cc2ccc(N)cc2)NC(=O)CN(C)C(=O)[C@@H](CCCNC(N)=O)NC(=O)[C@H](CC(=O)OC)NC(=O)[C@H](Cc2cn(OC)c3ccccc23)NC1=O)C(=O)N[C@@H](CSC(=C)C=C)C(O)OC(C=CCCCCl)=CCC';
+    await _testToAtomicLevelWithCustomMonomer(srcHelm, expectedSmiles);
+  });
   async function _testToAtomicLevel(
     df: DG.DataFrame, seqColName: string = 'seq', monomerLibHelper: IMonomerLibHelper
   ): Promise<DG.Column | null> {

package/src/utils/biln.ts CHANGED Viewed

@@ -6,7 +6,7 @@ import * as DG from 'datagrok-api/dg';
 /* eslint-disable max-len */
 import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
-import {INotationProvider, SplitterFunc} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
+import {INotationProvider, NotationProviderBase, SplitterFunc} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
 import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
 import {CellRendererBackBase} from '@datagrok-libraries/bio/src/utils/cell-renderer-back-base';
 import {MonomerPlacer} from '@datagrok-libraries/bio/src/utils/cell-renderer-monomer-placer';
@@ -14,16 +14,24 @@ import {monomerToShort, splitterAsBiln} from '@datagrok-libraries/bio/src/utils/
 import {_package} from '../package';
 /* eslint-enable max-len */
-export class BilnNotationProvider implements INotationProvider {
+export class BilnNotationProvider extends NotationProviderBase implements INotationProvider {
   public readonly splitter: SplitterFunc;
   get defaultGapOriginal(): string { return ''; }
+  static override get notationName(): string { return NOTATION.BILN; }
+  static override get implementsFromHelm(): boolean { return false; }
+  static override convertFromHelm(helm: string, options: any): string {
+    throw new Error('Canonical way of converting from helm to biln must be used');
+  }
   constructor(
     public readonly separator: string,
     public readonly seqHelper: ISeqHelper,
     public readonly seqCol: DG.Column
   ) {
+    super();
     this.splitter = splitterAsBiln.bind(this);
   }

package/src/utils/convert.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+/* eslint-disable max-len */
 import * as DG from 'datagrok-api/dg';
 import * as ui from 'datagrok-api/ui';
 import * as grok from 'datagrok-api/grok';
@@ -7,6 +8,7 @@ import {Subscription} from 'rxjs';
 import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
 import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
+import {NotationProviderBase} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
 let convertDialog: DG.Dialog | null = null;
@@ -18,7 +20,7 @@ let convertDialogSubs: Subscription[] = [];
  * @param {DG.Column<string>} col Column with 'Macromolecule' semantic type
  * @param {ISeqHelper} seqHelper
  */
-export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelper): void {
+export async function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelper): Promise<void> {
   let srcCol = col ?? grok.shell.t.columns.bySemType('Macromolecule')!;
   if (!srcCol)
     throw new Error('No column with Macromolecule semantic type found');
@@ -41,6 +43,9 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
     NOTATION.HELM,
     NOTATION.BILN
   ];
+  const notationProviderConstructors = await NotationProviderBase.getProviderConstructors();
   const toggleColumn = (newCol: DG.Column) => {
     srcCol = newCol;
     converterSh = seqHelper.getSeqHandler(srcCol);
@@ -49,6 +54,15 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
       separatorInput.value = '/'; // helm monomers can have - in the name like D-aThr;
     dialogHeader.textContent = 'Current notation: ' + currentNotation;
     filteredNotations = notations.filter((e) => e !== currentNotation);
+    if (currentNotation === NOTATION.CUSTOM)
+      filteredNotations = [NOTATION.HELM];
+    if (currentNotation === NOTATION.HELM) {
+      // add custom notations that
+      notationProviderConstructors.forEach((c) => {
+        if (c.implementsFromHelm)
+          filteredNotations.unshift(c.notationName as NOTATION); // hack :)
+      });
+    }
     targetNotationInput = ui.input.choice('Convert to', {
       value: filteredNotations[0], items: filteredNotations,
       onValueChanged: toggleSeparator
@@ -72,6 +86,15 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
   const separatorArray = ['-', '.', '/'];
   let filteredNotations = notations.filter((e) => e !== currentNotation);
+  if (currentNotation === NOTATION.CUSTOM)
+    filteredNotations = [NOTATION.HELM];
+  if (currentNotation === NOTATION.HELM) {
+    // add custom notations that
+    notationProviderConstructors.forEach((c) => {
+      if (c.implementsFromHelm)
+        filteredNotations.unshift(c.notationName as NOTATION); // hack :)
+    });
+  }
   const separatorInput = ui.input.choice('Separator', {value: separatorArray[0], items: separatorArray});
@@ -105,8 +128,11 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
       .onOK(async () => {
         const targetNotation = targetNotationInput.value as NOTATION;
         const separator: string | undefined = targetNotation === NOTATION.SEPARATOR ? separatorInput.value! : undefined;
+        let notationProviderConstructor: typeof NotationProviderBase | undefined = undefined;
+        if (!notations.includes(targetNotation) && notationProviderConstructors.find((c) => c.notationName === targetNotation))
+          notationProviderConstructor = notationProviderConstructors.find((c) => c.notationName === targetNotation)!;
-        await convertDo(srcCol, seqHelper, targetNotation, separator);
+        await convertDo(srcCol, seqHelper, targetNotation, separator, notationProviderConstructor);
       })
       .show({x: 350, y: 100});
@@ -123,18 +149,38 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
  * @param {NOTATION} targetNotation Target notation
  * @param {string | null} separator Separator for SEPARATOR notation
  */
-export async function convertDo(srcCol: DG.Column, seqHelper: ISeqHelper, targetNotation: NOTATION, separator?: string): Promise<DG.Column> {
-  const converterSh = seqHelper.getSeqHandler(srcCol);
-  const newColumn = converterSh.convert(targetNotation, separator);
-  srcCol.dataFrame.columns.add(newColumn);
-  // Call detector directly to escape some error on detectSemanticTypes
-  const semType = await grok.functions.call('Bio:detectMacromolecule', {col: newColumn});
-  if (semType)
-    newColumn.semType = semType;
-  // call to calculate 'cell.renderer' tag
-  await grok.data.detectSemanticTypes(srcCol.dataFrame);
-  return newColumn;
+export async function convertDo(srcCol: DG.Column, seqHelper: ISeqHelper, targetNotation: NOTATION, separator?: string, notationProviderConstructor?: typeof NotationProviderBase): Promise<DG.Column> {
+  if (notationProviderConstructor) {
+    const newColName = srcCol.dataFrame.columns.getUnusedName(`${notationProviderConstructor.notationName}(${srcCol.name})`);
+    const newCol = DG.Column.string(newColName, srcCol.length);
+    newCol.init((i) => {
+      const seq = srcCol.get(i);// we know for sure (in Macron accent) that it is helm
+      try {
+        return notationProviderConstructor.convertFromHelm(seq, {});
+      } catch (e) {
+        console.error(`Error converting sequence at row ${i}: ${e instanceof Error ? e.message : e}`);
+        return '';
+      }
+    });
+    srcCol.dataFrame.columns.add(newCol);
+    const semType = await grok.functions.call('Bio:detectMacromolecule', {col: newCol});
+    if (semType)
+      newCol.semType = semType;
+    await grok.data.detectSemanticTypes(srcCol.dataFrame);
+    return newCol;
+  } else {
+    const converterSh = seqHelper.getSeqHandler(srcCol);
+    const newColumn = converterSh.convert(targetNotation, separator);
+    srcCol.dataFrame.columns.add(newColumn);
+    // Call detector directly to escape some error on detectSemanticTypes
+    const semType = await grok.functions.call('Bio:detectMacromolecule', {col: newColumn});
+    if (semType)
+      newColumn.semType = semType;
+    // call to calculate 'cell.renderer' tag
+    await grok.data.detectSemanticTypes(srcCol.dataFrame);
+    return newColumn;
+  }
 }

package/src/utils/helm-to-molfile/converter/mol-atoms.ts CHANGED Viewed

@@ -16,6 +16,11 @@ export abstract class MolfileAtoms {
     this.rawAtomLines[atomIdx] = this.rawAtomLines[atomIdx].replace(R_GROUP_ELEMENT_SYMBOL, newElementSymbol);
   }
+  appendAtomLine(rawLine: string, x: number, y: number): void {
+    this.rawAtomLines.push(rawLine);
+    this.coordinates.push({x, y});
+  }
   deleteAtoms(indices: number[]): void {
     this.coordinates = this.coordinates.filter((_, idx) => !indices.includes(idx));
     this.rawAtomLines = this.rawAtomLines.filter((_, idx) => !indices.includes(idx));

package/src/utils/helm-to-molfile/converter/mol-bonds.ts CHANGED Viewed

@@ -13,6 +13,11 @@ export abstract class MolfileBonds {
     return this.bondedAtomPairs;
   }
+  appendBondLine(rawLine: string, bondedPair: number[]): void {
+    this.rawBondLines.push(rawLine);
+    this.bondedAtomPairs.push(bondedPair);
+  }
   deleteBondLines(indices: number[]): void {
     this.rawBondLines = this.rawBondLines.filter((_, idx) => !indices.includes(idx));
     this.bondedAtomPairs = this.bondedAtomPairs.filter((_, idx) => !indices.includes(idx));

package/src/utils/helm-to-molfile/converter/mol-wrapper.ts CHANGED Viewed

@@ -1,6 +1,8 @@
+import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
 import {MolfileAtoms} from './mol-atoms';
 import {MolfileBonds} from './mol-bonds';
 import {RGroupHandler} from './r-group-handler';
+import {CapGroupInfo} from './types';
 export abstract class MolfileWrapper {
   constructor(protected monomerSymbol: string) { }
@@ -76,8 +78,8 @@ export abstract class MolfileWrapper {
     this.bonds.shift(shift);
   }
-  capRGroups(capGroupElements: string[]): void {
-    this.rGroups.capRGroups(capGroupElements);
+  capRGroups(capGroupInfo: CapGroupInfo[], rdKitModule: RDModule): void {
+    this.rGroups.capRGroups(capGroupInfo, rdKitModule);
   }
 }

package/src/utils/helm-to-molfile/converter/monomer-wrapper.ts CHANGED Viewed

@@ -6,17 +6,23 @@ import {MolfileHandler} from '@datagrok-libraries/chem-meta/src/parsing-utils/mo
 import {Helm} from './helm';
 import {MolfileWrapper} from './mol-wrapper';
 import {MolfileWrapperFactory} from './mol-wrapper-factory';
+import {CapGroupInfo} from './types';
+/** Returns true if the string is a valid single element symbol (e.g. 'H', 'O', 'C', 'Cl') */
+function isSimpleElement(s: string): boolean {
+  return /^[A-Z][a-z]?$/.test(s);
+}
 export class MonomerWrapper {
   private readonly molfileWrapper: MolfileWrapper;
-  private capGroupElements: string[] = [];
+  private capGroupInfo: CapGroupInfo[] = [];
   private static molfileV2KToV3KCache: Map<string, string> = new Map();
   constructor(
     public readonly monomerSymbol: string,
     public readonly monomerIdx: number,
     private helm: Helm,
     shift: { x: number, y: number },
-    rdKitModule: RDModule,
+    private readonly rdKitModule: RDModule,
     private readonly monomerLib: IMonomerLibBase
   ) {
     const libraryMonomerObject = this.getLibraryMonomerObject();
@@ -26,7 +32,7 @@ export class MonomerWrapper {
       molfile = this.convertMolfileToV3KFormat(molfile, monomerSymbol, rdKitModule);
     this.molfileWrapper = MolfileWrapperFactory.getInstance(molfile, monomerSymbol);
-    this.capGroupElements = this.getCapGroupElements(libraryMonomerObject);
+    this.capGroupInfo = this.getCapGroupInfo(libraryMonomerObject);
     this.removeRGroups(helm.bondedRGroupsMap[monomerIdx]!);
     this.capRemainingRGroups();
@@ -63,20 +69,28 @@ export class MonomerWrapper {
     return monomer;
   }
-  private getCapGroupElements(
+  private getCapGroupInfo(
     libraryMonomerObject: Monomer
-  ): string[] {
+  ): CapGroupInfo[] {
     const rgroups = libraryMonomerObject.rgroups;
-    const result = rgroups.map((rgroup) => {
+    return rgroups.map((rgroup, ind) => {
       const smiles = rgroup[HELM_RGROUP_FIELDS.CAP_GROUP_SMILES] ||
         // WARNING: ignore because both key variants coexist in HELM Core Library!
         // @ts-ignore
         rgroup[HELM_RGROUP_FIELDS.CAP_GROUP_SMILES_UPPERCASE];
+      let rgroupId = rgroup[HELM_RGROUP_FIELDS.LABEL][1];
+      if (!rgroupId || !parseInt(rgroupId) || isNaN(parseInt(rgroupId))) {
+        // try to parse it from smiles, which can look like '[H][*:1]', 'O[*:2]', 'C=C[*:3]'
+        const match = smiles?.match(/\[\*:(\d)\]/);
+        if (match && match[1])
+          rgroupId = match[1];
+      }
+      if (!rgroupId || !parseInt(rgroupId) || isNaN(parseInt(rgroupId)))
+        rgroupId = `${ind + 1}`; // fallback to index-based id, starting from 1
       // extract the element symbol
-      return smiles.replace(/(\[|\]|\*|:|\d)/g, '');
+      const element = smiles.replace(/(\[|\]|\*|:|\d)/g, '');
+      return {element, smiles, isSimple: isSimpleElement(element), rGroupId: parseInt(rgroupId)};
     });
-    return result;
   }
   private shiftCoordinates(shift: { x: number, y: number }): void {
@@ -96,7 +110,7 @@ export class MonomerWrapper {
   }
   private capRemainingRGroups(): void {
-    this.molfileWrapper.capRGroups(this.capGroupElements);
+    this.molfileWrapper.capRGroups(this.capGroupInfo, this.rdKitModule);
   }
   replaceRGroupWithAttachmentAtom(rGroupId: number, attachmentAtomIdx: number): void {

package/src/utils/helm-to-molfile/converter/r-group-handler.ts CHANGED Viewed

@@ -1,8 +1,10 @@
 import {MolfileHandlerBase} from '@datagrok-libraries/chem-meta/src/parsing-utils/molfile-handler-base';
+import {MolfileHandler} from '@datagrok-libraries/chem-meta/src/parsing-utils/molfile-handler';
+import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
 import {HYDROGEN_SYMBOL} from './const';
 import {MolfileAtoms} from './mol-atoms';
 import {MolfileBonds} from './mol-bonds';
-import {PositionInBonds} from './types';
+import {CapGroupInfo, PositionInBonds} from './types';
 export class RGroupHandler {
@@ -108,15 +110,106 @@ export class RGroupHandler {
   }
   /** WARNING: capping RGroups and deletion of the bonded ones don't commute */
-  capRGroups(capGroupElements: string[]): void {
+  capRGroups(capGroupInfo: CapGroupInfo[], rdKitModule: RDModule): void {
     this.rGroupIdToAtomicIndexMap.forEach((atomicIdx, rGroupId) => {
-      const element = capGroupElements[rGroupId - 1];
-      if (element === HYDROGEN_SYMBOL) {
-        this.removeRGroups([rGroupId]);
-        this.deleteBondLineWithSpecifiedRGroup(rGroupId);
+      const info = capGroupInfo.find((info) => info.rGroupId === rGroupId) ?? capGroupInfo[rGroupId - 1];
+      if (info.isSimple) {
+        if (info.element === HYDROGEN_SYMBOL) {
+          this.removeRGroups([rGroupId]);
+          this.deleteBondLineWithSpecifiedRGroup(rGroupId);
+        } else
+          this.atoms.replaceRGroupSymbolByElement(atomicIdx, info.element);
       } else
-        this.atoms.replaceRGroupSymbolByElement(atomicIdx, element);
+        this.capWithComplexGroup(atomicIdx, info.smiles, rdKitModule);
     });
   }
+  /** Cap an R-group with a multi-atom cap group by parsing the cap SMILES,
+   * then inserting its atoms and bonds into the monomer molfile */
+  private capWithComplexGroup(
+    rGroupAtomicIdx: number, capSmiles: string, rdKitModule: RDModule
+  ): void {
+    // Replace [*:N] with placeholder element Xe so RDKit can parse the SMILES
+    const PLACEHOLDER = 'Xe';
+    const parsableSmiles = capSmiles.replace(/\[\*:\d+\]/g, `[${PLACEHOLDER}]`);
+    const capMol = rdKitModule.get_mol(parsableSmiles);
+    if (!capMol)
+      throw new Error(`Cannot parse cap group SMILES: ${capSmiles}`);
+    let capMolfile: string;
+    try {
+      capMolfile = capMol.get_v3Kmolblock();
+    } finally {
+      capMol.delete();
+    }
+    const capHandler = MolfileHandler.getInstance(capMolfile);
+    const capAtomLines = capHandler.getAtomLines();
+    const capBondPairs = capHandler.pairsOfBondedAtoms;
+    const capBondLines = capHandler.getBondLines();
+    const capX = capHandler.x;
+    const capY = capHandler.y;
+    const capAtomTypes = capHandler.atomTypes;
+    // Find the placeholder atom (was the [*:N] attachment point)
+    let dummyCapIdx = -1; // 0-based
+    for (let i = 0; i < capAtomTypes.length; i++) {
+      if (capAtomTypes[i] === PLACEHOLDER) {
+        dummyCapIdx = i;
+        break;
+      }
+    }
+    if (dummyCapIdx === -1)
+      throw new Error(`Cannot find placeholder atom in cap group SMILES: ${capSmiles}`);
+    // Find the attachment atom (bonded to placeholder) and the bond connecting them
+    let attachmentCapIdx = -1; // 0-based
+    for (let i = 0; i < capBondPairs.length; i++) {
+      const [a1, a2] = capBondPairs[i]; // 1-based
+      if (a1 === dummyCapIdx + 1) {
+        attachmentCapIdx = a2 - 1;
+        break;
+      }
+      if (a2 === dummyCapIdx + 1) {
+        attachmentCapIdx = a1 - 1;
+        break;
+      }
+    }
+    if (attachmentCapIdx === -1)
+      throw new Error(`Cannot find attachment atom in cap group SMILES: ${capSmiles}`);
+    // Compute coordinate translation: place cap attachment at R-group position
+    const rGroupCoords = this.atoms.atomCoordinates[rGroupAtomicIdx];
+    const tx = rGroupCoords.x - capX[attachmentCapIdx];
+    const ty = rGroupCoords.y - capY[attachmentCapIdx];
+    // Replace the R# atom symbol with the attachment atom's element
+    const attachmentSymbol = capAtomTypes[attachmentCapIdx];
+    this.atoms.replaceRGroupSymbolByElement(rGroupAtomicIdx, attachmentSymbol);
+    // Build index mapping: cap 1-based → monomer 1-based
+    const capToMonomer = new Map<number, number>();
+    capToMonomer.set(attachmentCapIdx + 1, rGroupAtomicIdx + 1);
+    // Append remaining cap atoms (excluding placeholder and attachment)
+    let nextMonomerIdx = this.atoms.count + 1; // 1-based
+    for (let i = 0; i < capAtomLines.length; i++) {
+      if (i === dummyCapIdx || i === attachmentCapIdx) continue;
+      const newX = capX[i] + tx;
+      const newY = capY[i] + ty;
+      this.atoms.appendAtomLine(capAtomLines[i], newX, newY);
+      capToMonomer.set(i + 1, nextMonomerIdx);
+      nextMonomerIdx++;
+    }
+    // Append cap bonds (excluding any bond involving the placeholder)
+    for (let i = 0; i < capBondPairs.length; i++) {
+      const [a1, a2] = capBondPairs[i]; // 1-based in cap
+      if (a1 === dummyCapIdx + 1 || a2 === dummyCapIdx + 1) continue;
+      const newA1 = capToMonomer.get(a1)!;
+      const newA2 = capToMonomer.get(a2)!;
+      this.bonds.appendBondLine(capBondLines[i], [newA1, newA2]);
+    }
+  }
 }

package/src/utils/helm-to-molfile/converter/types.ts CHANGED Viewed

@@ -11,3 +11,15 @@ export type PositionInBonds = {
   nodeIdx: number,
 }
+/** Cap group information for an R-group */
+export type CapGroupInfo = {
+  /** Extracted element string (e.g. 'H', 'O', 'C=C') */
+  element: string,
+  /** Raw cap group SMILES (e.g. '[H][*:1]', 'O[*:2]', 'C=C[*:3]') */
+  smiles: string,
+  /** Whether the cap is a single atom (valid element symbol) */
+  isSimple: boolean,
+  /** Number of R group, to handle cases where its not sorted */
+  rGroupId: number,
+}