@datagrok/bio 2.25.10 → 2.25.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Davit Rizhinashvili",
6
6
  "email": "drizhinashvili@datagrok.ai"
7
7
  },
8
- "version": "2.25.10",
8
+ "version": "2.25.12",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -44,10 +44,10 @@
44
44
  ],
45
45
  "dependencies": {
46
46
  "@biowasm/aioli": "^3.1.0",
47
- "@datagrok-libraries/bio": "^5.61.4",
47
+ "@datagrok-libraries/bio": "^5.61.6",
48
48
  "@datagrok-libraries/chem-meta": "^1.2.9",
49
49
  "@datagrok-libraries/math": "^1.2.6",
50
- "@datagrok-libraries/ml": "^6.10.7",
50
+ "@datagrok-libraries/ml": "^6.10.9",
51
51
  "@datagrok-libraries/test": "^1.1.0",
52
52
  "@datagrok-libraries/tutorials": "^1.7.4",
53
53
  "@datagrok-libraries/utils": "^4.6.9",
@@ -77,7 +77,6 @@
77
77
  "@types/wu": "^2.1.44",
78
78
  "@typescript-eslint/eslint-plugin": "^8.8.1",
79
79
  "@typescript-eslint/parser": "^8.8.1",
80
- "datagrok-tools": "^5.1.5",
81
80
  "eslint": "^8.57.1",
82
81
  "eslint-config-google": "^0.14.0",
83
82
  "eslint-plugin-rxjs": "^5.0.3",
@@ -62,7 +62,7 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
62
62
  if (!this.beforeRender())
63
63
  return;
64
64
  if (this.targetColumn) {
65
- this.curIdx = this.dataFrame!.currentRowIdx == -1 ? 0 : this.dataFrame!.currentRowIdx;
65
+ this.curIdx = (this.dataFrame?.currentRowIdx ?? -1) == -1 ? 0 : this.dataFrame!.currentRowIdx;
66
66
 
67
67
  // Force recomputation if parameters changed
68
68
  const parametersChanged =
@@ -72,7 +72,7 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
72
72
  this.lastGapExtend !== this.gapExtend;
73
73
 
74
74
  if ((computeData && !this.gridSelect) || parametersChanged) {
75
- this.targetMoleculeIdx = (this.dataFrame!.currentRowIdx ?? -1) < 0 ? 0 : this.dataFrame!.currentRowIdx; await this.computeByMM();
75
+ this.targetMoleculeIdx = (this.dataFrame?.currentRowIdx ?? -1) < 0 ? 0 : this.dataFrame!.currentRowIdx; await this.computeByMM();
76
76
  const similarColumnName: string = this.similarColumnLabel != null ? this.similarColumnLabel :
77
77
  `similar (${this.targetColumn})`;
78
78
  this.molCol = DG.Column.string(similarColumnName,
@@ -87,9 +87,9 @@ export class SequenceSimilarityViewer extends SequenceSearchBaseViewer {
87
87
  let prevTimer: any = null;
88
88
  const _ = resDf.onCurrentRowChanged.subscribe((_: any) => {
89
89
  prevTimer && clearTimeout(prevTimer);
90
- if ((resDf.currentRowIdx ?? -1) < 0)
90
+ if ((resDf?.currentRowIdx ?? -1) < 0)
91
91
  return;
92
- this.dataFrame.currentRowIdx = resDf.col('indexes')!.get(resDf.currentRowIdx);
92
+ this.dataFrame && (this.dataFrame.currentRowIdx = resDf.col('indexes')!.get(resDf.currentRowIdx));
93
93
  prevTimer = setTimeout(() => { this.createPropertyPanel(resDf); }, 300);
94
94
  this.gridSelect = true;
95
95
  });
@@ -1,3 +1,4 @@
1
+ /* eslint-disable max-len */
1
2
  /* Do not change these import lines to match external modules in webpack configuration */
2
3
  import * as grok from 'datagrok-api/grok';
3
4
  import * as ui from 'datagrok-api/ui';
@@ -8,7 +9,7 @@ import wu from 'wu';
8
9
  import {before, after, category, test, expectArray, expect} from '@datagrok-libraries/test/src/test';
9
10
  import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
10
11
  import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
11
- import {IMonomerLib} from '@datagrok-libraries/bio/src/types/monomer-library';
12
+ import {IMonomerLib, Monomer} from '@datagrok-libraries/bio/src/types/monomer-library';
12
13
  import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
13
14
  import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/types/monomer-library';
14
15
  import {
@@ -23,6 +24,80 @@ import {_package} from '../package-test';
23
24
  const appPath = 'System:AppData/Bio';
24
25
  const fileSource = new DG.FileSource(appPath);
25
26
 
27
+ const complexMonomerAllylRgroup: Monomer = {
28
+ 'symbol': 'allyl_mon',
29
+ 'name': 'monomer with Allyl R group',
30
+ 'molfile': '\n RDKit 2D\n\n 9 8 0 0 0 0 0 0 0 0999 V2000\n 1.4434 -2.1667 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 1.4434 -0.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 0.1443 0.0833 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -1.1547 -0.6667 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -2.4537 0.0833 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0\n -3.7528 -0.6667 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n 0.1443 1.5833 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 1.4434 2.3333 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n 2.7424 0.0833 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 2 0\n 2 3 1 0\n 3 4 1 6\n 4 5 1 0\n 5 6 1 0\n 3 7 1 0\n 7 8 1 0\n 2 9 1 0\nM RGP 3 6 3 8 1 9 2\nM END\n',
31
+ 'smiles': 'O=C([C@H](CS[*:3])N[*:1])[*:2]',
32
+ 'polymerType': 'PEPTIDE',
33
+ 'monomerType': 'Backbone',
34
+ 'naturalAnalog': 'C',
35
+ 'id': 16,
36
+ 'rgroups': [
37
+ {
38
+ 'alternateId': 'R1-H',
39
+ 'capGroupName': 'H',
40
+ 'capGroupSmiles': '[H][*:1]',
41
+ 'label': 'R1'
42
+ },
43
+ {
44
+ 'alternateId': 'R2-OH',
45
+ 'capGroupName': 'OH',
46
+ 'capGroupSmiles': 'O[*:2]',
47
+ 'label': 'R2'
48
+ },
49
+ {
50
+ 'alternateId': 'R3-Allyl',
51
+ 'capGroupName': 'Allyl',
52
+ 'capGroupSmiles': 'C=C[*:3]',
53
+ 'label': 'R3'
54
+ }
55
+ ],
56
+ 'author': 'Admin',
57
+ 'createDate': '2026-02-18T14:48:41.723Z',
58
+ 'meta': {}
59
+ };
60
+
61
+ const complexMonomerWithComplexRgroup: Monomer = {
62
+ 'symbol': 'SomeComplex',
63
+ 'name': 'Some complex monomer with complex R group',
64
+ 'molfile': '\n RDKit 2D\n\n 10 9 0 0 0 0 0 0 0 0999 V2000\n -1.4289 -0.3750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -2.7280 0.3750 0.0000 S 0 0 0 0 0 0 0 0 0 0 0 0\n -4.0270 -0.3750 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n -0.1299 0.3750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n -0.1299 1.8750 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0\n 1.1691 2.6250 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n 1.1691 -0.3750 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0\n 1.1691 -1.8750 0.0000 O 0 0 0 0 0 0 0 0 0 0 0 0\n 2.4682 -2.6250 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n 2.4682 0.3750 0.0000 R# 0 0 0 0 0 0 0 0 0 0 0 0\n 1 2 1 0\n 2 3 1 0\n 4 1 1 6\n 4 5 1 0\n 5 6 1 0\n 4 7 1 0\n 7 8 1 0\n 8 9 1 0\n 7 10 1 0\nM RGP 4 3 3 6 1 9 4 10 2\nM END\n',
65
+ 'smiles': '[*:4]OC([C@H](CS[*:3])N[*:1])[*:2]',
66
+ 'polymerType': 'PEPTIDE',
67
+ 'monomerType': 'Backbone',
68
+ 'naturalAnalog': 'C',
69
+ 'id': 16,
70
+ 'rgroups': [
71
+ {
72
+ 'alternateId': 'R1-H',
73
+ 'capGroupName': 'H',
74
+ 'capGroupSmiles': '[H][*:1]',
75
+ 'label': 'R1'
76
+ },
77
+ {
78
+ 'alternateId': 'R2-OH',
79
+ 'capGroupName': 'OH',
80
+ 'capGroupSmiles': 'O[*:2]',
81
+ 'label': 'R2'
82
+ },
83
+ {
84
+ 'alternateId': 'R3-Something',
85
+ 'capGroupName': 'Something',
86
+ 'capGroupSmiles': 'C=CC([*:3])=C',
87
+ 'label': 'R3'
88
+ },
89
+ {
90
+ 'alternateId': 'R4-SomethingElse',
91
+ 'capGroupName': 'SomethingElse',
92
+ 'capGroupSmiles': 'ClCCCC=CC([*:4])=CCC',
93
+ 'label': 'R4'
94
+ }
95
+ ],
96
+ 'author': 'Admin',
97
+ 'createDate': '2026-02-18T14:48:41.723Z',
98
+ 'meta': {}
99
+ };
100
+
26
101
  const enum Tests {
27
102
  PT = 'peptides-fasta',
28
103
  DNA = 'dna-fasta',
@@ -225,6 +300,51 @@ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2.Thr_PO3H2.Aca.Tyr
225
300
  expect(polishMolfile(resCol.get(0)), polishMolfile(tgtMol));
226
301
  });
227
302
 
303
+ async function _testToAtomicLevelWithCustomMonomer(srcHelm: string, expectedSmiles: string): Promise<void> {
304
+ let error: any = null;
305
+ // first, patch the monomer library with a custom monomers
306
+ const monomerLib = monomerLibHelper.getMonomerLib();
307
+ // @ts-ignore
308
+ monomerLib._monomers['PEPTIDE'][complexMonomerAllylRgroup.symbol] = complexMonomerAllylRgroup;
309
+ // @ts-ignore
310
+ monomerLib._monomers['PEPTIDE'][complexMonomerWithComplexRgroup.symbol] = complexMonomerWithComplexRgroup;
311
+
312
+ try {
313
+ const converter = await seqHelper.getHelmToMolfileConverter(monomerLib);
314
+ const resMolFile = seqHelper.helmToAtomicLevelSingle(srcHelm, converter, true, true);
315
+ const resSmiles = grok.chem.convert(resMolFile.molfile, grok.chem.Notation.Unknown, grok.chem.Notation.Smiles);
316
+ expect(resSmiles, expectedSmiles);
317
+ } catch (err) {
318
+ error = err;
319
+ }
320
+ // restore the monomer library to avoid affecting other tests
321
+ // @ts-ignore
322
+ delete monomerLib._monomers['PEPTIDE'][complexMonomerAllylRgroup.symbol];
323
+ // @ts-ignore
324
+ delete monomerLib._monomers['PEPTIDE'][complexMonomerWithComplexRgroup.symbol];
325
+
326
+ if (error)
327
+ throw error;
328
+ }
329
+
330
+ test('SingleHelmMonomerWithAllylGroups', async () => {
331
+ const srcHelm = `PEPTIDE1{[${complexMonomerAllylRgroup.symbol}]}$$$$V2.0`;
332
+ const expectedSmiles = 'C=CSC[C@H](N)C(=O)O';
333
+ await _testToAtomicLevelWithCustomMonomer(srcHelm, expectedSmiles);
334
+ });
335
+
336
+ test('SingleHelmMonomerWithComplexRGroups', async () => {
337
+ const srcHelm = `PEPTIDE1{[${complexMonomerWithComplexRgroup.symbol}]}$$$$V2.0`;
338
+ const expectedSmiles = 'C=CC(=C)SC[C@H](N)C(O)OC(C=CCCCCl)=CCC';
339
+ await _testToAtomicLevelWithCustomMonomer(srcHelm, expectedSmiles);
340
+ });
341
+
342
+ test('HelmPolymerWithComplexRGroups', async () => {
343
+ const srcHelm = `PEPTIDE1{[dI].[Trp_Ome].[Asp_OMe].[D-Cit].[meG].[Phe_4NH2].[Phe_34diCl].[meY].[Pro_4Me3OH].[Met_O].[NMe2Abz].[Tyr_Ph4OH].[3Pal].[xiIle].[Tyr_35diI].[Ala_tBu]}|PEPTIDE2{[${complexMonomerAllylRgroup.symbol}].[${complexMonomerWithComplexRgroup.symbol}]}$PEPTIDE1,PEPTIDE1,16:R2-1:R1|PEPTIDE1,PEPTIDE2,1:R3-1:R1$$$V2.0`;
344
+ const expectedSmiles = 'C=CSC[C@H](NCC[C@@H](C)[C@H]1NC(=O)[C@H](C(C)(C)C)NC(=O)[C@H](Cc2cc(I)c(O)c(I)c2)NC(=O)[C@H](C(C)CC)NC(=O)[C@H](Cc2cccnc2)NC(=O)[C@H](Cc2ccc(Oc3ccc(O)cc3)cc2)NC(=O)c2ccccc2N(C)C(=O)[C@H](CCS(C)=O)NC(=O)[C@@H]2C(O)C(C)CN2C(=O)[C@H](Cc2ccc(O)cc2)N(C)C(=O)[C@H](Cc2ccc(Cl)c(Cl)c2)NC(=O)[C@H](Cc2ccc(N)cc2)NC(=O)CN(C)C(=O)[C@@H](CCCNC(N)=O)NC(=O)[C@H](CC(=O)OC)NC(=O)[C@H](Cc2cn(OC)c3ccccc23)NC1=O)C(=O)N[C@@H](CSC(=C)C=C)C(O)OC(C=CCCCCl)=CCC';
345
+ await _testToAtomicLevelWithCustomMonomer(srcHelm, expectedSmiles);
346
+ });
347
+
228
348
  async function _testToAtomicLevel(
229
349
  df: DG.DataFrame, seqColName: string = 'seq', monomerLibHelper: IMonomerLibHelper
230
350
  ): Promise<DG.Column | null> {
package/src/utils/biln.ts CHANGED
@@ -6,7 +6,7 @@ import * as DG from 'datagrok-api/dg';
6
6
 
7
7
  /* eslint-disable max-len */
8
8
  import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
9
- import {INotationProvider, SplitterFunc} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
9
+ import {INotationProvider, NotationProviderBase, SplitterFunc} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
10
10
  import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
11
11
  import {CellRendererBackBase} from '@datagrok-libraries/bio/src/utils/cell-renderer-back-base';
12
12
  import {MonomerPlacer} from '@datagrok-libraries/bio/src/utils/cell-renderer-monomer-placer';
@@ -14,16 +14,24 @@ import {monomerToShort, splitterAsBiln} from '@datagrok-libraries/bio/src/utils/
14
14
  import {_package} from '../package';
15
15
  /* eslint-enable max-len */
16
16
 
17
- export class BilnNotationProvider implements INotationProvider {
17
+ export class BilnNotationProvider extends NotationProviderBase implements INotationProvider {
18
18
  public readonly splitter: SplitterFunc;
19
19
 
20
20
  get defaultGapOriginal(): string { return ''; }
21
21
 
22
+ static override get notationName(): string { return NOTATION.BILN; }
23
+
24
+ static override get implementsFromHelm(): boolean { return false; }
25
+
26
+ static override convertFromHelm(helm: string, options: any): string {
27
+ throw new Error('Canonical way of converting from helm to biln must be used');
28
+ }
22
29
  constructor(
23
30
  public readonly separator: string,
24
31
  public readonly seqHelper: ISeqHelper,
25
32
  public readonly seqCol: DG.Column
26
33
  ) {
34
+ super();
27
35
  this.splitter = splitterAsBiln.bind(this);
28
36
  }
29
37
 
@@ -1,3 +1,4 @@
1
+ /* eslint-disable max-len */
1
2
  import * as DG from 'datagrok-api/dg';
2
3
  import * as ui from 'datagrok-api/ui';
3
4
  import * as grok from 'datagrok-api/grok';
@@ -7,6 +8,7 @@ import {Subscription} from 'rxjs';
7
8
 
8
9
  import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
9
10
  import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
11
+ import {NotationProviderBase} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
10
12
 
11
13
 
12
14
  let convertDialog: DG.Dialog | null = null;
@@ -18,7 +20,7 @@ let convertDialogSubs: Subscription[] = [];
18
20
  * @param {DG.Column<string>} col Column with 'Macromolecule' semantic type
19
21
  * @param {ISeqHelper} seqHelper
20
22
  */
21
- export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelper): void {
23
+ export async function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelper): Promise<void> {
22
24
  let srcCol = col ?? grok.shell.t.columns.bySemType('Macromolecule')!;
23
25
  if (!srcCol)
24
26
  throw new Error('No column with Macromolecule semantic type found');
@@ -41,6 +43,9 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
41
43
  NOTATION.HELM,
42
44
  NOTATION.BILN
43
45
  ];
46
+
47
+ const notationProviderConstructors = await NotationProviderBase.getProviderConstructors();
48
+
44
49
  const toggleColumn = (newCol: DG.Column) => {
45
50
  srcCol = newCol;
46
51
  converterSh = seqHelper.getSeqHandler(srcCol);
@@ -49,6 +54,15 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
49
54
  separatorInput.value = '/'; // helm monomers can have - in the name like D-aThr;
50
55
  dialogHeader.textContent = 'Current notation: ' + currentNotation;
51
56
  filteredNotations = notations.filter((e) => e !== currentNotation);
57
+ if (currentNotation === NOTATION.CUSTOM)
58
+ filteredNotations = [NOTATION.HELM];
59
+ if (currentNotation === NOTATION.HELM) {
60
+ // add custom notations that
61
+ notationProviderConstructors.forEach((c) => {
62
+ if (c.implementsFromHelm)
63
+ filteredNotations.unshift(c.notationName as NOTATION); // hack :)
64
+ });
65
+ }
52
66
  targetNotationInput = ui.input.choice('Convert to', {
53
67
  value: filteredNotations[0], items: filteredNotations,
54
68
  onValueChanged: toggleSeparator
@@ -72,6 +86,15 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
72
86
 
73
87
  const separatorArray = ['-', '.', '/'];
74
88
  let filteredNotations = notations.filter((e) => e !== currentNotation);
89
+ if (currentNotation === NOTATION.CUSTOM)
90
+ filteredNotations = [NOTATION.HELM];
91
+ if (currentNotation === NOTATION.HELM) {
92
+ // add custom notations that
93
+ notationProviderConstructors.forEach((c) => {
94
+ if (c.implementsFromHelm)
95
+ filteredNotations.unshift(c.notationName as NOTATION); // hack :)
96
+ });
97
+ }
75
98
 
76
99
  const separatorInput = ui.input.choice('Separator', {value: separatorArray[0], items: separatorArray});
77
100
 
@@ -105,8 +128,11 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
105
128
  .onOK(async () => {
106
129
  const targetNotation = targetNotationInput.value as NOTATION;
107
130
  const separator: string | undefined = targetNotation === NOTATION.SEPARATOR ? separatorInput.value! : undefined;
131
+ let notationProviderConstructor: typeof NotationProviderBase | undefined = undefined;
132
+ if (!notations.includes(targetNotation) && notationProviderConstructors.find((c) => c.notationName === targetNotation))
133
+ notationProviderConstructor = notationProviderConstructors.find((c) => c.notationName === targetNotation)!;
108
134
 
109
- await convertDo(srcCol, seqHelper, targetNotation, separator);
135
+ await convertDo(srcCol, seqHelper, targetNotation, separator, notationProviderConstructor);
110
136
  })
111
137
  .show({x: 350, y: 100});
112
138
 
@@ -123,18 +149,38 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
123
149
  * @param {NOTATION} targetNotation Target notation
124
150
  * @param {string | null} separator Separator for SEPARATOR notation
125
151
  */
126
- export async function convertDo(srcCol: DG.Column, seqHelper: ISeqHelper, targetNotation: NOTATION, separator?: string): Promise<DG.Column> {
127
- const converterSh = seqHelper.getSeqHandler(srcCol);
128
- const newColumn = converterSh.convert(targetNotation, separator);
129
- srcCol.dataFrame.columns.add(newColumn);
130
-
131
- // Call detector directly to escape some error on detectSemanticTypes
132
- const semType = await grok.functions.call('Bio:detectMacromolecule', {col: newColumn});
133
- if (semType)
134
- newColumn.semType = semType;
135
-
136
- // call to calculate 'cell.renderer' tag
137
- await grok.data.detectSemanticTypes(srcCol.dataFrame);
138
-
139
- return newColumn;
152
+ export async function convertDo(srcCol: DG.Column, seqHelper: ISeqHelper, targetNotation: NOTATION, separator?: string, notationProviderConstructor?: typeof NotationProviderBase): Promise<DG.Column> {
153
+ if (notationProviderConstructor) {
154
+ const newColName = srcCol.dataFrame.columns.getUnusedName(`${notationProviderConstructor.notationName}(${srcCol.name})`);
155
+ const newCol = DG.Column.string(newColName, srcCol.length);
156
+ newCol.init((i) => {
157
+ const seq = srcCol.get(i);// we know for sure (in Macron accent) that it is helm
158
+ try {
159
+ return notationProviderConstructor.convertFromHelm(seq, {});
160
+ } catch (e) {
161
+ console.error(`Error converting sequence at row ${i}: ${e instanceof Error ? e.message : e}`);
162
+ return '';
163
+ }
164
+ });
165
+ srcCol.dataFrame.columns.add(newCol);
166
+ const semType = await grok.functions.call('Bio:detectMacromolecule', {col: newCol});
167
+ if (semType)
168
+ newCol.semType = semType;
169
+ await grok.data.detectSemanticTypes(srcCol.dataFrame);
170
+ return newCol;
171
+ } else {
172
+ const converterSh = seqHelper.getSeqHandler(srcCol);
173
+ const newColumn = converterSh.convert(targetNotation, separator);
174
+ srcCol.dataFrame.columns.add(newColumn);
175
+
176
+ // Call detector directly to escape some error on detectSemanticTypes
177
+ const semType = await grok.functions.call('Bio:detectMacromolecule', {col: newColumn});
178
+ if (semType)
179
+ newColumn.semType = semType;
180
+
181
+ // call to calculate 'cell.renderer' tag
182
+ await grok.data.detectSemanticTypes(srcCol.dataFrame);
183
+
184
+ return newColumn;
185
+ }
140
186
  }
@@ -16,6 +16,11 @@ export abstract class MolfileAtoms {
16
16
  this.rawAtomLines[atomIdx] = this.rawAtomLines[atomIdx].replace(R_GROUP_ELEMENT_SYMBOL, newElementSymbol);
17
17
  }
18
18
 
19
+ appendAtomLine(rawLine: string, x: number, y: number): void {
20
+ this.rawAtomLines.push(rawLine);
21
+ this.coordinates.push({x, y});
22
+ }
23
+
19
24
  deleteAtoms(indices: number[]): void {
20
25
  this.coordinates = this.coordinates.filter((_, idx) => !indices.includes(idx));
21
26
  this.rawAtomLines = this.rawAtomLines.filter((_, idx) => !indices.includes(idx));
@@ -13,6 +13,11 @@ export abstract class MolfileBonds {
13
13
  return this.bondedAtomPairs;
14
14
  }
15
15
 
16
+ appendBondLine(rawLine: string, bondedPair: number[]): void {
17
+ this.rawBondLines.push(rawLine);
18
+ this.bondedAtomPairs.push(bondedPair);
19
+ }
20
+
16
21
  deleteBondLines(indices: number[]): void {
17
22
  this.rawBondLines = this.rawBondLines.filter((_, idx) => !indices.includes(idx));
18
23
  this.bondedAtomPairs = this.bondedAtomPairs.filter((_, idx) => !indices.includes(idx));
@@ -1,6 +1,8 @@
1
+ import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
1
2
  import {MolfileAtoms} from './mol-atoms';
2
3
  import {MolfileBonds} from './mol-bonds';
3
4
  import {RGroupHandler} from './r-group-handler';
5
+ import {CapGroupInfo} from './types';
4
6
 
5
7
  export abstract class MolfileWrapper {
6
8
  constructor(protected monomerSymbol: string) { }
@@ -76,8 +78,8 @@ export abstract class MolfileWrapper {
76
78
  this.bonds.shift(shift);
77
79
  }
78
80
 
79
- capRGroups(capGroupElements: string[]): void {
80
- this.rGroups.capRGroups(capGroupElements);
81
+ capRGroups(capGroupInfo: CapGroupInfo[], rdKitModule: RDModule): void {
82
+ this.rGroups.capRGroups(capGroupInfo, rdKitModule);
81
83
  }
82
84
  }
83
85
 
@@ -6,17 +6,23 @@ import {MolfileHandler} from '@datagrok-libraries/chem-meta/src/parsing-utils/mo
6
6
  import {Helm} from './helm';
7
7
  import {MolfileWrapper} from './mol-wrapper';
8
8
  import {MolfileWrapperFactory} from './mol-wrapper-factory';
9
+ import {CapGroupInfo} from './types';
10
+
11
+ /** Returns true if the string is a valid single element symbol (e.g. 'H', 'O', 'C', 'Cl') */
12
+ function isSimpleElement(s: string): boolean {
13
+ return /^[A-Z][a-z]?$/.test(s);
14
+ }
9
15
 
10
16
  export class MonomerWrapper {
11
17
  private readonly molfileWrapper: MolfileWrapper;
12
- private capGroupElements: string[] = [];
18
+ private capGroupInfo: CapGroupInfo[] = [];
13
19
  private static molfileV2KToV3KCache: Map<string, string> = new Map();
14
20
  constructor(
15
21
  public readonly monomerSymbol: string,
16
22
  public readonly monomerIdx: number,
17
23
  private helm: Helm,
18
24
  shift: { x: number, y: number },
19
- rdKitModule: RDModule,
25
+ private readonly rdKitModule: RDModule,
20
26
  private readonly monomerLib: IMonomerLibBase
21
27
  ) {
22
28
  const libraryMonomerObject = this.getLibraryMonomerObject();
@@ -26,7 +32,7 @@ export class MonomerWrapper {
26
32
  molfile = this.convertMolfileToV3KFormat(molfile, monomerSymbol, rdKitModule);
27
33
 
28
34
  this.molfileWrapper = MolfileWrapperFactory.getInstance(molfile, monomerSymbol);
29
- this.capGroupElements = this.getCapGroupElements(libraryMonomerObject);
35
+ this.capGroupInfo = this.getCapGroupInfo(libraryMonomerObject);
30
36
 
31
37
  this.removeRGroups(helm.bondedRGroupsMap[monomerIdx]!);
32
38
  this.capRemainingRGroups();
@@ -63,20 +69,28 @@ export class MonomerWrapper {
63
69
  return monomer;
64
70
  }
65
71
 
66
- private getCapGroupElements(
72
+ private getCapGroupInfo(
67
73
  libraryMonomerObject: Monomer
68
- ): string[] {
74
+ ): CapGroupInfo[] {
69
75
  const rgroups = libraryMonomerObject.rgroups;
70
- const result = rgroups.map((rgroup) => {
76
+ return rgroups.map((rgroup, ind) => {
71
77
  const smiles = rgroup[HELM_RGROUP_FIELDS.CAP_GROUP_SMILES] ||
72
78
  // WARNING: ignore because both key variants coexist in HELM Core Library!
73
79
  // @ts-ignore
74
80
  rgroup[HELM_RGROUP_FIELDS.CAP_GROUP_SMILES_UPPERCASE];
81
+ let rgroupId = rgroup[HELM_RGROUP_FIELDS.LABEL][1];
82
+ if (!rgroupId || !parseInt(rgroupId) || isNaN(parseInt(rgroupId))) {
83
+ // try to parse it from smiles, which can look like '[H][*:1]', 'O[*:2]', 'C=C[*:3]'
84
+ const match = smiles?.match(/\[\*:(\d)\]/);
85
+ if (match && match[1])
86
+ rgroupId = match[1];
87
+ }
88
+ if (!rgroupId || !parseInt(rgroupId) || isNaN(parseInt(rgroupId)))
89
+ rgroupId = `${ind + 1}`; // fallback to index-based id, starting from 1
75
90
  // extract the element symbol
76
- return smiles.replace(/(\[|\]|\*|:|\d)/g, '');
91
+ const element = smiles.replace(/(\[|\]|\*|:|\d)/g, '');
92
+ return {element, smiles, isSimple: isSimpleElement(element), rGroupId: parseInt(rgroupId)};
77
93
  });
78
-
79
- return result;
80
94
  }
81
95
 
82
96
  private shiftCoordinates(shift: { x: number, y: number }): void {
@@ -96,7 +110,7 @@ export class MonomerWrapper {
96
110
  }
97
111
 
98
112
  private capRemainingRGroups(): void {
99
- this.molfileWrapper.capRGroups(this.capGroupElements);
113
+ this.molfileWrapper.capRGroups(this.capGroupInfo, this.rdKitModule);
100
114
  }
101
115
 
102
116
  replaceRGroupWithAttachmentAtom(rGroupId: number, attachmentAtomIdx: number): void {
@@ -1,8 +1,10 @@
1
1
  import {MolfileHandlerBase} from '@datagrok-libraries/chem-meta/src/parsing-utils/molfile-handler-base';
2
+ import {MolfileHandler} from '@datagrok-libraries/chem-meta/src/parsing-utils/molfile-handler';
3
+ import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
2
4
  import {HYDROGEN_SYMBOL} from './const';
3
5
  import {MolfileAtoms} from './mol-atoms';
4
6
  import {MolfileBonds} from './mol-bonds';
5
- import {PositionInBonds} from './types';
7
+ import {CapGroupInfo, PositionInBonds} from './types';
6
8
 
7
9
 
8
10
  export class RGroupHandler {
@@ -108,15 +110,106 @@ export class RGroupHandler {
108
110
  }
109
111
 
110
112
  /** WARNING: capping RGroups and deletion of the bonded ones don't commute */
111
- capRGroups(capGroupElements: string[]): void {
113
+ capRGroups(capGroupInfo: CapGroupInfo[], rdKitModule: RDModule): void {
112
114
  this.rGroupIdToAtomicIndexMap.forEach((atomicIdx, rGroupId) => {
113
- const element = capGroupElements[rGroupId - 1];
114
- if (element === HYDROGEN_SYMBOL) {
115
- this.removeRGroups([rGroupId]);
116
- this.deleteBondLineWithSpecifiedRGroup(rGroupId);
115
+ const info = capGroupInfo.find((info) => info.rGroupId === rGroupId) ?? capGroupInfo[rGroupId - 1];
116
+ if (info.isSimple) {
117
+ if (info.element === HYDROGEN_SYMBOL) {
118
+ this.removeRGroups([rGroupId]);
119
+ this.deleteBondLineWithSpecifiedRGroup(rGroupId);
120
+ } else
121
+ this.atoms.replaceRGroupSymbolByElement(atomicIdx, info.element);
117
122
  } else
118
- this.atoms.replaceRGroupSymbolByElement(atomicIdx, element);
123
+ this.capWithComplexGroup(atomicIdx, info.smiles, rdKitModule);
119
124
  });
120
125
  }
126
+
127
+ /** Cap an R-group with a multi-atom cap group by parsing the cap SMILES,
128
+ * then inserting its atoms and bonds into the monomer molfile */
129
+ private capWithComplexGroup(
130
+ rGroupAtomicIdx: number, capSmiles: string, rdKitModule: RDModule
131
+ ): void {
132
+ // Replace [*:N] with placeholder element Xe so RDKit can parse the SMILES
133
+ const PLACEHOLDER = 'Xe';
134
+ const parsableSmiles = capSmiles.replace(/\[\*:\d+\]/g, `[${PLACEHOLDER}]`);
135
+ const capMol = rdKitModule.get_mol(parsableSmiles);
136
+ if (!capMol)
137
+ throw new Error(`Cannot parse cap group SMILES: ${capSmiles}`);
138
+
139
+ let capMolfile: string;
140
+ try {
141
+ capMolfile = capMol.get_v3Kmolblock();
142
+ } finally {
143
+ capMol.delete();
144
+ }
145
+
146
+ const capHandler = MolfileHandler.getInstance(capMolfile);
147
+ const capAtomLines = capHandler.getAtomLines();
148
+ const capBondPairs = capHandler.pairsOfBondedAtoms;
149
+ const capBondLines = capHandler.getBondLines();
150
+ const capX = capHandler.x;
151
+ const capY = capHandler.y;
152
+ const capAtomTypes = capHandler.atomTypes;
153
+
154
+ // Find the placeholder atom (was the [*:N] attachment point)
155
+ let dummyCapIdx = -1; // 0-based
156
+ for (let i = 0; i < capAtomTypes.length; i++) {
157
+ if (capAtomTypes[i] === PLACEHOLDER) {
158
+ dummyCapIdx = i;
159
+ break;
160
+ }
161
+ }
162
+ if (dummyCapIdx === -1)
163
+ throw new Error(`Cannot find placeholder atom in cap group SMILES: ${capSmiles}`);
164
+
165
+ // Find the attachment atom (bonded to placeholder) and the bond connecting them
166
+ let attachmentCapIdx = -1; // 0-based
167
+ for (let i = 0; i < capBondPairs.length; i++) {
168
+ const [a1, a2] = capBondPairs[i]; // 1-based
169
+ if (a1 === dummyCapIdx + 1) {
170
+ attachmentCapIdx = a2 - 1;
171
+ break;
172
+ }
173
+ if (a2 === dummyCapIdx + 1) {
174
+ attachmentCapIdx = a1 - 1;
175
+ break;
176
+ }
177
+ }
178
+ if (attachmentCapIdx === -1)
179
+ throw new Error(`Cannot find attachment atom in cap group SMILES: ${capSmiles}`);
180
+
181
+ // Compute coordinate translation: place cap attachment at R-group position
182
+ const rGroupCoords = this.atoms.atomCoordinates[rGroupAtomicIdx];
183
+ const tx = rGroupCoords.x - capX[attachmentCapIdx];
184
+ const ty = rGroupCoords.y - capY[attachmentCapIdx];
185
+
186
+ // Replace the R# atom symbol with the attachment atom's element
187
+ const attachmentSymbol = capAtomTypes[attachmentCapIdx];
188
+ this.atoms.replaceRGroupSymbolByElement(rGroupAtomicIdx, attachmentSymbol);
189
+
190
+ // Build index mapping: cap 1-based → monomer 1-based
191
+ const capToMonomer = new Map<number, number>();
192
+ capToMonomer.set(attachmentCapIdx + 1, rGroupAtomicIdx + 1);
193
+
194
+ // Append remaining cap atoms (excluding placeholder and attachment)
195
+ let nextMonomerIdx = this.atoms.count + 1; // 1-based
196
+ for (let i = 0; i < capAtomLines.length; i++) {
197
+ if (i === dummyCapIdx || i === attachmentCapIdx) continue;
198
+ const newX = capX[i] + tx;
199
+ const newY = capY[i] + ty;
200
+ this.atoms.appendAtomLine(capAtomLines[i], newX, newY);
201
+ capToMonomer.set(i + 1, nextMonomerIdx);
202
+ nextMonomerIdx++;
203
+ }
204
+
205
+ // Append cap bonds (excluding any bond involving the placeholder)
206
+ for (let i = 0; i < capBondPairs.length; i++) {
207
+ const [a1, a2] = capBondPairs[i]; // 1-based in cap
208
+ if (a1 === dummyCapIdx + 1 || a2 === dummyCapIdx + 1) continue;
209
+ const newA1 = capToMonomer.get(a1)!;
210
+ const newA2 = capToMonomer.get(a2)!;
211
+ this.bonds.appendBondLine(capBondLines[i], [newA1, newA2]);
212
+ }
213
+ }
121
214
  }
122
215
 
@@ -11,3 +11,15 @@ export type PositionInBonds = {
11
11
  nodeIdx: number,
12
12
  }
13
13
 
14
+ /** Cap group information for an R-group */
15
+ export type CapGroupInfo = {
16
+ /** Extracted element string (e.g. 'H', 'O', 'C=C') */
17
+ element: string,
18
+ /** Raw cap group SMILES (e.g. '[H][*:1]', 'O[*:2]', 'C=C[*:3]') */
19
+ smiles: string,
20
+ /** Whether the cap is a single atom (valid element symbol) */
21
+ isSimple: boolean,
22
+ /** Number of R group, to handle cases where its not sorted */
23
+ rGroupId: number,
24
+ }
25
+