@datagrok/sequence-translator 1.6.4 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,7 @@ import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
10
10
  import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
11
11
  import {getSeqHelper, ISeqHelper, ToAtomicLevelRes} from '@datagrok-libraries/bio/src/utils/seq-helper';
12
12
  import {MmcrTemps} from '@datagrok-libraries/bio/src/utils/cell-renderer-consts';
13
- import {addMonomerHoverLink, buildMonomerHoverLink} from '@datagrok-libraries/bio/src/monomer-works/monomer-hover';
13
+ import {addMonomerHoverLink} from '@datagrok-libraries/bio/src/monomer-works/monomer-hover';
14
14
  import {getRdKitModule} from '@datagrok-libraries/bio/src/chem/rdkit-module';
15
15
  import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
16
16
 
@@ -37,10 +37,13 @@ import {PolymerTypes} from '@datagrok-libraries/js-draw-lite/src/types/org';
37
37
  import {_toAtomicLevel, getMonomersDictFromLib} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
38
38
  import {monomerSeqToMolfile} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level-utils';
39
39
  import {LRUCache} from 'lru-cache';
40
- import {getMonomerHover, ISubstruct, setMonomerHover} from '@datagrok-libraries/chem-meta/src/types';
40
+ import {addSubstructProvider, getMonomerHover, ISubstruct, setMonomerHover}
41
+ from '@datagrok-libraries/chem-meta/src/types';
41
42
  import {getMolHighlight} from '@datagrok-libraries/bio/src/monomer-works/seq-to-molfile';
42
43
  import {ChemTags} from '@datagrok-libraries/chem-meta/src/consts';
44
+ import {mergeSubstructs} from '@datagrok-libraries/chem-meta/src/types';
43
45
  import {getMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
46
+ import {dealGroups, helmToMol} from './conversion/pt-atomic';
44
47
 
45
48
  type PolyToolConvertSerialized = {
46
49
  generateHelm: boolean;
@@ -65,10 +68,10 @@ export async function polyToolEnumerateChemUI(cell?: DG.Cell): Promise<void> {
65
68
 
66
69
  export async function polyToolConvertUI(): Promise<void> {
67
70
  await _package.initPromise;
68
- let dialog: DG.Dialog;
71
+ let dialog: DG.Dialog | null = null;
69
72
  try {
70
73
  dialog = await getPolyToolConvertDialog();
71
- dialog.show();
74
+ dialog?.show();
72
75
  } catch (err: any) {
73
76
  const [errMsg, errStack] = errInfo(err);
74
77
  grok.shell.warning('To run PolyTool Conversion, open a dataframe with macromolecules');
@@ -76,25 +79,38 @@ export async function polyToolConvertUI(): Promise<void> {
76
79
  }
77
80
  }
78
81
 
79
- export async function getPolyToolConvertDialog(srcCol?: DG.Column): Promise<DG.Dialog> {
82
+ export async function getPolyToolConvertDialog(srcCol?: DG.Column): Promise<DG.Dialog | null> {
80
83
  const subs: Unsubscribable[] = [];
81
84
  const destroy = () => {
82
85
  for (const sub of subs) sub.unsubscribe();
83
86
  };
84
87
  try {
85
88
  let srcColVal: DG.Column<string> | undefined = srcCol;
89
+ const srcColList = grok.shell.t.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
90
+ const customSrcCols = srcColList.filter((col) => {
91
+ const sh = _package.seqHelper.getSeqHandler(col);
92
+ return sh.notation === NOTATION.CUSTOM;
93
+ });
86
94
  if (!srcColVal) {
87
- const srcColList = grok.shell.t.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
88
95
  if (srcColList.length < 1)
89
96
  throw new Error(PT_ERROR_DATAFRAME);
97
+
98
+ if (customSrcCols.length < 1) {
99
+ const toAtomicLevelFunc = DG.Func.find({package: 'Bio', name: 'toAtomicLevel'})[0];
100
+ if (toAtomicLevelFunc) {
101
+ toAtomicLevelFunc.prepare().edit();
102
+ return null;
103
+ }
104
+ grok.shell.warning('Polytool requires a macromolecule column with custom notation. \n\nUse Top menu | Bio | Transform | To Atomic Level.');
105
+ return null;
106
+ }
107
+
90
108
  srcColVal = srcColList[0];
91
109
  }
92
110
  const srcColInput = ui.input.column('Column', {
93
111
  table: srcColVal.dataFrame, value: srcColVal,
94
112
  filter: (col: DG.Column) => {
95
- if (col.semType !== DG.SEMTYPE.MACROMOLECULE) return false;
96
- const sh = _package.seqHelper.getSeqHandler(col);
97
- return sh.notation === NOTATION.CUSTOM;
113
+ return customSrcCols.includes(col);
98
114
  }
99
115
  });
100
116
 
@@ -259,16 +275,6 @@ async function getPolyToolEnumerationChemDialog(cell?: DG.Cell): Promise<DG.Dial
259
275
  }
260
276
  }
261
277
 
262
- function dealGroups(col: DG.Column<string>): void {
263
- for (let i = 0; i < col.length; i++) {
264
- col.set(i, col.get(i)!.replaceAll('undefined', 'H'));
265
- col.set(i, col.get(i)!.replaceAll('Oh', 'O'));
266
- col.set(i, col.get(i)!.replaceAll('0.000000 3', '0.000000 0'));
267
- col.set(i, col.get(i)!.replaceAll('?', 'O'));
268
- col.set(i, col.get(i)!.replaceAll('0 3\n', '0 0\n'));
269
- }
270
- }
271
-
272
278
  /** Returns Helm and molfile columns. */
273
279
  export async function polyToolConvert(seqCol: DG.Column<string>,
274
280
  generateHelm: boolean, linearize: boolean, chiralityEngine: boolean, highlight: boolean, ruleFiles: string[]
@@ -283,7 +289,7 @@ export async function polyToolConvert(seqCol: DG.Column<string>,
283
289
 
284
290
  const table = seqCol.dataFrame;
285
291
  const rules = await getRules(ruleFiles);
286
- const [resList, isLinear] = doPolyToolConvert(seqCol.toList(), rules, helmHelper);
292
+ const [resList, isLinear, positionMaps] = doPolyToolConvert(seqCol.toList(), rules, helmHelper);
287
293
 
288
294
  const resHelmColName = getUnusedName(table, `transformed(${seqCol.name})`);
289
295
  const resHelmCol = DG.Column.fromType(DG.COLUMN_TYPE.STRING, resHelmColName, resList.length)
@@ -295,61 +301,25 @@ export async function polyToolConvert(seqCol: DG.Column<string>,
295
301
 
296
302
 
297
303
  const rdKitModule: RDModule = await getRdKitModule();
304
+ const seqHelper: ISeqHelper = await getSeqHelper();
305
+
298
306
  const lib = await getOverriddenLibrary(rules);
299
307
  const resHelmColTemp = resHelmCol.temp;
300
308
  resHelmColTemp[MmcrTemps.overriddenLibrary] = lib;
301
309
  resHelmCol.temp = resHelmColTemp;
302
310
 
303
- const seqHelper: ISeqHelper = await getSeqHelper();
304
- const toAtomicLevelRes =
305
- await seqHelper.helmToAtomicLevel(resHelmCol, chiralityEngine, highlight, lib);
306
-
307
- const resMolCol = toAtomicLevelRes.molCol!;
308
-
309
-
310
- const allLinear = isLinear.filter((l) => l).length;
311
- if (linearize && allLinear > 0) {
312
- const lin = new Array<string>(allLinear);
313
- let counter = 0;
314
- for (let i = 0; i < isLinear.length; i++) {
315
- if (isLinear[i]) {
316
- lin[counter] = resList[i];
317
- counter++;
318
- }
319
- }
320
-
321
- const linCol = DG.Column.fromStrings('helm', lin);
322
- linCol.semType = DG.SEMTYPE.MACROMOLECULE;
323
- linCol.meta.units = NOTATION.HELM;
324
- linCol.setTag(DG.TAGS.CELL_RENDERER, 'helm');
325
-
326
- const monomerLibHelper = await getMonomerLibHelper();
327
- const systemMonomerLib = monomerLibHelper.getMonomerLib();
328
- let linear: ToAtomicLevelRes | null = null;
329
- try {
330
- linear = await _toAtomicLevel(DG.DataFrame.create(0), linCol, systemMonomerLib, seqHelper, rdKitModule);
331
- counter = 0;
332
- for (let i = 0; i < isLinear.length; i++) {
333
- if (isLinear[i]) {
334
- resMolCol.set(i, linear!.molCol!.get(counter));
335
- counter++;
336
- }
337
- }
338
- } catch (e: any) {
339
- grok.shell.warning('PolyTool was not able to linearize sequences');
340
- }
341
- }
342
-
343
- dealGroups(resMolCol);
311
+ const resMolCol = await helmToMol(resHelmCol, resList,
312
+ isLinear, chiralityEngine, highlight, linearize, lib, rdKitModule, seqHelper);
344
313
  resMolCol.name = getUnusedName(table, `molfile(${seqCol.name})`);
345
314
  resMolCol.semType = DG.SEMTYPE.MOLECULE;
315
+
346
316
  if (table) {
347
317
  table.columns.add(resMolCol, true);
348
318
  await grok.data.detectSemanticTypes(table);
349
319
  }
350
320
 
351
- buildMonomerHoverLink(resHelmCol, resMolCol, lib, seqHelper, rdKitModule);
352
- buildCyclizedMonomerHoverLink(seqCol, resHelmCol, resMolCol, lib, seqHelper, rdKitModule);
321
+ //buildMonomerHoverLink(resHelmCol, resMolCol, lib, seqHelper, rdKitModule);
322
+ buildCyclizedMonomerHoverLink(seqCol, resHelmCol, resMolCol, lib, seqHelper, rdKitModule, positionMaps);
353
323
 
354
324
  return [resHelmCol, resMolCol];
355
325
  } finally {
@@ -359,7 +329,8 @@ export async function polyToolConvert(seqCol: DG.Column<string>,
359
329
 
360
330
  function buildCyclizedMonomerHoverLink(
361
331
  cyclizedCol: DG.Column<string>, seqCol: DG.Column<string>, molCol: DG.Column<string>,
362
- monomerLib: IMonomerLibBase, seqHelper: ISeqHelper, rdKitModule: RDModule
332
+ monomerLib: IMonomerLibBase, seqHelper: ISeqHelper, rdKitModule: RDModule,
333
+ positionMaps: number[][][]
363
334
  ): MonomerHoverLink {
364
335
  function buildMonomerMap(seqCol: DG.Column<string>, tableRowIdx: number): MonomerMap {
365
336
  const seqSH = seqHelper.getSeqHandler(seqCol);
@@ -395,19 +366,22 @@ function buildCyclizedMonomerHoverLink(
395
366
  const resLink: MonomerHoverLink = {
396
367
  targetCol: molCol,
397
368
  handler: (seqGridCell: DG.GridCell, cyclizedMonomer: ISeqMonomer | null, targetGridCol: DG.GridColumn): boolean => {
369
+ if (!seqGridCell || !targetGridCol.grid || !seqCol.dataFrame)
370
+ return true;
398
371
  const grid = targetGridCol.grid;
399
372
  const tableRowIdx = seqGridCell.tableRowIndex!;
400
373
  const gridRowIdx = seqGridCell.gridRow;
401
374
  const targetGridCell = grid.cell(targetGridCol.name, gridRowIdx);
375
+ const positionMap = positionMaps[gridRowIdx];
402
376
 
403
377
  const prev = getMonomerHover();
404
- if (!prev || (prev && (prev.dataFrameId != seqCol.dataFrame.id || prev.gridRowIdx != gridRowIdx ||
378
+ if (!prev || (prev && (prev.dataFrameId != seqCol.dataFrame?.id || prev.gridRowIdx != gridRowIdx ||
405
379
  prev.seqColName != seqCol.name || prev.seqPosition != cyclizedMonomer?.position))
406
380
  ) {
407
381
  if (prev) {
408
382
  setMonomerHover(null);
409
- prev.gridCell.grid?.invalidate();
410
- // prev.gridCell.render();
383
+ //prev.gridCell.grid?.invalidate();
384
+ prev.gridCell.render();
411
385
  }
412
386
  if (!cyclizedMonomer) {
413
387
  setMonomerHover(null);
@@ -429,21 +403,21 @@ function buildCyclizedMonomerHoverLink(
429
403
  return undefined;
430
404
 
431
405
  const resSubstructList: ISubstruct[] = [];
432
- const seqMonomerList: number[] = [cyclizedMonomer.position]; // TODO: Map position of harmonized sequence
406
+ const seqMonomerList: number[] = positionMap[cyclizedMonomer.position];
433
407
  for (const seqMonomer of seqMonomerList) {
434
- const monomerMap = molMonomerMap.get(cyclizedMonomer!.position); // single monomer
408
+ const monomerMap = molMonomerMap.get(seqMonomer); // single monomer
435
409
  if (!monomerMap) return {atoms: [], bonds: [], highlightAtomColors: [], highlightBondColors: []};
436
410
  resSubstructList.push(getMolHighlight([monomerMap], monomerLib));
437
411
  }
438
412
  //TODO: refine merge substract
439
- //const res: ISubstruct = mergeSubstructs(resSubstructList);
440
- return undefined;
413
+ const res: ISubstruct = mergeSubstructs(resSubstructList);
414
+ return res;
441
415
  }
442
416
  });
443
417
 
444
418
  // TODO: Invalidate targetGridCell
445
- grid.invalidate();
446
- // targetGridCell.render();
419
+ //grid.invalidate();
420
+ targetGridCell.render();
447
421
  }
448
422
 
449
423
  return true;
@@ -463,7 +437,7 @@ function buildCyclizedMonomerHoverLink(
463
437
  };
464
438
 
465
439
  addMonomerHoverLink(cyclizedCol.temp, resLink);
466
- // addSubstructProvider(molCol.temp, resLink); //
440
+ addSubstructProvider(molCol.temp, resLink);
467
441
 
468
442
  return resLink;
469
443
  }
@@ -167,6 +167,9 @@ async function getPolyToolEnumerateDialog(
167
167
  resDataRole = (resSeqValue.tags[PolyToolTags.dataRole] as PolyToolDataRole.template) ?? PolyToolDataRole.macromolecule;
168
168
  } else {
169
169
  const seqCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'seq', [PT_HELM_EXAMPLE]);
170
+ seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
171
+ const _tempDf = DG.DataFrame.fromColumns([seqCol]);
172
+
170
173
  seqCol.meta.units = NOTATION.HELM;
171
174
  const sh = seqHelper.getSeqHandler(seqCol);
172
175
  resSeqValue = sh.getValue(0);
@@ -184,7 +187,7 @@ async function getPolyToolEnumerateDialog(
184
187
  const warningsTextDiv = ui.divText('', {style: {color: 'red'}});
185
188
  // #### Inputs
186
189
  inputs = {
187
- macromolecule: helmHelper.createHelmInput(
190
+ macromolecule: helmHelper.createHelmInput(
188
191
  'Macromolecule', {
189
192
  editable: false,
190
193
  editorOptions: {
@@ -192,9 +195,20 @@ async function getPolyToolEnumerateDialog(
192
195
  monomerNumbering: MonomerNumberingTypes.continuous,
193
196
  getMonomer: (a: HelmAtom | HelmType, name?: string): GetMonomerResType => {
194
197
  const aa: HelmAtom = a as HelmAtom;
198
+
199
+
195
200
  if (aa.T === 'ATOM') {
196
- const canonicalSymbol = seqValue.getSplitted().getCanonical(aa.bio!.continuousId - 1);
197
- return monomerLibFuncs.getMonomer(aa.bio!.type, canonicalSymbol);
201
+ try {
202
+ if (!seqValue.isDna() && !seqValue.isRna()) {
203
+ const canonicalSymbol = seqValue.getSplitted().getCanonical(aa.bio!.continuousId - 1);
204
+ return monomerLibFuncs.getMonomer(aa.bio!.type, canonicalSymbol);
205
+ } else {
206
+ const canonicalSymbol = seqValue.getSplittedWithSugarsAndPhosphates().getCanonical(aa.bio!.continuousId - 1);
207
+ return monomerLibFuncs.getMonomer(aa.bio!.type, canonicalSymbol);
208
+ }
209
+ } catch (_) {
210
+ return monomerLibFuncs.getMonomer(a, name);
211
+ }
198
212
  } else { return monomerLibFuncs.getMonomer(a, name); }
199
213
  },
200
214
  },
@@ -656,8 +670,8 @@ async function getPolyToolEnumerateDialog(
656
670
  },
657
671
  /* applyInput */ (x: PolyToolEnumerateHelmSerialized): void => {
658
672
  //inputs.macromolecule.stringValue = x.macromolecule;
659
- inputs.placeholders.stringValue = x.placeholders;
660
673
  inputs.enumeratorType.value = x.enumeratorType ?? PolyToolEnumeratorTypes.Single;
674
+ inputs.placeholders.stringValue = x.placeholders;
661
675
  inputs.placeholdersBreadth.stringValue = x.placeholdersBreadth;
662
676
  inputs.trivialNameCol.stringValue = x.trivialNameCol;
663
677
  inputs.keepOriginal.value = x.keepOriginal ?? false;
@@ -727,7 +741,7 @@ async function polyToolEnumerateSeq(
727
741
  toAtomicLevel.chiralityEngine, toAtomicLevel.highlightMonomers);
728
742
  enumeratorResDf.columns.add(talRes.molCol!, false);
729
743
  const resMolCol = talRes.molCol!;
730
- buildMonomerHoverLink(resHelmCol, resMolCol, monomerLib, helmHelper.seqHelper, rdKitModule);
744
+ await buildMonomerHoverLink(resHelmCol, resMolCol, monomerLib, helmHelper.seqHelper, rdKitModule, true);
731
745
  } else if (dataRole === PolyToolDataRole.template) {
732
746
  const talRes = await polyToolConvert(enumCol,
733
747
  toAtomicLevel.generateHelm, false, toAtomicLevel.chiralityEngine, false, toAtomicLevel.rules);
@@ -107,7 +107,7 @@ category('PolyTool: Convert', () => {
107
107
  for (const [testName, testData] of Object.entries(tests)) {
108
108
  test(`toAtomicLevel-${testName}`, async () => {
109
109
  const rules = await getRules(['rules_example.json']);
110
- const [helmList, isLinear] = doPolyToolConvert([testData.src.seq], rules, helmHelper);
110
+ const [helmList, isLinear, positionMaps] = doPolyToolConvert([testData.src.seq], rules, helmHelper);
111
111
 
112
112
  const lib = await getOverriddenLibrary(rules);
113
113