@datagrok/sequence-translator 1.6.4 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +2 -1
- package/CHANGELOG.md +17 -0
- package/dist/455.js +1 -1
- package/dist/455.js.map +1 -1
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +8 -8
- package/src/polytool/conversion/pt-atomic.ts +70 -0
- package/src/polytool/conversion/pt-chain.ts +22 -2
- package/src/polytool/conversion/pt-conversion.ts +5 -2
- package/src/polytool/conversion/pt-rule-cards.ts +160 -0
- package/src/polytool/conversion/pt-rules.ts +24 -0
- package/src/polytool/conversion/pt-tools-parse.ts +11 -21
- package/src/polytool/conversion/rule-manager.ts +52 -6
- package/src/polytool/conversion/style.css +32 -0
- package/src/polytool/pt-dialog.ts +48 -74
- package/src/polytool/pt-enumerate-seq-dialog.ts +19 -5
- package/src/tests/polytool-convert-tests.ts +1 -1
- package/test-console-output-1.log +1685 -0
- package/test-record-1.mp4 +0 -0
|
@@ -10,7 +10,7 @@ import {errInfo} from '@datagrok-libraries/bio/src/utils/err-info';
|
|
|
10
10
|
import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
11
11
|
import {getSeqHelper, ISeqHelper, ToAtomicLevelRes} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
12
12
|
import {MmcrTemps} from '@datagrok-libraries/bio/src/utils/cell-renderer-consts';
|
|
13
|
-
import {addMonomerHoverLink
|
|
13
|
+
import {addMonomerHoverLink} from '@datagrok-libraries/bio/src/monomer-works/monomer-hover';
|
|
14
14
|
import {getRdKitModule} from '@datagrok-libraries/bio/src/chem/rdkit-module';
|
|
15
15
|
import {RDModule} from '@datagrok-libraries/chem-meta/src/rdkit-api';
|
|
16
16
|
|
|
@@ -37,10 +37,13 @@ import {PolymerTypes} from '@datagrok-libraries/js-draw-lite/src/types/org';
|
|
|
37
37
|
import {_toAtomicLevel, getMonomersDictFromLib} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
|
|
38
38
|
import {monomerSeqToMolfile} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level-utils';
|
|
39
39
|
import {LRUCache} from 'lru-cache';
|
|
40
|
-
import {getMonomerHover, ISubstruct, setMonomerHover}
|
|
40
|
+
import {addSubstructProvider, getMonomerHover, ISubstruct, setMonomerHover}
|
|
41
|
+
from '@datagrok-libraries/chem-meta/src/types';
|
|
41
42
|
import {getMolHighlight} from '@datagrok-libraries/bio/src/monomer-works/seq-to-molfile';
|
|
42
43
|
import {ChemTags} from '@datagrok-libraries/chem-meta/src/consts';
|
|
44
|
+
import {mergeSubstructs} from '@datagrok-libraries/chem-meta/src/types';
|
|
43
45
|
import {getMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
46
|
+
import {dealGroups, helmToMol} from './conversion/pt-atomic';
|
|
44
47
|
|
|
45
48
|
type PolyToolConvertSerialized = {
|
|
46
49
|
generateHelm: boolean;
|
|
@@ -65,10 +68,10 @@ export async function polyToolEnumerateChemUI(cell?: DG.Cell): Promise<void> {
|
|
|
65
68
|
|
|
66
69
|
export async function polyToolConvertUI(): Promise<void> {
|
|
67
70
|
await _package.initPromise;
|
|
68
|
-
let dialog: DG.Dialog;
|
|
71
|
+
let dialog: DG.Dialog | null = null;
|
|
69
72
|
try {
|
|
70
73
|
dialog = await getPolyToolConvertDialog();
|
|
71
|
-
dialog
|
|
74
|
+
dialog?.show();
|
|
72
75
|
} catch (err: any) {
|
|
73
76
|
const [errMsg, errStack] = errInfo(err);
|
|
74
77
|
grok.shell.warning('To run PolyTool Conversion, open a dataframe with macromolecules');
|
|
@@ -76,25 +79,38 @@ export async function polyToolConvertUI(): Promise<void> {
|
|
|
76
79
|
}
|
|
77
80
|
}
|
|
78
81
|
|
|
79
|
-
export async function getPolyToolConvertDialog(srcCol?: DG.Column): Promise<DG.Dialog> {
|
|
82
|
+
export async function getPolyToolConvertDialog(srcCol?: DG.Column): Promise<DG.Dialog | null> {
|
|
80
83
|
const subs: Unsubscribable[] = [];
|
|
81
84
|
const destroy = () => {
|
|
82
85
|
for (const sub of subs) sub.unsubscribe();
|
|
83
86
|
};
|
|
84
87
|
try {
|
|
85
88
|
let srcColVal: DG.Column<string> | undefined = srcCol;
|
|
89
|
+
const srcColList = grok.shell.t.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
|
|
90
|
+
const customSrcCols = srcColList.filter((col) => {
|
|
91
|
+
const sh = _package.seqHelper.getSeqHandler(col);
|
|
92
|
+
return sh.notation === NOTATION.CUSTOM;
|
|
93
|
+
});
|
|
86
94
|
if (!srcColVal) {
|
|
87
|
-
const srcColList = grok.shell.t.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
|
|
88
95
|
if (srcColList.length < 1)
|
|
89
96
|
throw new Error(PT_ERROR_DATAFRAME);
|
|
97
|
+
|
|
98
|
+
if (customSrcCols.length < 1) {
|
|
99
|
+
const toAtomicLevelFunc = DG.Func.find({package: 'Bio', name: 'toAtomicLevel'})[0];
|
|
100
|
+
if (toAtomicLevelFunc) {
|
|
101
|
+
toAtomicLevelFunc.prepare().edit();
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
grok.shell.warning('Polytool requires a macromolecule column with custom notation. \n\nUse Top menu | Bio | Transform | To Atomic Level.');
|
|
105
|
+
return null;
|
|
106
|
+
}
|
|
107
|
+
|
|
90
108
|
srcColVal = srcColList[0];
|
|
91
109
|
}
|
|
92
110
|
const srcColInput = ui.input.column('Column', {
|
|
93
111
|
table: srcColVal.dataFrame, value: srcColVal,
|
|
94
112
|
filter: (col: DG.Column) => {
|
|
95
|
-
|
|
96
|
-
const sh = _package.seqHelper.getSeqHandler(col);
|
|
97
|
-
return sh.notation === NOTATION.CUSTOM;
|
|
113
|
+
return customSrcCols.includes(col);
|
|
98
114
|
}
|
|
99
115
|
});
|
|
100
116
|
|
|
@@ -259,16 +275,6 @@ async function getPolyToolEnumerationChemDialog(cell?: DG.Cell): Promise<DG.Dial
|
|
|
259
275
|
}
|
|
260
276
|
}
|
|
261
277
|
|
|
262
|
-
function dealGroups(col: DG.Column<string>): void {
|
|
263
|
-
for (let i = 0; i < col.length; i++) {
|
|
264
|
-
col.set(i, col.get(i)!.replaceAll('undefined', 'H'));
|
|
265
|
-
col.set(i, col.get(i)!.replaceAll('Oh', 'O'));
|
|
266
|
-
col.set(i, col.get(i)!.replaceAll('0.000000 3', '0.000000 0'));
|
|
267
|
-
col.set(i, col.get(i)!.replaceAll('?', 'O'));
|
|
268
|
-
col.set(i, col.get(i)!.replaceAll('0 3\n', '0 0\n'));
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
|
|
272
278
|
/** Returns Helm and molfile columns. */
|
|
273
279
|
export async function polyToolConvert(seqCol: DG.Column<string>,
|
|
274
280
|
generateHelm: boolean, linearize: boolean, chiralityEngine: boolean, highlight: boolean, ruleFiles: string[]
|
|
@@ -283,7 +289,7 @@ export async function polyToolConvert(seqCol: DG.Column<string>,
|
|
|
283
289
|
|
|
284
290
|
const table = seqCol.dataFrame;
|
|
285
291
|
const rules = await getRules(ruleFiles);
|
|
286
|
-
const [resList, isLinear] = doPolyToolConvert(seqCol.toList(), rules, helmHelper);
|
|
292
|
+
const [resList, isLinear, positionMaps] = doPolyToolConvert(seqCol.toList(), rules, helmHelper);
|
|
287
293
|
|
|
288
294
|
const resHelmColName = getUnusedName(table, `transformed(${seqCol.name})`);
|
|
289
295
|
const resHelmCol = DG.Column.fromType(DG.COLUMN_TYPE.STRING, resHelmColName, resList.length)
|
|
@@ -295,61 +301,25 @@ export async function polyToolConvert(seqCol: DG.Column<string>,
|
|
|
295
301
|
|
|
296
302
|
|
|
297
303
|
const rdKitModule: RDModule = await getRdKitModule();
|
|
304
|
+
const seqHelper: ISeqHelper = await getSeqHelper();
|
|
305
|
+
|
|
298
306
|
const lib = await getOverriddenLibrary(rules);
|
|
299
307
|
const resHelmColTemp = resHelmCol.temp;
|
|
300
308
|
resHelmColTemp[MmcrTemps.overriddenLibrary] = lib;
|
|
301
309
|
resHelmCol.temp = resHelmColTemp;
|
|
302
310
|
|
|
303
|
-
const
|
|
304
|
-
|
|
305
|
-
await seqHelper.helmToAtomicLevel(resHelmCol, chiralityEngine, highlight, lib);
|
|
306
|
-
|
|
307
|
-
const resMolCol = toAtomicLevelRes.molCol!;
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
const allLinear = isLinear.filter((l) => l).length;
|
|
311
|
-
if (linearize && allLinear > 0) {
|
|
312
|
-
const lin = new Array<string>(allLinear);
|
|
313
|
-
let counter = 0;
|
|
314
|
-
for (let i = 0; i < isLinear.length; i++) {
|
|
315
|
-
if (isLinear[i]) {
|
|
316
|
-
lin[counter] = resList[i];
|
|
317
|
-
counter++;
|
|
318
|
-
}
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
const linCol = DG.Column.fromStrings('helm', lin);
|
|
322
|
-
linCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
323
|
-
linCol.meta.units = NOTATION.HELM;
|
|
324
|
-
linCol.setTag(DG.TAGS.CELL_RENDERER, 'helm');
|
|
325
|
-
|
|
326
|
-
const monomerLibHelper = await getMonomerLibHelper();
|
|
327
|
-
const systemMonomerLib = monomerLibHelper.getMonomerLib();
|
|
328
|
-
let linear: ToAtomicLevelRes | null = null;
|
|
329
|
-
try {
|
|
330
|
-
linear = await _toAtomicLevel(DG.DataFrame.create(0), linCol, systemMonomerLib, seqHelper, rdKitModule);
|
|
331
|
-
counter = 0;
|
|
332
|
-
for (let i = 0; i < isLinear.length; i++) {
|
|
333
|
-
if (isLinear[i]) {
|
|
334
|
-
resMolCol.set(i, linear!.molCol!.get(counter));
|
|
335
|
-
counter++;
|
|
336
|
-
}
|
|
337
|
-
}
|
|
338
|
-
} catch (e: any) {
|
|
339
|
-
grok.shell.warning('PolyTool was not able to linearize sequences');
|
|
340
|
-
}
|
|
341
|
-
}
|
|
342
|
-
|
|
343
|
-
dealGroups(resMolCol);
|
|
311
|
+
const resMolCol = await helmToMol(resHelmCol, resList,
|
|
312
|
+
isLinear, chiralityEngine, highlight, linearize, lib, rdKitModule, seqHelper);
|
|
344
313
|
resMolCol.name = getUnusedName(table, `molfile(${seqCol.name})`);
|
|
345
314
|
resMolCol.semType = DG.SEMTYPE.MOLECULE;
|
|
315
|
+
|
|
346
316
|
if (table) {
|
|
347
317
|
table.columns.add(resMolCol, true);
|
|
348
318
|
await grok.data.detectSemanticTypes(table);
|
|
349
319
|
}
|
|
350
320
|
|
|
351
|
-
buildMonomerHoverLink(resHelmCol, resMolCol, lib, seqHelper, rdKitModule);
|
|
352
|
-
buildCyclizedMonomerHoverLink(seqCol, resHelmCol, resMolCol, lib, seqHelper, rdKitModule);
|
|
321
|
+
//buildMonomerHoverLink(resHelmCol, resMolCol, lib, seqHelper, rdKitModule);
|
|
322
|
+
buildCyclizedMonomerHoverLink(seqCol, resHelmCol, resMolCol, lib, seqHelper, rdKitModule, positionMaps);
|
|
353
323
|
|
|
354
324
|
return [resHelmCol, resMolCol];
|
|
355
325
|
} finally {
|
|
@@ -359,7 +329,8 @@ export async function polyToolConvert(seqCol: DG.Column<string>,
|
|
|
359
329
|
|
|
360
330
|
function buildCyclizedMonomerHoverLink(
|
|
361
331
|
cyclizedCol: DG.Column<string>, seqCol: DG.Column<string>, molCol: DG.Column<string>,
|
|
362
|
-
monomerLib: IMonomerLibBase, seqHelper: ISeqHelper, rdKitModule: RDModule
|
|
332
|
+
monomerLib: IMonomerLibBase, seqHelper: ISeqHelper, rdKitModule: RDModule,
|
|
333
|
+
positionMaps: number[][][]
|
|
363
334
|
): MonomerHoverLink {
|
|
364
335
|
function buildMonomerMap(seqCol: DG.Column<string>, tableRowIdx: number): MonomerMap {
|
|
365
336
|
const seqSH = seqHelper.getSeqHandler(seqCol);
|
|
@@ -395,19 +366,22 @@ function buildCyclizedMonomerHoverLink(
|
|
|
395
366
|
const resLink: MonomerHoverLink = {
|
|
396
367
|
targetCol: molCol,
|
|
397
368
|
handler: (seqGridCell: DG.GridCell, cyclizedMonomer: ISeqMonomer | null, targetGridCol: DG.GridColumn): boolean => {
|
|
369
|
+
if (!seqGridCell || !targetGridCol.grid || !seqCol.dataFrame)
|
|
370
|
+
return true;
|
|
398
371
|
const grid = targetGridCol.grid;
|
|
399
372
|
const tableRowIdx = seqGridCell.tableRowIndex!;
|
|
400
373
|
const gridRowIdx = seqGridCell.gridRow;
|
|
401
374
|
const targetGridCell = grid.cell(targetGridCol.name, gridRowIdx);
|
|
375
|
+
const positionMap = positionMaps[gridRowIdx];
|
|
402
376
|
|
|
403
377
|
const prev = getMonomerHover();
|
|
404
|
-
if (!prev || (prev && (prev.dataFrameId != seqCol.dataFrame
|
|
378
|
+
if (!prev || (prev && (prev.dataFrameId != seqCol.dataFrame?.id || prev.gridRowIdx != gridRowIdx ||
|
|
405
379
|
prev.seqColName != seqCol.name || prev.seqPosition != cyclizedMonomer?.position))
|
|
406
380
|
) {
|
|
407
381
|
if (prev) {
|
|
408
382
|
setMonomerHover(null);
|
|
409
|
-
prev.gridCell.grid?.invalidate();
|
|
410
|
-
|
|
383
|
+
//prev.gridCell.grid?.invalidate();
|
|
384
|
+
prev.gridCell.render();
|
|
411
385
|
}
|
|
412
386
|
if (!cyclizedMonomer) {
|
|
413
387
|
setMonomerHover(null);
|
|
@@ -429,21 +403,21 @@ function buildCyclizedMonomerHoverLink(
|
|
|
429
403
|
return undefined;
|
|
430
404
|
|
|
431
405
|
const resSubstructList: ISubstruct[] = [];
|
|
432
|
-
const seqMonomerList: number[] = [cyclizedMonomer.position];
|
|
406
|
+
const seqMonomerList: number[] = positionMap[cyclizedMonomer.position];
|
|
433
407
|
for (const seqMonomer of seqMonomerList) {
|
|
434
|
-
const monomerMap = molMonomerMap.get(
|
|
408
|
+
const monomerMap = molMonomerMap.get(seqMonomer); // single monomer
|
|
435
409
|
if (!monomerMap) return {atoms: [], bonds: [], highlightAtomColors: [], highlightBondColors: []};
|
|
436
410
|
resSubstructList.push(getMolHighlight([monomerMap], monomerLib));
|
|
437
411
|
}
|
|
438
412
|
//TODO: refine merge substract
|
|
439
|
-
|
|
440
|
-
return
|
|
413
|
+
const res: ISubstruct = mergeSubstructs(resSubstructList);
|
|
414
|
+
return res;
|
|
441
415
|
}
|
|
442
416
|
});
|
|
443
417
|
|
|
444
418
|
// TODO: Invalidate targetGridCell
|
|
445
|
-
grid.invalidate();
|
|
446
|
-
|
|
419
|
+
//grid.invalidate();
|
|
420
|
+
targetGridCell.render();
|
|
447
421
|
}
|
|
448
422
|
|
|
449
423
|
return true;
|
|
@@ -463,7 +437,7 @@ function buildCyclizedMonomerHoverLink(
|
|
|
463
437
|
};
|
|
464
438
|
|
|
465
439
|
addMonomerHoverLink(cyclizedCol.temp, resLink);
|
|
466
|
-
|
|
440
|
+
addSubstructProvider(molCol.temp, resLink);
|
|
467
441
|
|
|
468
442
|
return resLink;
|
|
469
443
|
}
|
|
@@ -167,6 +167,9 @@ async function getPolyToolEnumerateDialog(
|
|
|
167
167
|
resDataRole = (resSeqValue.tags[PolyToolTags.dataRole] as PolyToolDataRole.template) ?? PolyToolDataRole.macromolecule;
|
|
168
168
|
} else {
|
|
169
169
|
const seqCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'seq', [PT_HELM_EXAMPLE]);
|
|
170
|
+
seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
171
|
+
const _tempDf = DG.DataFrame.fromColumns([seqCol]);
|
|
172
|
+
|
|
170
173
|
seqCol.meta.units = NOTATION.HELM;
|
|
171
174
|
const sh = seqHelper.getSeqHandler(seqCol);
|
|
172
175
|
resSeqValue = sh.getValue(0);
|
|
@@ -184,7 +187,7 @@ async function getPolyToolEnumerateDialog(
|
|
|
184
187
|
const warningsTextDiv = ui.divText('', {style: {color: 'red'}});
|
|
185
188
|
// #### Inputs
|
|
186
189
|
inputs = {
|
|
187
|
-
macromolecule: helmHelper.createHelmInput(
|
|
190
|
+
macromolecule: helmHelper.createHelmInput(
|
|
188
191
|
'Macromolecule', {
|
|
189
192
|
editable: false,
|
|
190
193
|
editorOptions: {
|
|
@@ -192,9 +195,20 @@ async function getPolyToolEnumerateDialog(
|
|
|
192
195
|
monomerNumbering: MonomerNumberingTypes.continuous,
|
|
193
196
|
getMonomer: (a: HelmAtom | HelmType, name?: string): GetMonomerResType => {
|
|
194
197
|
const aa: HelmAtom = a as HelmAtom;
|
|
198
|
+
|
|
199
|
+
|
|
195
200
|
if (aa.T === 'ATOM') {
|
|
196
|
-
|
|
197
|
-
|
|
201
|
+
try {
|
|
202
|
+
if (!seqValue.isDna() && !seqValue.isRna()) {
|
|
203
|
+
const canonicalSymbol = seqValue.getSplitted().getCanonical(aa.bio!.continuousId - 1);
|
|
204
|
+
return monomerLibFuncs.getMonomer(aa.bio!.type, canonicalSymbol);
|
|
205
|
+
} else {
|
|
206
|
+
const canonicalSymbol = seqValue.getSplittedWithSugarsAndPhosphates().getCanonical(aa.bio!.continuousId - 1);
|
|
207
|
+
return monomerLibFuncs.getMonomer(aa.bio!.type, canonicalSymbol);
|
|
208
|
+
}
|
|
209
|
+
} catch (_) {
|
|
210
|
+
return monomerLibFuncs.getMonomer(a, name);
|
|
211
|
+
}
|
|
198
212
|
} else { return monomerLibFuncs.getMonomer(a, name); }
|
|
199
213
|
},
|
|
200
214
|
},
|
|
@@ -656,8 +670,8 @@ async function getPolyToolEnumerateDialog(
|
|
|
656
670
|
},
|
|
657
671
|
/* applyInput */ (x: PolyToolEnumerateHelmSerialized): void => {
|
|
658
672
|
//inputs.macromolecule.stringValue = x.macromolecule;
|
|
659
|
-
inputs.placeholders.stringValue = x.placeholders;
|
|
660
673
|
inputs.enumeratorType.value = x.enumeratorType ?? PolyToolEnumeratorTypes.Single;
|
|
674
|
+
inputs.placeholders.stringValue = x.placeholders;
|
|
661
675
|
inputs.placeholdersBreadth.stringValue = x.placeholdersBreadth;
|
|
662
676
|
inputs.trivialNameCol.stringValue = x.trivialNameCol;
|
|
663
677
|
inputs.keepOriginal.value = x.keepOriginal ?? false;
|
|
@@ -727,7 +741,7 @@ async function polyToolEnumerateSeq(
|
|
|
727
741
|
toAtomicLevel.chiralityEngine, toAtomicLevel.highlightMonomers);
|
|
728
742
|
enumeratorResDf.columns.add(talRes.molCol!, false);
|
|
729
743
|
const resMolCol = talRes.molCol!;
|
|
730
|
-
buildMonomerHoverLink(resHelmCol, resMolCol, monomerLib, helmHelper.seqHelper, rdKitModule);
|
|
744
|
+
await buildMonomerHoverLink(resHelmCol, resMolCol, monomerLib, helmHelper.seqHelper, rdKitModule, true);
|
|
731
745
|
} else if (dataRole === PolyToolDataRole.template) {
|
|
732
746
|
const talRes = await polyToolConvert(enumCol,
|
|
733
747
|
toAtomicLevel.generateHelm, false, toAtomicLevel.chiralityEngine, false, toAtomicLevel.rules);
|
|
@@ -107,7 +107,7 @@ category('PolyTool: Convert', () => {
|
|
|
107
107
|
for (const [testName, testData] of Object.entries(tests)) {
|
|
108
108
|
test(`toAtomicLevel-${testName}`, async () => {
|
|
109
109
|
const rules = await getRules(['rules_example.json']);
|
|
110
|
-
const [helmList, isLinear] = doPolyToolConvert([testData.src.seq], rules, helmHelper);
|
|
110
|
+
const [helmList, isLinear, positionMaps] = doPolyToolConvert([testData.src.seq], rules, helmHelper);
|
|
111
111
|
|
|
112
112
|
const lib = await getOverriddenLibrary(rules);
|
|
113
113
|
|