@datagrok/bio 2.10.17 → 2.10.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +2 -0
- package/dist/1.js +1 -1
- package/dist/1.js.map +1 -1
- package/dist/172.js +1 -1
- package/dist/172.js.map +1 -1
- package/dist/190.js +1 -1
- package/dist/190.js.map +1 -1
- package/dist/196.js +3 -0
- package/dist/196.js.map +1 -0
- package/dist/361.js +1 -1
- package/dist/361.js.map +1 -1
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +5 -4
- package/src/analysis/sequence-space.ts +44 -24
- package/src/package.ts +114 -51
- package/src/tests/WebLogo-positions-test.ts +13 -13
- package/src/tests/activity-cliffs-tests.ts +2 -2
- package/src/tests/activity-cliffs-utils.ts +2 -2
- package/src/tests/detectors-benchmark-tests.ts +2 -2
- package/src/tests/detectors-tests.ts +4 -1
- package/src/tests/sequence-space-test.ts +4 -4
- package/src/tests/sequence-space-utils.ts +3 -2
- package/src/utils/cell-renderer.ts +9 -1
- package/src/utils/err-info.ts +28 -0
- package/src/viewers/vd-regions-viewer.ts +58 -32
- package/src/viewers/web-logo-viewer.ts +108 -102
- package/dist/175.js +0 -3
- package/dist/175.js.map +0 -1
- /package/dist/{175.js.LICENSE.txt → 196.js.LICENSE.txt} +0 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.10.
|
|
8
|
+
"version": "2.10.23",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -34,9 +34,9 @@
|
|
|
34
34
|
],
|
|
35
35
|
"dependencies": {
|
|
36
36
|
"@biowasm/aioli": "^3.1.0",
|
|
37
|
-
"@datagrok-libraries/bio": "^5.38.
|
|
37
|
+
"@datagrok-libraries/bio": "^5.38.12",
|
|
38
38
|
"@datagrok-libraries/chem-meta": "^1.0.1",
|
|
39
|
-
"@datagrok-libraries/ml": "^6.3.
|
|
39
|
+
"@datagrok-libraries/ml": "^6.3.49",
|
|
40
40
|
"@datagrok-libraries/tutorials": "^1.3.6",
|
|
41
41
|
"@datagrok-libraries/utils": "^4.0.17",
|
|
42
42
|
"cash-dom": "^8.0.0",
|
|
@@ -64,7 +64,8 @@
|
|
|
64
64
|
"webpack-bundle-analyzer": "latest",
|
|
65
65
|
"webpack-cli": "^4.9.1",
|
|
66
66
|
"@datagrok/chem": "1.7.2",
|
|
67
|
-
"@datagrok/helm": "2.1.17"
|
|
67
|
+
"@datagrok/helm": "2.1.17",
|
|
68
|
+
"@datagrok/dendrogram": "^1.2.20"
|
|
68
69
|
},
|
|
69
70
|
"scripts": {
|
|
70
71
|
"link-api": "npm link datagrok-api",
|
|
@@ -3,7 +3,9 @@ import {reduceDimensinalityWithNormalization} from '@datagrok-libraries/ml/src/s
|
|
|
3
3
|
import {BitArrayMetrics, StringMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
4
4
|
import {ISequenceSpaceParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
5
5
|
import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
|
|
6
|
+
import {mmDistanceFunctionArgs} from '@datagrok-libraries/ml/src/macromolecule-distance-functions/types';
|
|
6
7
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
8
|
+
import {calculateMonomerSimilarity} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
7
9
|
import * as grok from 'datagrok-api/grok';
|
|
8
10
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
9
11
|
|
|
@@ -53,41 +55,59 @@ export async function sequenceSpaceByFingerprints(spaceParams: ISequenceSpacePar
|
|
|
53
55
|
return result;
|
|
54
56
|
}
|
|
55
57
|
|
|
56
|
-
export async function getSequenceSpace(spaceParams: ISequenceSpaceParams
|
|
58
|
+
export async function getSequenceSpace(spaceParams: ISequenceSpaceParams,
|
|
59
|
+
progressFunc?: (epochNum: number, epochsLength: number, embedding: number[][]) => void
|
|
60
|
+
): Promise<ISequenceSpaceResult> {
|
|
57
61
|
const ncUH = UnitsHandler.getOrCreate(spaceParams.seqCol);
|
|
58
62
|
|
|
59
|
-
const distanceFName = ncUH.isMsa() ? MmDistanceFunctionsNames.HAMMING : MmDistanceFunctionsNames.LEVENSHTEIN;
|
|
63
|
+
//const distanceFName = ncUH.isMsa() ? MmDistanceFunctionsNames.HAMMING : MmDistanceFunctionsNames.LEVENSHTEIN;
|
|
60
64
|
const seqList = spaceParams.seqCol.toList();
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
}
|
|
80
|
-
seqList[i] += charCodeMap.get(char)!;
|
|
65
|
+
|
|
66
|
+
const splitter = ncUH.getSplitter();
|
|
67
|
+
const seqColLength = seqList.length;
|
|
68
|
+
let charCodeCounter = 36;
|
|
69
|
+
const charCodeMap = new Map<string, string>();
|
|
70
|
+
for (let i = 0; i < seqColLength; i++) {
|
|
71
|
+
const seq = seqList[i];
|
|
72
|
+
if (seqList[i] === null || spaceParams.seqCol.isNone(i)) {
|
|
73
|
+
seqList[i] = null;
|
|
74
|
+
continue;
|
|
75
|
+
}
|
|
76
|
+
seqList[i] = '';
|
|
77
|
+
const splittedSeq = splitter(seq);
|
|
78
|
+
for (let j = 0; j < splittedSeq.length; j++) {
|
|
79
|
+
const char = splittedSeq[j];
|
|
80
|
+
if (!charCodeMap.has(char)) {
|
|
81
|
+
charCodeMap.set(char, String.fromCharCode(charCodeCounter));
|
|
82
|
+
charCodeCounter++;
|
|
81
83
|
}
|
|
84
|
+
seqList[i] += charCodeMap.get(char)!;
|
|
82
85
|
}
|
|
83
86
|
}
|
|
84
87
|
|
|
88
|
+
if (spaceParams.similarityMetric === MmDistanceFunctionsNames.MONOMER_CHEMICAL_DISTANCE) {
|
|
89
|
+
const monomers = Array.from(charCodeMap.keys());
|
|
90
|
+
const monomerRes = await calculateMonomerSimilarity(monomers);
|
|
91
|
+
// the susbstitution matrix contains similarity, but we need distances
|
|
92
|
+
monomerRes.scoringMatrix.forEach((row, i) => {
|
|
93
|
+
row.forEach((val, j) => {
|
|
94
|
+
monomerRes.scoringMatrix[i][j] = 1 - val;
|
|
95
|
+
});
|
|
96
|
+
});
|
|
97
|
+
const monomerHashToMatrixMap: {[_: string]: number} = {};
|
|
98
|
+
Object.entries(monomerRes.alphabetIndexes).forEach(([key, value]) => {
|
|
99
|
+
monomerHashToMatrixMap[charCodeMap.get(key)!] = value;
|
|
100
|
+
});
|
|
101
|
+
spaceParams.options.distanceFnArgs = {scoringMatrix: monomerRes.scoringMatrix,
|
|
102
|
+
alphabetIndexes: monomerHashToMatrixMap} satisfies mmDistanceFunctionArgs;
|
|
103
|
+
}
|
|
104
|
+
|
|
85
105
|
const sequenceSpaceResult = await reduceDimensinalityWithNormalization(
|
|
86
106
|
seqList,
|
|
87
107
|
spaceParams.methodName,
|
|
88
|
-
|
|
108
|
+
spaceParams.similarityMetric,
|
|
89
109
|
spaceParams.options,
|
|
90
|
-
true);
|
|
110
|
+
true, progressFunc);
|
|
91
111
|
const cols: DG.Column[] = spaceParams.embedAxesNames.map(
|
|
92
112
|
(name: string, index: number) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]));
|
|
93
113
|
return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
|
package/src/package.ts
CHANGED
|
@@ -69,9 +69,13 @@ import {_getEnumeratorWidget, _setPeptideColumn} from './utils/enumerator-tools'
|
|
|
69
69
|
import {getRegionDo} from './utils/get-region';
|
|
70
70
|
import {GetRegionApp} from './apps/get-region-app';
|
|
71
71
|
import {GetRegionFuncEditor} from './utils/get-region-func-editor';
|
|
72
|
+
import {DIMENSIONALITY_REDUCER_TERMINATE_EVENT}
|
|
73
|
+
from '@datagrok-libraries/ml/src/workers/dimensionality-reducing-worker-creator';
|
|
74
|
+
import {Options} from '@datagrok-libraries/utils/src/type-declarations';
|
|
72
75
|
|
|
73
76
|
export const _package = new BioPackage();
|
|
74
77
|
|
|
78
|
+
export const BYPASS_LARGE_DATA_WARNING = 'bypassLargeDataWarning';
|
|
75
79
|
// /** Avoid reassigning {@link monomerLib} because consumers subscribe to {@link IMonomerLib.onChanged} event */
|
|
76
80
|
// let monomerLib: MonomerLib | null = null;
|
|
77
81
|
|
|
@@ -221,7 +225,7 @@ export function SequenceSpaceEditor(call: DG.FuncCall) {
|
|
|
221
225
|
ui.dialog({title: 'Sequence Space'})
|
|
222
226
|
.add(funcEditor.paramsUI)
|
|
223
227
|
.onOK(async () => {
|
|
224
|
-
return call.func.prepare(funcEditor.funcParams).call(
|
|
228
|
+
return call.func.prepare(funcEditor.funcParams).call();
|
|
225
229
|
})
|
|
226
230
|
.show();
|
|
227
231
|
}
|
|
@@ -381,8 +385,8 @@ export async function getRegionTopMenu(
|
|
|
381
385
|
//input: object options {optional: true}
|
|
382
386
|
//output: viewer result
|
|
383
387
|
//editor: Bio:SeqActivityCliffsEditor
|
|
384
|
-
export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column
|
|
385
|
-
similarity: number, methodName: DimReductionMethods, options?: IUMAPOptions | ITSNEOptions,
|
|
388
|
+
export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column<string>, activities: DG.Column,
|
|
389
|
+
similarity: number, methodName: DimReductionMethods, options?: (IUMAPOptions | ITSNEOptions) & Options,
|
|
386
390
|
): Promise<DG.Viewer | undefined> {
|
|
387
391
|
if (!checkInputColumnUI(macroMolecule, 'Activity Cliffs'))
|
|
388
392
|
return;
|
|
@@ -435,7 +439,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
435
439
|
return;
|
|
436
440
|
}
|
|
437
441
|
|
|
438
|
-
if (df.rowCount > fastRowCount) {
|
|
442
|
+
if (df.rowCount > fastRowCount && !options?.[BYPASS_LARGE_DATA_WARNING]) {
|
|
439
443
|
ui.dialog().add(ui.divText(`Activity cliffs analysis might take several minutes.
|
|
440
444
|
Do you want to continue?`))
|
|
441
445
|
.onOK(async () => {
|
|
@@ -457,69 +461,128 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
|
|
|
457
461
|
//input: dataframe table
|
|
458
462
|
//input: column molecules { semType: Macromolecule }
|
|
459
463
|
//input: string methodName { choices:["UMAP", "t-SNE"] }
|
|
460
|
-
//input: string similarityMetric { choices:["
|
|
464
|
+
//input: string similarityMetric { choices:["Hamming", "Levenshtein", "Monomer chemical distance"] }
|
|
461
465
|
//input: bool plotEmbeddings = true
|
|
462
466
|
//input: double sparseMatrixThreshold = 0.8 [Similarity Threshold for sparse matrix calculation]
|
|
463
467
|
//input: object options {optional: true}
|
|
464
468
|
//editor: Bio:SequenceSpaceEditor
|
|
465
469
|
export async function sequenceSpaceTopMenu(
|
|
466
470
|
table: DG.DataFrame, macroMolecule: DG.Column, methodName: DimReductionMethods,
|
|
467
|
-
similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames =
|
|
468
|
-
plotEmbeddings: boolean, sparseMatrixThreshold?: number, options?: IUMAPOptions | ITSNEOptions,
|
|
471
|
+
similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames = MmDistanceFunctionsNames.LEVENSHTEIN,
|
|
472
|
+
plotEmbeddings: boolean, sparseMatrixThreshold?: number, options?: (IUMAPOptions | ITSNEOptions) & Options,
|
|
469
473
|
): Promise<DG.Viewer | undefined> {
|
|
470
474
|
// Delay is required for initial function dialog to close before starting invalidating of molfiles.
|
|
471
475
|
// Otherwise, dialog is freezing
|
|
472
476
|
await delay(10);
|
|
473
477
|
if (!checkInputColumnUI(macroMolecule, 'Sequence space')) return;
|
|
478
|
+
let scatterPlot: DG.ScatterPlotViewer | undefined = undefined;
|
|
479
|
+
const pg = DG.TaskBarProgressIndicator.create('Initializing sequence space ...');
|
|
480
|
+
// function for progress of umap
|
|
481
|
+
try {
|
|
482
|
+
function progressFunc(_nEpoch: number, epochsLength: number, embeddings: number[][]) {
|
|
483
|
+
let embedXCol: DG.Column | null = null;
|
|
484
|
+
let embedYCol: DG.Column | null = null;
|
|
485
|
+
if (!table.columns.names().includes(embedColsNames[0])) {
|
|
486
|
+
embedXCol = table.columns.add(DG.Column.float(embedColsNames[0], table.rowCount));
|
|
487
|
+
embedYCol = table.columns.add(DG.Column.float(embedColsNames[1], table.rowCount));
|
|
488
|
+
if (plotEmbeddings) {
|
|
489
|
+
scatterPlot = grok.shell
|
|
490
|
+
.tableView(table.name)
|
|
491
|
+
.scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
492
|
+
}
|
|
493
|
+
} else {
|
|
494
|
+
embedXCol = table.columns.byName(embedColsNames[0]);
|
|
495
|
+
embedYCol = table.columns.byName(embedColsNames[1]);
|
|
496
|
+
}
|
|
474
497
|
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
498
|
+
embedXCol.init((i) => embeddings[i] ? embeddings[i][0] : undefined);
|
|
499
|
+
embedYCol.init((i) => embeddings[i] ? embeddings[i][1] : undefined);
|
|
500
|
+
const progress = (_nEpoch / epochsLength * 100);
|
|
501
|
+
pg.update(progress, `Running sequence space ... ${progress.toFixed(0)}%`);
|
|
502
|
+
}
|
|
503
|
+
const embedColsNames = getEmbeddingColsNames(table);
|
|
504
|
+
const withoutEmptyValues = DG.DataFrame.fromColumns([macroMolecule]).clone();
|
|
505
|
+
const emptyValsIdxs = removeEmptyStringRows(withoutEmptyValues, macroMolecule);
|
|
506
|
+
|
|
507
|
+
const chemSpaceParams: ISequenceSpaceParams = {
|
|
508
|
+
seqCol: withoutEmptyValues.col(macroMolecule.name)!,
|
|
509
|
+
methodName: methodName,
|
|
510
|
+
similarityMetric: similarityMetric,
|
|
511
|
+
embedAxesNames: embedColsNames,
|
|
512
|
+
options: {...options, sparseMatrixThreshold: sparseMatrixThreshold ?? 0.8,
|
|
513
|
+
usingSparseMatrix: table.rowCount > 20000},
|
|
514
|
+
};
|
|
515
|
+
|
|
516
|
+
const allowedRowCount = methodName === DimReductionMethods.UMAP ? 100000 : 15000;
|
|
517
|
+
// number of rows which will be processed relatively fast
|
|
518
|
+
const fastRowCount = methodName === DimReductionMethods.UMAP ? 5000 : 2000;
|
|
519
|
+
if (table.rowCount > allowedRowCount) {
|
|
520
|
+
grok.shell.warning(`Too many rows, maximum for sequence space is ${allowedRowCount}`);
|
|
521
|
+
return;
|
|
522
|
+
}
|
|
487
523
|
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
524
|
+
async function getSeqSpace() {
|
|
525
|
+
let resolveF: Function | null = null;
|
|
526
|
+
|
|
527
|
+
const sub = grok.events.onViewerClosed.subscribe((args) => {
|
|
528
|
+
const v = args.args.viewer as unknown as DG.Viewer<any>;
|
|
529
|
+
if (v?.getOptions()?.look?.title && scatterPlot?.getOptions()?.look?.title &&
|
|
530
|
+
v?.getOptions()?.look?.title === scatterPlot?.getOptions()?.look?.title) {
|
|
531
|
+
grok.events.fireCustomEvent(DIMENSIONALITY_REDUCER_TERMINATE_EVENT, {});
|
|
532
|
+
sub.unsubscribe();
|
|
533
|
+
resolveF?.();
|
|
534
|
+
pg.close();
|
|
535
|
+
}
|
|
536
|
+
});
|
|
537
|
+
const sequenceSpaceResPromise = new Promise<ISequenceSpaceResult | undefined>(async (resolve) => {
|
|
538
|
+
resolveF = resolve;
|
|
539
|
+
const res = await getSequenceSpace(chemSpaceParams,
|
|
540
|
+
options?.[BYPASS_LARGE_DATA_WARNING] ? undefined : progressFunc);
|
|
541
|
+
resolve(res);
|
|
542
|
+
});
|
|
543
|
+
const sequenceSpaceRes = await sequenceSpaceResPromise;
|
|
544
|
+
pg.close();
|
|
545
|
+
sub.unsubscribe();
|
|
546
|
+
return sequenceSpaceRes ? processResult(sequenceSpaceRes) : sequenceSpaceRes;
|
|
547
|
+
}
|
|
495
548
|
|
|
496
|
-
|
|
497
|
-
|
|
549
|
+
if (table.rowCount > fastRowCount && !options?.[BYPASS_LARGE_DATA_WARNING]) {
|
|
550
|
+
ui.dialog().add(ui.divText(`Sequence space analysis might take several minutes.
|
|
498
551
|
Do you want to continue?`))
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
} else {
|
|
507
|
-
const sequenceSpaceRes = await getSequenceSpace(chemSpaceParams);
|
|
508
|
-
return processResult(sequenceSpaceRes);
|
|
509
|
-
}
|
|
510
|
-
|
|
511
|
-
function processResult(sequenceSpaceRes: ISequenceSpaceResult): DG.ScatterPlotViewer | undefined {
|
|
512
|
-
const embeddings = sequenceSpaceRes.coordinates;
|
|
513
|
-
for (const col of embeddings) {
|
|
514
|
-
const listValues = col.toList();
|
|
515
|
-
emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
|
|
516
|
-
table.columns.add(DG.Column.float(col.name, table.rowCount).init((i) => listValues[i]));
|
|
552
|
+
.onOK(async () => {
|
|
553
|
+
await getSeqSpace();
|
|
554
|
+
})
|
|
555
|
+
.onCancel(() => { pg.close(); })
|
|
556
|
+
.show();
|
|
557
|
+
} else {
|
|
558
|
+
return await getSeqSpace();
|
|
517
559
|
}
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
560
|
+
|
|
561
|
+
function processResult(sequenceSpaceRes: ISequenceSpaceResult): DG.ScatterPlotViewer | undefined {
|
|
562
|
+
const embeddings = sequenceSpaceRes.coordinates;
|
|
563
|
+
for (const col of embeddings) {
|
|
564
|
+
const listValues = col.toList();
|
|
565
|
+
emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
|
|
566
|
+
let embedCol = table.columns.byName(col.name);
|
|
567
|
+
if (!embedCol) {
|
|
568
|
+
embedCol = DG.Column.float(col.name, listValues.length);
|
|
569
|
+
table.columns.add(embedCol);
|
|
570
|
+
}
|
|
571
|
+
embedCol.init((i) => listValues[i]);
|
|
572
|
+
//table.columns.add(DG.Column.float(col.name, table.rowCount).init((i) => listValues[i]));
|
|
573
|
+
}
|
|
574
|
+
if (plotEmbeddings) {
|
|
575
|
+
if (!scatterPlot) {
|
|
576
|
+
scatterPlot = grok.shell
|
|
577
|
+
.tableView(table.name)
|
|
578
|
+
.scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
579
|
+
}
|
|
580
|
+
return scatterPlot;
|
|
581
|
+
}
|
|
522
582
|
}
|
|
583
|
+
} catch (e) {
|
|
584
|
+
console.error(e);
|
|
585
|
+
pg.close();
|
|
523
586
|
}
|
|
524
587
|
|
|
525
588
|
|
|
@@ -550,7 +613,7 @@ export async function sequenceSpaceTopMenu(
|
|
|
550
613
|
sp = (v as DG.TableView).scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
551
614
|
}
|
|
552
615
|
} */
|
|
553
|
-
}
|
|
616
|
+
}
|
|
554
617
|
|
|
555
618
|
//top-menu: Bio | Convert | To Atomic Level...
|
|
556
619
|
//name: To Atomic Level
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import * as grok from 'datagrok-api/grok';
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
|
|
4
|
-
import {category, expect, expectArray, test,
|
|
4
|
+
import {category, expect, expectArray, test, testEvent} from '@datagrok-libraries/utils/src/test';
|
|
5
5
|
import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
6
|
import {
|
|
7
7
|
countForMonomerAtPosition,
|
|
@@ -30,9 +30,9 @@ ATC-G-TTGC--
|
|
|
30
30
|
seqCol.setTag(bioTAGS.aligned, 'SEQ.MSA');
|
|
31
31
|
|
|
32
32
|
const wlViewer: WebLogoViewer = (await df.plot.fromType('WebLogo')) as WebLogoViewer;
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
33
|
+
await testEvent(wlViewer.onLayoutCalculated, () => {}, () => {
|
|
34
|
+
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
35
|
+
}, 200);
|
|
36
36
|
const positions: PI[] = wlViewer['positions'];
|
|
37
37
|
|
|
38
38
|
const resAllDf1: PI[] = [
|
|
@@ -82,9 +82,9 @@ ATC-G-TTGC--
|
|
|
82
82
|
df.filter.fireChanged();
|
|
83
83
|
const wlViewer: WebLogoViewer = (await df.plot.fromType('WebLogo',
|
|
84
84
|
{'shrinkEmptyTail': true})) as WebLogoViewer;
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
85
|
+
await testEvent(wlViewer.onLayoutCalculated, () => {}, () => {
|
|
86
|
+
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
87
|
+
}, 200);
|
|
88
88
|
const positions: PI[] = wlViewer['positions'];
|
|
89
89
|
|
|
90
90
|
const resAllDf1: PI[] = [
|
|
@@ -120,9 +120,9 @@ ATC-G-TTGC--
|
|
|
120
120
|
|
|
121
121
|
const wlViewer: WebLogoViewer = (await df.plot.fromType('WebLogo',
|
|
122
122
|
{'skipEmptyPositions': true})) as WebLogoViewer;
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
123
|
+
await testEvent(wlViewer.onLayoutCalculated, () => {}, () => {
|
|
124
|
+
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
125
|
+
}, 200);
|
|
126
126
|
const resPosList: PI[] = wlViewer['positions'];
|
|
127
127
|
|
|
128
128
|
const tgtPosList: PI[] = [
|
|
@@ -156,9 +156,9 @@ ATC-G-TTGC--
|
|
|
156
156
|
endPositionName: '7',
|
|
157
157
|
skipEmptyPositions: true,
|
|
158
158
|
})) as WebLogoViewer;
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
159
|
+
await testEvent(wlViewer.onLayoutCalculated, () => {}, () => {
|
|
160
|
+
tv.dockManager.dock(wlViewer.root, DG.DOCK_TYPE.DOWN);
|
|
161
|
+
}, 200);
|
|
162
162
|
const resPosList: PI[] = wlViewer['positions'];
|
|
163
163
|
const tgtPosList: PI[] = [
|
|
164
164
|
new PI(2, '3', {'C': new PMI(5)}),
|
|
@@ -38,7 +38,7 @@ category('activityCliffs', async () => {
|
|
|
38
38
|
const cliffsNum = DG.Test.isInBenchmark ? 6 : 3;
|
|
39
39
|
|
|
40
40
|
await _testActivityCliffsOpen(actCliffsDf, cliffsNum, DimReductionMethods.UMAP, 'sequence');
|
|
41
|
-
}
|
|
41
|
+
});
|
|
42
42
|
|
|
43
43
|
test('activityCliffsWithEmptyRows', async () => {
|
|
44
44
|
actCliffsDfWithEmptyRows = await readDataframe('tests/100_3_clustests_empty_vals.csv');
|
|
@@ -47,5 +47,5 @@ category('activityCliffs', async () => {
|
|
|
47
47
|
viewList.push(actCliffsTableViewWithEmptyRows);
|
|
48
48
|
|
|
49
49
|
await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 3, DimReductionMethods.UMAP, 'sequence');
|
|
50
|
-
}
|
|
50
|
+
});
|
|
51
51
|
});
|
|
@@ -2,7 +2,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
|
|
4
4
|
import {expect} from '@datagrok-libraries/utils/src/test';
|
|
5
|
-
import {activityCliffs} from '../package';
|
|
5
|
+
import {activityCliffs, BYPASS_LARGE_DATA_WARNING} from '../package';
|
|
6
6
|
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
7
7
|
|
|
8
8
|
export async function _testActivityCliffsOpen(df: DG.DataFrame, numberCliffs: number, method: DimReductionMethods,
|
|
@@ -10,7 +10,7 @@ export async function _testActivityCliffsOpen(df: DG.DataFrame, numberCliffs: nu
|
|
|
10
10
|
await grok.data.detectSemanticTypes(df);
|
|
11
11
|
const scatterPlot = await activityCliffs(
|
|
12
12
|
df, df.getCol(colName), df.getCol('activity'),
|
|
13
|
-
90, method);
|
|
13
|
+
90, method, {[`${BYPASS_LARGE_DATA_WARNING}`]: true});
|
|
14
14
|
// const scatterPlot = (await grok.functions.call('Bio:activityCliffs', {
|
|
15
15
|
// table: df, molecules: df.getCol(colName), activities: df.getCol('Activity'),
|
|
16
16
|
// similarity: 50, methodName: method
|
|
@@ -29,7 +29,7 @@ category('detectorsBenchmark', () => {
|
|
|
29
29
|
});
|
|
30
30
|
|
|
31
31
|
test('fastaDnaLong1e6Few50', async () => {
|
|
32
|
-
await detectMacromoleculeBenchmark(
|
|
32
|
+
await detectMacromoleculeBenchmark(20, NOTATION.FASTA, ALPHABET.DNA, 1E6, 50);
|
|
33
33
|
});
|
|
34
34
|
|
|
35
35
|
// -- separator --
|
|
@@ -49,7 +49,7 @@ category('detectorsBenchmark', () => {
|
|
|
49
49
|
async function detectMacromoleculeBenchmark(
|
|
50
50
|
maxET: number, notation: NOTATION, alphabet: ALPHABET, length: number, count: number, separator?: string,
|
|
51
51
|
): Promise<number> {
|
|
52
|
-
return await benchmark<DG.FuncCall, DG.Column>(
|
|
52
|
+
return await benchmark<DG.FuncCall, DG.Column>(maxET,
|
|
53
53
|
(): DG.FuncCall => {
|
|
54
54
|
const col: DG.Column = generate(notation, [...getAlphabet(alphabet)], length, count, separator);
|
|
55
55
|
const funcCall: DG.FuncCall = detectFunc.prepare({col: col});
|
|
@@ -182,7 +182,10 @@ MWRSWY-CKHP`;
|
|
|
182
182
|
const df: DG.DataFrame = await readFile(samples[key]);
|
|
183
183
|
// await grok.data.detectSemanticTypes(df);
|
|
184
184
|
return df;
|
|
185
|
-
})()
|
|
185
|
+
})().catch((err: any) => {
|
|
186
|
+
delete _samplesDfs[key];
|
|
187
|
+
throw err;
|
|
188
|
+
});
|
|
186
189
|
}
|
|
187
190
|
return _samplesDfs[key];
|
|
188
191
|
};
|
|
@@ -19,15 +19,15 @@ category('sequenceSpace', async () => {
|
|
|
19
19
|
);
|
|
20
20
|
testFastaTableView = grok.shell.addTableView(testFastaDf);
|
|
21
21
|
await _testSequenceSpaceReturnsResult(testFastaDf, DimReductionMethods.UMAP, 'sequence');
|
|
22
|
-
grok.shell.closeTable(testFastaDf);
|
|
23
|
-
testFastaTableView.close();
|
|
22
|
+
//grok.shell.closeTable(testFastaDf);
|
|
23
|
+
//testFastaTableView.close();
|
|
24
24
|
});
|
|
25
25
|
|
|
26
26
|
test('sequenceSpaceWithEmptyRows', async () => {
|
|
27
27
|
testHelmWithEmptyRows = await readDataframe('tests/100_3_clustests_empty_vals.csv');
|
|
28
28
|
testHelmWithEmptyRowsTableView = grok.shell.addTableView(testHelmWithEmptyRows);
|
|
29
29
|
await _testSequenceSpaceReturnsResult(testHelmWithEmptyRows, DimReductionMethods.UMAP, 'sequence');
|
|
30
|
-
grok.shell.closeTable(testHelmWithEmptyRows);
|
|
31
|
-
testHelmWithEmptyRowsTableView.close();
|
|
30
|
+
//grok.shell.closeTable(testHelmWithEmptyRows);
|
|
31
|
+
//testHelmWithEmptyRowsTableView.close();
|
|
32
32
|
});
|
|
33
33
|
});
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
import {expect} from '@datagrok-libraries/utils/src/test';
|
|
4
|
-
import {sequenceSpaceTopMenu} from '../package';
|
|
4
|
+
import {BYPASS_LARGE_DATA_WARNING, sequenceSpaceTopMenu} from '../package';
|
|
5
5
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
6
6
|
import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
7
7
|
|
|
@@ -14,6 +14,7 @@ export async function _testSequenceSpaceReturnsResult(
|
|
|
14
14
|
if (semType)
|
|
15
15
|
col.semType = semType;
|
|
16
16
|
|
|
17
|
-
const sp = await sequenceSpaceTopMenu(df, df.col(colName)!, algorithm, MmDistanceFunctionsNames.LEVENSHTEIN, true
|
|
17
|
+
const sp = await sequenceSpaceTopMenu(df, df.col(colName)!, algorithm, MmDistanceFunctionsNames.LEVENSHTEIN, true,
|
|
18
|
+
0.6, {[`${BYPASS_LARGE_DATA_WARNING}`]: true});
|
|
18
19
|
expect(sp != null, true);
|
|
19
20
|
}
|
|
@@ -27,6 +27,7 @@ import * as C from './constants';
|
|
|
27
27
|
import {_package, getBioLib} from '../package';
|
|
28
28
|
import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
29
29
|
import {getSplitter} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
|
|
30
|
+
import {errInfo} from './err-info';
|
|
30
31
|
|
|
31
32
|
|
|
32
33
|
type TempType = { [tagName: string]: any };
|
|
@@ -136,8 +137,15 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
|
|
|
136
137
|
|
|
137
138
|
// TODO: Store temp data to GridColumn
|
|
138
139
|
// Now the renderer requires data frame table Column underlying GridColumn
|
|
139
|
-
|
|
140
|
+
let grid: DG.Grid | undefined = undefined;
|
|
141
|
+
try { grid = gridCell.grid; } catch (err: any) {
|
|
142
|
+
grid = undefined;
|
|
143
|
+
const [errMsg, errStack] = errInfo(err);
|
|
144
|
+
_package.logger.error(errMsg, undefined, errStack);
|
|
145
|
+
}
|
|
140
146
|
const tableCol: DG.Column = gridCell.cell.column;
|
|
147
|
+
if (!grid || !tableCol) return;
|
|
148
|
+
|
|
141
149
|
const tableColTemp: TempType = tableCol.temp;
|
|
142
150
|
|
|
143
151
|
// Cell renderer settings
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as ui from 'datagrok-api/ui';
|
|
3
|
+
import * as DG from 'datagrok-api/dg';
|
|
4
|
+
|
|
5
|
+
export function errMsg(err: any): string {
|
|
6
|
+
if (typeof err === 'string' || err instanceof String)
|
|
7
|
+
return err as string;
|
|
8
|
+
else if (err.constructor.name === 'StateError')
|
|
9
|
+
return err['message'];
|
|
10
|
+
else if (err.constructor.name === 'StateError' && '$thrownJsError' in err)
|
|
11
|
+
return errMsg(err['$thrownJsError']);
|
|
12
|
+
else if (err instanceof Error)
|
|
13
|
+
return (err as Error).message;
|
|
14
|
+
else
|
|
15
|
+
return err.toString();
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function errStack(err: any): string | undefined {
|
|
19
|
+
if (err instanceof Error)
|
|
20
|
+
return err.stack;
|
|
21
|
+
else if (err.constructor.name === 'StateError' && '$thrownJsError' in err)
|
|
22
|
+
return errStack(err['$thrownJsError']);
|
|
23
|
+
return undefined;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function errInfo(err: any): [string, string | undefined] {
|
|
27
|
+
return [errMsg(err), errStack(err)];
|
|
28
|
+
}
|