@datagrok/bio 2.26.2 → 2.26.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/dist/284.js.map +1 -1
- package/dist/455.js.map +1 -1
- package/dist/705.js +1 -1
- package/dist/705.js.map +1 -1
- package/dist/980.js.map +1 -1
- package/dist/package-test.js +2 -2
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +2 -2
- package/dist/package.js.map +1 -1
- package/package.json +5 -5
- package/src/analysis/sequence-activity-cliffs.ts +12 -0
- package/src/package-api.ts +13 -1
- package/src/package-test.ts +1 -0
- package/src/package.g.ts +35 -0
- package/src/package.ts +163 -56
- package/src/tests/projects-tests.ts +202 -0
- package/src/utils/cell-renderer.ts +4 -2
- package/src/utils/convert.ts +9 -3
- package/src/utils/monomer-cell-renderer.ts +40 -3
- package/src/utils/seq-helper/seq-handler.ts +18 -8
- package/test-console-output-1.log +748 -583
- package/test-record-1.mp4 +0 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Davit Rizhinashvili",
|
|
6
6
|
"email": "drizhinashvili@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.26.
|
|
8
|
+
"version": "2.26.5",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -44,10 +44,10 @@
|
|
|
44
44
|
],
|
|
45
45
|
"dependencies": {
|
|
46
46
|
"@biowasm/aioli": "^3.1.0",
|
|
47
|
-
"@datagrok-libraries/bio": "^5.63.
|
|
47
|
+
"@datagrok-libraries/bio": "^5.63.5",
|
|
48
48
|
"@datagrok-libraries/chem-meta": "^1.2.9",
|
|
49
49
|
"@datagrok-libraries/math": "^1.2.6",
|
|
50
|
-
"@datagrok-libraries/ml": "^6.10.
|
|
50
|
+
"@datagrok-libraries/ml": "^6.10.11",
|
|
51
51
|
"@datagrok-libraries/test": "^1.1.0",
|
|
52
52
|
"@datagrok-libraries/tutorials": "^1.7.4",
|
|
53
53
|
"@datagrok-libraries/utils": "^4.6.9",
|
|
@@ -68,7 +68,7 @@
|
|
|
68
68
|
"devDependencies": {
|
|
69
69
|
"@datagrok-libraries/helm-web-editor": "^1.1.16",
|
|
70
70
|
"@datagrok-libraries/js-draw-lite": "^0.0.10",
|
|
71
|
-
"@datagrok/chem": "^1.
|
|
71
|
+
"@datagrok/chem": "^1.17.1",
|
|
72
72
|
"@datagrok/dendrogram": "^1.2.33",
|
|
73
73
|
"@datagrok/eda": "^1.4.13",
|
|
74
74
|
"@datagrok/helm": "^2.13.1",
|
|
@@ -77,7 +77,7 @@
|
|
|
77
77
|
"@types/wu": "^2.1.44",
|
|
78
78
|
"@typescript-eslint/eslint-plugin": "^8.8.1",
|
|
79
79
|
"@typescript-eslint/parser": "^8.8.1",
|
|
80
|
-
"datagrok-tools": "^5.1.
|
|
80
|
+
"datagrok-tools": "^5.1.9",
|
|
81
81
|
"eslint": "^8.57.1",
|
|
82
82
|
"eslint-config-google": "^0.14.0",
|
|
83
83
|
"eslint-plugin-rxjs": "^5.0.3",
|
|
@@ -15,8 +15,20 @@ import {HelmType} from '@datagrok-libraries/bio/src/helm/types';
|
|
|
15
15
|
import {drawMoleculeDifferenceOnCanvas} from '../utils/cell-renderer';
|
|
16
16
|
import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
|
|
17
17
|
|
|
18
|
+
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
19
|
+
import {BitArrayMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
20
|
+
|
|
18
21
|
import {_package} from '../package';
|
|
19
22
|
|
|
23
|
+
export type SeqActivityCliffsParams = {
|
|
24
|
+
seqColName: string,
|
|
25
|
+
activityColName: string,
|
|
26
|
+
similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics,
|
|
27
|
+
similarity: number,
|
|
28
|
+
options: any,
|
|
29
|
+
isDemo?: boolean,
|
|
30
|
+
}
|
|
31
|
+
|
|
20
32
|
export async function getDistances(col: DG.Column, seq: string): Promise<Array<number>> {
|
|
21
33
|
const stringArray = col.toList();
|
|
22
34
|
const distances = new Array(stringArray.length).fill(0);
|
package/src/package-api.ts
CHANGED
|
@@ -192,6 +192,14 @@ export namespace funcs {
|
|
|
192
192
|
return await grok.functions.call('Bio:ActivityCliffs', { table, molecules, activities, similarity, methodName, similarityMetric, preprocessingFunction, options, demo });
|
|
193
193
|
}
|
|
194
194
|
|
|
195
|
+
export async function seqActivityCliffsInitFunction(sp: any ): Promise<void> {
|
|
196
|
+
return await grok.functions.call('Bio:SeqActivityCliffsInitFunction', { sp });
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
export async function seqActivityCliffsTransform(table: DG.DataFrame , molecules: DG.Column , activities: DG.Column , similarity: number , methodName: string , similarityMetric: string , options?: string , isDemo?: boolean , axesNames?: any ): Promise<void> {
|
|
200
|
+
return await grok.functions.call('Bio:SeqActivityCliffsTransform', { table, molecules, activities, similarity, methodName, similarityMetric, options, isDemo, axesNames });
|
|
201
|
+
}
|
|
202
|
+
|
|
195
203
|
export async function macromoleculePreprocessingFunction(col: DG.Column , metric: string , gapOpen?: number , gapExtend?: number , fingerprintType?: string ): Promise<any> {
|
|
196
204
|
return await grok.functions.call('Bio:MacromoleculePreprocessingFunction', { col, metric, gapOpen, gapExtend, fingerprintType });
|
|
197
205
|
}
|
|
@@ -203,10 +211,14 @@ export namespace funcs {
|
|
|
203
211
|
/**
|
|
204
212
|
Creates 2D sequence space with projected sequences by pairwise distance
|
|
205
213
|
*/
|
|
206
|
-
export async function sequenceSpaceTopMenu(table: DG.DataFrame , molecules: DG.Column , methodName: string , similarityMetric: string , plotEmbeddings: boolean , preprocessingFunction?: any , options?: any , clusterEmbeddings?: boolean , isDemo?: boolean ): Promise<
|
|
214
|
+
export async function sequenceSpaceTopMenu(table: DG.DataFrame , molecules: DG.Column , methodName: string , similarityMetric: string , plotEmbeddings: boolean , preprocessingFunction?: any , options?: any , clusterEmbeddings?: boolean , isDemo?: boolean ): Promise<any> {
|
|
207
215
|
return await grok.functions.call('Bio:SequenceSpaceTopMenu', { table, molecules, methodName, similarityMetric, plotEmbeddings, preprocessingFunction, options, clusterEmbeddings, isDemo });
|
|
208
216
|
}
|
|
209
217
|
|
|
218
|
+
export async function sequenceSpaceTransform(table: DG.DataFrame , molecules: DG.Column , methodName: string , similarityMetric: string , plotEmbeddings: boolean , options?: string , clusterEmbeddings?: boolean , embedColsNames?: any , clusterColName?: string ): Promise<any> {
|
|
219
|
+
return await grok.functions.call('Bio:SequenceSpaceTransform', { table, molecules, methodName, similarityMetric, plotEmbeddings, options, clusterEmbeddings, embedColsNames, clusterColName });
|
|
220
|
+
}
|
|
221
|
+
|
|
210
222
|
/**
|
|
211
223
|
Converts Peptide molecules to HELM notation by matching with monomer library
|
|
212
224
|
*/
|
package/src/package-test.ts
CHANGED
package/src/package.g.ts
CHANGED
|
@@ -270,6 +270,25 @@ export async function activityCliffs(table: DG.DataFrame, molecules: DG.Column<a
|
|
|
270
270
|
return await PackageFunctions.activityCliffs(table, molecules, activities, similarity, methodName, similarityMetric, preprocessingFunction, options, demo);
|
|
271
271
|
}
|
|
272
272
|
|
|
273
|
+
//input: viewer sp
|
|
274
|
+
export async function seqActivityCliffsInitFunction(sp: any) : Promise<void> {
|
|
275
|
+
await PackageFunctions.seqActivityCliffsInitFunction(sp);
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
//input: dataframe table { description: Input data table }
|
|
279
|
+
//input: column molecules { semType: Macromolecule }
|
|
280
|
+
//input: column activities { type: numerical }
|
|
281
|
+
//input: double similarity = 80 { description: Similarity cutoff }
|
|
282
|
+
//input: string methodName
|
|
283
|
+
//input: string similarityMetric
|
|
284
|
+
//input: string options { optional: true }
|
|
285
|
+
//input: bool isDemo { optional: true }
|
|
286
|
+
//input: list<string> axesNames { optional: true }
|
|
287
|
+
//meta.role: transform
|
|
288
|
+
export async function seqActivityCliffsTransform(table: DG.DataFrame, molecules: DG.Column, activities: DG.Column, similarity: number, methodName: any, similarityMetric: any, options?: string, isDemo?: boolean, axesNames?: string[]) : Promise<void> {
|
|
289
|
+
await PackageFunctions.seqActivityCliffsTransform(table, molecules, activities, similarity, methodName, similarityMetric, options, isDemo, axesNames);
|
|
290
|
+
}
|
|
291
|
+
|
|
273
292
|
//name: Encode Sequences
|
|
274
293
|
//tags: dim-red-preprocessing-function
|
|
275
294
|
//input: column col { semType: Macromolecule }
|
|
@@ -309,12 +328,28 @@ export async function helmPreprocessingFunction(col: DG.Column<any>, _metric: an
|
|
|
309
328
|
//input: object options { optional: true }
|
|
310
329
|
//input: bool clusterEmbeddings = true { optional: true }
|
|
311
330
|
//input: bool isDemo { optional: true }
|
|
331
|
+
//output: viewer result
|
|
312
332
|
//top-menu: Bio | Analyze | Sequence Space...
|
|
313
333
|
//editor: Bio:SequenceSpaceEditor
|
|
314
334
|
export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Column, methodName: any, similarityMetric: any, plotEmbeddings: boolean, preprocessingFunction?: any, options?: any, clusterEmbeddings?: boolean, isDemo?: boolean) : Promise<any> {
|
|
315
335
|
return await PackageFunctions.sequenceSpaceTopMenu(table, molecules, methodName, similarityMetric, plotEmbeddings, preprocessingFunction, options, clusterEmbeddings, isDemo);
|
|
316
336
|
}
|
|
317
337
|
|
|
338
|
+
//input: dataframe table
|
|
339
|
+
//input: column molecules { semType: Macromolecule }
|
|
340
|
+
//input: string methodName
|
|
341
|
+
//input: string similarityMetric
|
|
342
|
+
//input: bool plotEmbeddings = true
|
|
343
|
+
//input: string options { optional: true }
|
|
344
|
+
//input: bool clusterEmbeddings { optional: true }
|
|
345
|
+
//input: list<string> embedColsNames { optional: true }
|
|
346
|
+
//input: string clusterColName { optional: true }
|
|
347
|
+
//output: viewer result
|
|
348
|
+
//meta.role: transform
|
|
349
|
+
export async function sequenceSpaceTransform(table: DG.DataFrame, molecules: DG.Column, methodName: any, similarityMetric: any, plotEmbeddings: boolean, options?: string, clusterEmbeddings?: boolean, embedColsNames?: string[], clusterColName?: string) : Promise<any> {
|
|
350
|
+
return await PackageFunctions.sequenceSpaceTransform(table, molecules, methodName, similarityMetric, plotEmbeddings, options, clusterEmbeddings, embedColsNames, clusterColName);
|
|
351
|
+
}
|
|
352
|
+
|
|
318
353
|
//name: Molecules to HELM
|
|
319
354
|
//description: Converts Peptide molecules to HELM notation by matching with monomer library
|
|
320
355
|
//input: dataframe table { description: Input data table }
|
package/src/package.ts
CHANGED
|
@@ -9,7 +9,7 @@ import * as DG from 'datagrok-api/dg';
|
|
|
9
9
|
|
|
10
10
|
import {Options} from '@datagrok-libraries/utils/src/type-declarations';
|
|
11
11
|
import {DimReductionBaseEditor, PreprocessFunctionReturnType} from '@datagrok-libraries/ml/src/functionEditors/dimensionality-reduction-editor';
|
|
12
|
-
import {
|
|
12
|
+
import {getActivityCliffsEmbeddings, runActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
13
13
|
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
14
14
|
import {BitArrayMetrics, KnownMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
15
15
|
import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
@@ -38,7 +38,7 @@ import {MacromoleculeDifferenceCellRenderer, MacromoleculeSequenceCellRenderer,}
|
|
|
38
38
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
39
39
|
import {SequenceAlignment} from './seq_align';
|
|
40
40
|
import {getEncodedSeqSpaceCol} from './analysis/sequence-space';
|
|
41
|
-
import {createLinesGrid, createPropPanelElement, createTooltipElement,} from './analysis/sequence-activity-cliffs';
|
|
41
|
+
import {createLinesGrid, createPropPanelElement, createTooltipElement, SeqActivityCliffsParams} from './analysis/sequence-activity-cliffs';
|
|
42
42
|
import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
|
|
43
43
|
import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
|
|
44
44
|
import {invalidateMols, MONOMERIC_COL_TAGS, SubstructureSearchDialog} from './substructure-search/substructure-search';
|
|
@@ -522,38 +522,6 @@ export class PackageFunctions {
|
|
|
522
522
|
}
|
|
523
523
|
if (!checkInputColumnUI(molecules, 'Activity Cliffs'))
|
|
524
524
|
return;
|
|
525
|
-
const axesNames = getEmbeddingColsNames(table);
|
|
526
|
-
const tags = {
|
|
527
|
-
'units': molecules.meta.units!,
|
|
528
|
-
'aligned': molecules.getTag(bioTAGS.aligned),
|
|
529
|
-
'separator': molecules.getTag(bioTAGS.separator),
|
|
530
|
-
'alphabet': molecules.getTag(bioTAGS.alphabet),
|
|
531
|
-
};
|
|
532
|
-
const columnDistanceMetric: MmDistanceFunctionsNames | BitArrayMetrics = similarityMetric;
|
|
533
|
-
const seqCol = molecules;
|
|
534
|
-
|
|
535
|
-
const runCliffs = async () => {
|
|
536
|
-
const sp = await getActivityCliffs(
|
|
537
|
-
table,
|
|
538
|
-
seqCol,
|
|
539
|
-
axesNames,
|
|
540
|
-
'Activity cliffs', //scatterTitle
|
|
541
|
-
activities,
|
|
542
|
-
similarity,
|
|
543
|
-
columnDistanceMetric, //similarityMetric
|
|
544
|
-
methodName,
|
|
545
|
-
{...(options ?? {})},
|
|
546
|
-
DG.SEMTYPE.MACROMOLECULE,
|
|
547
|
-
tags,
|
|
548
|
-
preprocessingFunction,
|
|
549
|
-
createTooltipElement,
|
|
550
|
-
createPropPanelElement,
|
|
551
|
-
createLinesGrid,
|
|
552
|
-
undefined,
|
|
553
|
-
demo
|
|
554
|
-
);
|
|
555
|
-
return sp;
|
|
556
|
-
};
|
|
557
525
|
|
|
558
526
|
const allowedRowCount = methodName === DimReductionMethods.UMAP ? 200_000 : 20_000;
|
|
559
527
|
const fastRowCount = methodName === DimReductionMethods.UMAP ? 5_000 : 2_000;
|
|
@@ -562,29 +530,125 @@ export class PackageFunctions {
|
|
|
562
530
|
return;
|
|
563
531
|
}
|
|
564
532
|
|
|
533
|
+
const axesNames = getEmbeddingColsNames(table);
|
|
534
|
+
|
|
535
|
+
const runCliffs = async (): Promise<void> => {
|
|
536
|
+
await DG.Func.find({name: 'seqActivityCliffsTransform'})[0].prepare({
|
|
537
|
+
table: table,
|
|
538
|
+
molecules: molecules,
|
|
539
|
+
activities: activities,
|
|
540
|
+
similarity: similarity,
|
|
541
|
+
methodName: methodName,
|
|
542
|
+
similarityMetric: similarityMetric,
|
|
543
|
+
options: JSON.stringify(options),
|
|
544
|
+
isDemo: demo,
|
|
545
|
+
axesNames: axesNames,
|
|
546
|
+
}).call(undefined, undefined, {processed: false});
|
|
547
|
+
|
|
548
|
+
const view = grok.shell.tv;
|
|
549
|
+
|
|
550
|
+
const description = `Molecules: ${molecules.name}, activities: ${activities.name}, method: ${methodName}, ${options ? `options: ${JSON.stringify(options)},` : ``} similarity: ${similarityMetric}, similarity cutoff: ${similarity}`;
|
|
551
|
+
view.addViewer(DG.VIEWER.SCATTER_PLOT, {
|
|
552
|
+
xColumnName: axesNames[0],
|
|
553
|
+
yColumnName: axesNames[1],
|
|
554
|
+
color: activities.name,
|
|
555
|
+
showXSelector: false,
|
|
556
|
+
showYSelector: false,
|
|
557
|
+
showSizeSelector: false,
|
|
558
|
+
showColorSelector: false,
|
|
559
|
+
markerMinSize: 5,
|
|
560
|
+
markerMaxSize: 25,
|
|
561
|
+
title: 'Activity cliffs',
|
|
562
|
+
initializationFunction: 'seqActivityCliffsInitFunction',
|
|
563
|
+
description: description,
|
|
564
|
+
descriptionVisibilityMode: 'Never',
|
|
565
|
+
}) as DG.ScatterPlotViewer;
|
|
566
|
+
};
|
|
567
|
+
|
|
565
568
|
const pi = DG.TaskBarProgressIndicator.create(`Running sequence activity cliffs ...`);
|
|
566
|
-
|
|
569
|
+
try {
|
|
567
570
|
if (table.rowCount > fastRowCount && !options?.[BYPASS_LARGE_DATA_WARNING]) {
|
|
568
|
-
|
|
571
|
+
await new Promise<void>((resolve, reject) => {
|
|
572
|
+
ui.dialog().add(ui.divText(`Activity cliffs analysis might take several minutes.
|
|
569
573
|
Do you want to continue?`))
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
574
|
+
.onOK(async () => {
|
|
575
|
+
runCliffs().then(() => resolve()).catch((err) => reject(err));
|
|
576
|
+
})
|
|
577
|
+
.onCancel(() => { resolve(); })
|
|
578
|
+
.show();
|
|
579
|
+
});
|
|
575
580
|
} else
|
|
576
|
-
runCliffs()
|
|
577
|
-
}
|
|
581
|
+
await runCliffs();
|
|
582
|
+
} catch (err: any) {
|
|
578
583
|
const [errMsg, errStack] = errInfo(err);
|
|
579
584
|
_package.logger.error(errMsg, undefined, errStack);
|
|
580
585
|
throw err;
|
|
581
|
-
}
|
|
582
|
-
|
|
583
|
-
table.col(scRes.props.xColumnName)!.set(0, table.col(scRes.props.xColumnName)!.get(0)); // to trigger rendering
|
|
584
|
-
table.col(scRes.props.yColumnName)!.set(0, table.col(scRes.props.yColumnName)!.get(0)); // to trigger rendering
|
|
586
|
+
} finally {
|
|
587
|
+
pi.close();
|
|
585
588
|
}
|
|
589
|
+
}
|
|
586
590
|
|
|
587
|
-
|
|
591
|
+
@grok.decorators.func({
|
|
592
|
+
name: 'seqActivityCliffsInitFunction',
|
|
593
|
+
})
|
|
594
|
+
static async seqActivityCliffsInitFunction(
|
|
595
|
+
@grok.decorators.param({type: 'viewer'}) sp: DG.ScatterPlotViewer): Promise<void> {
|
|
596
|
+
const tag = sp.dataFrame.getTag('seqActivityCliffsParams');
|
|
597
|
+
if (!tag) {
|
|
598
|
+
grok.shell.error(`Sequence activity cliffs parameters not found in table tags`);
|
|
599
|
+
return;
|
|
600
|
+
}
|
|
601
|
+
const actCliffsParams: SeqActivityCliffsParams = JSON.parse(tag);
|
|
602
|
+
const molCol = sp.dataFrame.col(actCliffsParams.seqColName)!
|
|
603
|
+
const actCol = sp.dataFrame.col(actCliffsParams.activityColName)!;
|
|
604
|
+
|
|
605
|
+
const preprocessingFunction = DG.Func.find({name: 'macromoleculePreprocessingFunction', package: 'Bio'})[0];
|
|
606
|
+
const encodedColWithOptions = await preprocessingFunction.apply({
|
|
607
|
+
col: molCol, metric: actCliffsParams.similarityMetric,
|
|
608
|
+
});
|
|
609
|
+
|
|
610
|
+
const axesNames = [sp.getOptions().look['xColumnName'], sp.getOptions().look['yColumnName']];
|
|
611
|
+
const tags = {
|
|
612
|
+
'units': molCol.meta.units!,
|
|
613
|
+
'aligned': molCol.getTag(bioTAGS.aligned),
|
|
614
|
+
'separator': molCol.getTag(bioTAGS.separator),
|
|
615
|
+
'alphabet': molCol.getTag(bioTAGS.alphabet),
|
|
616
|
+
};
|
|
617
|
+
|
|
618
|
+
await runActivityCliffs(sp, sp.dataFrame, molCol, encodedColWithOptions, actCol, axesNames,
|
|
619
|
+
actCliffsParams.similarity, actCliffsParams.similarityMetric, actCliffsParams.options ?? {},
|
|
620
|
+
DG.SEMTYPE.MACROMOLECULE, tags,
|
|
621
|
+
createTooltipElement, createPropPanelElement, createLinesGrid, undefined, actCliffsParams.isDemo);
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
@grok.decorators.func({
|
|
625
|
+
meta: {role: 'transform'},
|
|
626
|
+
})
|
|
627
|
+
static async seqActivityCliffsTransform(
|
|
628
|
+
@grok.decorators.param({options: {description: 'Input data table'}}) table: DG.DataFrame,
|
|
629
|
+
@grok.decorators.param({type: 'column', options: {semType: 'Macromolecule'}}) molecules: DG.Column,
|
|
630
|
+
@grok.decorators.param({type: 'column', options: {type: 'numerical'}}) activities: DG.Column,
|
|
631
|
+
@grok.decorators.param({options: {description: 'Similarity cutoff', initialValue: '80'}}) similarity: number,
|
|
632
|
+
@grok.decorators.param({type: 'string'}) methodName: DimReductionMethods,
|
|
633
|
+
@grok.decorators.param({type: 'string'}) similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics,
|
|
634
|
+
@grok.decorators.param({options: {optional: true}}) options?: string,
|
|
635
|
+
@grok.decorators.param({options: {optional: true}}) isDemo?: boolean,
|
|
636
|
+
@grok.decorators.param({options: {optional: true}}) axesNames?: string[]): Promise<void> {
|
|
637
|
+
await table.meta.detectSemanticTypes();
|
|
638
|
+
const preprocessingFunction = DG.Func.find({name: 'macromoleculePreprocessingFunction', package: 'Bio'})[0];
|
|
639
|
+
if (!axesNames)
|
|
640
|
+
axesNames = getEmbeddingColsNames(table);
|
|
641
|
+
await getActivityCliffsEmbeddings(table, molecules, axesNames, similarity,
|
|
642
|
+
similarityMetric, methodName, JSON.parse(options ?? '{}'), preprocessingFunction);
|
|
643
|
+
const tagContent: SeqActivityCliffsParams = {
|
|
644
|
+
seqColName: molecules.name,
|
|
645
|
+
activityColName: activities.name,
|
|
646
|
+
similarityMetric: similarityMetric,
|
|
647
|
+
similarity: similarity,
|
|
648
|
+
options: options ?? {},
|
|
649
|
+
isDemo: isDemo,
|
|
650
|
+
};
|
|
651
|
+
table.setTag('seqActivityCliffsParams', JSON.stringify(tagContent));
|
|
588
652
|
}
|
|
589
653
|
|
|
590
654
|
@grok.decorators.func({
|
|
@@ -644,7 +708,7 @@ export class PackageFunctions {
|
|
|
644
708
|
description: 'Creates 2D sequence space with projected sequences by pairwise distance',
|
|
645
709
|
'top-menu': 'Bio | Analyze | Sequence Space...',
|
|
646
710
|
editor: 'Bio:SequenceSpaceEditor',
|
|
647
|
-
outputs: [],
|
|
711
|
+
outputs: [{type: 'viewer', name: 'result'}],
|
|
648
712
|
})
|
|
649
713
|
static async sequenceSpaceTopMenu(
|
|
650
714
|
table: DG.DataFrame,
|
|
@@ -662,22 +726,65 @@ export class PackageFunctions {
|
|
|
662
726
|
grok.shell.error(`Table ${table.name} is not a current table view`);
|
|
663
727
|
return;
|
|
664
728
|
}
|
|
665
|
-
const tableView =
|
|
666
|
-
grok.shell.tv.dataFrame == table ? grok.shell.tv : undefined;
|
|
667
729
|
if (!checkInputColumnUI(molecules, 'Sequence Space'))
|
|
668
730
|
return;
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
731
|
+
const clusterColName = table.columns.getUnusedName('Cluster (DBSCAN)');
|
|
732
|
+
const embedColsNames: string[] = getEmbeddingColsNames(table);
|
|
733
|
+
await DG.Func.find({name: 'sequenceSpaceTransform'})[0].prepare({
|
|
734
|
+
table: table,
|
|
735
|
+
molecules: molecules,
|
|
736
|
+
methodName: methodName,
|
|
737
|
+
similarityMetric: similarityMetric,
|
|
738
|
+
plotEmbeddings: false,
|
|
739
|
+
options: JSON.stringify(options),
|
|
740
|
+
clusterEmbeddings: clusterEmbeddings,
|
|
741
|
+
embedColsNames: embedColsNames,
|
|
742
|
+
clusterColName: clusterColName,
|
|
743
|
+
}).call(undefined, undefined, {processed: false});
|
|
744
|
+
|
|
745
|
+
let res: DG.ScatterPlotViewer | undefined;
|
|
746
|
+
if (plotEmbeddings) {
|
|
747
|
+
const tv = grok.shell.tv;
|
|
748
|
+
res = tv.scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
|
|
749
|
+
const description = `Molecules column: ${molecules.name}, method: ${methodName}, ${options ? `options: ${JSON.stringify(options)},` : ``} similarity: ${similarityMetric}`;
|
|
750
|
+
res.setOptions({description: description, descriptionVisibilityMode: 'Never'});
|
|
751
|
+
if (clusterEmbeddings)
|
|
752
|
+
res.props.colorColumnName = clusterColName;
|
|
753
|
+
}
|
|
754
|
+
return res;
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
@grok.decorators.func({
|
|
758
|
+
outputs: [{type: 'viewer', name: 'result'}],
|
|
759
|
+
meta: {role: 'transform'},
|
|
760
|
+
})
|
|
761
|
+
static async sequenceSpaceTransform(
|
|
762
|
+
table: DG.DataFrame,
|
|
763
|
+
@grok.decorators.param({options: {semType: 'Macromolecule'}}) molecules: DG.Column,
|
|
764
|
+
@grok.decorators.param({type: 'string'}) methodName: DimReductionMethods,
|
|
765
|
+
@grok.decorators.param({type: 'string'}) similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames,
|
|
766
|
+
@grok.decorators.param({options: {initialValue: 'true'}}) plotEmbeddings: boolean,
|
|
767
|
+
@grok.decorators.param({options: {optional: true}}) options?: string,
|
|
768
|
+
@grok.decorators.param({options: {optional: true}}) clusterEmbeddings?: boolean,
|
|
769
|
+
@grok.decorators.param({options: {optional: true}}) embedColsNames?: string[],
|
|
770
|
+
@grok.decorators.param({options: {optional: true}}) clusterColName?: string,
|
|
771
|
+
): Promise<DG.ScatterPlotViewer | undefined> {
|
|
772
|
+
await table.meta.detectSemanticTypes();
|
|
773
|
+
const preprocessingFunction = DG.Func.find({name: 'macromoleculePreprocessingFunction', package: 'Bio'})[0];
|
|
774
|
+
const parsedOptions: any = JSON.parse(options ?? '{}');
|
|
775
|
+
const tableView =
|
|
776
|
+
grok.shell.tv?.dataFrame == table ? grok.shell.tv : undefined;
|
|
672
777
|
const res = await multiColReduceDimensionality(table, [molecules], methodName,
|
|
673
778
|
[similarityMetric as KnownMetrics], [1], [preprocessingFunction], 'MANHATTAN',
|
|
674
779
|
plotEmbeddings, clusterEmbeddings ?? false,
|
|
675
|
-
/* dimRedOptions */ {...
|
|
780
|
+
/* dimRedOptions */ {...parsedOptions, preprocessingFuncArgs: [parsedOptions.preprocessingFuncArgs ?? {}]},
|
|
676
781
|
/* uiOptions */{
|
|
677
782
|
fastRowCount: 10000,
|
|
678
783
|
scatterPlotName: 'Sequence space',
|
|
679
|
-
bypassLargeDataWarning:
|
|
784
|
+
bypassLargeDataWarning: parsedOptions?.[BYPASS_LARGE_DATA_WARNING],
|
|
680
785
|
tableView: tableView,
|
|
786
|
+
embedColsNames: embedColsNames,
|
|
787
|
+
clusterColName: clusterColName,
|
|
681
788
|
});
|
|
682
789
|
return res;
|
|
683
790
|
}
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
import * as DG from 'datagrok-api/dg';
|
|
2
|
+
import * as grok from 'datagrok-api/grok';
|
|
3
|
+
|
|
4
|
+
import {after, awaitCheck, category, delay, expect, test} from '@datagrok-libraries/test/src/test';
|
|
5
|
+
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
6
|
+
import {BYPASS_LARGE_DATA_WARNING} from '@datagrok-libraries/ml/src/functionEditors/consts';
|
|
7
|
+
import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/types/monomer-library';
|
|
8
|
+
import {getUserLibSettings, setUserLibSettings} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
|
|
9
|
+
import {UserLibSettings} from '@datagrok-libraries/bio/src/monomer-works/types';
|
|
10
|
+
|
|
11
|
+
import {readDataframe} from './utils';
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
category('projects', () => {
|
|
15
|
+
let monomerLibHelper: IMonomerLibHelper;
|
|
16
|
+
let userLibSettings: UserLibSettings;
|
|
17
|
+
|
|
18
|
+
async function createTableView(tableName: string): Promise<DG.TableView> {
|
|
19
|
+
const df = await readDataframe(tableName);
|
|
20
|
+
df.name = tableName.replace('.csv', '');
|
|
21
|
+
await grok.data.detectSemanticTypes(df);
|
|
22
|
+
const view = grok.shell.addTableView(df);
|
|
23
|
+
return view;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
async function saveAndOpenProject(tv: DG.TableView, dataSync?: boolean): Promise<void> {
|
|
27
|
+
const project = DG.Project.create();
|
|
28
|
+
const tableInfo = tv.dataFrame.getTableInfo();
|
|
29
|
+
if (dataSync) {
|
|
30
|
+
//@ts-ignore
|
|
31
|
+
tableInfo.tags[DG.Tags.DataSync] = 'sync';
|
|
32
|
+
//@ts-ignore
|
|
33
|
+
tableInfo.tags[DG.Tags.CreationScript] = grok.shell.tv.dataFrame.getTag(DG.Tags.CreationScript);
|
|
34
|
+
}
|
|
35
|
+
const layoutInfo = tv.getInfo();
|
|
36
|
+
project.addChild(tableInfo);
|
|
37
|
+
project.addChild(layoutInfo);
|
|
38
|
+
await grok.dapi.tables.uploadDataFrame(tv.dataFrame);
|
|
39
|
+
await grok.dapi.tables.save(tableInfo);
|
|
40
|
+
await grok.dapi.views.save(layoutInfo);
|
|
41
|
+
await grok.dapi.projects.save(project);
|
|
42
|
+
const projId = project.id;
|
|
43
|
+
grok.shell.closeAll();
|
|
44
|
+
const p = await grok.dapi.projects.find(projId);
|
|
45
|
+
await p.open();
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
async function dataFrameContainsColumns(colArr: string[]): Promise<void> {
|
|
49
|
+
let col = '';
|
|
50
|
+
const getError = () => `${col} hasn't been added to dataframe`;
|
|
51
|
+
await awaitCheck(() => {
|
|
52
|
+
if (!grok.shell.tv.dataFrame)
|
|
53
|
+
return false;
|
|
54
|
+
for (const colName of colArr) {
|
|
55
|
+
if (!grok.shell.tv.dataFrame.col(colName)) {
|
|
56
|
+
col = colName;
|
|
57
|
+
return false;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
return true;
|
|
61
|
+
}, getError(), 5000);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
async function checkViewerAdded(viewerType: string): Promise<void> {
|
|
65
|
+
await awaitCheck(() => {
|
|
66
|
+
for (const v of grok.shell.tv.viewers) {
|
|
67
|
+
if (v.type === viewerType)
|
|
68
|
+
return true;
|
|
69
|
+
}
|
|
70
|
+
return false;
|
|
71
|
+
}, `${viewerType} hasn\'t been added`, 5000);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
async function runSaveAndOpenProjectTest(tableName: string, analysisFunc: (tv: DG.TableView) => Promise<void>,
|
|
75
|
+
colList: string[], viewerType: string, dataSync?: boolean,
|
|
76
|
+
additionalChecks?: (tv: DG.TableView) => Promise<void>) {
|
|
77
|
+
let tv;
|
|
78
|
+
if (dataSync) {
|
|
79
|
+
await DG.Func.find({name: 'OpenFile'})[0].prepare({
|
|
80
|
+
fullPath: `System:AppData/Bio/${tableName}`,
|
|
81
|
+
}).call(undefined, undefined, {processed: false});
|
|
82
|
+
tv = grok.shell.tv;
|
|
83
|
+
await grok.data.detectSemanticTypes(tv.dataFrame);
|
|
84
|
+
} else
|
|
85
|
+
tv = await createTableView(tableName);
|
|
86
|
+
await delay(100);
|
|
87
|
+
await analysisFunc(tv);
|
|
88
|
+
await delay(10);
|
|
89
|
+
await saveAndOpenProject(tv, dataSync);
|
|
90
|
+
await delay(10);
|
|
91
|
+
await dataFrameContainsColumns(colList);
|
|
92
|
+
if (viewerType)
|
|
93
|
+
await checkViewerAdded(viewerType);
|
|
94
|
+
if (additionalChecks)
|
|
95
|
+
await additionalChecks(tv);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
async function runSequenceSpace(tv: DG.TableView): Promise<void> {
|
|
99
|
+
const seqCol = tv.dataFrame.col('sequence')!;
|
|
100
|
+
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
|
|
101
|
+
if (semType)
|
|
102
|
+
seqCol.semType = semType;
|
|
103
|
+
await DG.Func.find({package: 'Bio', name: 'sequenceSpaceTopMenu'})[0].prepare({
|
|
104
|
+
table: tv.dataFrame,
|
|
105
|
+
molecules: seqCol,
|
|
106
|
+
methodName: 'UMAP',
|
|
107
|
+
similarityMetric: MmDistanceFunctionsNames.LEVENSHTEIN,
|
|
108
|
+
plotEmbeddings: true,
|
|
109
|
+
options: {[BYPASS_LARGE_DATA_WARNING]: true},
|
|
110
|
+
clusterEmbeddings: true,
|
|
111
|
+
}).call(undefined, undefined, {processed: false});
|
|
112
|
+
await delay(10);
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
async function runActivityCliffs(tv: DG.TableView): Promise<void> {
|
|
116
|
+
const seqCol = tv.dataFrame.col('sequence')!;
|
|
117
|
+
const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
|
|
118
|
+
if (semType)
|
|
119
|
+
seqCol.semType = semType;
|
|
120
|
+
await DG.Func.find({package: 'Bio', name: 'activityCliffs'})[0].prepare({
|
|
121
|
+
table: tv.dataFrame,
|
|
122
|
+
molecules: seqCol,
|
|
123
|
+
activities: tv.dataFrame.col('Activity'),
|
|
124
|
+
similarity: 90,
|
|
125
|
+
methodName: 'UMAP',
|
|
126
|
+
similarityMetric: MmDistanceFunctionsNames.LEVENSHTEIN,
|
|
127
|
+
preprocessingFunction: DG.Func.find({name: 'macromoleculePreprocessingFunction', package: 'Bio'})[0],
|
|
128
|
+
options: {[BYPASS_LARGE_DATA_WARNING]: true},
|
|
129
|
+
}).call(undefined, undefined, {processed: false});
|
|
130
|
+
await delay(10);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
async function checkActivityCliffsInit(tv: DG.TableView): Promise<void> {
|
|
134
|
+
let sp: DG.Viewer | null = null;
|
|
135
|
+
for (const v of grok.shell.tv.viewers) {
|
|
136
|
+
if (v.type === DG.VIEWER.SCATTER_PLOT)
|
|
137
|
+
sp = v;
|
|
138
|
+
}
|
|
139
|
+
await awaitCheck(() => {
|
|
140
|
+
const link = sp?.root.getElementsByClassName('scatter_plot_link');
|
|
141
|
+
return !link || !link.length ? false : (link[0] as HTMLElement).innerText.toLowerCase().includes('cliffs');
|
|
142
|
+
}, 'Initialization function hasn\'t been applied on scatter plot', 5000);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
test('sequence_space', async () => {
|
|
146
|
+
monomerLibHelper = await getMonomerLibHelper();
|
|
147
|
+
userLibSettings = await getUserLibSettings();
|
|
148
|
+
await monomerLibHelper.loadMonomerLibForTests();
|
|
149
|
+
|
|
150
|
+
await runSaveAndOpenProjectTest('tests/100_3_clustests.csv', runSequenceSpace,
|
|
151
|
+
['sequence', 'Embed_X_1', 'Embed_Y_1', 'Cluster (DBSCAN)'], DG.VIEWER.SCATTER_PLOT);
|
|
152
|
+
await delay(100);
|
|
153
|
+
|
|
154
|
+
await setUserLibSettings(userLibSettings);
|
|
155
|
+
await monomerLibHelper.loadMonomerLib(true);
|
|
156
|
+
}, {timeout: 60000});
|
|
157
|
+
|
|
158
|
+
test('sequence_space_sync', async () => {
|
|
159
|
+
monomerLibHelper = await getMonomerLibHelper();
|
|
160
|
+
userLibSettings = await getUserLibSettings();
|
|
161
|
+
await monomerLibHelper.loadMonomerLibForTests();
|
|
162
|
+
|
|
163
|
+
await runSaveAndOpenProjectTest('tests/100_3_clustests.csv', runSequenceSpace,
|
|
164
|
+
['sequence', 'Embed_X_1', 'Embed_Y_1', 'Cluster (DBSCAN)'], DG.VIEWER.SCATTER_PLOT, true);
|
|
165
|
+
await delay(100);
|
|
166
|
+
|
|
167
|
+
await setUserLibSettings(userLibSettings);
|
|
168
|
+
await monomerLibHelper.loadMonomerLib(true);
|
|
169
|
+
}, {timeout: 60000});
|
|
170
|
+
|
|
171
|
+
test('activity_cliffs', async () => {
|
|
172
|
+
monomerLibHelper = await getMonomerLibHelper();
|
|
173
|
+
userLibSettings = await getUserLibSettings();
|
|
174
|
+
await monomerLibHelper.loadMonomerLibForTests();
|
|
175
|
+
|
|
176
|
+
await runSaveAndOpenProjectTest('tests/100_3_clustests.csv', runActivityCliffs,
|
|
177
|
+
['sequence', 'Activity', 'Embed_X_1', 'Embed_Y_1'],
|
|
178
|
+
DG.VIEWER.SCATTER_PLOT, false, checkActivityCliffsInit);
|
|
179
|
+
await delay(100);
|
|
180
|
+
|
|
181
|
+
await setUserLibSettings(userLibSettings);
|
|
182
|
+
await monomerLibHelper.loadMonomerLib(true);
|
|
183
|
+
}, {timeout: 60000});
|
|
184
|
+
|
|
185
|
+
test('activity_cliffs_sync', async () => {
|
|
186
|
+
monomerLibHelper = await getMonomerLibHelper();
|
|
187
|
+
userLibSettings = await getUserLibSettings();
|
|
188
|
+
await monomerLibHelper.loadMonomerLibForTests();
|
|
189
|
+
|
|
190
|
+
await runSaveAndOpenProjectTest('tests/100_3_clustests.csv', runActivityCliffs,
|
|
191
|
+
['sequence', 'Activity', 'Embed_X_1', 'Embed_Y_1'],
|
|
192
|
+
DG.VIEWER.SCATTER_PLOT, true, checkActivityCliffsInit);
|
|
193
|
+
await delay(100);
|
|
194
|
+
|
|
195
|
+
await setUserLibSettings(userLibSettings);
|
|
196
|
+
await monomerLibHelper.loadMonomerLib(true);
|
|
197
|
+
}, {timeout: 60000});
|
|
198
|
+
|
|
199
|
+
after(async () => {
|
|
200
|
+
grok.shell.closeAll();
|
|
201
|
+
});
|
|
202
|
+
});
|
|
@@ -216,11 +216,13 @@ export class MacromoleculeDifferenceCellRendererBack extends CellRendererWithMon
|
|
|
216
216
|
const cell = gridCell.cell;
|
|
217
217
|
const s: string = cell.value ?? '';
|
|
218
218
|
const separator = this.tableCol.tags[bioTAGS.separator];
|
|
219
|
-
|
|
219
|
+
let units: string = this.tableCol.meta.units!;
|
|
220
220
|
w = getUpdatedWidth(grid, g, x, w, dpr);
|
|
221
221
|
//TODO: can this be replaced/merged with splitSequence?
|
|
222
222
|
const [s1, s2] = s.split('#');
|
|
223
|
-
|
|
223
|
+
if (units === NOTATION.CUSTOM && !this.tableCol.temp[SeqTemps.notationProvider])
|
|
224
|
+
units = NOTATION.SEPARATOR;
|
|
225
|
+
const splitter = this.tableCol.temp[SeqTemps.notationProvider]?.separatorSplitter ?? this.tableCol.temp[SeqTemps.notationProvider]?.splitter ?? getSplitter(units, separator);
|
|
224
226
|
const s1SS = splitter(s1);
|
|
225
227
|
const s2SS = splitter(s2);
|
|
226
228
|
const subParts1 = wu.count(0).take(s1SS.length).map((posIdx) => s1SS.getCanonical(posIdx)).toArray();
|