@datagrok/peptides 1.14.1 → 1.15.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -0
- package/dist/196.js +3 -0
- package/dist/196.js.LICENSE.txt +51 -0
- package/dist/209.js +2 -2
- package/dist/361.js +2 -0
- package/dist/381.js +2 -0
- package/dist/436.js +2 -0
- package/dist/694.js +2 -0
- package/dist/831.js +2 -0
- package/dist/868.js +2 -0
- package/dist/package-test.js +3 -2
- package/dist/package-test.js.LICENSE.txt +51 -0
- package/dist/package.js +3 -2
- package/dist/package.js.LICENSE.txt +51 -0
- package/files/help/logo-summary-table.md +23 -0
- package/files/help/monomer-position.md +31 -0
- package/files/help/most-potent-residues.md +17 -0
- package/files/icons/ribbon/logo-summary-viewer.svg +8 -0
- package/files/icons/ribbon/peptide-sar-vertical-viewer.svg +10 -0
- package/files/icons/ribbon/peptide-sar-viewer.svg +11 -0
- package/files/tests/100k.d42 +0 -0
- package/files/tests/200k.d42 +0 -0
- package/files/tests/50k.d42 +0 -0
- package/files/tests/{aligned_5k.d42 → 5k.d42} +0 -0
- package/package.json +5 -5
- package/src/model.ts +85 -175
- package/src/package-test.ts +7 -6
- package/src/tests/benchmarks.ts +95 -0
- package/src/tests/core.ts +4 -75
- package/src/tests/{algorithms.ts → misc.ts} +3 -16
- package/src/tests/model.ts +3 -14
- package/src/tests/table-view.ts +4 -14
- package/src/tests/viewers.ts +7 -1
- package/src/tests/widgets.ts +9 -1
- package/src/utils/algorithms.ts +166 -16
- package/src/utils/cell-renderer.ts +12 -7
- package/src/utils/constants.ts +2 -0
- package/src/utils/misc.ts +2 -1
- package/src/utils/parallel-mutation-cliffs.ts +106 -0
- package/src/utils/types.ts +1 -1
- package/src/viewers/logo-summary.ts +9 -6
- package/src/viewers/sar-viewer.ts +28 -14
- package/src/widgets/mutation-cliffs.ts +2 -2
- package/src/widgets/peptides.ts +15 -5
- package/src/widgets/selection.ts +9 -0
- package/src/widgets/settings.ts +3 -8
- package/src/workers/mutation-cliffs-worker.ts +77 -0
- package/dist/521.js +0 -2
- package/dist/677.js +0 -2
- package/dist/729.js +0 -2
- package/dist/959.js +0 -2
package/src/model.ts
CHANGED
|
@@ -6,7 +6,7 @@ import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter'
|
|
|
6
6
|
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
7
7
|
import {pickUpPalette, TAGS as bioTAGS, monomerToShort} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
8
|
import {calculateScores, SCORE} from '@datagrok-libraries/bio/src/utils/macromolecule/scoring';
|
|
9
|
-
import {StringDictionary} from '@datagrok-libraries/utils/src/type-declarations';
|
|
9
|
+
import {Options, StringDictionary} from '@datagrok-libraries/utils/src/type-declarations';
|
|
10
10
|
import {DistanceMatrix} from '@datagrok-libraries/ml/src/distance-matrix';
|
|
11
11
|
import {StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
12
12
|
import {ITreeHelper} from '@datagrok-libraries/bio/src/trees/tree-helper';
|
|
@@ -29,10 +29,14 @@ import {getAggregatedValue, getStats, Stats} from './utils/statistics';
|
|
|
29
29
|
import {LogoSummaryTable} from './viewers/logo-summary';
|
|
30
30
|
import {getSettingsDialog} from './widgets/settings';
|
|
31
31
|
import {_package, getMonomerWorksInstance, getTreeHelperInstance} from './package';
|
|
32
|
-
import {findMutations} from './utils/algorithms';
|
|
32
|
+
import {calculateClusterStatistics, calculateMonomerPositionStatistics, findMutations} from './utils/algorithms';
|
|
33
33
|
import {createDistanceMatrixWorker} from './utils/worker-creator';
|
|
34
34
|
import {getSelectionWidget} from './widgets/selection';
|
|
35
35
|
|
|
36
|
+
import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
|
|
37
|
+
import {BitArrayMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
|
|
38
|
+
import {DimReductionMethods, ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
39
|
+
|
|
36
40
|
export type SummaryStats = {
|
|
37
41
|
minCount: number, maxCount: number,
|
|
38
42
|
minMeanDifference: number, maxMeanDifference: number,
|
|
@@ -73,6 +77,7 @@ export class PeptidesModel {
|
|
|
73
77
|
isInitialized = false;
|
|
74
78
|
_analysisView?: DG.TableView;
|
|
75
79
|
|
|
80
|
+
|
|
76
81
|
_settings!: type.PeptidesSettings;
|
|
77
82
|
isRibbonSet = false;
|
|
78
83
|
|
|
@@ -81,13 +86,13 @@ export class PeptidesModel {
|
|
|
81
86
|
webLogoBounds: {[positon: string]: {[monomer: string]: DG.Rect}} = {};
|
|
82
87
|
cachedWebLogoTooltip: {bar: string, tooltip: HTMLDivElement | null} = {bar: '', tooltip: null};
|
|
83
88
|
_alphabet?: string;
|
|
84
|
-
_splitSeqDf?: DG.DataFrame;
|
|
85
89
|
_dm!: DistanceMatrix;
|
|
86
90
|
_layoutEventInitialized = false;
|
|
87
91
|
|
|
88
92
|
subs: rxjs.Subscription[] = [];
|
|
89
93
|
isHighlighting: boolean = false;
|
|
90
94
|
latestSelectionItem: (type.SelectionItem & {kind: SELECTION_MODE | 'Cluster'}) | null = null;
|
|
95
|
+
controlFire: boolean = false;
|
|
91
96
|
|
|
92
97
|
private constructor(dataFrame: DG.DataFrame) {
|
|
93
98
|
this.df = dataFrame;
|
|
@@ -108,7 +113,7 @@ export class PeptidesModel {
|
|
|
108
113
|
}
|
|
109
114
|
|
|
110
115
|
get monomerPositionStats(): MonomerPositionStats {
|
|
111
|
-
this._monomerPositionStats ??= this.
|
|
116
|
+
this._monomerPositionStats ??= calculateMonomerPositionStatistics(this.df, this.positionColumns.toArray());
|
|
112
117
|
return this._monomerPositionStats;
|
|
113
118
|
}
|
|
114
119
|
|
|
@@ -116,13 +121,8 @@ export class PeptidesModel {
|
|
|
116
121
|
this._monomerPositionStats = mps;
|
|
117
122
|
}
|
|
118
123
|
|
|
119
|
-
get
|
|
120
|
-
|
|
121
|
-
return this._splitSeqDf;
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
set splitSeqDf(df: DG.DataFrame) {
|
|
125
|
-
this._splitSeqDf = df;
|
|
124
|
+
get positionColumns(): wu.WuIterable<DG.Column> {
|
|
125
|
+
return wu(this.df.columns.byTags({[C.TAGS.POSITION_COL]: `${true}`}));
|
|
126
126
|
}
|
|
127
127
|
|
|
128
128
|
get alphabet(): string {
|
|
@@ -139,7 +139,8 @@ export class PeptidesModel {
|
|
|
139
139
|
}
|
|
140
140
|
|
|
141
141
|
get clusterStats(): ClusterTypeStats {
|
|
142
|
-
this._clusterStats ??= this.
|
|
142
|
+
this._clusterStats ??= calculateClusterStatistics(this.df, this.settings.clustersColumnName!,
|
|
143
|
+
this.customClusters.toArray());
|
|
143
144
|
return this._clusterStats;
|
|
144
145
|
}
|
|
145
146
|
|
|
@@ -161,7 +162,7 @@ export class PeptidesModel {
|
|
|
161
162
|
this._analysisView = wu(grok.shell.tableViews).find(({dataFrame}) => dataFrame?.getTag(C.TAGS.UUID) === this.id);
|
|
162
163
|
if (this._analysisView === undefined) {
|
|
163
164
|
this._analysisView = grok.shell.addTableView(this.df);
|
|
164
|
-
const posCols = this.
|
|
165
|
+
const posCols = this.positionColumns.toArray().map((col) => col.name);
|
|
165
166
|
|
|
166
167
|
for (let colIdx = 1; colIdx < this._analysisView.grid.columns.length; ++colIdx) {
|
|
167
168
|
const gridCol = this._analysisView.grid.columns.byIndex(colIdx)!;
|
|
@@ -286,10 +287,10 @@ export class PeptidesModel {
|
|
|
286
287
|
return (this.clusterSelection[CLUSTER_TYPE.ORIGINAL].length + this.clusterSelection[CLUSTER_TYPE.CUSTOM].length) === 0;
|
|
287
288
|
}
|
|
288
289
|
|
|
289
|
-
get customClusters():
|
|
290
|
+
get customClusters(): wu.WuIterable<DG.Column<boolean>> {
|
|
290
291
|
const query: { [key: string]: string } = {};
|
|
291
292
|
query[C.TAGS.CUSTOM_CLUSTER] = '1';
|
|
292
|
-
return this.df.columns.byTags(query);
|
|
293
|
+
return wu(this.df.columns.byTags(query));
|
|
293
294
|
}
|
|
294
295
|
|
|
295
296
|
get settings(): type.PeptidesSettings {
|
|
@@ -331,26 +332,27 @@ export class PeptidesModel {
|
|
|
331
332
|
}
|
|
332
333
|
}
|
|
333
334
|
this.df.setTag('settings', JSON.stringify(this._settings));
|
|
335
|
+
let updateViewersData = false;
|
|
334
336
|
for (const variable of updateVars) {
|
|
335
337
|
switch (variable) {
|
|
336
338
|
case 'activity':
|
|
337
339
|
this.createScaledCol();
|
|
340
|
+
updateViewersData = true;
|
|
338
341
|
break;
|
|
339
342
|
case 'mutationCliffs':
|
|
340
343
|
this.updateMutationCliffs();
|
|
341
344
|
break;
|
|
342
345
|
case 'stats':
|
|
343
|
-
this.monomerPositionStats = this.
|
|
344
|
-
this.clusterStats = this.
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
mpViewer.render();
|
|
348
|
-
const mprViewer = this.findViewer(VIEWER_TYPE.MOST_POTENT_RESIDUES) as MostPotentResidues;
|
|
349
|
-
mprViewer.createMostPotentResiduesGrid();
|
|
350
|
-
mprViewer.render();
|
|
346
|
+
this.monomerPositionStats = calculateMonomerPositionStatistics(this.df, this.positionColumns.toArray());
|
|
347
|
+
this.clusterStats = calculateClusterStatistics(this.df, this.settings.clustersColumnName!,
|
|
348
|
+
this.customClusters.toArray());
|
|
349
|
+
updateViewersData = true;
|
|
351
350
|
break;
|
|
352
351
|
case 'grid':
|
|
353
352
|
this.setGridProperties();
|
|
353
|
+
const lstViewer = this.findViewer(VIEWER_TYPE.LOGO_SUMMARY_TABLE) as LogoSummaryTable | null;
|
|
354
|
+
lstViewer?.createLogoSummaryTableGrid();
|
|
355
|
+
lstViewer?.render();
|
|
354
356
|
break;
|
|
355
357
|
case 'dendrogram':
|
|
356
358
|
this.settings.showDendrogram ? this.addDendrogram() : this.closeViewer(VIEWER_TYPE.DENDROGRAM);
|
|
@@ -372,13 +374,16 @@ export class PeptidesModel {
|
|
|
372
374
|
|
|
373
375
|
//TODO: handle settings change
|
|
374
376
|
const mpViewer = this.findViewer(VIEWER_TYPE.MONOMER_POSITION) as MonomerPosition | null;
|
|
375
|
-
|
|
377
|
+
if (updateViewersData)
|
|
378
|
+
mpViewer?.createMonomerPositionGrid();
|
|
376
379
|
mpViewer?.render();
|
|
377
380
|
const mprViewer = this.findViewer(VIEWER_TYPE.MOST_POTENT_RESIDUES) as MostPotentResidues | null;
|
|
378
|
-
|
|
381
|
+
if (updateViewersData)
|
|
382
|
+
mprViewer?.createMostPotentResiduesGrid();
|
|
379
383
|
mprViewer?.render();
|
|
380
384
|
const lstViewer = this.findViewer(VIEWER_TYPE.LOGO_SUMMARY_TABLE) as LogoSummaryTable | null;
|
|
381
|
-
|
|
385
|
+
if (updateViewersData)
|
|
386
|
+
lstViewer?.createLogoSummaryTableGrid();
|
|
382
387
|
lstViewer?.render();
|
|
383
388
|
}
|
|
384
389
|
|
|
@@ -390,20 +395,25 @@ export class PeptidesModel {
|
|
|
390
395
|
this.df.setTag(C.TAGS.IDENTITY_TEMPLATE, template);
|
|
391
396
|
}
|
|
392
397
|
|
|
393
|
-
updateMutationCliffs(notify: boolean = true): void {
|
|
398
|
+
async updateMutationCliffs(notify: boolean = true): Promise<void> {
|
|
394
399
|
const scaledActivityCol: DG.Column<number> = this.df.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED);
|
|
395
400
|
//TODO: set categories ordering the same to share compare indexes instead of strings
|
|
396
401
|
const monomerCols: type.RawColumn[] = this.df.columns.bySemTypeAll(C.SEM_TYPES.MONOMER).map(extractColInfo);
|
|
397
402
|
const targetCol = typeof this.settings.targetColumnName !== 'undefined' ?
|
|
398
403
|
extractColInfo(this.df.getCol(this.settings.targetColumnName)) : null;
|
|
399
|
-
|
|
400
|
-
const currentTarget = mpViewer?.getProperty(MONOMER_POSITION_PROPERTIES.TARGET)?.get(mpViewer);
|
|
404
|
+
let mpViewer = this.findViewer(VIEWER_TYPE.MONOMER_POSITION) as MonomerPosition | null;
|
|
405
|
+
const currentTarget = mpViewer?.getProperty(MONOMER_POSITION_PROPERTIES.TARGET)?.get(mpViewer) as string | undefined;
|
|
401
406
|
const targetOptions = {targetCol: targetCol, currentTarget: currentTarget};
|
|
402
|
-
const mutationCliffs = findMutations(scaledActivityCol.getRawData(), monomerCols, this.settings, targetOptions);
|
|
407
|
+
const mutationCliffs = await findMutations(scaledActivityCol.getRawData(), monomerCols, this.settings, targetOptions);
|
|
403
408
|
if (notify)
|
|
404
409
|
this.mutationCliffs = mutationCliffs;
|
|
405
410
|
else
|
|
406
411
|
this._mutationCliffs = mutationCliffs;
|
|
412
|
+
|
|
413
|
+
mpViewer ??= this.findViewer(VIEWER_TYPE.MONOMER_POSITION) as MonomerPosition | null;
|
|
414
|
+
mpViewer?.render(true);
|
|
415
|
+
const mostPotentViewer = this.findViewer(VIEWER_TYPE.MOST_POTENT_RESIDUES) as MostPotentResidues | null;
|
|
416
|
+
mostPotentViewer?.render(true);
|
|
407
417
|
}
|
|
408
418
|
|
|
409
419
|
buildSplitSeqDf(): DG.DataFrame {
|
|
@@ -418,7 +428,7 @@ export class PeptidesModel {
|
|
|
418
428
|
}
|
|
419
429
|
|
|
420
430
|
createAccordion(): DG.Accordion | null {
|
|
421
|
-
const trueModel: PeptidesModel | undefined = grok.shell.t
|
|
431
|
+
const trueModel: PeptidesModel | undefined = grok.shell.t?.temp[PeptidesModel.modelName];
|
|
422
432
|
if (!trueModel)
|
|
423
433
|
return null;
|
|
424
434
|
|
|
@@ -481,7 +491,7 @@ export class PeptidesModel {
|
|
|
481
491
|
options.notify ??= true;
|
|
482
492
|
|
|
483
493
|
const tempSelection: type.Selection = {};
|
|
484
|
-
const positionColumns = this.
|
|
494
|
+
const positionColumns = this.positionColumns.toArray().map((col) => col.name);
|
|
485
495
|
for (const pos of positionColumns)
|
|
486
496
|
tempSelection[pos] = [];
|
|
487
497
|
|
|
@@ -495,7 +505,7 @@ export class PeptidesModel {
|
|
|
495
505
|
options.notify ??= true;
|
|
496
506
|
|
|
497
507
|
const tempSelection: type.Selection = {};
|
|
498
|
-
const positionColumns = this.
|
|
508
|
+
const positionColumns = this.positionColumns.toArray().map((col) => col.name);
|
|
499
509
|
for (const pos of positionColumns)
|
|
500
510
|
tempSelection[pos] = [];
|
|
501
511
|
|
|
@@ -509,16 +519,19 @@ export class PeptidesModel {
|
|
|
509
519
|
// append splitSeqDf columns to source table and make sure columns are not added more than once
|
|
510
520
|
const name = this.df.name;
|
|
511
521
|
const cols = this.df.columns;
|
|
512
|
-
const
|
|
522
|
+
const splitSeqDf = this.buildSplitSeqDf();
|
|
523
|
+
const positionColumns = splitSeqDf.columns.names();
|
|
513
524
|
for (const colName of positionColumns) {
|
|
514
525
|
let col = this.df.col(colName);
|
|
515
|
-
const newCol =
|
|
526
|
+
const newCol = splitSeqDf.getCol(colName);
|
|
516
527
|
if (col !== null)
|
|
517
528
|
cols.remove(colName);
|
|
518
529
|
|
|
519
530
|
const newColCat = newCol.categories;
|
|
520
531
|
const newColData = newCol.getRawData();
|
|
521
532
|
col = cols.addNew(newCol.name, newCol.type).init((i) => newColCat[newColData[i]]);
|
|
533
|
+
col.setTag(C.TAGS.ANALYSIS_COL, `${true}`);
|
|
534
|
+
col.setTag(C.TAGS.POSITION_COL, `${true}`);
|
|
522
535
|
CR.setMonomerRenderer(col, this.alphabet);
|
|
523
536
|
}
|
|
524
537
|
this.df.name = name;
|
|
@@ -533,140 +546,6 @@ export class PeptidesModel {
|
|
|
533
546
|
sourceGrid.columns.setOrder([scaledCol.name]);
|
|
534
547
|
}
|
|
535
548
|
|
|
536
|
-
calculateMonomerPositionStatistics(options: {isFiltered?: boolean, columns?: string[]} = {}): MonomerPositionStats {
|
|
537
|
-
options.isFiltered ??= false;
|
|
538
|
-
const monomerPositionObject = {general: {}} as MonomerPositionStats & { general: SummaryStats };
|
|
539
|
-
let activityColData: Float64Array = this.df.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED).getRawData() as Float64Array;
|
|
540
|
-
let positionColumns = this.splitSeqDf.columns.toList();
|
|
541
|
-
let sourceDfLen = this.df.rowCount;
|
|
542
|
-
|
|
543
|
-
if (options.isFiltered) {
|
|
544
|
-
sourceDfLen = this.df.filter.trueCount;
|
|
545
|
-
const tempActivityData = new Float64Array(sourceDfLen);
|
|
546
|
-
const selectedIndexes = this.df.filter.getSelectedIndexes();
|
|
547
|
-
for (let i = 0; i < sourceDfLen; ++i)
|
|
548
|
-
tempActivityData[i] = activityColData[selectedIndexes[i]];
|
|
549
|
-
activityColData = tempActivityData;
|
|
550
|
-
positionColumns = this.splitSeqDf.clone(this.df.filter).columns.toList();
|
|
551
|
-
}
|
|
552
|
-
options.columns ??= positionColumns.map((col) => col.name);
|
|
553
|
-
|
|
554
|
-
for (const posCol of positionColumns) {
|
|
555
|
-
if (!options.columns.includes(posCol.name))
|
|
556
|
-
continue;
|
|
557
|
-
const posColData = posCol.getRawData();
|
|
558
|
-
const posColCateogries = posCol.categories;
|
|
559
|
-
const currentPositionObject = {general: {}} as PositionStats & {general: SummaryStats};
|
|
560
|
-
|
|
561
|
-
for (let categoryIndex = 0; categoryIndex < posColCateogries.length; ++categoryIndex) {
|
|
562
|
-
const monomer = posColCateogries[categoryIndex];
|
|
563
|
-
if (monomer === '')
|
|
564
|
-
continue;
|
|
565
|
-
|
|
566
|
-
const boolArray: boolean[] = new Array(sourceDfLen).fill(false);
|
|
567
|
-
for (let i = 0; i < sourceDfLen; ++i) {
|
|
568
|
-
if (posColData[i] === categoryIndex)
|
|
569
|
-
boolArray[i] = true;
|
|
570
|
-
}
|
|
571
|
-
const bitArray = BitArray.fromValues(boolArray);
|
|
572
|
-
const stats = bitArray.allFalse || bitArray.allTrue ?
|
|
573
|
-
{count: sourceDfLen, meanDifference: 0, ratio: 1.0, pValue: null, mask: bitArray} :
|
|
574
|
-
getStats(activityColData, bitArray);
|
|
575
|
-
currentPositionObject[monomer] = stats;
|
|
576
|
-
this.getSummaryStats(currentPositionObject.general, stats);
|
|
577
|
-
}
|
|
578
|
-
monomerPositionObject[posCol.name] = currentPositionObject;
|
|
579
|
-
this.getSummaryStats(monomerPositionObject.general, null, currentPositionObject.general);
|
|
580
|
-
}
|
|
581
|
-
return monomerPositionObject;
|
|
582
|
-
}
|
|
583
|
-
|
|
584
|
-
getSummaryStats(genObj: SummaryStats, stats: Stats | null = null, summaryStats: SummaryStats | null = null): void {
|
|
585
|
-
if (stats === null && summaryStats === null)
|
|
586
|
-
throw new Error(`MonomerPositionStatsError: either stats or summaryStats must be present`);
|
|
587
|
-
|
|
588
|
-
const possibleMaxCount = stats?.count ?? summaryStats!.maxCount;
|
|
589
|
-
genObj.maxCount ??= possibleMaxCount;
|
|
590
|
-
if (genObj.maxCount < possibleMaxCount)
|
|
591
|
-
genObj.maxCount = possibleMaxCount;
|
|
592
|
-
|
|
593
|
-
const possibleMinCount = stats?.count ?? summaryStats!.minCount;
|
|
594
|
-
genObj.minCount ??= possibleMinCount;
|
|
595
|
-
if (genObj.minCount > possibleMinCount)
|
|
596
|
-
genObj.minCount = possibleMinCount;
|
|
597
|
-
|
|
598
|
-
const possibleMaxMeanDifference = stats?.meanDifference ?? summaryStats!.maxMeanDifference;
|
|
599
|
-
genObj.maxMeanDifference ??= possibleMaxMeanDifference;
|
|
600
|
-
if (genObj.maxMeanDifference < possibleMaxMeanDifference)
|
|
601
|
-
genObj.maxMeanDifference = possibleMaxMeanDifference;
|
|
602
|
-
|
|
603
|
-
const possibleMinMeanDifference = stats?.meanDifference ?? summaryStats!.minMeanDifference;
|
|
604
|
-
genObj.minMeanDifference ??= possibleMinMeanDifference;
|
|
605
|
-
if (genObj.minMeanDifference > possibleMinMeanDifference)
|
|
606
|
-
genObj.minMeanDifference = possibleMinMeanDifference;
|
|
607
|
-
|
|
608
|
-
if (!isNaN(stats?.pValue ?? NaN)) {
|
|
609
|
-
const possibleMaxPValue = stats?.pValue ?? summaryStats!.maxPValue;
|
|
610
|
-
genObj.maxPValue ??= possibleMaxPValue;
|
|
611
|
-
if (genObj.maxPValue < possibleMaxPValue)
|
|
612
|
-
genObj.maxPValue = possibleMaxPValue;
|
|
613
|
-
|
|
614
|
-
const possibleMinPValue = stats?.pValue ?? summaryStats!.minPValue;
|
|
615
|
-
genObj.minPValue ??= possibleMinPValue;
|
|
616
|
-
if (genObj.minPValue > possibleMinPValue)
|
|
617
|
-
genObj.minPValue = possibleMinPValue;
|
|
618
|
-
}
|
|
619
|
-
|
|
620
|
-
const possibleMaxRatio = stats?.ratio ?? summaryStats!.maxRatio;
|
|
621
|
-
genObj.maxRatio ??= possibleMaxRatio;
|
|
622
|
-
if (genObj.maxRatio < possibleMaxRatio)
|
|
623
|
-
genObj.maxRatio = possibleMaxRatio;
|
|
624
|
-
|
|
625
|
-
const possibleMinRatio = stats?.ratio ?? summaryStats!.minRatio;
|
|
626
|
-
genObj.minRatio ??= possibleMinRatio;
|
|
627
|
-
if (genObj.minRatio > possibleMinRatio)
|
|
628
|
-
genObj.minRatio = possibleMinRatio;
|
|
629
|
-
}
|
|
630
|
-
|
|
631
|
-
calculateClusterStatistics(): ClusterTypeStats {
|
|
632
|
-
const rowCount = this.df.rowCount;
|
|
633
|
-
const origClustCol = this.df.getCol(this.settings.clustersColumnName!);
|
|
634
|
-
const origClustColData = origClustCol.getRawData();
|
|
635
|
-
const origClustColCat = origClustCol.categories;
|
|
636
|
-
const origClustMasks: BitArray[] = Array.from({length: origClustColCat.length},
|
|
637
|
-
() => new BitArray(rowCount, false));
|
|
638
|
-
for (let rowIdx = 0; rowIdx < rowCount; ++rowIdx)
|
|
639
|
-
origClustMasks[origClustColData[rowIdx]].setTrue(rowIdx);
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
const customClustColList = wu(this.customClusters).toArray();
|
|
643
|
-
const customClustMasks = customClustColList.map(
|
|
644
|
-
(v) => BitArray.fromUint32Array(rowCount, v.getRawData() as Uint32Array));
|
|
645
|
-
const customClustColNamesList = customClustColList.map((v) => v.name);
|
|
646
|
-
|
|
647
|
-
const activityColData: type.RawData = this.df.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED).getRawData();
|
|
648
|
-
|
|
649
|
-
const origClustStats: ClusterStats = {};
|
|
650
|
-
const customClustStats: ClusterStats = {};
|
|
651
|
-
|
|
652
|
-
for (const clustType of Object.values(CLUSTER_TYPE)) {
|
|
653
|
-
const masks = clustType === CLUSTER_TYPE.ORIGINAL ? origClustMasks : customClustMasks;
|
|
654
|
-
const clustNames = clustType === CLUSTER_TYPE.ORIGINAL ? origClustColCat : customClustColNamesList;
|
|
655
|
-
const resultStats = clustType === CLUSTER_TYPE.ORIGINAL ? origClustStats : customClustStats;
|
|
656
|
-
for (let maskIdx = 0; maskIdx < masks.length; ++maskIdx) {
|
|
657
|
-
const mask = masks[maskIdx];
|
|
658
|
-
const stats = mask.allTrue || mask.allFalse ? {count: mask.length, meanDifference: 0, ratio: 1.0, pValue: null, mask: mask} :
|
|
659
|
-
getStats(activityColData, mask);
|
|
660
|
-
resultStats[clustNames[maskIdx]] = stats;
|
|
661
|
-
}
|
|
662
|
-
}
|
|
663
|
-
|
|
664
|
-
const resultStats = {} as ClusterTypeStats;
|
|
665
|
-
resultStats[CLUSTER_TYPE.ORIGINAL] = origClustStats;
|
|
666
|
-
resultStats[CLUSTER_TYPE.CUSTOM] = customClustStats;
|
|
667
|
-
return resultStats;
|
|
668
|
-
}
|
|
669
|
-
|
|
670
549
|
initClusterSelection(options: {notify?: boolean} = {}): void {
|
|
671
550
|
options.notify ??= true;
|
|
672
551
|
|
|
@@ -774,8 +653,8 @@ export class PeptidesModel {
|
|
|
774
653
|
//TODO: optimize
|
|
775
654
|
if (gcArgs.cell.isColHeader && col?.semType === C.SEM_TYPES.MONOMER) {
|
|
776
655
|
const isDfFiltered = this.df.filter.anyFalse;
|
|
777
|
-
const stats = (isDfFiltered ? this.
|
|
778
|
-
this.monomerPositionStats)[col.name];
|
|
656
|
+
const stats = (isDfFiltered ? calculateMonomerPositionStatistics(this.df, this.positionColumns.toArray(),
|
|
657
|
+
{isFiltered: true, columns: [col.name]}) : this.monomerPositionStats)[col.name];
|
|
779
658
|
if (!stats)
|
|
780
659
|
return;
|
|
781
660
|
//TODO: precalc on stats creation
|
|
@@ -970,14 +849,22 @@ export class PeptidesModel {
|
|
|
970
849
|
pane.expanded = true;
|
|
971
850
|
};
|
|
972
851
|
|
|
973
|
-
|
|
852
|
+
selection.onChanged.subscribe(() => {
|
|
853
|
+
if (this.controlFire) {
|
|
854
|
+
this.controlFire = false;
|
|
855
|
+
return;
|
|
856
|
+
}
|
|
974
857
|
if (!this.isUserChangedSelection)
|
|
975
858
|
selection.copyFrom(getLatestSelection(), false);
|
|
976
859
|
showAccordion();
|
|
977
860
|
this.isUserChangedSelection = true;
|
|
978
861
|
});
|
|
979
862
|
|
|
980
|
-
|
|
863
|
+
filter.onChanged.subscribe(() => {
|
|
864
|
+
if (this.controlFire) {
|
|
865
|
+
this.controlFire = false;
|
|
866
|
+
return;
|
|
867
|
+
}
|
|
981
868
|
const lstViewer = this.findViewer(VIEWER_TYPE.LOGO_SUMMARY_TABLE) as LogoSummaryTable | null;
|
|
982
869
|
if (lstViewer !== null && typeof lstViewer.model !== 'undefined') {
|
|
983
870
|
lstViewer.createLogoSummaryTableGrid();
|
|
@@ -994,6 +881,13 @@ export class PeptidesModel {
|
|
|
994
881
|
this.df.selection.fireChanged();
|
|
995
882
|
if (fireFilterChanged)
|
|
996
883
|
this.df.filter.fireChanged();
|
|
884
|
+
|
|
885
|
+
// Fire bitset changed event again to update UI
|
|
886
|
+
this.controlFire = true;
|
|
887
|
+
this.df.selection.fireChanged();
|
|
888
|
+
if (fireFilterChanged)
|
|
889
|
+
this.df.filter.fireChanged();
|
|
890
|
+
|
|
997
891
|
this.headerSelectedMonomers = calculateSelected(this.df);
|
|
998
892
|
}
|
|
999
893
|
|
|
@@ -1004,7 +898,7 @@ export class PeptidesModel {
|
|
|
1004
898
|
sourceGridProps.allowEdit = props?.allowEdit ?? false;
|
|
1005
899
|
sourceGridProps.showCurrentRowIndicator = props?.showCurrentRowIndicator ?? false;
|
|
1006
900
|
this.df.temp[C.EMBEDDING_STATUS] = false;
|
|
1007
|
-
const positionCols = this.
|
|
901
|
+
const positionCols = this.positionColumns.toArray();
|
|
1008
902
|
let maxWidth = 10;
|
|
1009
903
|
const canvasContext = sourceGrid.canvas.getContext('2d');
|
|
1010
904
|
for (const positionCol of positionCols) {
|
|
@@ -1017,7 +911,7 @@ export class PeptidesModel {
|
|
|
1017
911
|
setTimeout(() => {
|
|
1018
912
|
for (const positionCol of positionCols)
|
|
1019
913
|
sourceGrid.col(positionCol.name)!.width = maxWidth + 15;
|
|
1020
|
-
},
|
|
914
|
+
}, 100);
|
|
1021
915
|
}
|
|
1022
916
|
|
|
1023
917
|
closeViewer(viewerType: VIEWER_TYPE): void {
|
|
@@ -1115,6 +1009,7 @@ export class PeptidesModel {
|
|
|
1115
1009
|
this.fireBitsetChanged(true);
|
|
1116
1010
|
if (typeof this.settings.targetColumnName === 'undefined')
|
|
1117
1011
|
this.updateMutationCliffs();
|
|
1012
|
+
|
|
1118
1013
|
this.analysisView.grid.invalidate();
|
|
1119
1014
|
}
|
|
1120
1015
|
|
|
@@ -1162,6 +1057,7 @@ export class PeptidesModel {
|
|
|
1162
1057
|
addNewCluster(clusterName: string): void {
|
|
1163
1058
|
const newClusterCol = DG.Column.fromBitSet(clusterName, this.getCompoundBitset());
|
|
1164
1059
|
newClusterCol.setTag(C.TAGS.CUSTOM_CLUSTER, '1');
|
|
1060
|
+
newClusterCol.setTag(C.TAGS.ANALYSIS_COL, `${true}`);
|
|
1165
1061
|
this.df.columns.add(newClusterCol);
|
|
1166
1062
|
this.analysisView.grid.col(newClusterCol.name)!.visible = false;
|
|
1167
1063
|
}
|
|
@@ -1230,4 +1126,18 @@ export class PeptidesModel {
|
|
|
1230
1126
|
}
|
|
1231
1127
|
return selection;
|
|
1232
1128
|
}
|
|
1129
|
+
|
|
1130
|
+
async addSequenceSpace(): Promise<void> {
|
|
1131
|
+
const seqSpaceParams: {table: DG.DataFrame, molecules: DG.Column, methodName: DimReductionMethods,
|
|
1132
|
+
similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames, plotEmbeddings: boolean,
|
|
1133
|
+
sparseMatrixThreshold?: number, options?: (IUMAPOptions | ITSNEOptions) & Options} =
|
|
1134
|
+
{table: this.df, molecules: this.df.getCol(this.settings.sequenceColumnName!),
|
|
1135
|
+
methodName: DimReductionMethods.UMAP, similarityMetric: MmDistanceFunctionsNames.MONOMER_CHEMICAL_DISTANCE,
|
|
1136
|
+
plotEmbeddings: true, sparseMatrixThreshold: 0.8, options: {'bypassLargeDataWarning': true}};
|
|
1137
|
+
const seqSpaceViewer: DG.ScatterPlotViewer | undefined = await grok.functions.call('Bio:sequenceSpaceTopMenu', seqSpaceParams);
|
|
1138
|
+
if (!(seqSpaceViewer instanceof DG.ScatterPlotViewer))
|
|
1139
|
+
return;
|
|
1140
|
+
seqSpaceViewer.props.colorColumnName = C.COLUMNS_NAMES.ACTIVITY_SCALED;
|
|
1141
|
+
this.analysisView.dockManager.dock(seqSpaceViewer, DG.DOCK_TYPE.RIGHT, this.findViewerNode(VIEWER_TYPE.LOGO_SUMMARY_TABLE), 'Sequence space');
|
|
1142
|
+
}
|
|
1233
1143
|
}
|
package/src/package-test.ts
CHANGED
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import * as DG from 'datagrok-api/dg';
|
|
2
2
|
import {runTests, tests, TestContext} from '@datagrok-libraries/utils/src/test';
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
import './tests/core';
|
|
5
5
|
//FIXME: fails on CI; crashes browser
|
|
6
6
|
// import './tests/peptide-space-test';
|
|
7
|
-
import './tests/
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
7
|
+
import './tests/benchmarks';
|
|
8
|
+
import './tests/viewers';
|
|
9
|
+
import './tests/widgets';
|
|
10
|
+
import './tests/table-view';
|
|
11
|
+
import './tests/model';
|
|
12
|
+
import './tests/misc';
|
|
12
13
|
|
|
13
14
|
export const _package = new DG.Package();
|
|
14
15
|
export {tests};
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
|
|
4
|
+
import {category, test} from '@datagrok-libraries/utils/src/test';
|
|
5
|
+
import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
|
+
|
|
7
|
+
import {_package} from '../package-test';
|
|
8
|
+
import {calculateClusterStatistics, calculateMonomerPositionStatistics, findMutations} from '../utils/algorithms';
|
|
9
|
+
import * as type from '../utils/types';
|
|
10
|
+
import {scaleActivity} from '../utils/misc';
|
|
11
|
+
import {startAnalysis} from '../widgets/peptides';
|
|
12
|
+
import * as C from '../utils/constants';
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
const benchmarkDatasetSizes = [5, 50, 100, 200];
|
|
16
|
+
|
|
17
|
+
category('Benchmarks: Mutation Cliffs', () => {
|
|
18
|
+
for (const size of benchmarkDatasetSizes)
|
|
19
|
+
test(`${size}k sequences`, async () => await mutationCliffsBenchmark(size), {timeout: 100000});
|
|
20
|
+
});
|
|
21
|
+
|
|
22
|
+
category('Benchmarks: Cluster stats', () => {
|
|
23
|
+
for (const size of benchmarkDatasetSizes) {
|
|
24
|
+
test(`${size}k sequences`, async () => {
|
|
25
|
+
if (!DG.Test.isInBenchmark)
|
|
26
|
+
return null;
|
|
27
|
+
|
|
28
|
+
const df = (await _package.files.readBinaryDataFrames(`tests/${size}k.d42`))[0];
|
|
29
|
+
const clustersColumnName = 'cluster';
|
|
30
|
+
const scaledActivity = scaleActivity(df.getCol('activity'), C.SCALING_METHODS.NONE);
|
|
31
|
+
df.columns.add(scaledActivity);
|
|
32
|
+
DG.time(`Cluster stats benchmark - ${size}k`, () => calculateClusterStatistics(df, clustersColumnName, []));
|
|
33
|
+
}, {timeout: 100000});
|
|
34
|
+
}
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
category('Benchmarks: Monomer-Position stats', () => {
|
|
38
|
+
for (const size of benchmarkDatasetSizes) {
|
|
39
|
+
test(`${size}k sequences`, async () => {
|
|
40
|
+
if (!DG.Test.isInBenchmark)
|
|
41
|
+
return null;
|
|
42
|
+
|
|
43
|
+
const df = (await _package.files.readBinaryDataFrames(`tests/${size}k.d42`))[0];
|
|
44
|
+
const positionCols: DG.Column<string>[] = [];
|
|
45
|
+
let i = 1;
|
|
46
|
+
while (df.col(i.toString()) !== null) {
|
|
47
|
+
positionCols.push(df.getCol(i.toString()));
|
|
48
|
+
++i;
|
|
49
|
+
}
|
|
50
|
+
const scaledActivity = scaleActivity(df.getCol('activity'), C.SCALING_METHODS.NONE);
|
|
51
|
+
df.columns.add(scaledActivity);
|
|
52
|
+
DG.time(`Monomer-Position stats benchmark - ${size}k`, () => calculateMonomerPositionStatistics(df, positionCols));
|
|
53
|
+
}, {timeout: 100000});
|
|
54
|
+
}
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
category('Benchmarks: Analysis start', () => {
|
|
58
|
+
for (const size of benchmarkDatasetSizes) {
|
|
59
|
+
test(`${size}k sequences`, async () => {
|
|
60
|
+
if (!DG.Test.isInBenchmark)
|
|
61
|
+
return;
|
|
62
|
+
|
|
63
|
+
const df = (await _package.files.readBinaryDataFrames(`tests/${size}k.d42`))[0];
|
|
64
|
+
const activityCol = df.getCol('activity');
|
|
65
|
+
const scaledActivityCol = scaleActivity(activityCol, C.SCALING_METHODS.NONE);
|
|
66
|
+
const clustersCol = df.getCol('cluster');
|
|
67
|
+
const sequenceCol = df.getCol('sequence');
|
|
68
|
+
sequenceCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
69
|
+
sequenceCol.setTag(DG.TAGS.UNITS, size === benchmarkDatasetSizes[0] ? NOTATION.HELM : NOTATION.FASTA);
|
|
70
|
+
|
|
71
|
+
await DG.timeAsync('Analysis start', async () => {
|
|
72
|
+
const model = await startAnalysis(activityCol, sequenceCol, clustersCol, df, scaledActivityCol, C.SCALING_METHODS.NONE);
|
|
73
|
+
|
|
74
|
+
if (model)
|
|
75
|
+
grok.shell.closeTable(model.df);
|
|
76
|
+
});
|
|
77
|
+
}, {timeout: 100000});
|
|
78
|
+
}
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
async function mutationCliffsBenchmark(size: number): Promise<void> {
|
|
82
|
+
if (!DG.Test.isInBenchmark)
|
|
83
|
+
return;
|
|
84
|
+
|
|
85
|
+
const df = (await _package.files.readBinaryDataFrames(`tests/${size}k.d42`))[0];
|
|
86
|
+
const activityCol: type.RawData = df.getCol('activity').getRawData();
|
|
87
|
+
const monomerCols: type.RawColumn[] = [];
|
|
88
|
+
let i = 1;
|
|
89
|
+
while (df.col(i.toString()) !== null) {
|
|
90
|
+
const col = df.getCol(i.toString());
|
|
91
|
+
monomerCols.push({name: col.name, rawData: col.getRawData(), cat: col.categories});
|
|
92
|
+
++i;
|
|
93
|
+
}
|
|
94
|
+
await DG.timeAsync('Mutation Cliffs', async () => await findMutations(activityCol, monomerCols));
|
|
95
|
+
}
|