@datagrok/peptides 1.27.6 → 1.27.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@datagrok/peptides",
3
3
  "friendlyName": "Peptides",
4
- "version": "1.27.6",
4
+ "version": "1.27.8",
5
5
  "author": {
6
6
  "name": "Davit Rizhinashvili",
7
7
  "email": "drizhinashvili@datagrok.ai"
@@ -13,7 +13,7 @@
13
13
  "directory": "packages/Peptides"
14
14
  },
15
15
  "dependencies": {
16
- "@datagrok-libraries/bio": "^5.63.2",
16
+ "@datagrok-libraries/bio": "^5.63.6",
17
17
  "@datagrok-libraries/math": "^1.2.6",
18
18
  "@datagrok-libraries/ml": "^6.10.7",
19
19
  "@datagrok-libraries/statistics": "^1.2.12",
@@ -31,11 +31,11 @@
31
31
  "devDependencies": {
32
32
  "@datagrok-libraries/helm-web-editor": "^1.1.16",
33
33
  "@datagrok-libraries/js-draw-lite": "^0.0.5",
34
- "@datagrok/bio": "^2.24.0",
35
- "@datagrok/chem": "^1.13.0",
34
+ "@datagrok/bio": "^2.26.4",
35
+ "@datagrok/chem": "^1.17.1",
36
36
  "@datagrok/dendrogram": "^1.2.33",
37
37
  "@datagrok/eda": "^1.3.1",
38
- "@datagrok/helm": "^2.7.0",
38
+ "@datagrok/helm": "^2.13.0",
39
39
  "@types/uuid": "^10.0.0",
40
40
  "@types/wu": "^2.1.44",
41
41
  "@typescript-eslint/eslint-plugin": "^8.8.1",
package/src/model.ts CHANGED
@@ -50,6 +50,7 @@ import {ITSNEOptions, IUMAPOptions}
50
50
  import {DimReductionMethods} from '@datagrok-libraries/ml/src/multi-column-dimensionality-reduction/types';
51
51
  import {AggregationColumns, MonomerPositionStats} from './utils/statistics';
52
52
  import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter';
53
+ import {MONOMER_CANONICALIZER_FUNC_TAG} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
53
54
  import {getDbscanWorker} from '@datagrok-libraries/math';
54
55
  import {DistanceAggregationMethods} from '@datagrok-libraries/ml/src/distance-matrix/types';
55
56
  import {ClusterMaxActivityViewer, IClusterMaxActivity} from './viewers/cluster-max-activity-viewer';
@@ -748,9 +749,14 @@ export class PeptidesModel {
748
749
  const positionColumns = splitSeqDf.columns.names();
749
750
  if (positionColumns.every((colName) => cols.contains(colName))) {
750
751
  positionColumns.forEach((colName) => {
751
- this.df.col(colName)!.setTag(C.TAGS.ANALYSIS_COL, `${true}`);
752
- this.df.col(colName)!.setTag(C.TAGS.POSITION_COL, `${true}`);
753
- CR.setMonomerRenderer(this.df.col(colName)!, this.alphabet);
752
+ const col = this.df.col(colName)!;
753
+ col.setTag(C.TAGS.ANALYSIS_COL, `${true}`);
754
+ col.setTag(C.TAGS.POSITION_COL, `${true}`);
755
+ // Propagate canonicalizer tag from split columns if present
756
+ const canonTag = splitSeqDf.getCol(colName).getTag(MONOMER_CANONICALIZER_FUNC_TAG);
757
+ if (canonTag)
758
+ col.setTag(MONOMER_CANONICALIZER_FUNC_TAG, canonTag);
759
+ CR.setMonomerRenderer(col, this.alphabet);
754
760
  });
755
761
  } else {
756
762
  for (const colName of positionColumns) {
@@ -763,6 +769,10 @@ export class PeptidesModel {
763
769
  col = cols.addNew(newCol.name, newCol.type).init((i) => newColCat[newColData[i]]);
764
770
  col.setTag(C.TAGS.ANALYSIS_COL, `${true}`);
765
771
  col.setTag(C.TAGS.POSITION_COL, `${true}`);
772
+ // Propagate canonicalizer tag from split columns if present
773
+ const canonTag = newCol.getTag(MONOMER_CANONICALIZER_FUNC_TAG);
774
+ if (canonTag)
775
+ col.setTag(MONOMER_CANONICALIZER_FUNC_TAG, canonTag);
766
776
  CR.setMonomerRenderer(col, this.alphabet);
767
777
  }
768
778
  }
@@ -10,6 +10,8 @@ import $ from 'cash-dom';
10
10
  import {PeptidesModel} from '../model';
11
11
  import {extractColInfo} from '../utils/misc';
12
12
  import {Subscription} from 'rxjs';
13
+ import {SeqTemps} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';
14
+ import {MONOMER_CANONICALIZER_FUNC_TAG} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
13
15
  export type MutationCliffsWithMonomers = {
14
16
  cliffs: MutationCliffs,
15
17
  monomers: string[]
@@ -267,15 +269,20 @@ export class MutationCliffsViewer extends DG.JsViewer {
267
269
  this._positionColumns = posCols;
268
270
  return this._positionColumns;
269
271
  }
270
- // fallback: generate columns
272
+ // fallback: generate columns using original (non-canonical) monomers
271
273
  const seqCol = this.dataFrame.col(this.sequenceColumnName)!;
272
274
  const seqHelper = PeptideUtils.getSeqHelper();
273
275
  const seqHandler = seqHelper.getSeqHandler(seqCol);
274
276
  const length = seqHandler.maxLength;
275
277
  const cols: DG.Column[] = [];
278
+ // Propagate canonicalizer tag if the source column has a notation provider with one
279
+ const notationProvider: any = seqCol.temp[SeqTemps.notationProvider] ?? null;
280
+ const canonFuncName: string | null = notationProvider?.monomerCanonicalizerFuncName ?? null;
276
281
  for (let i = 0; i < length; i++) {
277
- const monomersAtPosition = seqHandler.getMonomersAtPosition(i, true);
282
+ const monomersAtPosition = seqHandler.getMonomersAtPosition(i, false);
278
283
  const monomerCol = DG.Column.fromList('string', `Position ${i + 1}`, monomersAtPosition);
284
+ if (canonFuncName)
285
+ monomerCol.setTag(MONOMER_CANONICALIZER_FUNC_TAG, canonFuncName);
279
286
  cols.push(monomerCol);
280
287
  }
281
288
  this._positionColumns = cols;
@@ -93,8 +93,8 @@ export class SequencePositionStatsViewer extends DG.JsViewer {
93
93
  const seqHandler = seqHelper.getSeqHandler(sequenceColumn);
94
94
  const maxPos = seqHandler.maxLength;
95
95
 
96
- const canonicals = positions.map((p) => seqHandler.getMonomersAtPosition(p - 1, true));
97
- this._positionColumn.init((i) => canonicals.map((c) => c[i]).join(MONOMER_MOTIF_SPLITTER));
96
+ const originals = positions.map((p) => seqHandler.getMonomersAtPosition(p - 1, false));
97
+ this._positionColumn.init((i) => originals.map((c) => c[i]).join(MONOMER_MOTIF_SPLITTER));
98
98
 
99
99
  this._boxPlotViewer = this.dataFrame.plot.box({categoryColumnNames: [this._positionColumn.name], plotStyle: 'violin',
100
100
  valueColumnName: this.valueColumnName, colorColumnName: this._positionColumn.name, showColorSelector: false, showSizeSelector: false, showCategorySelector: false,
@@ -36,12 +36,14 @@ import {
36
36
  } from '../utils/misc';
37
37
  import {splitAlignedSequences} from '@datagrok-libraries/bio/src/utils/splitter';
38
38
  import {LogoSummaryTable} from './logo-summary';
39
- import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
39
+ import {TAGS as bioTAGS, MONOMER_CANONICALIZER_FUNC_TAG} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
40
40
  import {ALPHABET} from '@datagrok-libraries/bio/src/utils/macromolecule';
41
41
  import {getMonomerLibHelper} from '@datagrok-libraries/bio/src/types/monomer-library';
42
42
  import {PolymerTypes} from '@datagrok-libraries/bio/src/helm/consts';
43
43
  import {PeptideUtils} from '../peptideUtils';
44
44
  import {StringDictionary} from '@datagrok-libraries/utils/src/type-declarations';
45
+ import {SeqTemps} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';
46
+ import {getSeparator} from '../utils/misc';
45
47
 
46
48
  export enum SELECTION_MODE {
47
49
  MUTATION_CLIFFS = 'Mutation Cliffs',
@@ -244,6 +246,16 @@ export abstract class SARViewer extends DG.JsViewer implements ISARViewer {
244
246
  return this._positionColumns!;
245
247
  }
246
248
 
249
+ /** Copies the monomer canonicalizer tag from the sequence column to a monomer column. */
250
+ protected propagateCanonicalizerTag(monomerCol: DG.Column): void {
251
+ const seqCol = this.dataFrame?.col(this.sequenceColumnName);
252
+ if (!seqCol) return;
253
+ const notationProvider: any = seqCol.temp[SeqTemps.notationProvider] ?? null;
254
+ const canonFuncName: string | null = notationProvider?.monomerCanonicalizerFuncName ?? null;
255
+ if (canonFuncName)
256
+ monomerCol.setTag(MONOMER_CANONICALIZER_FUNC_TAG, canonFuncName);
257
+ }
258
+
247
259
  _monomerPositionStats: MonomerPositionStats | null = null;
248
260
 
249
261
  /**
@@ -467,7 +479,8 @@ export abstract class SARViewer extends DG.JsViewer implements ISARViewer {
467
479
  */
468
480
  onPropertyChanged(property: DG.Property): void {
469
481
  super.onPropertyChanged(property);
470
-
482
+ if (!this.dataFrame)
483
+ return;
471
484
 
472
485
  this.doRender = true;
473
486
  switch (property.name) {
@@ -543,6 +556,152 @@ export abstract class SARViewer extends DG.JsViewer implements ISARViewer {
543
556
  throw new Error('Not implemented');
544
557
  }
545
558
 
559
+ /** Shows a dialog to choose extra columns, then exports all unique mutation cliffs as a new table view. */
560
+ exportMutationCliffs(): void {
561
+ if (!this.mutationCliffs) {
562
+ grok.shell.warning('Mutation cliffs have not been calculated yet.');
563
+ return;
564
+ }
565
+
566
+ const available = this.dataFrame.columns.toList()
567
+ .filter((col) => col.name !== this.activityColumnName && col.name !== this.sequenceColumnName &&
568
+ !this.positionColumns.some((pc) => pc.name === col.name))
569
+ .map((col) => col.name);
570
+
571
+ const columnsInput = ui.input.columns('Extra columns', {table: this.dataFrame, value: [], available, nullable: true});
572
+
573
+ ui.dialog('Export Mutation Cliffs')
574
+ .add(columnsInput.root)
575
+ .onOK(() => this._doExportMutationCliffs(columnsInput.value ?? []))
576
+ .show();
577
+ }
578
+
579
+ private _doExportMutationCliffs(extraColumns: DG.Column[]): void {
580
+ const mc = this.mutationCliffs!;
581
+ const alignedSeqCol = this.dataFrame.getCol(this.sequenceColumnName);
582
+ const alignedSeqColCategories = alignedSeqCol.categories;
583
+ const alignedSeqColData = alignedSeqCol.getRawData();
584
+ const activityCol = this.dataFrame.getCol(this.activityColumnName);
585
+ const activityColData = activityCol.getRawData();
586
+
587
+ const seq1Array: string[] = [];
588
+ const seq2Array: string[] = [];
589
+ const diffArray: string[] = [];
590
+ const act1Array: (number | null)[] = [];
591
+ const act2Array: (number | null)[] = [];
592
+ const deltaArray: (number | null)[] = [];
593
+ const extraData1: any[][] = extraColumns.map(() => []);
594
+ const extraData2: any[][] = extraColumns.map(() => []);
595
+
596
+ const seenPairs = new Set<string>();
597
+
598
+ for (const [_monomer, posMap] of mc.entries()) {
599
+ for (const [_position, indexMap] of posMap.entries()) {
600
+ for (const [refIdx, subIndexes] of indexMap.entries()) {
601
+ for (const subIdx of subIndexes) {
602
+ const pairKey = refIdx < subIdx ? `${refIdx}-${subIdx}` : `${subIdx}-${refIdx}`;
603
+ if (seenPairs.has(pairKey))
604
+ continue;
605
+ seenPairs.add(pairKey);
606
+
607
+ const seq1 = alignedSeqColCategories[alignedSeqColData[refIdx]];
608
+ const seq2 = alignedSeqColCategories[alignedSeqColData[subIdx]];
609
+ seq1Array.push(seq1);
610
+ seq2Array.push(seq2);
611
+ diffArray.push(`${seq1}#${seq2}`);
612
+
613
+ const a1 = activityCol.isNone(refIdx) ? null : activityColData[refIdx];
614
+ const a2 = activityCol.isNone(subIdx) ? null : activityColData[subIdx];
615
+ act1Array.push(a1);
616
+ act2Array.push(a2);
617
+ deltaArray.push(a1 == null || a2 == null ? null : a1 - a2);
618
+
619
+ for (let e = 0; e < extraColumns.length; e++) {
620
+ const eCol = extraColumns[e];
621
+ extraData1[e].push(eCol.isNone(refIdx) ? null : eCol.get(refIdx));
622
+ extraData2[e].push(eCol.isNone(subIdx) ? null : eCol.get(subIdx));
623
+ }
624
+ }
625
+ }
626
+ }
627
+ }
628
+
629
+ if (seq1Array.length === 0) {
630
+ grok.shell.warning('No mutation cliffs found to export.');
631
+ return;
632
+ }
633
+
634
+ const seq1Col = DG.Column.fromStrings('Seq 1', seq1Array);
635
+ const seq2Col = DG.Column.fromStrings('Seq 2', seq2Array);
636
+ const diffCol = DG.Column.fromStrings('Mutation', diffArray);
637
+
638
+ // Copy sequence tags (without notation provider) so the platform detects the same semtype
639
+ for (const col of [seq1Col, seq2Col]) {
640
+ for (const tag of alignedSeqCol.tags.keys()) {
641
+ if (tag !== '.notationProvider')
642
+ col.setTag(tag, alignedSeqCol.getTag(tag)!);
643
+ }
644
+ col.semType = alignedSeqCol.semType;
645
+ }
646
+
647
+ // Set up macromolecule difference column
648
+ diffCol.semType = C.SEM_TYPES.MACROMOLECULE_DIFFERENCE;
649
+ diffCol.setTag(C.TAGS.SEPARATOR, getSeparator(alignedSeqCol));
650
+ diffCol.setTag(DG.TAGS.UNITS, alignedSeqCol.getTag(DG.TAGS.UNITS) ?? '');
651
+ diffCol.setTag(DG.TAGS.CELL_RENDERER, 'MacromoleculeDifference');
652
+ diffCol.temp[SeqTemps.notationProvider] = alignedSeqCol.temp[SeqTemps.notationProvider];
653
+
654
+ const act1Col = DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, `Seq 1 ${this.activityColumnName}`, act1Array);
655
+ const act2Col = DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, `Seq 2 ${this.activityColumnName}`, act2Array);
656
+ const deltaCol = DG.Column.fromList(DG.COLUMN_TYPE.FLOAT, 'Delta', deltaArray);
657
+
658
+ const columns: DG.Column[] = [seq1Col, seq2Col, diffCol, act1Col, act2Col, deltaCol];
659
+
660
+ // Add extra columns (two per extra column: one for seq1, one for seq2)
661
+ for (let e = 0; e < extraColumns.length; e++) {
662
+ const eName = extraColumns[e].name;
663
+ const eType = extraColumns[e].type;
664
+ columns.push(DG.Column.fromList(eType as DG.COLUMN_TYPE, `Seq 1 ${eName}`, extraData1[e]));
665
+ columns.push(DG.Column.fromList(eType as DG.COLUMN_TYPE, `Seq 2 ${eName}`, extraData2[e]));
666
+ }
667
+
668
+ const df = DG.DataFrame.fromColumns(columns);
669
+ df.name = 'Mutation Cliffs';
670
+ grok.shell.addTableView(df);
671
+ }
672
+
673
+ /** Exports the invariant map as a new table view: monomer × position counts. */
674
+ exportInvariantMap(): void {
675
+ const stats = this.monomerPositionStats;
676
+ const uniqueMonomers = new Set<string>();
677
+ const positionNames: string[] = [];
678
+ for (const pos of Object.keys(stats)) {
679
+ if (pos === 'general')
680
+ continue;
681
+ positionNames.push(pos);
682
+ const posStats = stats[pos]!;
683
+ for (const monomer of Object.keys(posStats)) {
684
+ if (monomer === 'general')
685
+ continue;
686
+ uniqueMonomers.add(monomer);
687
+ }
688
+ }
689
+
690
+ const monomersArray = Array.from(uniqueMonomers).sort();
691
+ const monomerCol = DG.Column.fromStrings(C.COLUMNS_NAMES.MONOMER, monomersArray);
692
+ const columns: DG.Column[] = [monomerCol];
693
+
694
+ for (const pos of positionNames) {
695
+ const posStats = stats[pos]!;
696
+ const counts = monomersArray.map((m) => posStats[m]?.count ?? 0);
697
+ columns.push(DG.Column.fromList(DG.COLUMN_TYPE.INT, pos, counts));
698
+ }
699
+
700
+ const df = DG.DataFrame.fromColumns(columns);
701
+ df.name = 'Invariant Map';
702
+ grok.shell.addTableView(df);
703
+ }
704
+
546
705
  /** Removes all the active subscriptions. */
547
706
  detach(): void {
548
707
  this.subs.forEach((sub) => sub.unsubscribe());
@@ -567,6 +726,9 @@ export abstract class SARViewer extends DG.JsViewer implements ISARViewer {
567
726
  if (!a || !a.causedBy || !a.args || !a.args.menu || !a.causedBy.target || !(a.causedBy.target instanceof HTMLElement) || !this.root.contains(a.causedBy.target))
568
727
  return;
569
728
  const menu = a.args.menu as DG.Menu;
729
+ const exportGroup = menu.group('Export');
730
+ exportGroup.item('Export Mutation Cliffs...', () => this.exportMutationCliffs());
731
+ exportGroup.item('Export Invariant Map', () => this.exportInvariantMap());
570
732
  getMonomerLibHelper().then((lh) => {
571
733
  const lib = lh.getMonomerLib();
572
734
  const mSymbols = lib.getMonomerSymbolsByType(PolymerTypes.PEPTIDE);
@@ -748,6 +910,8 @@ export class MonomerPosition extends SARViewer {
748
910
  */
749
911
  onPropertyChanged(property: DG.Property): void {
750
912
  super.onPropertyChanged(property);
913
+ if (!this.dataFrame)
914
+ return;
751
915
  switch (property.name) {
752
916
  case SAR_PROPERTIES.SEQUENCE:
753
917
  this._invariantMapSelection = null;
@@ -915,6 +1079,7 @@ export class MonomerPosition extends SARViewer {
915
1079
  sumGridCol.visible = false;
916
1080
  const monomerCol = monomerPositionDf.getCol(C.COLUMNS_NAMES.MONOMER);
917
1081
  CR.setMonomerRenderer(monomerCol, this.alphabet, true);
1082
+ this.propagateCanonicalizerTag(monomerCol);
918
1083
  this.cacheInvariantMapColors();
919
1084
 
920
1085
  grid.onCellRender.subscribe((args: DG.GridCellRenderArgs) => renderCell(args, this,
@@ -1115,7 +1280,7 @@ export class MonomerPosition extends SARViewer {
1115
1280
  /** Renders the MonomerPosition viewer body. */
1116
1281
  render(): void {
1117
1282
  $(this.root).empty();
1118
- if (!this.activityColumnName || !this.sequenceColumnName) {
1283
+ if (!this.dataFrame || !this.activityColumnName || !this.sequenceColumnName) {
1119
1284
  this.root.appendChild(ui.divText('Please, select a sequence and activity columns in the viewer properties'));
1120
1285
  return;
1121
1286
  }
@@ -1342,6 +1507,7 @@ export class MostPotentResidues extends SARViewer {
1342
1507
 
1343
1508
  // Setting Monomer column renderer
1344
1509
  CR.setMonomerRenderer(monomerCol, this.alphabet, true);
1510
+ this.propagateCanonicalizerTag(monomerCol);
1345
1511
  grid.onCellRender.subscribe(
1346
1512
  (args: DG.GridCellRenderArgs) => renderCell(args, this, false, undefined, undefined));
1347
1513
 
@@ -1474,7 +1640,7 @@ export class MostPotentResidues extends SARViewer {
1474
1640
  /** Renders the MostPotentResidues viewer body. */
1475
1641
  render(): void {
1476
1642
  $(this.root).empty();
1477
- if (!this.activityColumnName || !this.sequenceColumnName) {
1643
+ if (!this.dataFrame || !this.activityColumnName || !this.sequenceColumnName) {
1478
1644
  this.root.appendChild(ui.divText('Please, select a sequence and activity columns in the viewer properties'));
1479
1645
  return;
1480
1646
  }
@@ -24,7 +24,7 @@ export function manualAlignmentWidget(alignedSequenceCol: DG.Column<string>, cur
24
24
  const affectedRowIndex = currentDf.currentRowIdx;
25
25
  alignedSequenceCol.set(affectedRowIndex, newSequence);
26
26
  for (let i = 0; i < splitSequence.length; i++) {
27
- const part = splitSequence.getCanonical(i);
27
+ const part = splitSequence.getOriginal(i);
28
28
  if (currentDf.col(i.toString()) !== null)
29
29
  currentDf.set(i.toString(), affectedRowIndex, part);
30
30
  }