@datagrok/peptides 1.4.0 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@ import * as DG from 'datagrok-api/dg';
2
2
 
3
3
  export type DataFrameDict = {[key: string]: DG.DataFrame};
4
4
 
5
+ export type RawData = Int32Array | Uint32Array | Float32Array | Float64Array;
5
6
  export type UTypedArray = Uint8Array | Uint16Array | Uint32Array;
6
7
  //AAR: (Position: (index: indexList))
7
8
  export type SubstitutionsInfo = Map<string, Map<string, Map<number, number[] | UTypedArray>>>;
@@ -11,6 +12,9 @@ export type MonomerSelectionStats = {[position: string]: {[monomer: string]: num
11
12
 
12
13
  export type ScalingMethods = 'none' | 'lg' | '-lg';
13
14
  export type PeptidesSettings = {
15
+ sequenceColumnName?: string,
16
+ activityColumnName?: string,
17
+ clustersColumnName?: string,
14
18
  scaling?: ScalingMethods,
15
19
  isBidirectional?: boolean,
16
20
  maxMutations?: number,
@@ -34,3 +38,5 @@ export type StatsInfo = {
34
38
  countCol: DG.Column<number>,
35
39
  orderedIndexes: Int32Array,
36
40
  }
41
+
42
+ export type RawColumn = {name: string, rawData: RawData, cat?: string[]};
@@ -6,6 +6,7 @@ import {PeptidesModel} from '../model';
6
6
  import * as C from '../utils/constants';
7
7
  import * as CR from '../utils/cell-renderer';
8
8
  import * as bio from '@datagrok-libraries/bio';
9
+ import { Stats } from '../utils/statistics';
9
10
 
10
11
  export class LogoSummary extends DG.JsViewer {
11
12
  _titleHost = ui.divText('Logo Summary Table', {id: 'pep-viewer-title'});
@@ -13,29 +14,25 @@ export class LogoSummary extends DG.JsViewer {
13
14
  viewerGrid!: DG.Grid;
14
15
  initialized: boolean = false;
15
16
  webLogoMode: string;
16
- importanceThreshold: number;
17
+ membersRatioThreshold: number;
17
18
 
18
19
  constructor() {
19
20
  super();
20
21
 
21
- this.webLogoMode = this.string('webLogoMode', bio.PositionHeight.full, {choices: [bio.PositionHeight.full, bio.PositionHeight.Entropy]});
22
- this.importanceThreshold = this.float('importanceThreshold', 0.7);
22
+ this.webLogoMode = this.string('webLogoMode', bio.PositionHeight.full,
23
+ {choices: [bio.PositionHeight.full, bio.PositionHeight.Entropy]});
24
+ this.membersRatioThreshold = this.float('membersRatioThreshold', 0.7, {min: 0.01, max: 1.0});
23
25
  }
24
26
 
25
- async onTableAttached(): Promise<void> {
27
+ onTableAttached(): void {
26
28
  super.onTableAttached();
27
29
 
28
- this.model = await PeptidesModel.getInstance(this.dataFrame);
30
+ this.model = PeptidesModel.getInstance(this.dataFrame);
29
31
  this.subs.push(this.model.onSettingsChanged.subscribe(() => {
30
32
  this.createLogoSummaryGrid();
31
33
  this.render();
32
34
  }));
33
- // this.subs.push(this.model.onLogoSummaryGridChanged.subscribe((grid) => {
34
- // this.viewerGrid = grid;
35
- // this.render();
36
- // }));
37
- // this.model.updateDefault();
38
- // this.viewerGrid = this.model.logoSummaryGrid;
35
+
39
36
  this.createLogoSummaryGrid();
40
37
  this.initialized = true;
41
38
  this.render();
@@ -52,33 +49,52 @@ export class LogoSummary extends DG.JsViewer {
52
49
  }
53
50
  }
54
51
 
55
- onPropertyChanged(): void {
56
- this.createLogoSummaryGrid();
52
+ onPropertyChanged(property: DG.Property): void {
53
+ super.onPropertyChanged(property);
54
+ if (property.name == 'membersRatioThreshold')
55
+ this.updateFilter();
57
56
  this.render();
58
57
  }
59
58
 
60
59
  createLogoSummaryGrid(): DG.Grid {
61
- let summaryTableBuilder = this.dataFrame.groupBy([C.COLUMNS_NAMES.CLUSTERS]);
60
+ const clustersColName = this.model.settings.clustersColumnName!;
61
+ let summaryTableBuilder = this.dataFrame.groupBy([clustersColName]);
62
62
  for (const [colName, aggregationFunc] of Object.entries(this.model.settings.columns ?? {}))
63
63
  summaryTableBuilder = summaryTableBuilder.add(aggregationFunc as any, colName, `${aggregationFunc}(${colName})`);
64
64
 
65
65
  const summaryTable = summaryTableBuilder.aggregate();
66
- const summaryTableLength = summaryTable.rowCount;
67
- const clustersCol: DG.Column<number> = summaryTable.getCol(C.COLUMNS_NAMES.CLUSTERS);
68
- const membersCol: DG.Column<number> = summaryTable.columns.addNewInt('Members');
69
- const webLogoCol: DG.Column<string> = summaryTable.columns.addNew('WebLogo', DG.COLUMN_TYPE.STRING);
70
- const tempDfList: DG.DataFrame[] = new Array(summaryTableLength);
71
- const originalClustersCol = this.dataFrame.getCol(C.COLUMNS_NAMES.CLUSTERS);
72
- const peptideCol: DG.Column<string> = this.dataFrame.getCol(C.COLUMNS_NAMES.MACROMOLECULE);
66
+ const summaryTableCols = summaryTable.columns;
67
+ const webLogoCol: DG.Column<string> = summaryTableCols.addNewString('WebLogo');
68
+ // const webLogoColData = webLogoCol.getRawData();
69
+ const clustersCol: DG.Column<string> = summaryTable.getCol(clustersColName);
70
+ const clustersColData = clustersCol.getRawData();
71
+ const clustersColCategories = clustersCol.categories;
72
+ const summaryTableLength = clustersColData.length;
73
+ const membersColData = summaryTableCols.addNewInt(C.COLUMNS_NAMES.MEMBERS).getRawData();
74
+ const tempWebLogoDfPlotList: DG.DataFramePlotHelper[] = new Array(summaryTableLength);
75
+ const tempDistributionDfPlotList: DG.DataFramePlotHelper[] = new Array(summaryTableLength);
76
+ const originalClustersCol = this.dataFrame.getCol(clustersColName);
77
+ const originalClustersColData = originalClustersCol.getRawData();
78
+ const originalClustersColCategories = originalClustersCol.categories;
79
+ const originalClustersColLength = originalClustersColData.length;
80
+ const peptideCol: DG.Column<string> = this.dataFrame.getCol(this.model.settings.sequenceColumnName!);
81
+ const peptideColData = peptideCol.getRawData();
82
+ const peptideColCategories = peptideCol.categories;
83
+ const meanDifferenceColData = summaryTableCols.addNewFloat('Mean difference').getRawData();
84
+ const pValColData = summaryTableCols.addNewFloat('P-Value').getRawData();
85
+ const ratioColData = summaryTableCols.addNewFloat('Ratio').getRawData();
86
+ const distributionCol = summaryTableCols.addNewString('Distribution');
87
+ // const distributionColData = distributionCol.getRawData();
73
88
 
74
89
  for (let index = 0; index < summaryTableLength; ++index) {
75
90
  const indexes: number[] = [];
76
- for (let j = 0; j < originalClustersCol.length; ++j) {
77
- if (originalClustersCol.get(j) === clustersCol.get(index))
91
+ const currentCluster = clustersColCategories[clustersColData[index]];
92
+ for (let j = 0; j < originalClustersColLength; ++j) {
93
+ if (originalClustersColCategories[originalClustersColData[j]] == currentCluster)
78
94
  indexes.push(j);
79
95
  }
80
96
  const tCol = DG.Column.string('peptides', indexes.length);
81
- tCol.init((i) => peptideCol.get(indexes[i]));
97
+ tCol.init((i) => peptideColCategories[peptideColData[indexes[i]]]);
82
98
 
83
99
  for (const tag of peptideCol.tags)
84
100
  tCol.setTag(tag[0], tag[1]);
@@ -86,26 +102,69 @@ export class LogoSummary extends DG.JsViewer {
86
102
  const uh = new bio.UnitsHandler(tCol);
87
103
  tCol.setTag(bio.TAGS.alphabetSize, uh.getAlphabetSize().toString());
88
104
 
105
+ // Get Stats
106
+ const matcher: {[key: string]: number | string} = {};
107
+ matcher[this.model.settings.clustersColumnName!] = currentCluster;
108
+ const currentStatsDf = this.model.clusterStatsDf.rows.match(matcher).toDataFrame();
109
+ const activityCol = this.dataFrame.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED);
110
+ //TODO: use bitset instead of splitCol
111
+ const splitCol = DG.Column.bool(C.COLUMNS_NAMES.SPLIT_COL, activityCol.length);
112
+ const currentClusterCol = this.dataFrame.getCol(this.model.settings.clustersColumnName!);
113
+ splitCol.init((i) => currentClusterCol.get(i) == currentCluster);
114
+ const distributionTable = DG.DataFrame.fromColumns([activityCol, splitCol]);
115
+ const stats: Stats = {
116
+ count: currentStatsDf.get(C.COLUMNS_NAMES.COUNT, 0),
117
+ ratio: currentStatsDf.get(C.COLUMNS_NAMES.RATIO, 0),
118
+ pValue: currentStatsDf.get(C.COLUMNS_NAMES.P_VALUE, 0),
119
+ meanDifference: currentStatsDf.get(C.COLUMNS_NAMES.MEAN_DIFFERENCE, 0),
120
+ };
121
+
89
122
  const dfSlice = DG.DataFrame.fromColumns([tCol]);
90
- tempDfList[index] = dfSlice;
91
- webLogoCol.set(index, index.toString());
92
- membersCol.set(index, dfSlice.rowCount);
93
- //TODO: user should be able to choose threshold
94
- if (dfSlice.rowCount <= Math.ceil(this.model.clusterStatsDf.getCol(C.COLUMNS_NAMES.COUNT).stats.max * this.importanceThreshold))
95
- summaryTable.filter.set(index, false, false);
123
+ tempWebLogoDfPlotList[index] = dfSlice.plot;
124
+ tempDistributionDfPlotList[index] = distributionTable.plot;
125
+ // webLogoColData[index] = index;
126
+ // distributionColData[index] = index;
127
+ membersColData[index] = indexes.length;
128
+ meanDifferenceColData[index] = stats.meanDifference;
129
+ pValColData[index] = stats.pValue;
130
+ ratioColData[index] = stats.ratio;
96
131
  }
97
132
  webLogoCol.setTag(DG.TAGS.CELL_RENDERER, 'html');
133
+ distributionCol.setTag(DG.TAGS.CELL_RENDERER, 'html');
98
134
 
99
135
  this.viewerGrid = summaryTable.plot.grid();
100
- const gridClustersCol = this.viewerGrid.col(C.COLUMNS_NAMES.CLUSTERS)!;
136
+ this.updateFilter();
137
+ const gridClustersCol = this.viewerGrid.col(clustersColName)!;
101
138
  gridClustersCol.name = 'Clusters';
102
139
  gridClustersCol.visible = true;
103
140
  this.viewerGrid.columns.rowHeader!.visible = false;
104
141
  this.viewerGrid.props.rowHeight = 55;
105
142
  this.viewerGrid.onCellPrepare((cell) => {
106
- if (cell.isTableCell && cell.tableColumn?.name === 'WebLogo') {
107
- tempDfList[parseInt(cell.cell.value)].plot.fromType('WebLogo', {maxHeight: 50, positionHeight: this.webLogoMode})
143
+ const currentRowIdx = cell.tableRowIndex;
144
+ if (!cell.isTableCell || currentRowIdx == null || currentRowIdx == -1)
145
+ return;
146
+
147
+ if (cell.tableColumn?.name == 'WebLogo') {
148
+ tempWebLogoDfPlotList[currentRowIdx]
149
+ .fromType('WebLogo', {maxHeight: cell.grid.props.rowHeight - 5, positionHeight: this.webLogoMode})
108
150
  .then((viewer) => cell.element = viewer.root);
151
+ } else if (cell.tableColumn?.name == 'Distribution') {
152
+ const viewerRoot = tempDistributionDfPlotList[currentRowIdx].histogram({
153
+ filteringEnabled: false,
154
+ valueColumnName: C.COLUMNS_NAMES.ACTIVITY_SCALED,
155
+ splitColumnName: C.COLUMNS_NAMES.SPLIT_COL,
156
+ legendVisibility: 'Never',
157
+ showXAxis: true,
158
+ showColumnSelector: false,
159
+ showRangeSlider: false,
160
+ showBinSelector: false,
161
+ backColor: '#fffff',
162
+ }).root;
163
+
164
+ viewerRoot.style.width = 'auto';
165
+ const height = (cell.grid.props.rowHeight - 5) / 2 * 3;
166
+ viewerRoot.style.height = `${height}px`;
167
+ cell.element = viewerRoot;
109
168
  }
110
169
  });
111
170
  this.viewerGrid.root.addEventListener('click', (ev) => {
@@ -113,16 +172,17 @@ export class LogoSummary extends DG.JsViewer {
113
172
  if (!cell || !cell.isTableCell)
114
173
  return;
115
174
 
116
- const cluster = clustersCol.get(cell.tableRowIndex!)!;
175
+ const clusterIdx = clustersColData[cell.tableRowIndex!];
117
176
  summaryTable.currentRowIdx = -1;
118
177
  if (ev.shiftKey)
119
- this.model.modifyClusterSelection(cluster);
178
+ this.model.modifyClusterSelection(clusterIdx);
120
179
  else
121
- this.model.initClusterSelection(cluster);
180
+ this.model.initClusterSelection(clusterIdx);
181
+ this.viewerGrid.invalidate();
122
182
  });
123
183
  this.viewerGrid.onCellRender.subscribe((gridCellArgs) => {
124
184
  const gc = gridCellArgs.cell;
125
- if (gc.tableColumn?.name !== C.COLUMNS_NAMES.CLUSTERS || gc.isColHeader)
185
+ if (gc.tableColumn?.name !== clustersColName || gc.isColHeader)
126
186
  return;
127
187
  const canvasContext = gridCellArgs.g;
128
188
  const bound = gridCellArgs.bounds;
@@ -130,12 +190,13 @@ export class LogoSummary extends DG.JsViewer {
130
190
  canvasContext.beginPath();
131
191
  canvasContext.rect(bound.x, bound.y, bound.width, bound.height);
132
192
  canvasContext.clip();
133
- CR.renderLogoSummaryCell(canvasContext, gc.cell.value, this.model.logoSummarySelection, bound);
193
+ const cellRawData = clustersColData[gc.cell.rowIndex];
194
+ CR.renderLogoSummaryCell(canvasContext, gc.cell.value, cellRawData, this.model.logoSummarySelection, bound);
134
195
  gridCellArgs.preventDefault();
135
196
  canvasContext.restore();
136
197
  });
137
198
  this.viewerGrid.onCellTooltip((cell, x, y) => {
138
- if (!cell.isColHeader && cell.tableColumn?.name === C.COLUMNS_NAMES.CLUSTERS)
199
+ if (!cell.isColHeader && cell.tableColumn?.name === clustersColName)
139
200
  this.model.showTooltipCluster(cell.cell.value, x, y);
140
201
  return true;
141
202
  });
@@ -151,4 +212,12 @@ export class LogoSummary extends DG.JsViewer {
151
212
 
152
213
  return this.viewerGrid;
153
214
  }
215
+
216
+ updateFilter(): void {
217
+ const table = this.viewerGrid.table;
218
+ const memberstCol = table.getCol(C.COLUMNS_NAMES.MEMBERS);
219
+ const membersColData = memberstCol.getRawData();
220
+ const maxCount = memberstCol.stats.max;
221
+ table.filter.init((i) => membersColData[i] > Math.ceil(maxCount * this.membersRatioThreshold));
222
+ }
154
223
  }
@@ -40,7 +40,7 @@ export class PeptideSpaceViewer extends DG.JsViewer {
40
40
  async onTableAttached(): Promise<void> {
41
41
  super.onTableAttached();
42
42
 
43
- this.model = await PeptidesModel.getInstance(this.dataFrame);
43
+ this.model = PeptidesModel.getInstance(this.dataFrame);
44
44
 
45
45
  await this.render(!this.dataFrame.temp[C.EMBEDDING_STATUS]);
46
46
  }
@@ -62,7 +62,7 @@ export class PeptideSpaceViewer extends DG.JsViewer {
62
62
  $(this.root).empty();
63
63
  const viewerHost = ui.waitBox(async () => {
64
64
  // const aligendSeqCol = this.dataFrame.columns.bySemType(C.SEM_TYPES.MACROMOLECULE)!;
65
- const alignedSeqCol = this.dataFrame.getCol(C.COLUMNS_NAMES.MACROMOLECULE);
65
+ const alignedSeqCol = this.dataFrame.getCol(this.model.settings.sequenceColumnName!);
66
66
  const edf = await computeWeights(this.dataFrame, this.method, this.measure, this.cyclesCount, alignedSeqCol);
67
67
  this.dataFrame.temp[C.EMBEDDING_STATUS] = true;
68
68
  this.model.edf = edf;
@@ -77,7 +77,7 @@ export class PeptideSpaceViewer extends DG.JsViewer {
77
77
  this.model.fireBitsetChanged(true);
78
78
  });
79
79
 
80
- const colorCol = this.dataFrame.columns.bySemType(C.SEM_TYPES.ACTIVITY_SCALED)!;
80
+ const colorCol = this.dataFrame.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED);
81
81
  edf.columns.add(colorCol);
82
82
 
83
83
  const viewerOptions = {
@@ -112,14 +112,14 @@ export class PeptideSpaceViewer extends DG.JsViewer {
112
112
 
113
113
  //Do not accept table, only column
114
114
  export async function computeWeights(
115
- table: DG.DataFrame, method: string, measure: string, cyclesCount: number, col?: DG.Column,
115
+ table: DG.DataFrame, method: string, measure: string, cyclesCount: number, col: DG.Column,
116
116
  ): Promise<DG.DataFrame | null> {
117
117
  const pi = DG.TaskBarProgressIndicator.create('Creating embedding...');
118
118
  let edf: DG.DataFrame | null = null;
119
119
  try {
120
120
  const axesNames = ['~X', '~Y', '~MW'];
121
121
  // col ??= table.columns.bySemType(C.SEM_TYPES.MACROMOLECULE)!;
122
- col ??= table.getCol(C.COLUMNS_NAMES.MACROMOLECULE);
122
+ // col ??= table.getCol(this.model.settings.sequenceColumnName!);
123
123
  const columnData = col.toList().map((v) => AlignedSequenceEncoder.clean(v));
124
124
 
125
125
  const reduceDimRes: IReduceDimensionalityResult = await createDimensinalityReducingWorker(