@datagrok/peptides 1.7.2 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,23 +3,27 @@ import * as grok from 'datagrok-api/grok';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
5
  import $ from 'cash-dom';
6
- import {PeptidesModel} from '../model';
6
+ import {ClusterType, CLUSTER_TYPE, PeptidesModel, VIEWER_TYPE} from '../model';
7
7
  import * as C from '../utils/constants';
8
8
  import * as CR from '../utils/cell-renderer';
9
9
  import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
10
10
  import {PositionHeight} from '@datagrok-libraries/bio/src/viewers/web-logo';
11
- import {getStats, MaskInfo, Stats} from '../utils/statistics';
11
+ import {getAggregatedValue, getStats, MaskInfo, Stats} from '../utils/statistics';
12
12
  import wu from 'wu';
13
13
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
14
+ import {getDistributionAndStats} from '../widgets/distribution';
15
+ import {wrapDistroAndStatsDefault} from '../utils/misc';
14
16
 
15
- export class LogoSummary extends DG.JsViewer {
16
- _titleHost = ui.divText('Logo Summary Table', {id: 'pep-viewer-title'});
17
+ const getAggregatedColName = (aggF: string, colName: string): string => `${aggF}(${colName})`;
18
+
19
+
20
+ export class LogoSummaryTable extends DG.JsViewer {
21
+ _titleHost = ui.divText(VIEWER_TYPE.LOGO_SUMMARY_TABLE, {id: 'pep-viewer-title'});
17
22
  model!: PeptidesModel;
18
23
  viewerGrid!: DG.Grid;
19
24
  initialized: boolean = false;
20
25
  webLogoMode: string;
21
26
  membersRatioThreshold: number;
22
- newClusterName: string;
23
27
  webLogoDfPlot: DG.DataFramePlotHelper[] = [];
24
28
  distributionDfPlot: DG.DataFramePlotHelper[] = [];
25
29
 
@@ -28,8 +32,7 @@ export class LogoSummary extends DG.JsViewer {
28
32
 
29
33
  this.webLogoMode = this.string('webLogoMode', PositionHeight.full,
30
34
  {choices: [PositionHeight.full, PositionHeight.Entropy]});
31
- this.membersRatioThreshold = this.float('membersRatioThreshold', 0.7, {min: 0, max: 1.0});
32
- this.newClusterName = this.string('newClusterName', 'New cluster');
35
+ this.membersRatioThreshold = this.float('membersRatioThreshold', 0.3, {min: 0, max: 1.0});
33
36
  }
34
37
 
35
38
  onTableAttached(): void {
@@ -82,136 +85,147 @@ export class LogoSummary extends DG.JsViewer {
82
85
  const isDfFiltered = this.dataFrame.filter.anyFalse;
83
86
  const filteredDf = isDfFiltered ? this.dataFrame.clone(this.dataFrame.filter) : this.dataFrame;
84
87
  const filteredDfCols = filteredDf.columns;
88
+ const filteredDfRowCount = filteredDf.rowCount;
85
89
  const activityCol = filteredDf.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED);
86
90
  const activityColData = activityCol.getRawData();
87
91
 
88
- const filteredDfClustersCol = filteredDf.getCol(clustersColName);
89
- const filteredDfClustersColData = filteredDfClustersCol.getRawData();
90
- const filteredDfClustersColCategories = filteredDfClustersCol.categories;
91
- const filteredDfClustersColLength = filteredDfClustersColData.length;
92
+ const filteredDfClustCol = filteredDf.getCol(clustersColName);
93
+ const filteredDfClustColData = filteredDfClustCol.getRawData();
94
+ const filteredDfClustColCat = filteredDfClustCol.categories;
95
+
96
+ const pepCol: DG.Column<string> = filteredDf.getCol(this.model.settings.sequenceColumnName!);
92
97
 
93
- // const customClustersColumnsList = wu(this.model.customClusters).toArray();
94
98
  const query: { [key: string]: string } = {};
95
99
  query[C.TAGS.CUSTOM_CLUSTER] = '1';
96
- const customClustersColumnsList = wu(filteredDfCols.byTags(query)).filter(c => c.max > 0).toArray();
97
- const getAggregatedColName = (aggF: string, colName: string) => `${aggF}(${colName})`;
98
- const isCustomCluster = (cluster: string) => filteredDfCols.contains(cluster);
99
-
100
- let summaryTableBuilder = filteredDf.groupBy([clustersColName]);
101
- const aggregateColumnsEntries = Object.entries(this.model.settings.columns ?? {});
102
- for (const [colName, aggregationFunc] of aggregateColumnsEntries) {
103
- summaryTableBuilder = summaryTableBuilder.add(
104
- aggregationFunc as any, colName, getAggregatedColName(aggregationFunc, colName));
100
+ const customClustColList: DG.Column<boolean>[] =
101
+ wu(filteredDfCols.byTags(query)).filter((c) => c.max > 0).toArray();
102
+
103
+ const customLST = DG.DataFrame.create(customClustColList.length);
104
+ const customLSTCols = customLST.columns;
105
+ const customLSTClustCol = customLSTCols.addNewString(clustersColName);
106
+
107
+ const customMembersColData = customLSTCols.addNewInt(C.LST_COLUMN_NAMES.MEMBERS).getRawData();
108
+ const customWebLogoCol = customLSTCols.addNewString(C.LST_COLUMN_NAMES.WEB_LOGO);
109
+ const customDistCol = customLSTCols.addNewString(C.LST_COLUMN_NAMES.DISTRIBUTION);
110
+ const customMDColData = customLSTCols.addNewFloat(C.LST_COLUMN_NAMES.MEAN_DIFFERENCE).getRawData();
111
+ const customPValColData = customLSTCols.addNewFloat(C.LST_COLUMN_NAMES.P_VALUE).getRawData();
112
+ const customRatioColData = customLSTCols.addNewFloat(C.LST_COLUMN_NAMES.RATIO).getRawData();
113
+
114
+ let origLSTBuilder = filteredDf.groupBy([clustersColName]);
115
+ const aggColsEntries = Object.entries(this.model.settings.columns ?? {});
116
+ const aggColNames = aggColsEntries.map(([colName, aggFn]) => getAggregatedColName(aggFn, colName));
117
+ const customAggRawCols = new Array(aggColNames.length);
118
+ const colAggEntries = aggColsEntries.map(
119
+ ([colName, aggFn]) => [filteredDf.getCol(colName), aggFn] as [DG.Column<number>, DG.AggregationType]);
120
+
121
+ for (let aggIdx = 0; aggIdx < aggColsEntries.length; ++aggIdx) {
122
+ const [colName, aggFn] = aggColsEntries[aggIdx];
123
+ origLSTBuilder = origLSTBuilder.add(aggFn, colName, aggColNames[aggIdx]);
124
+ const customLSTAggCol = customLSTCols.addNewFloat(aggColNames[aggIdx]);
125
+ customAggRawCols[aggIdx] = customLSTAggCol.getRawData();
105
126
  }
106
127
 
107
- const tempSummaryTable = summaryTableBuilder.aggregate();
108
- const tempSummaryTableLength = tempSummaryTable.rowCount;
109
- const tempClustersCol: DG.Column<string> = tempSummaryTable.getCol(clustersColName);
110
- const summaryTableLength = tempSummaryTableLength + customClustersColumnsList.length;
111
- const summaryTable = DG.DataFrame.create(summaryTableLength);
112
- const summaryTableCols = summaryTable.columns;
113
-
114
- const clustersCol = summaryTableCols.addNewString(clustersColName);
115
- for (let i = 0; i < summaryTableLength; ++i) {
116
- clustersCol.set(i, i < tempSummaryTableLength ? tempClustersCol.get(i) :
117
- customClustersColumnsList[i - tempSummaryTableLength].name);
118
- }
119
- const clustersColData = clustersCol.getRawData();
120
- const clustersColCategories = clustersCol.categories;
128
+ // BEGIN: fill LST part with custom clusters
129
+ const customWebLogoPlots = new Array(customClustColList.length);
130
+ const customDistPlots = new Array(customClustColList.length);
121
131
 
122
- const peptideCol: DG.Column<string> = filteredDf.getCol(this.model.settings.sequenceColumnName!);
123
- const peptideColData = peptideCol.getRawData();
124
- const peptideColCategories = peptideCol.categories;
125
- const peptideColTags = peptideCol.tags;
132
+ for (let rowIdx = 0; rowIdx < customClustColList.length; ++rowIdx) {
133
+ const customClustCol = customClustColList[rowIdx];
134
+ customLSTClustCol.set(rowIdx, customClustCol.name);
135
+ const mask = customClustCol.toList() as boolean[];
136
+ const bsMask = DG.BitSet.create(filteredDfRowCount, (i) => mask[i]);
126
137
 
127
- const membersColData = summaryTableCols.addNewInt(C.LST_COLUMN_NAMES.MEMBERS).getRawData();
128
- const webLogoCol = summaryTableCols.addNewString(C.LST_COLUMN_NAMES.WEB_LOGO);
129
- const distributionCol = summaryTableCols.addNewString(C.LST_COLUMN_NAMES.DISTRIBUTION);
130
- const meanDifferenceColData = summaryTableCols.addNewFloat(C.LST_COLUMN_NAMES.MEAN_DIFFERENCE).getRawData();
131
- const pValColData = summaryTableCols.addNewFloat(C.LST_COLUMN_NAMES.P_VALUE).getRawData();
132
- const ratioColData = summaryTableCols.addNewFloat(C.LST_COLUMN_NAMES.RATIO).getRawData();
133
-
134
- for (const [colName, aggregationFunc] of aggregateColumnsEntries) {
135
- const tempSummaryTableCol = tempSummaryTable.getCol(getAggregatedColName(aggregationFunc, colName));
136
- const summaryTableCol = summaryTableCols.addNew(tempSummaryTableCol.name, tempSummaryTableCol.type);
137
- summaryTableCol.init((i) => i < tempSummaryTableLength ? tempSummaryTableCol.get(i) : null);
138
+ let stats: Stats;
139
+ if (isDfFiltered) {
140
+ const maskInfo: MaskInfo = {
141
+ mask: mask,
142
+ trueCount: bsMask.trueCount,
143
+ falseCount: bsMask.falseCount,
144
+ };
145
+
146
+ stats = getStats(activityColData, maskInfo);
147
+ } else
148
+ stats = this.model.clusterStats[CLUSTER_TYPE.CUSTOM][customClustCol.name];
149
+
150
+ customMembersColData[rowIdx] = stats.count;
151
+ customWebLogoPlots[rowIdx] = this.createWebLogoPlot(pepCol, bsMask);
152
+ customDistPlots[rowIdx] = this.createDistributionPlot(activityCol, customClustColList[rowIdx]);
153
+ customMDColData[rowIdx] = stats.meanDifference;
154
+ customPValColData[rowIdx] = stats.pValue;
155
+ customRatioColData[rowIdx] = stats.ratio;
156
+
157
+ for (let aggColIdx = 0; aggColIdx < aggColNames.length; ++aggColIdx) {
158
+ const [col, aggFn] = colAggEntries[aggColIdx];
159
+ customAggRawCols[aggColIdx][rowIdx] = getAggregatedValue(col, aggFn, bsMask);
160
+ }
138
161
  }
139
162
 
140
- this.webLogoDfPlot = new Array(summaryTableLength);
141
- this.distributionDfPlot = new Array(summaryTableLength);
163
+ customWebLogoCol.setTag(DG.TAGS.CELL_RENDERER, 'html');
164
+ customDistCol.setTag(DG.TAGS.CELL_RENDERER, 'html');
142
165
 
143
- for (let summaryTableRowIndex = 0; summaryTableRowIndex < summaryTableLength; ++summaryTableRowIndex) {
144
- const isOriginalCluster = summaryTableRowIndex < tempSummaryTableLength;
145
- const currentClusterCategoryIndex = clustersColData[summaryTableRowIndex];
146
- const currentCluster = clustersColCategories[currentClusterCategoryIndex]; // Cluster name
147
- const customClusterColData = customClustersColumnsList.find((col) => col.name == currentCluster)?.toList();
166
+ // END
148
167
 
149
- const isValidIndex = isOriginalCluster ?
150
- (j: number) => filteredDfClustersColCategories[filteredDfClustersColData[j]] == currentCluster :
151
- (j: number) => customClusterColData![j];
168
+ // BEGIN: fill LST part with original clusters
169
+ const origLST = origLSTBuilder.aggregate();
170
+ const origLSTLen = origLST.rowCount;
171
+ const origLSTCols = origLST.columns;
172
+ const origLSTClustCol: DG.Column<string> = origLST.getCol(clustersColName);
152
173
 
153
-
154
- //TODO: use bitset instead of splitCol
155
- const splitCol = DG.Column.bool(C.COLUMNS_NAMES.SPLIT_COL, activityCol.length);
156
- const getSplitColValueAt = isOriginalCluster ?
157
- (splitColIndex: number) => filteredDfClustersColData[splitColIndex] == currentClusterCategoryIndex :
158
- (splitColIndex: number) => customClusterColData![splitColIndex];
159
- splitCol.init((i) => getSplitColValueAt(i));
174
+ const origLSTClustColCat = origLSTClustCol.categories;
175
+
176
+ const origMembersColData = origLSTCols.addNewInt(C.LST_COLUMN_NAMES.MEMBERS).getRawData();
177
+ const origWebLogoCol = origLSTCols.addNewString(C.LST_COLUMN_NAMES.WEB_LOGO);
178
+ const origDistCol = origLSTCols.addNewString(C.LST_COLUMN_NAMES.DISTRIBUTION);
179
+ const origMDColData = origLSTCols.addNewFloat(C.LST_COLUMN_NAMES.MEAN_DIFFERENCE).getRawData();
180
+ const origPValColData = origLSTCols.addNewFloat(C.LST_COLUMN_NAMES.P_VALUE).getRawData();
181
+ const origRatioColData = origLSTCols.addNewFloat(C.LST_COLUMN_NAMES.RATIO).getRawData();
182
+
183
+ const origWebLogoPlots = new Array(origLSTLen);
184
+ const origDistPlots = new Array(origLSTLen);
185
+
186
+ const origClustMasks = Array.from({length: origLSTLen},
187
+ () => Array.from({length: filteredDfRowCount}, () => false));
188
+
189
+ for (let rowIdx = 0; rowIdx < filteredDfRowCount; ++rowIdx) {
190
+ const filteredClustName = filteredDfClustColCat[filteredDfClustColData[rowIdx]];
191
+ const origClustIdx = origLSTClustColCat.indexOf(filteredClustName);
192
+ origClustMasks[origClustIdx][rowIdx] = true;
193
+ }
194
+
195
+ for (let rowIdx = 0; rowIdx < origLSTLen; ++rowIdx) {
196
+ const mask = origClustMasks[rowIdx];
197
+ const bsMask = DG.BitSet.create(filteredDfRowCount, (i) => mask[i]);
160
198
 
161
199
  let stats: Stats;
162
200
  if (isDfFiltered) {
163
- const trueCount = splitCol.stats.sum;
201
+ const trueCount = bsMask.trueCount;
164
202
  const maskInfo = {
165
203
  trueCount: trueCount,
166
- falseCount: activityColData.length - trueCount,
167
- mask: splitCol.toList() as boolean[],
204
+ falseCount: filteredDfRowCount - trueCount,
205
+ mask: mask,
168
206
  };
169
207
  stats = getStats(activityColData, maskInfo);
170
208
  } else
171
- stats = this.model.clusterStats[currentCluster];
209
+ stats = this.model.clusterStats[CLUSTER_TYPE.ORIGINAL][origLSTClustColCat[rowIdx]];
210
+
211
+ origMembersColData[rowIdx] = stats.count;
212
+ origWebLogoPlots[rowIdx] = this.createWebLogoPlot(pepCol, bsMask);
213
+ origDistPlots[rowIdx] = this.createDistributionPlot(activityCol,
214
+ DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, bsMask));
215
+ origMDColData[rowIdx] = stats.meanDifference;
216
+ origPValColData[rowIdx] = stats.pValue;
217
+ origRatioColData[rowIdx] = stats.ratio;
218
+ }
172
219
 
173
- const tCol = DG.Column.string('peptides', stats.count);
174
- let tColIdx = 0;
175
- for (let j = 0; j < filteredDfClustersColLength; ++j) {
176
- if (isValidIndex(j))
177
- tCol.set(tColIdx++, peptideColCategories[peptideColData[j]]);
178
- }
220
+ origWebLogoCol.setTag(DG.TAGS.CELL_RENDERER, 'html');
221
+ origDistCol.setTag(DG.TAGS.CELL_RENDERER, 'html');
179
222
 
180
- for (const tag of peptideColTags)
181
- tCol.setTag(tag[0], tag[1]);
182
-
183
- const uh = new UnitsHandler(tCol);
184
- tCol.setTag(bioTAGS.alphabetSize, uh.getAlphabetSize().toString());
185
-
186
-
187
- const distributionTable = DG.DataFrame.fromColumns([activityCol, splitCol]);
188
- const dfSlice = DG.DataFrame.fromColumns([tCol]);
189
-
190
- this.webLogoDfPlot[summaryTableRowIndex] = dfSlice.plot;
191
- this.distributionDfPlot[summaryTableRowIndex] = distributionTable.plot;
192
-
193
- membersColData[summaryTableRowIndex] = stats.count;
194
- meanDifferenceColData[summaryTableRowIndex] = stats.meanDifference;
195
- pValColData[summaryTableRowIndex] = stats.pValue;
196
- ratioColData[summaryTableRowIndex] = stats.ratio;
197
-
198
- //Setting aggregated col values
199
- if (!isOriginalCluster) {
200
- for (const [colName, aggregationFunc] of aggregateColumnsEntries) {
201
- const arrayBuffer = filteredDf.getCol(colName).getRawData();
202
- const clusterMask = DG.BitSet.fromBytes(arrayBuffer.buffer, arrayBuffer.byteLength / 4);
203
- const subDf = filteredDf.clone(clusterMask, [colName]);
204
- const newColName = getAggregatedColName(aggregationFunc, colName);
205
- const aggregatedDf = subDf.groupBy()
206
- .add(aggregationFunc as any, colName, newColName)
207
- .aggregate();
208
- const value = aggregatedDf.get(newColName, 0);
209
- summaryTable.set(newColName, summaryTableRowIndex, value);
210
- }
211
- }
212
- }
213
- webLogoCol.setTag(DG.TAGS.CELL_RENDERER, 'html');
214
- distributionCol.setTag(DG.TAGS.CELL_RENDERER, 'html');
223
+ // END
224
+
225
+ // combine LSTs and create a grid
226
+ const summaryTable = origLST.append(customLST);
227
+ this.webLogoDfPlot = origWebLogoPlots.concat(customWebLogoPlots);
228
+ this.distributionDfPlot = origDistPlots.concat(customDistPlots);
215
229
 
216
230
  this.viewerGrid = summaryTable.plot.grid();
217
231
  this.updateFilter();
@@ -225,12 +239,12 @@ export class LogoSummary extends DG.JsViewer {
225
239
  if (!cell.isTableCell || currentRowIdx == null || currentRowIdx == -1)
226
240
  return;
227
241
 
228
- if (cell.tableColumn?.name == 'WebLogo') {
242
+ if (cell.tableColumn?.name == C.LST_COLUMN_NAMES.WEB_LOGO) {
229
243
  this.webLogoDfPlot[currentRowIdx]
230
244
  .fromType('WebLogo', {maxHeight: cell.grid.props.rowHeight - 5, positionHeight: this.webLogoMode,
231
245
  horizontalAlignment: 'left'})
232
246
  .then((viewer) => cell.element = viewer.root);
233
- } else if (cell.tableColumn?.name == 'Distribution') {
247
+ } else if (cell.tableColumn?.name == C.LST_COLUMN_NAMES.DISTRIBUTION) {
234
248
  const viewerRoot = this.distributionDfPlot[currentRowIdx].histogram({
235
249
  filteringEnabled: false,
236
250
  valueColumnName: C.COLUMNS_NAMES.ACTIVITY_SCALED,
@@ -276,8 +290,12 @@ export class LogoSummary extends DG.JsViewer {
276
290
  canvasContext.restore();
277
291
  });
278
292
  this.viewerGrid.onCellTooltip((cell, x, y) => {
279
- if (!cell.isColHeader && cell.tableColumn?.name === clustersColName)
280
- this.model.showTooltipCluster(cell.cell.rowIndex, x, y, cell.cell.value);
293
+ if (!cell.isColHeader && cell.tableColumn?.name === clustersColName) {
294
+ const clustName = cell.cell.value;
295
+ const clustColCat = this.dataFrame.getCol(this.model.settings.clustersColumnName!).categories;
296
+ const clustType = clustColCat.includes(clustName) ? CLUSTER_TYPE.ORIGINAL : CLUSTER_TYPE.CUSTOM;
297
+ this.showTooltip(clustName, x, y, clustType);
298
+ }
281
299
  return true;
282
300
  });
283
301
  const webLogoGridCol = this.viewerGrid.columns.byName('WebLogo')!;
@@ -318,7 +336,8 @@ export class LogoSummary extends DG.JsViewer {
318
336
  falseCount: selection.falseCount,
319
337
  };
320
338
  const stats = getStats(activityScaledCol.getRawData(), maskInfo);
321
- const distributionTable = DG.DataFrame.fromColumns([activityScaledCol, filteredDf.getCol(this.model.splitCol.name)]);
339
+ const distributionTable =
340
+ DG.DataFrame.fromColumns([activityScaledCol, filteredDf.getCol(this.model.splitCol.name)]);
322
341
 
323
342
  const peptideCol: DG.Column<string> = filteredDf.getCol(this.model.settings.sequenceColumnName!);
324
343
  const peptideColData = peptideCol.getRawData();
@@ -339,16 +358,15 @@ export class LogoSummary extends DG.JsViewer {
339
358
  this.webLogoDfPlot.push(webLogoTable.plot);
340
359
  this.distributionDfPlot.push(distributionTable.plot);
341
360
 
342
- const colCategories = viewerDfCols.byName(this.model.settings.clustersColumnName!).categories;
343
- let newClusterName = this.newClusterName;
344
- let clusterNum = 1;
345
- const getString = !isNaN(parseInt(newClusterName)) ? () => `${parseInt(newClusterName) + 1}` :
346
- newClusterName == '' ? () => `${clusterNum++}` :
347
- () => `${this.newClusterName} ${clusterNum++}`;
348
- while (colCategories.includes(newClusterName))
349
- newClusterName = getString();
361
+ const newClusterName = viewerDfCols.getUnusedName('New Cluster');
350
362
 
351
- this.getProperty('newClusterName')?.set(this, getString());
363
+ const aggregatedValues: {[colName: string]: number} = {};
364
+ const aggColsEntries = Object.entries(this.model.settings.columns ?? {});
365
+ for (const [colName, aggFn] of aggColsEntries) {
366
+ const newColName = getAggregatedColName(aggFn, colName);
367
+ const col = filteredDf.getCol(colName);
368
+ aggregatedValues[newColName] = getAggregatedValue(col, aggFn, selection);
369
+ }
352
370
 
353
371
  for (let i = 0; i < viewerDfColsLength; ++i) {
354
372
  const col = viewerDfCols.byIndex(i);
@@ -359,11 +377,12 @@ export class LogoSummary extends DG.JsViewer {
359
377
  col.name == C.LST_COLUMN_NAMES.MEAN_DIFFERENCE ? stats.meanDifference:
360
378
  col.name == C.LST_COLUMN_NAMES.P_VALUE ? stats.pValue:
361
379
  col.name == C.LST_COLUMN_NAMES.RATIO ? stats.ratio:
380
+ col.name in aggregatedValues ? aggregatedValues[col.name] :
362
381
  console.warn(`PeptidesLSTWarn: value for column ${col.name} is undefined`)! || null;
363
382
  }
364
383
  viewerDf.rows.addNew(newClusterVals);
365
384
 
366
- this.model.clusterStats[newClusterName] = stats;
385
+ this.model.clusterStats[CLUSTER_TYPE.CUSTOM][newClusterName] = stats;
367
386
  this.model.addNewCluster(newClusterName);
368
387
  }
369
388
 
@@ -371,18 +390,98 @@ export class LogoSummary extends DG.JsViewer {
371
390
  const lss = this.model.logoSummarySelection;
372
391
  const dfCols = this.dataFrame.columns;
373
392
 
374
- const removeClusterIndexesList = lss.filter((cluster) => dfCols.contains(cluster));
375
- if (removeClusterIndexesList.length == 0)
376
- return grok.shell.info('Nothing removed. Please select a created cluster to remove');
393
+ // Names of the clusters to remove
394
+ const clustNames = lss.filter((cluster) => dfCols.contains(cluster));
395
+ if (clustNames.length == 0)
396
+ return grok.shell.warning('Only custom clusters can be removed');
377
397
 
378
- for (const cluster of removeClusterIndexesList) {
398
+ const viewerDf = this.viewerGrid.dataFrame;
399
+ const viewerDfRows = viewerDf.rows;
400
+ const clustColCat = viewerDf.getCol(this.model.settings.clustersColumnName!).categories;
401
+
402
+ for (const cluster of clustNames) {
379
403
  lss.splice(lss.indexOf(cluster), 1);
380
404
  dfCols.remove(cluster);
405
+ delete this.model.clusterStats[CLUSTER_TYPE.CUSTOM][cluster];
406
+ const clustIdx = clustColCat.indexOf(cluster);
407
+ viewerDfRows.removeAt(clustIdx);
408
+ this.webLogoDfPlot.splice(clustIdx, 1);
409
+ this.distributionDfPlot.splice(clustIdx, 1);
381
410
  }
382
411
 
383
412
  this.model.logoSummarySelection = lss;
384
- this.model.clusterStats = this.model.calculateClusterStatistics();
385
- this.createLogoSummaryGrid();
386
413
  this.render();
387
414
  }
415
+
416
+ showTooltip(clustName: string, x: number, y: number, clustType: ClusterType = 'original'): HTMLDivElement | null {
417
+ const bs = this.dataFrame.filter;
418
+ const filteredDf = bs.anyFalse ? this.dataFrame.clone(bs) : this.dataFrame;
419
+ const rowCount = filteredDf.rowCount;
420
+
421
+ const maskInfo: MaskInfo = {mask: new Array(rowCount), trueCount: 0, falseCount: 0};
422
+ const activityCol = filteredDf.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED);
423
+ const activityColData = activityCol.getRawData();
424
+
425
+ if (clustType == CLUSTER_TYPE.ORIGINAL) {
426
+ //TODO: use bitset instead of splitCol
427
+ const origClustCol = filteredDf.getCol(this.model.settings.clustersColumnName!);
428
+ const origClustColData = origClustCol.getRawData();
429
+ const origClustColCategories = origClustCol.categories;
430
+
431
+ const seekValue = origClustColCategories.indexOf(clustName);
432
+
433
+ for (let i = 0; i < rowCount; ++i) {
434
+ maskInfo.mask[i] = origClustColData[i] == seekValue;
435
+
436
+ if (maskInfo.mask[i])
437
+ maskInfo.trueCount++;
438
+ else
439
+ maskInfo.falseCount++;
440
+ }
441
+ } else {
442
+ const clustCol: DG.Column<boolean> = filteredDf.getCol(clustName);
443
+ maskInfo.mask = clustCol.toList() as boolean[];
444
+
445
+ for (let i = 0; i < rowCount; ++i) {
446
+ if (maskInfo.mask[i])
447
+ maskInfo.trueCount++;
448
+ else
449
+ maskInfo.falseCount++;
450
+ }
451
+ }
452
+
453
+ const stats = bs.anyFalse ? getStats(activityColData, maskInfo) : this.model.clusterStats[clustType][clustName];
454
+
455
+ if (!stats.count)
456
+ return null;
457
+
458
+ const colResults: {[colName: string]: number} = {};
459
+ const mask = DG.BitSet.create(rowCount, (i) => maskInfo.mask[i] as boolean);
460
+ const splitCol = DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, mask);
461
+ const distDf = DG.DataFrame.fromColumns([activityCol, splitCol]);
462
+
463
+ for (const [colName, aggFn] of Object.entries(this.model.settings.columns || {})) {
464
+ const newColName = getAggregatedColName(colName, aggFn);
465
+ const value = getAggregatedValue(filteredDf.getCol(colName), aggFn, mask);
466
+ colResults[newColName] = value;
467
+ }
468
+
469
+ const das = getDistributionAndStats(distDf, stats, `Cluster: ${clustName}`, 'Other', true, splitCol.name);
470
+ const resultMap: {[key: string]: any} = {...das.tableMap, ...colResults};
471
+ const tooltip = wrapDistroAndStatsDefault(das.labels, das.histRoot, resultMap, true);
472
+
473
+ ui.tooltip.show(tooltip, x, y);
474
+
475
+ return tooltip;
476
+ }
477
+
478
+ createWebLogoPlot(pepCol: DG.Column<string>, mask: DG.BitSet): DG.DataFramePlotHelper {
479
+ const df = DG.DataFrame.fromColumns([pepCol]).clone(mask);
480
+ return df.plot;
481
+ }
482
+
483
+ createDistributionPlot(activityCol: DG.Column<number>, splitCol: DG.Column<boolean>): DG.DataFramePlotHelper {
484
+ const df = DG.DataFrame.fromColumns([activityCol, splitCol]);
485
+ return df.plot;
486
+ }
388
487
  }
@@ -57,7 +57,7 @@ export class PeptideSpaceViewer extends DG.JsViewer {
57
57
  }
58
58
 
59
59
  async render(computeData=false): Promise<void> {
60
- if (computeData && !this.isEmbeddingCreating && !this.model.isChangingEdfBitset) {
60
+ if (computeData && !this.isEmbeddingCreating /*&& !this.model.isChangingEdfBitset*/) {
61
61
  this.isEmbeddingCreating = true;
62
62
  $(this.root).empty();
63
63
  const viewerHost = ui.waitBox(async () => {
@@ -65,17 +65,17 @@ export class PeptideSpaceViewer extends DG.JsViewer {
65
65
  const alignedSeqCol = this.dataFrame.getCol(this.model.settings.sequenceColumnName!);
66
66
  const edf = await computeWeights(this.dataFrame, this.method, this.measure, this.cyclesCount, alignedSeqCol);
67
67
  this.dataFrame.temp[C.EMBEDDING_STATUS] = true;
68
- this.model.edf = edf;
68
+ // this.model.edf = edf;
69
69
 
70
70
  if (edf === null)
71
71
  return ui.label('Could not compute embeddings');
72
72
 
73
73
  const edfSelection = edf.selection;
74
74
  edfSelection.copyFrom(this.dataFrame.selection);
75
- edfSelection.onChanged.subscribe(() => {
76
- if (!this.model.isChangingEdfBitset)
77
- this.model.fireBitsetChanged(true);
78
- });
75
+ // edfSelection.onChanged.subscribe(() => {
76
+ // if (!this.model.isChangingEdfBitset)
77
+ // this.model.fireBitsetChanged(true);
78
+ // });
79
79
 
80
80
  const colorCol = this.dataFrame.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED);
81
81
  edf.columns.add(colorCol);
@@ -5,7 +5,7 @@ import * as DG from 'datagrok-api/dg';
5
5
  import $ from 'cash-dom';
6
6
  import * as C from '../utils/constants';
7
7
  import * as CR from '../utils/cell-renderer';
8
- import {PeptidesModel} from '../model';
8
+ import {PeptidesModel, VIEWER_TYPE} from '../model';
9
9
 
10
10
  export class SARViewerBase extends DG.JsViewer {
11
11
  tempName!: string;
@@ -53,7 +53,7 @@ export class SARViewerBase extends DG.JsViewer {
53
53
  render(refreshOnly = false): void {
54
54
  if (!refreshOnly) {
55
55
  $(this.root).empty();
56
- let switchHost = ui.divText('Most Potent Residues', {id: 'pep-viewer-title'});
56
+ let switchHost = ui.divText(VIEWER_TYPE.MOST_POTENT_RESIDUES, {id: 'pep-viewer-title'});
57
57
  if (this.name == 'MC') {
58
58
  const mutationCliffsMode = ui.boolInput('', this.isMutationCliffsMode === '1');
59
59
  mutationCliffsMode.root.addEventListener('click', () => {
@@ -172,7 +172,7 @@ export class MonomerPosition extends SARViewerBase {
172
172
  /** Vertical structure activity relationship viewer */
173
173
  export class MostPotentResiduesViewer extends SARViewerBase {
174
174
  _name = 'MPR';
175
- _titleHost = ui.divText('Most Potent Residues', {id: 'pep-viewer-title'});
175
+ _titleHost = ui.divText(VIEWER_TYPE.MOST_POTENT_RESIDUES, {id: 'pep-viewer-title'});
176
176
  _isVertical = true;
177
177
 
178
178
  constructor() {
@@ -5,10 +5,9 @@ import {StringDictionary} from '@datagrok-libraries/utils/src/type-declarations'
5
5
  import $ from 'cash-dom';
6
6
 
7
7
  import * as C from '../utils/constants';
8
- import {getStats, MaskInfo, Stats} from '../utils/statistics';
8
+ import {getAggregatedValue, getStats, MaskInfo, Stats} from '../utils/statistics';
9
9
  import {PeptidesModel} from '../model';
10
10
  import {wrapDistroAndStatsDefault} from '../utils/misc';
11
- import wu from 'wu';
12
11
 
13
12
  const allConst = 'All';
14
13
  const otherConst = 'Other';
@@ -26,7 +25,7 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
26
25
  const positionsLen = positions.length;
27
26
  let aarStr = allConst;
28
27
  let otherStr = '';
29
- const useSelectedStr = model.isPeptideSpaceChangingBitset;
28
+ // const useSelectedStr = model.isPeptideSpaceChangingBitset;
30
29
 
31
30
  const updateDistributionHost = (): void => {
32
31
  model.splitByPos = splitByPosition.value!;
@@ -81,13 +80,14 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
81
80
 
82
81
  //OPTIMIZE: don't create Bitset, use bool[]
83
82
  const mask = DG.BitSet.create(rowCount, (i) => aarList.includes(posCol.get(i)));
83
+ const splitCol = DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, mask);
84
+
84
85
  const maskInfo: MaskInfo = {
85
- mask: mask.getBuffer(),
86
+ mask: splitCol.toList() as boolean[],
86
87
  trueCount: mask.trueCount,
87
88
  falseCount: mask.falseCount,
88
89
  };
89
90
  const stats = getStats(activityScaledData, maskInfo);
90
- const splitCol = DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, mask);
91
91
  const distributionTable = DG.DataFrame.fromColumns([activityScaledCol, splitCol]);
92
92
 
93
93
  const indexes = mask.getSelectedIndexes();
@@ -137,13 +137,14 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
137
137
  }
138
138
  return false;
139
139
  });
140
+ const splitCol = DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, mask);
141
+
140
142
  const maskInfo: MaskInfo = {
141
- mask: mask.getBuffer(),
143
+ mask: splitCol.toList() as boolean[],
142
144
  trueCount: mask.trueCount,
143
145
  falseCount: mask.falseCount,
144
146
  };
145
147
  const stats = getStats(activityScaledData, maskInfo);
146
- const splitCol = DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, mask);
147
148
  const distributionTable = DG.DataFrame.fromColumns([activityScaledCol, splitCol]);
148
149
 
149
150
  const indexes = mask.getSelectedIndexes();
@@ -169,7 +170,7 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
169
170
  res.push(ui.divText('No distribution'));
170
171
  else {
171
172
  otherStr = '';
172
- if (useSelectedStr) {
173
+ if (false /*useSelectedStr*/) {
173
174
  aarStr = 'Selected';
174
175
  otherStr = otherConst;
175
176
  } else if (positionsLen) {
@@ -185,22 +186,18 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
185
186
  }
186
187
 
187
188
  const distributionTable = DG.DataFrame.fromColumns([activityScaledCol, splitCol]);
188
- // distributionTable.filter.copyFrom(table.filter);
189
189
 
190
- const indexes = model.getCompoundBitest().getSelectedIndexes();
190
+ const compoundBs = model.getCompoundBitest();
191
191
  const colResults: {[colName: string]: number} = {};
192
192
  for (const [col, agg] of Object.entries(model.settings.columns || {})) {
193
193
  const currentCol = table.getCol(col);
194
- const currentColData = currentCol.getRawData();
195
- const tempCol = DG.Column.float('', indexes.length);
196
- tempCol.init((i) => currentColData[indexes[i]]);
197
- colResults[`${agg}(${col})`] = tempCol.stats[agg as keyof DG.Stats] as number;
194
+ colResults[`${agg}(${col})`] = getAggregatedValue(currentCol, agg, compoundBs);
198
195
  }
199
196
 
200
197
  const maskInfo: MaskInfo = {
201
- mask: table.selection.getBuffer(),
202
- trueCount: table.selection.trueCount,
203
- falseCount: table.selection.falseCount,
198
+ mask: splitCol.toList() as boolean[],
199
+ trueCount: compoundBs.trueCount,
200
+ falseCount: compoundBs.falseCount,
204
201
  };
205
202
  const stats = getStats(activityScaledCol.getRawData(), maskInfo);
206
203
  const das = getDistributionAndStats(distributionTable, stats, aarStr, otherStr);
@@ -188,7 +188,10 @@ export async function startAnalysis(activityColumn: DG.Column<number>, peptidesC
188
188
  newDf.setTag(C.TAGS.UUID, dfUuid);
189
189
  newDf.setTag('monomerType', monomerType);
190
190
  model = PeptidesModel.getInstance(newDf);
191
- await model.addViewers();
191
+ // await model.addViewers();
192
+ if (clustersColumn) await model.addLogoSummaryTable();
193
+ await model.addMonomerPosition();
194
+ await model.addMostPotentResidues();
192
195
  } else
193
196
  grok.shell.error('The activity column must be of numeric type!');
194
197
  progress.close();