@datagrok/peptides 1.7.2 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/.eslintrc.json +2 -2
  2. package/README.md +3 -3
  3. package/dist/563.js +2 -0
  4. package/dist/611.js +2 -0
  5. package/dist/802.js +2 -0
  6. package/dist/96.js +2 -0
  7. package/dist/package-test.js +2 -29778
  8. package/dist/package.js +2 -28285
  9. package/files/icons/logo-summary-viewer.svg +13 -0
  10. package/files/icons/peptide-sar-vertical-viewer.svg +13 -0
  11. package/files/icons/peptide-sar-viewer.svg +19 -0
  12. package/files/icons/peptide-space-viewer.svg +40 -0
  13. package/files/tests/HELM_small.csv +12 -0
  14. package/package.json +7 -8
  15. package/src/demo/fasta.ts +24 -0
  16. package/src/model.ts +381 -325
  17. package/src/package-test.ts +3 -0
  18. package/src/package.ts +54 -30
  19. package/src/tests/algorithms.ts +1 -1
  20. package/src/tests/core.ts +13 -8
  21. package/src/tests/model.ts +152 -0
  22. package/src/tests/peptide-space-test.ts +1 -2
  23. package/src/tests/table-view.ts +158 -0
  24. package/src/tests/viewers.ts +142 -4
  25. package/src/tests/widgets.ts +135 -0
  26. package/src/utils/algorithms.ts +2 -2
  27. package/src/utils/cell-renderer.ts +2 -2
  28. package/src/utils/constants.ts +8 -0
  29. package/src/utils/distance-matrix.worker.ts +16 -0
  30. package/src/utils/misc.ts +4 -4
  31. package/src/utils/peptide-similarity-space.ts +0 -1
  32. package/src/utils/statistics.ts +14 -10
  33. package/src/utils/types.ts +8 -4
  34. package/src/utils/worker-creator.ts +11 -0
  35. package/src/viewers/logo-summary.ts +246 -168
  36. package/src/viewers/peptide-space-viewer.ts +6 -6
  37. package/src/viewers/sar-viewer.ts +108 -110
  38. package/src/widgets/distribution.ts +95 -128
  39. package/src/widgets/mutation-cliffs.ts +2 -2
  40. package/src/widgets/peptides.ts +11 -3
  41. package/src/widgets/settings.ts +94 -24
  42. package/tsconfig.json +1 -1
  43. package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +0 -9077
@@ -3,33 +3,41 @@ import * as grok from 'datagrok-api/grok';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
5
  import $ from 'cash-dom';
6
- import {PeptidesModel} from '../model';
6
+ import {ClusterType, CLUSTER_TYPE, PeptidesModel, VIEWER_TYPE} from '../model';
7
7
  import * as C from '../utils/constants';
8
8
  import * as CR from '../utils/cell-renderer';
9
- import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
10
- import {PositionHeight} from '@datagrok-libraries/bio/src/viewers/web-logo';
11
- import {getStats, MaskInfo, Stats} from '../utils/statistics';
9
+ import {TAGS as bioTAGS, getSplitterForColumn} from '@datagrok-libraries/bio/src/utils/macromolecule';
10
+ import {HorizontalAlignments, PositionHeight} from '@datagrok-libraries/bio/src/viewers/web-logo';
11
+ import {getAggregatedValue, getStats, Stats} from '../utils/statistics';
12
12
  import wu from 'wu';
13
13
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
14
+ import {getActivityDistribution, getDistributionLegend, getStatsTableMap} from '../widgets/distribution';
15
+ import {getStatsSummary} from '../utils/misc';
16
+ import BitArray from '@datagrok-libraries/utils/src/bit-array';
14
17
 
15
- export class LogoSummary extends DG.JsViewer {
16
- _titleHost = ui.divText('Logo Summary Table', {id: 'pep-viewer-title'});
18
+ const getAggregatedColName = (aggF: string, colName: string): string => `${aggF}(${colName})`;
19
+
20
+ export enum LST_PROPERTIES {
21
+ WEB_LOGO_MODE = 'webLogoMode',
22
+ MEMBERS_RATIO_THRESHOLD = 'membersRatioThreshold',
23
+ };
24
+
25
+ export class LogoSummaryTable extends DG.JsViewer {
26
+ _titleHost = ui.divText(VIEWER_TYPE.LOGO_SUMMARY_TABLE, {id: 'pep-viewer-title'});
17
27
  model!: PeptidesModel;
18
28
  viewerGrid!: DG.Grid;
19
29
  initialized: boolean = false;
20
30
  webLogoMode: string;
21
31
  membersRatioThreshold: number;
22
- newClusterName: string;
23
- webLogoDfPlot: DG.DataFramePlotHelper[] = [];
24
- distributionDfPlot: DG.DataFramePlotHelper[] = [];
32
+ webLogoDfPlot: DG.DataFrame[] = [];
33
+ distributionDfPlot: DG.DataFrame[] = [];
25
34
 
26
35
  constructor() {
27
36
  super();
28
37
 
29
- this.webLogoMode = this.string('webLogoMode', PositionHeight.full,
38
+ this.webLogoMode = this.string(LST_PROPERTIES.WEB_LOGO_MODE, PositionHeight.Entropy,
30
39
  {choices: [PositionHeight.full, PositionHeight.Entropy]});
31
- this.membersRatioThreshold = this.float('membersRatioThreshold', 0.7, {min: 0, max: 1.0});
32
- this.newClusterName = this.string('newClusterName', 'New cluster');
40
+ this.membersRatioThreshold = this.float(LST_PROPERTIES.MEMBERS_RATIO_THRESHOLD, 0.3, {min: 0, max: 1.0});
33
41
  }
34
42
 
35
43
  onTableAttached(): void {
@@ -82,142 +90,138 @@ export class LogoSummary extends DG.JsViewer {
82
90
  const isDfFiltered = this.dataFrame.filter.anyFalse;
83
91
  const filteredDf = isDfFiltered ? this.dataFrame.clone(this.dataFrame.filter) : this.dataFrame;
84
92
  const filteredDfCols = filteredDf.columns;
93
+ const filteredDfRowCount = filteredDf.rowCount;
85
94
  const activityCol = filteredDf.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED);
86
95
  const activityColData = activityCol.getRawData();
87
96
 
88
- const filteredDfClustersCol = filteredDf.getCol(clustersColName);
89
- const filteredDfClustersColData = filteredDfClustersCol.getRawData();
90
- const filteredDfClustersColCategories = filteredDfClustersCol.categories;
91
- const filteredDfClustersColLength = filteredDfClustersColData.length;
97
+ const filteredDfClustCol = filteredDf.getCol(clustersColName);
98
+ const filteredDfClustColData = filteredDfClustCol.getRawData();
99
+ const filteredDfClustColCat = filteredDfClustCol.categories;
100
+
101
+ const pepCol: DG.Column<string> = filteredDf.getCol(this.model.settings.sequenceColumnName!);
92
102
 
93
- // const customClustersColumnsList = wu(this.model.customClusters).toArray();
94
103
  const query: { [key: string]: string } = {};
95
104
  query[C.TAGS.CUSTOM_CLUSTER] = '1';
96
- const customClustersColumnsList = wu(filteredDfCols.byTags(query)).filter(c => c.max > 0).toArray();
97
- const getAggregatedColName = (aggF: string, colName: string) => `${aggF}(${colName})`;
98
- const isCustomCluster = (cluster: string) => filteredDfCols.contains(cluster);
99
-
100
- let summaryTableBuilder = filteredDf.groupBy([clustersColName]);
101
- const aggregateColumnsEntries = Object.entries(this.model.settings.columns ?? {});
102
- for (const [colName, aggregationFunc] of aggregateColumnsEntries) {
103
- summaryTableBuilder = summaryTableBuilder.add(
104
- aggregationFunc as any, colName, getAggregatedColName(aggregationFunc, colName));
105
+ const customClustColList: DG.Column<boolean>[] =
106
+ wu(filteredDfCols.byTags(query)).filter((c) => c.max > 0).toArray();
107
+
108
+ const customLST = DG.DataFrame.create(customClustColList.length);
109
+ const customLSTCols = customLST.columns;
110
+ const customLSTClustCol = customLSTCols.addNewString(clustersColName);
111
+
112
+ const customMembersColData = customLSTCols.addNewInt(C.LST_COLUMN_NAMES.MEMBERS).getRawData();
113
+ const customWebLogoCol = customLSTCols.addNewString(C.LST_COLUMN_NAMES.WEB_LOGO);
114
+ const customDistCol = customLSTCols.addNewString(C.LST_COLUMN_NAMES.DISTRIBUTION);
115
+ const customMDColData = customLSTCols.addNewFloat(C.LST_COLUMN_NAMES.MEAN_DIFFERENCE).getRawData();
116
+ const customPValColData = customLSTCols.addNewFloat(C.LST_COLUMN_NAMES.P_VALUE).getRawData();
117
+ const customRatioColData = customLSTCols.addNewFloat(C.LST_COLUMN_NAMES.RATIO).getRawData();
118
+
119
+ let origLSTBuilder = filteredDf.groupBy([clustersColName]);
120
+ const aggColsEntries = Object.entries(this.model.settings.columns ?? {});
121
+ const aggColNames = aggColsEntries.map(([colName, aggFn]) => getAggregatedColName(aggFn, colName));
122
+ const customAggRawCols = new Array(aggColNames.length);
123
+ const colAggEntries = aggColsEntries.map(
124
+ ([colName, aggFn]) => [filteredDf.getCol(colName), aggFn] as [DG.Column<number>, DG.AggregationType]);
125
+
126
+ for (let aggIdx = 0; aggIdx < aggColsEntries.length; ++aggIdx) {
127
+ const [colName, aggFn] = aggColsEntries[aggIdx];
128
+ origLSTBuilder = origLSTBuilder.add(aggFn, colName, aggColNames[aggIdx]);
129
+ const customLSTAggCol = customLSTCols.addNewFloat(aggColNames[aggIdx]);
130
+ customAggRawCols[aggIdx] = customLSTAggCol.getRawData();
105
131
  }
106
132
 
107
- const tempSummaryTable = summaryTableBuilder.aggregate();
108
- const tempSummaryTableLength = tempSummaryTable.rowCount;
109
- const tempClustersCol: DG.Column<string> = tempSummaryTable.getCol(clustersColName);
110
- const summaryTableLength = tempSummaryTableLength + customClustersColumnsList.length;
111
- const summaryTable = DG.DataFrame.create(summaryTableLength);
112
- const summaryTableCols = summaryTable.columns;
113
-
114
- const clustersCol = summaryTableCols.addNewString(clustersColName);
115
- for (let i = 0; i < summaryTableLength; ++i) {
116
- clustersCol.set(i, i < tempSummaryTableLength ? tempClustersCol.get(i) :
117
- customClustersColumnsList[i - tempSummaryTableLength].name);
133
+ // BEGIN: fill LST part with custom clusters
134
+ const customWebLogoTables: DG.DataFrame[] = new Array(customClustColList.length);
135
+ const customDistTables: DG.DataFrame[] = new Array(customClustColList.length);
136
+
137
+ for (let rowIdx = 0; rowIdx < customClustColList.length; ++rowIdx) {
138
+ const customClustCol = customClustColList[rowIdx];
139
+ customLSTClustCol.set(rowIdx, customClustCol.name);
140
+ const bitArray = BitArray.fromUint32Array(filteredDfRowCount, customClustCol.getRawData() as Uint32Array);
141
+ const bsMask = DG.BitSet.create(filteredDfRowCount, (i) => bitArray.getBit(i));
142
+
143
+ const stats: Stats = isDfFiltered ? getStats(activityColData, bitArray) :
144
+ this.model.clusterStats[CLUSTER_TYPE.CUSTOM][customClustCol.name];
145
+
146
+ customMembersColData[rowIdx] = stats.count;
147
+ customWebLogoTables[rowIdx] = this.createWebLogoPlot(pepCol, bsMask);
148
+ customDistTables[rowIdx] = this.createDistributionPlot(activityCol, customClustColList[rowIdx]);
149
+ customMDColData[rowIdx] = stats.meanDifference;
150
+ customPValColData[rowIdx] = stats.pValue;
151
+ customRatioColData[rowIdx] = stats.ratio;
152
+
153
+ for (let aggColIdx = 0; aggColIdx < aggColNames.length; ++aggColIdx) {
154
+ const [col, aggFn] = colAggEntries[aggColIdx];
155
+ customAggRawCols[aggColIdx][rowIdx] = getAggregatedValue(col, aggFn, bsMask);
156
+ }
118
157
  }
119
- const clustersColData = clustersCol.getRawData();
120
- const clustersColCategories = clustersCol.categories;
121
158
 
122
- const peptideCol: DG.Column<string> = filteredDf.getCol(this.model.settings.sequenceColumnName!);
123
- const peptideColData = peptideCol.getRawData();
124
- const peptideColCategories = peptideCol.categories;
125
- const peptideColTags = peptideCol.tags;
159
+ customWebLogoCol.setTag(DG.TAGS.CELL_RENDERER, 'html');
160
+ customDistCol.setTag(DG.TAGS.CELL_RENDERER, 'html');
126
161
 
127
- const membersColData = summaryTableCols.addNewInt(C.LST_COLUMN_NAMES.MEMBERS).getRawData();
128
- const webLogoCol = summaryTableCols.addNewString(C.LST_COLUMN_NAMES.WEB_LOGO);
129
- const distributionCol = summaryTableCols.addNewString(C.LST_COLUMN_NAMES.DISTRIBUTION);
130
- const meanDifferenceColData = summaryTableCols.addNewFloat(C.LST_COLUMN_NAMES.MEAN_DIFFERENCE).getRawData();
131
- const pValColData = summaryTableCols.addNewFloat(C.LST_COLUMN_NAMES.P_VALUE).getRawData();
132
- const ratioColData = summaryTableCols.addNewFloat(C.LST_COLUMN_NAMES.RATIO).getRawData();
133
-
134
- for (const [colName, aggregationFunc] of aggregateColumnsEntries) {
135
- const tempSummaryTableCol = tempSummaryTable.getCol(getAggregatedColName(aggregationFunc, colName));
136
- const summaryTableCol = summaryTableCols.addNew(tempSummaryTableCol.name, tempSummaryTableCol.type);
137
- summaryTableCol.init((i) => i < tempSummaryTableLength ? tempSummaryTableCol.get(i) : null);
138
- }
162
+ // END
139
163
 
140
- this.webLogoDfPlot = new Array(summaryTableLength);
141
- this.distributionDfPlot = new Array(summaryTableLength);
142
-
143
- for (let summaryTableRowIndex = 0; summaryTableRowIndex < summaryTableLength; ++summaryTableRowIndex) {
144
- const isOriginalCluster = summaryTableRowIndex < tempSummaryTableLength;
145
- const currentClusterCategoryIndex = clustersColData[summaryTableRowIndex];
146
- const currentCluster = clustersColCategories[currentClusterCategoryIndex]; // Cluster name
147
- const customClusterColData = customClustersColumnsList.find((col) => col.name == currentCluster)?.toList();
148
-
149
- const isValidIndex = isOriginalCluster ?
150
- (j: number) => filteredDfClustersColCategories[filteredDfClustersColData[j]] == currentCluster :
151
- (j: number) => customClusterColData![j];
152
-
153
-
154
- //TODO: use bitset instead of splitCol
155
- const splitCol = DG.Column.bool(C.COLUMNS_NAMES.SPLIT_COL, activityCol.length);
156
- const getSplitColValueAt = isOriginalCluster ?
157
- (splitColIndex: number) => filteredDfClustersColData[splitColIndex] == currentClusterCategoryIndex :
158
- (splitColIndex: number) => customClusterColData![splitColIndex];
159
- splitCol.init((i) => getSplitColValueAt(i));
160
-
161
- let stats: Stats;
162
- if (isDfFiltered) {
163
- const trueCount = splitCol.stats.sum;
164
- const maskInfo = {
165
- trueCount: trueCount,
166
- falseCount: activityColData.length - trueCount,
167
- mask: splitCol.toList() as boolean[],
168
- };
169
- stats = getStats(activityColData, maskInfo);
170
- } else
171
- stats = this.model.clusterStats[currentCluster];
172
-
173
- const tCol = DG.Column.string('peptides', stats.count);
174
- let tColIdx = 0;
175
- for (let j = 0; j < filteredDfClustersColLength; ++j) {
176
- if (isValidIndex(j))
177
- tCol.set(tColIdx++, peptideColCategories[peptideColData[j]]);
178
- }
164
+ // BEGIN: fill LST part with original clusters
165
+ const origLST = origLSTBuilder.aggregate();
166
+ const origLSTLen = origLST.rowCount;
167
+ const origLSTCols = origLST.columns;
168
+ const origLSTClustCol: DG.Column<string> = origLST.getCol(clustersColName);
179
169
 
180
- for (const tag of peptideColTags)
181
- tCol.setTag(tag[0], tag[1]);
170
+ const origLSTClustColCat = origLSTClustCol.categories;
182
171
 
183
- const uh = new UnitsHandler(tCol);
184
- tCol.setTag(bioTAGS.alphabetSize, uh.getAlphabetSize().toString());
172
+ const origMembersColData = origLSTCols.addNewInt(C.LST_COLUMN_NAMES.MEMBERS).getRawData();
173
+ const origWebLogoCol = origLSTCols.addNewString(C.LST_COLUMN_NAMES.WEB_LOGO);
174
+ const origDistCol = origLSTCols.addNewString(C.LST_COLUMN_NAMES.DISTRIBUTION);
175
+ const origMDColData = origLSTCols.addNewFloat(C.LST_COLUMN_NAMES.MEAN_DIFFERENCE).getRawData();
176
+ const origPValColData = origLSTCols.addNewFloat(C.LST_COLUMN_NAMES.P_VALUE).getRawData();
177
+ const origRatioColData = origLSTCols.addNewFloat(C.LST_COLUMN_NAMES.RATIO).getRawData();
185
178
 
179
+ const origWebLogoTables: DG.DataFrame[] = new Array(origLSTLen);
180
+ const origDistTables: DG.DataFrame[] = new Array(origLSTLen);
186
181
 
187
- const distributionTable = DG.DataFrame.fromColumns([activityCol, splitCol]);
188
- const dfSlice = DG.DataFrame.fromColumns([tCol]);
182
+ const origClustMasks = Array.from({length: origLSTLen},
183
+ () => BitArray.fromSeq(filteredDfRowCount, () => false));
189
184
 
190
- this.webLogoDfPlot[summaryTableRowIndex] = dfSlice.plot;
191
- this.distributionDfPlot[summaryTableRowIndex] = distributionTable.plot;
185
+ for (let rowIdx = 0; rowIdx < filteredDfRowCount; ++rowIdx) {
186
+ const filteredClustName = filteredDfClustColCat[filteredDfClustColData[rowIdx]];
187
+ const origClustIdx = origLSTClustColCat.indexOf(filteredClustName);
188
+ origClustMasks[origClustIdx].setTrue(rowIdx);
189
+ }
192
190
 
193
- membersColData[summaryTableRowIndex] = stats.count;
194
- meanDifferenceColData[summaryTableRowIndex] = stats.meanDifference;
195
- pValColData[summaryTableRowIndex] = stats.pValue;
196
- ratioColData[summaryTableRowIndex] = stats.ratio;
191
+ for (let rowIdx = 0; rowIdx < origLSTLen; ++rowIdx) {
192
+ const mask = origClustMasks[rowIdx];
193
+ const bsMask = DG.BitSet.create(filteredDfRowCount, (i) => mask.getBit(i));
197
194
 
198
- //Setting aggregated col values
199
- if (!isOriginalCluster) {
200
- for (const [colName, aggregationFunc] of aggregateColumnsEntries) {
201
- const arrayBuffer = filteredDf.getCol(colName).getRawData();
202
- const clusterMask = DG.BitSet.fromBytes(arrayBuffer.buffer, arrayBuffer.byteLength / 4);
203
- const subDf = filteredDf.clone(clusterMask, [colName]);
204
- const newColName = getAggregatedColName(aggregationFunc, colName);
205
- const aggregatedDf = subDf.groupBy()
206
- .add(aggregationFunc as any, colName, newColName)
207
- .aggregate();
208
- const value = aggregatedDf.get(newColName, 0);
209
- summaryTable.set(newColName, summaryTableRowIndex, value);
210
- }
211
- }
195
+ const stats = isDfFiltered ? getStats(activityColData, mask) :
196
+ this.model.clusterStats[CLUSTER_TYPE.ORIGINAL][origLSTClustColCat[rowIdx]];
197
+
198
+ origMembersColData[rowIdx] = stats.count;
199
+ origWebLogoTables[rowIdx] = this.createWebLogoPlot(pepCol, bsMask);
200
+ origDistTables[rowIdx] = this.createDistributionPlot(activityCol,
201
+ DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, bsMask));
202
+ origMDColData[rowIdx] = stats.meanDifference;
203
+ origPValColData[rowIdx] = stats.pValue;
204
+ origRatioColData[rowIdx] = stats.ratio;
212
205
  }
213
- webLogoCol.setTag(DG.TAGS.CELL_RENDERER, 'html');
214
- distributionCol.setTag(DG.TAGS.CELL_RENDERER, 'html');
206
+
207
+ origWebLogoCol.setTag(DG.TAGS.CELL_RENDERER, 'html');
208
+ origDistCol.setTag(DG.TAGS.CELL_RENDERER, 'html');
209
+ // END
210
+
211
+ // combine LSTs and create a grid
212
+ const summaryTable = origLST.append(customLST);
213
+ this.webLogoDfPlot = origWebLogoTables.concat(customWebLogoTables);
214
+ this.distributionDfPlot = origDistTables.concat(customDistTables);
215
215
 
216
216
  this.viewerGrid = summaryTable.plot.grid();
217
+ this.viewerGrid.sort([C.LST_COLUMN_NAMES.MEMBERS], [false]);
217
218
  this.updateFilter();
218
219
  const gridClustersCol = this.viewerGrid.col(clustersColName)!;
219
- gridClustersCol.name = 'Clusters';
220
+ gridClustersCol.name = C.LST_COLUMN_NAMES.CLUSTER;
220
221
  gridClustersCol.visible = true;
222
+ this.viewerGrid.columns.setOrder([C.LST_COLUMN_NAMES.CLUSTER, C.LST_COLUMN_NAMES.MEMBERS,
223
+ C.LST_COLUMN_NAMES.WEB_LOGO, C.LST_COLUMN_NAMES.DISTRIBUTION, C.LST_COLUMN_NAMES.MEAN_DIFFERENCE,
224
+ C.LST_COLUMN_NAMES.P_VALUE, C.LST_COLUMN_NAMES.RATIO, ...aggColNames]);
221
225
  this.viewerGrid.columns.rowHeader!.visible = false;
222
226
  this.viewerGrid.props.rowHeight = 55;
223
227
  this.viewerGrid.onCellPrepare((cell) => {
@@ -225,27 +229,40 @@ export class LogoSummary extends DG.JsViewer {
225
229
  if (!cell.isTableCell || currentRowIdx == null || currentRowIdx == -1)
226
230
  return;
227
231
 
228
- if (cell.tableColumn?.name == 'WebLogo') {
229
- this.webLogoDfPlot[currentRowIdx]
230
- .fromType('WebLogo', {maxHeight: cell.grid.props.rowHeight - 5, positionHeight: this.webLogoMode,
231
- horizontalAlignment: 'left'})
232
+ const height = cell.bounds.height;
233
+ if (cell.tableColumn?.name == C.LST_COLUMN_NAMES.WEB_LOGO) {
234
+ const webLogoTable = this.webLogoDfPlot[currentRowIdx];
235
+ const webLogoTableRowCount = webLogoTable.rowCount;
236
+ const webLogoTablePepCol = webLogoTable.getCol(pepCol.name);
237
+ const webLogoTablePepColData = webLogoTablePepCol.getRawData();
238
+ const webLogoTablePepColCat = webLogoTablePepCol.categories;
239
+ const splitter = getSplitterForColumn(webLogoTablePepCol);
240
+ let maxSequenceLength = 0;
241
+ for (let i = 0; i < webLogoTableRowCount; ++i) {
242
+ maxSequenceLength = Math.max(maxSequenceLength,
243
+ splitter(webLogoTablePepColCat[webLogoTablePepColData[i]]).length);
244
+ }
245
+ const positionWidth = Math.floor((cell.bounds.width - 2 - (4 * (maxSequenceLength - 1))) / maxSequenceLength);
246
+ webLogoTable.plot
247
+ .fromType('WebLogo', {positionHeight: this.webLogoMode, horizontalAlignment: HorizontalAlignments.LEFT,
248
+ maxHeight: 1000, minHeight: height - 2, positionWidth: positionWidth})
232
249
  .then((viewer) => cell.element = viewer.root);
233
- } else if (cell.tableColumn?.name == 'Distribution') {
234
- const viewerRoot = this.distributionDfPlot[currentRowIdx].histogram({
250
+ } else if (cell.tableColumn?.name == C.LST_COLUMN_NAMES.DISTRIBUTION) {
251
+ const viewerRoot = this.distributionDfPlot[currentRowIdx].plot.histogram({
235
252
  filteringEnabled: false,
236
253
  valueColumnName: C.COLUMNS_NAMES.ACTIVITY_SCALED,
237
254
  splitColumnName: C.COLUMNS_NAMES.SPLIT_COL,
238
255
  legendVisibility: 'Never',
239
- showXAxis: true,
256
+ showXAxis: false,
240
257
  showColumnSelector: false,
241
258
  showRangeSlider: false,
242
259
  showBinSelector: false,
243
- backColor: '#fffff',
260
+ backColor: DG.Color.toHtml(DG.Color.white),
261
+ xAxisHeight: 1,
244
262
  }).root;
245
263
 
246
264
  viewerRoot.style.width = 'auto';
247
- const height = (cell.grid.props.rowHeight - 5) / 2 * 3;
248
- viewerRoot.style.height = `${height}px`;
265
+ viewerRoot.style.height = `${height-2}px`;
249
266
  cell.element = viewerRoot;
250
267
  }
251
268
  });
@@ -255,10 +272,10 @@ export class LogoSummary extends DG.JsViewer {
255
272
  return;
256
273
 
257
274
  summaryTable.currentRowIdx = -1;
258
- if (ev.shiftKey)
259
- this.model.modifyClusterSelection(cell.cell.value);
260
- else
261
- this.model.initClusterSelection(cell.cell.value);
275
+ if (!ev.shiftKey)
276
+ this.model.initClusterSelection({notify: false});
277
+
278
+ this.model.modifyClusterSelection(cell.cell.value);
262
279
  this.viewerGrid.invalidate();
263
280
  });
264
281
  this.viewerGrid.onCellRender.subscribe((gridCellArgs) => {
@@ -271,13 +288,17 @@ export class LogoSummary extends DG.JsViewer {
271
288
  canvasContext.beginPath();
272
289
  canvasContext.rect(bound.x, bound.y, bound.width, bound.height);
273
290
  canvasContext.clip();
274
- CR.renderLogoSummaryCell(canvasContext, gc.cell.value, this.model.logoSummarySelection, bound);
291
+ CR.renderLogoSummaryCell(canvasContext, gc.cell.value, this.model.clusterSelection, bound);
275
292
  gridCellArgs.preventDefault();
276
293
  canvasContext.restore();
277
294
  });
278
295
  this.viewerGrid.onCellTooltip((cell, x, y) => {
279
- if (!cell.isColHeader && cell.tableColumn?.name === clustersColName)
280
- this.model.showTooltipCluster(cell.cell.rowIndex, x, y, cell.cell.value);
296
+ if (!cell.isColHeader && cell.tableColumn?.name === clustersColName) {
297
+ const clustName = cell.cell.value;
298
+ const clustColCat = this.dataFrame.getCol(this.model.settings.clustersColumnName!).categories;
299
+ const clustType = clustColCat.includes(clustName) ? CLUSTER_TYPE.ORIGINAL : CLUSTER_TYPE.CUSTOM;
300
+ this.showTooltip(clustName, x, y, clustType);
301
+ }
281
302
  return true;
282
303
  });
283
304
  const webLogoGridCol = this.viewerGrid.columns.byName('WebLogo')!;
@@ -312,13 +333,10 @@ export class LogoSummary extends DG.JsViewer {
312
333
  const newClusterVals = new Array(viewerDfCols.length);
313
334
 
314
335
  const activityScaledCol = filteredDf.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED);
315
- const maskInfo: MaskInfo = {
316
- mask: selection.getBuffer(),
317
- trueCount: selection.trueCount,
318
- falseCount: selection.falseCount,
319
- };
320
- const stats = getStats(activityScaledCol.getRawData(), maskInfo);
321
- const distributionTable = DG.DataFrame.fromColumns([activityScaledCol, filteredDf.getCol(this.model.splitCol.name)]);
336
+ const bitArray = BitArray.fromString(selection.toBinaryString());
337
+ const stats = getStats(activityScaledCol.getRawData(), bitArray);
338
+ const distributionTable =
339
+ DG.DataFrame.fromColumns([activityScaledCol, filteredDf.getCol(this.model.splitCol.name)]);
322
340
 
323
341
  const peptideCol: DG.Column<string> = filteredDf.getCol(this.model.settings.sequenceColumnName!);
324
342
  const peptideColData = peptideCol.getRawData();
@@ -336,53 +354,113 @@ export class LogoSummary extends DG.JsViewer {
336
354
  tCol.setTag(bioTAGS.alphabetSize, uh.getAlphabetSize().toString());
337
355
 
338
356
  const webLogoTable = DG.DataFrame.fromColumns([tCol]);
339
- this.webLogoDfPlot.push(webLogoTable.plot);
340
- this.distributionDfPlot.push(distributionTable.plot);
357
+ this.webLogoDfPlot.push(webLogoTable);
358
+ this.distributionDfPlot.push(distributionTable);
341
359
 
342
- const colCategories = viewerDfCols.byName(this.model.settings.clustersColumnName!).categories;
343
- let newClusterName = this.newClusterName;
344
- let clusterNum = 1;
345
- const getString = !isNaN(parseInt(newClusterName)) ? () => `${parseInt(newClusterName) + 1}` :
346
- newClusterName == '' ? () => `${clusterNum++}` :
347
- () => `${this.newClusterName} ${clusterNum++}`;
348
- while (colCategories.includes(newClusterName))
349
- newClusterName = getString();
360
+ const newClusterName = viewerDfCols.getUnusedName('New Cluster');
350
361
 
351
- this.getProperty('newClusterName')?.set(this, getString());
362
+ const aggregatedValues: {[colName: string]: number} = {};
363
+ const aggColsEntries = Object.entries(this.model.settings.columns ?? {});
364
+ for (const [colName, aggFn] of aggColsEntries) {
365
+ const newColName = getAggregatedColName(aggFn, colName);
366
+ const col = filteredDf.getCol(colName);
367
+ aggregatedValues[newColName] = getAggregatedValue(col, aggFn, selection);
368
+ }
352
369
 
353
370
  for (let i = 0; i < viewerDfColsLength; ++i) {
354
371
  const col = viewerDfCols.byIndex(i);
355
372
  newClusterVals[i] = col.name == this.model.settings.clustersColumnName! ? newClusterName :
356
- col.name == C.LST_COLUMN_NAMES.MEMBERS ? maskInfo.trueCount :
373
+ col.name == C.LST_COLUMN_NAMES.MEMBERS ? selection.trueCount :
357
374
  col.name == C.LST_COLUMN_NAMES.WEB_LOGO ? null :
358
375
  col.name == C.LST_COLUMN_NAMES.DISTRIBUTION ? null :
359
376
  col.name == C.LST_COLUMN_NAMES.MEAN_DIFFERENCE ? stats.meanDifference:
360
377
  col.name == C.LST_COLUMN_NAMES.P_VALUE ? stats.pValue:
361
378
  col.name == C.LST_COLUMN_NAMES.RATIO ? stats.ratio:
379
+ col.name in aggregatedValues ? aggregatedValues[col.name] :
362
380
  console.warn(`PeptidesLSTWarn: value for column ${col.name} is undefined`)! || null;
363
381
  }
364
382
  viewerDf.rows.addNew(newClusterVals);
365
383
 
366
- this.model.clusterStats[newClusterName] = stats;
384
+ this.model.clusterStats[CLUSTER_TYPE.CUSTOM][newClusterName] = stats;
367
385
  this.model.addNewCluster(newClusterName);
368
386
  }
369
387
 
370
388
  removeCluster(): void {
371
- const lss = this.model.logoSummarySelection;
389
+ const lss = this.model.clusterSelection;
372
390
  const dfCols = this.dataFrame.columns;
373
391
 
374
- const removeClusterIndexesList = lss.filter((cluster) => dfCols.contains(cluster));
375
- if (removeClusterIndexesList.length == 0)
376
- return grok.shell.info('Nothing removed. Please select a created cluster to remove');
392
+ // Names of the clusters to remove
393
+ const clustNames = lss.filter((cluster) => dfCols.contains(cluster));
394
+ if (clustNames.length == 0)
395
+ return grok.shell.warning('Only custom clusters can be removed');
396
+
397
+ const viewerDf = this.viewerGrid.dataFrame;
398
+ const viewerDfRows = viewerDf.rows;
399
+ const clustColCat = viewerDf.getCol(this.model.settings.clustersColumnName!).categories;
377
400
 
378
- for (const cluster of removeClusterIndexesList) {
401
+ for (const cluster of clustNames) {
379
402
  lss.splice(lss.indexOf(cluster), 1);
380
403
  dfCols.remove(cluster);
404
+ delete this.model.clusterStats[CLUSTER_TYPE.CUSTOM][cluster];
405
+ const clustIdx = clustColCat.indexOf(cluster);
406
+ viewerDfRows.removeAt(clustIdx);
407
+ this.webLogoDfPlot.splice(clustIdx, 1);
408
+ this.distributionDfPlot.splice(clustIdx, 1);
381
409
  }
382
410
 
383
- this.model.logoSummarySelection = lss;
384
- this.model.clusterStats = this.model.calculateClusterStatistics();
385
- this.createLogoSummaryGrid();
411
+ this.model.clusterSelection = lss;
386
412
  this.render();
387
413
  }
414
+
415
+ showTooltip(clustName: string, x: number, y: number, clustType: ClusterType = 'original'): HTMLDivElement | null {
416
+ const bs = this.dataFrame.filter;
417
+ const filteredDf = bs.anyFalse ? this.dataFrame.clone(bs) : this.dataFrame;
418
+ const rowCount = filteredDf.rowCount;
419
+
420
+ const bitArray = new BitArray(rowCount);
421
+ const activityCol = filteredDf.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED);
422
+ const activityColData = activityCol.getRawData();
423
+
424
+
425
+ if (clustType == CLUSTER_TYPE.ORIGINAL) {
426
+ const origClustCol = filteredDf.getCol(this.model.settings.clustersColumnName!);
427
+ const origClustColData = origClustCol.getRawData();
428
+ const origClustColCategories = origClustCol.categories;
429
+ const seekValue = origClustColCategories.indexOf(clustName);
430
+
431
+ for (let i = 0; i < rowCount; ++i)
432
+ bitArray.setBit(i, origClustColData[i] === seekValue);
433
+ } else {
434
+ const clustCol: DG.Column<boolean> = filteredDf.getCol(clustName);
435
+ bitArray.buffer = clustCol.getRawData() as Uint32Array;
436
+ }
437
+
438
+ const stats = bs.anyFalse ? getStats(activityColData, bitArray) : this.model.clusterStats[clustType][clustName];
439
+
440
+ if (!stats.count)
441
+ return null;
442
+
443
+ const mask = DG.BitSet.create(rowCount, (i) => bitArray.getBit(i));
444
+ const distributionTable = DG.DataFrame.fromColumns(
445
+ [activityCol, DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, mask)]);
446
+ const labels = getDistributionLegend(`Cluster: ${clustName}`, 'Other');
447
+ const hist = getActivityDistribution(distributionTable, true);
448
+ const tableMap = getStatsTableMap(stats, {fractionDigits: 2});
449
+ const aggregatedColMap = this.model.getAggregatedColumnValues({filterDf: true, mask: mask, fractionDigits: 2});
450
+
451
+ const resultMap: {[key: string]: any} = {...tableMap, ...aggregatedColMap};
452
+ const tooltip = getStatsSummary(labels, hist, resultMap, true);
453
+
454
+ ui.tooltip.show(tooltip, x, y);
455
+
456
+ return tooltip;
457
+ }
458
+
459
+ createWebLogoPlot(pepCol: DG.Column<string>, mask: DG.BitSet): DG.DataFrame {
460
+ return DG.DataFrame.fromColumns([pepCol]).clone(mask);
461
+ }
462
+
463
+ createDistributionPlot(activityCol: DG.Column<number>, splitCol: DG.Column<boolean>): DG.DataFrame {
464
+ return DG.DataFrame.fromColumns([activityCol, splitCol]);
465
+ }
388
466
  }
@@ -57,7 +57,7 @@ export class PeptideSpaceViewer extends DG.JsViewer {
57
57
  }
58
58
 
59
59
  async render(computeData=false): Promise<void> {
60
- if (computeData && !this.isEmbeddingCreating && !this.model.isChangingEdfBitset) {
60
+ if (computeData && !this.isEmbeddingCreating /*&& !this.model.isChangingEdfBitset*/) {
61
61
  this.isEmbeddingCreating = true;
62
62
  $(this.root).empty();
63
63
  const viewerHost = ui.waitBox(async () => {
@@ -65,17 +65,17 @@ export class PeptideSpaceViewer extends DG.JsViewer {
65
65
  const alignedSeqCol = this.dataFrame.getCol(this.model.settings.sequenceColumnName!);
66
66
  const edf = await computeWeights(this.dataFrame, this.method, this.measure, this.cyclesCount, alignedSeqCol);
67
67
  this.dataFrame.temp[C.EMBEDDING_STATUS] = true;
68
- this.model.edf = edf;
68
+ // this.model.edf = edf;
69
69
 
70
70
  if (edf === null)
71
71
  return ui.label('Could not compute embeddings');
72
72
 
73
73
  const edfSelection = edf.selection;
74
74
  edfSelection.copyFrom(this.dataFrame.selection);
75
- edfSelection.onChanged.subscribe(() => {
76
- if (!this.model.isChangingEdfBitset)
77
- this.model.fireBitsetChanged(true);
78
- });
75
+ // edfSelection.onChanged.subscribe(() => {
76
+ // if (!this.model.isChangingEdfBitset)
77
+ // this.model.fireBitsetChanged(true);
78
+ // });
79
79
 
80
80
  const colorCol = this.dataFrame.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED);
81
81
  edf.columns.add(colorCol);