npm - @datagrok/peptides - Versions diffs - 1.7.2 → 1.8.0 - Mend

@datagrok/peptides 1.7.2 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/README.md +3 -3
package/dist/563.js +2 -0
package/dist/611.js +2 -0
package/dist/802.js +2 -0
package/dist/96.js +2 -0
package/dist/package-test.js +2 -29778
package/dist/package.js +2 -28285
package/files/icons/logo-summary-viewer.svg +13 -0
package/files/icons/peptide-sar-vertical-viewer.svg +13 -0
package/files/icons/peptide-sar-viewer.svg +19 -0
package/files/icons/peptide-space-viewer.svg +40 -0
package/package.json +5 -6
package/src/model.ts +262 -196
package/src/package.ts +44 -29
package/src/tests/peptide-space-test.ts +1 -2
package/src/tests/viewers.ts +0 -2
package/src/utils/cell-renderer.ts +1 -1
package/src/utils/distance-matrix.worker.ts +16 -0
package/src/utils/peptide-similarity-space.ts +0 -1
package/src/utils/statistics.ts +9 -0
package/src/utils/types.ts +5 -1
package/src/utils/worker-creator.ts +11 -0
package/src/viewers/logo-summary.ts +232 -133
package/src/viewers/peptide-space-viewer.ts +6 -6
package/src/viewers/sar-viewer.ts +3 -3
package/src/widgets/distribution.ts +14 -17
package/src/widgets/peptides.ts +4 -1
package/src/widgets/settings.ts +30 -4
package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +0 -9077

package/src/viewers/logo-summary.ts CHANGED Viewed

@@ -3,23 +3,27 @@ import * as grok from 'datagrok-api/grok';
 import * as DG from 'datagrok-api/dg';
 import $ from 'cash-dom';
-import {PeptidesModel} from '../model';
+import {ClusterType, CLUSTER_TYPE, PeptidesModel, VIEWER_TYPE} from '../model';
 import * as C from '../utils/constants';
 import * as CR from '../utils/cell-renderer';
 import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
 import {PositionHeight} from '@datagrok-libraries/bio/src/viewers/web-logo';
-import {getStats, MaskInfo, Stats} from '../utils/statistics';
+import {getAggregatedValue, getStats, MaskInfo, Stats} from '../utils/statistics';
 import wu from 'wu';
 import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
+import {getDistributionAndStats} from '../widgets/distribution';
+import {wrapDistroAndStatsDefault} from '../utils/misc';
-export class LogoSummary extends DG.JsViewer {
-  _titleHost = ui.divText('Logo Summary Table', {id: 'pep-viewer-title'});
+const getAggregatedColName = (aggF: string, colName: string): string => `${aggF}(${colName})`;
+export class LogoSummaryTable extends DG.JsViewer {
+  _titleHost = ui.divText(VIEWER_TYPE.LOGO_SUMMARY_TABLE, {id: 'pep-viewer-title'});
   model!: PeptidesModel;
   viewerGrid!: DG.Grid;
   initialized: boolean = false;
   webLogoMode: string;
   membersRatioThreshold: number;
-  newClusterName: string;
   webLogoDfPlot: DG.DataFramePlotHelper[] = [];
   distributionDfPlot: DG.DataFramePlotHelper[] = [];
@@ -28,8 +32,7 @@ export class LogoSummary extends DG.JsViewer {
     this.webLogoMode = this.string('webLogoMode', PositionHeight.full,
       {choices: [PositionHeight.full, PositionHeight.Entropy]});
-    this.membersRatioThreshold = this.float('membersRatioThreshold', 0.7, {min: 0, max: 1.0});
-    this.newClusterName = this.string('newClusterName', 'New cluster');
+    this.membersRatioThreshold = this.float('membersRatioThreshold', 0.3, {min: 0, max: 1.0});
   }
   onTableAttached(): void {
@@ -82,136 +85,147 @@ export class LogoSummary extends DG.JsViewer {
     const isDfFiltered = this.dataFrame.filter.anyFalse;
     const filteredDf = isDfFiltered ? this.dataFrame.clone(this.dataFrame.filter) : this.dataFrame;
     const filteredDfCols = filteredDf.columns;
+    const filteredDfRowCount = filteredDf.rowCount;
     const activityCol = filteredDf.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED);
     const activityColData = activityCol.getRawData();
-    const filteredDfClustersCol = filteredDf.getCol(clustersColName);
-    const filteredDfClustersColData = filteredDfClustersCol.getRawData();
-    const filteredDfClustersColCategories = filteredDfClustersCol.categories;
-    const filteredDfClustersColLength = filteredDfClustersColData.length;
+    const filteredDfClustCol = filteredDf.getCol(clustersColName);
+    const filteredDfClustColData = filteredDfClustCol.getRawData();
+    const filteredDfClustColCat = filteredDfClustCol.categories;
+    const pepCol: DG.Column<string> = filteredDf.getCol(this.model.settings.sequenceColumnName!);
-    // const customClustersColumnsList = wu(this.model.customClusters).toArray();
     const query: { [key: string]: string } = {};
     query[C.TAGS.CUSTOM_CLUSTER] = '1';
-    const customClustersColumnsList = wu(filteredDfCols.byTags(query)).filter(c => c.max > 0).toArray();
-    const getAggregatedColName = (aggF: string, colName: string) => `${aggF}(${colName})`;
-    const isCustomCluster = (cluster: string) => filteredDfCols.contains(cluster);
-    let summaryTableBuilder = filteredDf.groupBy([clustersColName]);
-    const aggregateColumnsEntries = Object.entries(this.model.settings.columns ?? {});
-    for (const [colName, aggregationFunc] of aggregateColumnsEntries) {
-      summaryTableBuilder = summaryTableBuilder.add(
-        aggregationFunc as any, colName, getAggregatedColName(aggregationFunc, colName));
+    const customClustColList: DG.Column<boolean>[] =
+      wu(filteredDfCols.byTags(query)).filter((c) => c.max > 0).toArray();
+    const customLST = DG.DataFrame.create(customClustColList.length);
+    const customLSTCols = customLST.columns;
+    const customLSTClustCol = customLSTCols.addNewString(clustersColName);
+    const customMembersColData = customLSTCols.addNewInt(C.LST_COLUMN_NAMES.MEMBERS).getRawData();
+    const customWebLogoCol = customLSTCols.addNewString(C.LST_COLUMN_NAMES.WEB_LOGO);
+    const customDistCol = customLSTCols.addNewString(C.LST_COLUMN_NAMES.DISTRIBUTION);
+    const customMDColData = customLSTCols.addNewFloat(C.LST_COLUMN_NAMES.MEAN_DIFFERENCE).getRawData();
+    const customPValColData = customLSTCols.addNewFloat(C.LST_COLUMN_NAMES.P_VALUE).getRawData();
+    const customRatioColData = customLSTCols.addNewFloat(C.LST_COLUMN_NAMES.RATIO).getRawData();
+    let origLSTBuilder = filteredDf.groupBy([clustersColName]);
+    const aggColsEntries = Object.entries(this.model.settings.columns ?? {});
+    const aggColNames = aggColsEntries.map(([colName, aggFn]) => getAggregatedColName(aggFn, colName));
+    const customAggRawCols = new Array(aggColNames.length);
+    const colAggEntries = aggColsEntries.map(
+      ([colName, aggFn]) => [filteredDf.getCol(colName), aggFn] as [DG.Column<number>, DG.AggregationType]);
+    for (let aggIdx = 0; aggIdx < aggColsEntries.length; ++aggIdx) {
+      const [colName, aggFn] = aggColsEntries[aggIdx];
+      origLSTBuilder = origLSTBuilder.add(aggFn, colName, aggColNames[aggIdx]);
+      const customLSTAggCol = customLSTCols.addNewFloat(aggColNames[aggIdx]);
+      customAggRawCols[aggIdx] = customLSTAggCol.getRawData();
     }
-    const tempSummaryTable = summaryTableBuilder.aggregate();
-    const tempSummaryTableLength = tempSummaryTable.rowCount;
-    const tempClustersCol: DG.Column<string> = tempSummaryTable.getCol(clustersColName);
-    const summaryTableLength = tempSummaryTableLength + customClustersColumnsList.length;
-    const summaryTable = DG.DataFrame.create(summaryTableLength);
-    const summaryTableCols = summaryTable.columns;
-    const clustersCol = summaryTableCols.addNewString(clustersColName);
-    for (let i = 0; i < summaryTableLength; ++i) {
-      clustersCol.set(i, i < tempSummaryTableLength ? tempClustersCol.get(i) :
-        customClustersColumnsList[i - tempSummaryTableLength].name);
-    }
-    const clustersColData = clustersCol.getRawData();
-    const clustersColCategories = clustersCol.categories;
+    // BEGIN: fill LST part with custom clusters
+    const customWebLogoPlots = new Array(customClustColList.length);
+    const customDistPlots = new Array(customClustColList.length);
-    const peptideCol: DG.Column<string> = filteredDf.getCol(this.model.settings.sequenceColumnName!);
-    const peptideColData = peptideCol.getRawData();
-    const peptideColCategories = peptideCol.categories;
-    const peptideColTags = peptideCol.tags;
+    for (let rowIdx = 0; rowIdx < customClustColList.length; ++rowIdx) {
+      const customClustCol = customClustColList[rowIdx];
+      customLSTClustCol.set(rowIdx, customClustCol.name);
+      const mask = customClustCol.toList() as boolean[];
+      const bsMask = DG.BitSet.create(filteredDfRowCount, (i) => mask[i]);
-    const membersColData = summaryTableCols.addNewInt(C.LST_COLUMN_NAMES.MEMBERS).getRawData();
-    const webLogoCol = summaryTableCols.addNewString(C.LST_COLUMN_NAMES.WEB_LOGO);
-    const distributionCol = summaryTableCols.addNewString(C.LST_COLUMN_NAMES.DISTRIBUTION);
-    const meanDifferenceColData = summaryTableCols.addNewFloat(C.LST_COLUMN_NAMES.MEAN_DIFFERENCE).getRawData();
-    const pValColData = summaryTableCols.addNewFloat(C.LST_COLUMN_NAMES.P_VALUE).getRawData();
-    const ratioColData = summaryTableCols.addNewFloat(C.LST_COLUMN_NAMES.RATIO).getRawData();
-    for (const [colName, aggregationFunc] of aggregateColumnsEntries) {
-      const tempSummaryTableCol = tempSummaryTable.getCol(getAggregatedColName(aggregationFunc, colName));
-      const summaryTableCol = summaryTableCols.addNew(tempSummaryTableCol.name, tempSummaryTableCol.type);
-      summaryTableCol.init((i) => i < tempSummaryTableLength ? tempSummaryTableCol.get(i) : null);
+      let stats: Stats;
+      if (isDfFiltered) {
+        const maskInfo: MaskInfo = {
+          mask: mask,
+          trueCount: bsMask.trueCount,
+          falseCount: bsMask.falseCount,
+        };
+        stats = getStats(activityColData, maskInfo);
+      } else
+        stats = this.model.clusterStats[CLUSTER_TYPE.CUSTOM][customClustCol.name];
+      customMembersColData[rowIdx] = stats.count;
+      customWebLogoPlots[rowIdx] = this.createWebLogoPlot(pepCol, bsMask);
+      customDistPlots[rowIdx] = this.createDistributionPlot(activityCol, customClustColList[rowIdx]);
+      customMDColData[rowIdx] = stats.meanDifference;
+      customPValColData[rowIdx] = stats.pValue;
+      customRatioColData[rowIdx] = stats.ratio;
+      for (let aggColIdx = 0; aggColIdx < aggColNames.length; ++aggColIdx) {
+        const [col, aggFn] = colAggEntries[aggColIdx];
+        customAggRawCols[aggColIdx][rowIdx] = getAggregatedValue(col, aggFn, bsMask);
+      }
     }
-    this.webLogoDfPlot = new Array(summaryTableLength);
-    this.distributionDfPlot = new Array(summaryTableLength);
+    customWebLogoCol.setTag(DG.TAGS.CELL_RENDERER, 'html');
+    customDistCol.setTag(DG.TAGS.CELL_RENDERER, 'html');
-    for (let summaryTableRowIndex = 0; summaryTableRowIndex < summaryTableLength; ++summaryTableRowIndex) {
-      const isOriginalCluster = summaryTableRowIndex < tempSummaryTableLength;
-      const currentClusterCategoryIndex = clustersColData[summaryTableRowIndex];
-      const currentCluster = clustersColCategories[currentClusterCategoryIndex]; // Cluster name
-      const customClusterColData = customClustersColumnsList.find((col) => col.name == currentCluster)?.toList();
+    // END
-      const isValidIndex = isOriginalCluster ?
-        (j: number) => filteredDfClustersColCategories[filteredDfClustersColData[j]] == currentCluster :
-        (j: number) => customClusterColData![j];
+    // BEGIN: fill LST part with original clusters
+    const origLST = origLSTBuilder.aggregate();
+    const origLSTLen = origLST.rowCount;
+    const origLSTCols = origLST.columns;
+    const origLSTClustCol: DG.Column<string> = origLST.getCol(clustersColName);
-      //TODO: use bitset instead of splitCol
-      const splitCol = DG.Column.bool(C.COLUMNS_NAMES.SPLIT_COL, activityCol.length);
-      const getSplitColValueAt = isOriginalCluster ?
-        (splitColIndex: number) => filteredDfClustersColData[splitColIndex] == currentClusterCategoryIndex :
-        (splitColIndex: number) => customClusterColData![splitColIndex];
-      splitCol.init((i) => getSplitColValueAt(i));
+    const origLSTClustColCat = origLSTClustCol.categories;
+    const origMembersColData = origLSTCols.addNewInt(C.LST_COLUMN_NAMES.MEMBERS).getRawData();
+    const origWebLogoCol = origLSTCols.addNewString(C.LST_COLUMN_NAMES.WEB_LOGO);
+    const origDistCol = origLSTCols.addNewString(C.LST_COLUMN_NAMES.DISTRIBUTION);
+    const origMDColData = origLSTCols.addNewFloat(C.LST_COLUMN_NAMES.MEAN_DIFFERENCE).getRawData();
+    const origPValColData = origLSTCols.addNewFloat(C.LST_COLUMN_NAMES.P_VALUE).getRawData();
+    const origRatioColData = origLSTCols.addNewFloat(C.LST_COLUMN_NAMES.RATIO).getRawData();
+    const origWebLogoPlots = new Array(origLSTLen);
+    const origDistPlots = new Array(origLSTLen);
+    const origClustMasks = Array.from({length: origLSTLen},
+      () => Array.from({length: filteredDfRowCount}, () => false));
+    for (let rowIdx = 0; rowIdx < filteredDfRowCount; ++rowIdx) {
+      const filteredClustName = filteredDfClustColCat[filteredDfClustColData[rowIdx]];
+      const origClustIdx = origLSTClustColCat.indexOf(filteredClustName);
+      origClustMasks[origClustIdx][rowIdx] = true;
+    }
+    for (let rowIdx = 0; rowIdx < origLSTLen; ++rowIdx) {
+      const mask = origClustMasks[rowIdx];
+      const bsMask = DG.BitSet.create(filteredDfRowCount, (i) => mask[i]);
       let stats: Stats;
       if (isDfFiltered) {
-        const trueCount = splitCol.stats.sum;
+        const trueCount = bsMask.trueCount;
         const maskInfo = {
           trueCount: trueCount,
-          falseCount: activityColData.length - trueCount,
-          mask: splitCol.toList() as boolean[],
+          falseCount: filteredDfRowCount - trueCount,
+          mask: mask,
         };
         stats = getStats(activityColData, maskInfo);
       } else
-        stats = this.model.clusterStats[currentCluster];
+        stats = this.model.clusterStats[CLUSTER_TYPE.ORIGINAL][origLSTClustColCat[rowIdx]];
+      origMembersColData[rowIdx] = stats.count;
+      origWebLogoPlots[rowIdx] = this.createWebLogoPlot(pepCol, bsMask);
+      origDistPlots[rowIdx] = this.createDistributionPlot(activityCol,
+        DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, bsMask));
+      origMDColData[rowIdx] = stats.meanDifference;
+      origPValColData[rowIdx] = stats.pValue;
+      origRatioColData[rowIdx] = stats.ratio;
+    }
-      const tCol = DG.Column.string('peptides', stats.count);
-      let tColIdx = 0;
-      for (let j = 0; j < filteredDfClustersColLength; ++j) {
-        if (isValidIndex(j))
-          tCol.set(tColIdx++, peptideColCategories[peptideColData[j]]);
-      }
+    origWebLogoCol.setTag(DG.TAGS.CELL_RENDERER, 'html');
+    origDistCol.setTag(DG.TAGS.CELL_RENDERER, 'html');
-      for (const tag of peptideColTags)
-        tCol.setTag(tag[0], tag[1]);
-      const uh = new UnitsHandler(tCol);
-      tCol.setTag(bioTAGS.alphabetSize, uh.getAlphabetSize().toString());
-      const distributionTable = DG.DataFrame.fromColumns([activityCol, splitCol]);
-      const dfSlice = DG.DataFrame.fromColumns([tCol]);
-      this.webLogoDfPlot[summaryTableRowIndex] = dfSlice.plot;
-      this.distributionDfPlot[summaryTableRowIndex] = distributionTable.plot;
-      membersColData[summaryTableRowIndex] = stats.count;
-      meanDifferenceColData[summaryTableRowIndex] = stats.meanDifference;
-      pValColData[summaryTableRowIndex] = stats.pValue;
-      ratioColData[summaryTableRowIndex] = stats.ratio;
-      //Setting aggregated col values
-      if (!isOriginalCluster) {
-        for (const [colName, aggregationFunc] of aggregateColumnsEntries) {
-          const arrayBuffer = filteredDf.getCol(colName).getRawData();
-          const clusterMask = DG.BitSet.fromBytes(arrayBuffer.buffer, arrayBuffer.byteLength / 4);
-          const subDf = filteredDf.clone(clusterMask, [colName]);
-          const newColName = getAggregatedColName(aggregationFunc, colName);
-          const aggregatedDf = subDf.groupBy()
-            .add(aggregationFunc as any, colName, newColName)
-            .aggregate();
-          const value = aggregatedDf.get(newColName, 0);
-          summaryTable.set(newColName, summaryTableRowIndex, value);
-        }
-      }
-    }
-    webLogoCol.setTag(DG.TAGS.CELL_RENDERER, 'html');
-    distributionCol.setTag(DG.TAGS.CELL_RENDERER, 'html');
+    // END
+    // combine LSTs and create a grid
+    const summaryTable = origLST.append(customLST);
+    this.webLogoDfPlot = origWebLogoPlots.concat(customWebLogoPlots);
+    this.distributionDfPlot = origDistPlots.concat(customDistPlots);
     this.viewerGrid = summaryTable.plot.grid();
     this.updateFilter();
@@ -225,12 +239,12 @@ export class LogoSummary extends DG.JsViewer {
       if (!cell.isTableCell || currentRowIdx == null || currentRowIdx == -1)
         return;
-      if (cell.tableColumn?.name == 'WebLogo') {
+      if (cell.tableColumn?.name == C.LST_COLUMN_NAMES.WEB_LOGO) {
         this.webLogoDfPlot[currentRowIdx]
           .fromType('WebLogo', {maxHeight: cell.grid.props.rowHeight - 5, positionHeight: this.webLogoMode,
             horizontalAlignment: 'left'})
           .then((viewer) => cell.element = viewer.root);
-      } else if (cell.tableColumn?.name == 'Distribution') {
+      } else if (cell.tableColumn?.name == C.LST_COLUMN_NAMES.DISTRIBUTION) {
         const viewerRoot = this.distributionDfPlot[currentRowIdx].histogram({
           filteringEnabled: false,
           valueColumnName: C.COLUMNS_NAMES.ACTIVITY_SCALED,
@@ -276,8 +290,12 @@ export class LogoSummary extends DG.JsViewer {
       canvasContext.restore();
     });
     this.viewerGrid.onCellTooltip((cell, x, y) => {
-      if (!cell.isColHeader && cell.tableColumn?.name === clustersColName)
-        this.model.showTooltipCluster(cell.cell.rowIndex, x, y, cell.cell.value);
+      if (!cell.isColHeader && cell.tableColumn?.name === clustersColName) {
+        const clustName = cell.cell.value;
+        const clustColCat = this.dataFrame.getCol(this.model.settings.clustersColumnName!).categories;
+        const clustType = clustColCat.includes(clustName) ? CLUSTER_TYPE.ORIGINAL : CLUSTER_TYPE.CUSTOM;
+        this.showTooltip(clustName, x, y, clustType);
+      }
       return true;
     });
     const webLogoGridCol = this.viewerGrid.columns.byName('WebLogo')!;
@@ -318,7 +336,8 @@ export class LogoSummary extends DG.JsViewer {
       falseCount: selection.falseCount,
     };
     const stats = getStats(activityScaledCol.getRawData(), maskInfo);
-    const distributionTable = DG.DataFrame.fromColumns([activityScaledCol, filteredDf.getCol(this.model.splitCol.name)]);
+    const distributionTable =
+      DG.DataFrame.fromColumns([activityScaledCol, filteredDf.getCol(this.model.splitCol.name)]);
     const peptideCol: DG.Column<string> = filteredDf.getCol(this.model.settings.sequenceColumnName!);
     const peptideColData = peptideCol.getRawData();
@@ -339,16 +358,15 @@ export class LogoSummary extends DG.JsViewer {
     this.webLogoDfPlot.push(webLogoTable.plot);
     this.distributionDfPlot.push(distributionTable.plot);
-    const colCategories = viewerDfCols.byName(this.model.settings.clustersColumnName!).categories;
-    let newClusterName = this.newClusterName;
-    let clusterNum = 1;
-    const getString = !isNaN(parseInt(newClusterName)) ? () => `${parseInt(newClusterName) + 1}` :
-      newClusterName == '' ? () => `${clusterNum++}` :
-        () => `${this.newClusterName} ${clusterNum++}`;
-    while (colCategories.includes(newClusterName))
-      newClusterName = getString();
+    const newClusterName = viewerDfCols.getUnusedName('New Cluster');
-    this.getProperty('newClusterName')?.set(this, getString());
+    const aggregatedValues: {[colName: string]: number} = {};
+    const aggColsEntries = Object.entries(this.model.settings.columns ?? {});
+    for (const [colName, aggFn] of aggColsEntries) {
+      const newColName = getAggregatedColName(aggFn, colName);
+      const col = filteredDf.getCol(colName);
+      aggregatedValues[newColName] = getAggregatedValue(col, aggFn, selection);
+    }
     for (let i = 0; i < viewerDfColsLength; ++i) {
       const col = viewerDfCols.byIndex(i);
@@ -359,11 +377,12 @@ export class LogoSummary extends DG.JsViewer {
               col.name == C.LST_COLUMN_NAMES.MEAN_DIFFERENCE ? stats.meanDifference:
                 col.name == C.LST_COLUMN_NAMES.P_VALUE ? stats.pValue:
                   col.name == C.LST_COLUMN_NAMES.RATIO ? stats.ratio:
+                    col.name in aggregatedValues ? aggregatedValues[col.name] :
         console.warn(`PeptidesLSTWarn: value for column ${col.name} is undefined`)! || null;
     }
     viewerDf.rows.addNew(newClusterVals);
-    this.model.clusterStats[newClusterName] = stats;
+    this.model.clusterStats[CLUSTER_TYPE.CUSTOM][newClusterName] = stats;
     this.model.addNewCluster(newClusterName);
   }
@@ -371,18 +390,98 @@ export class LogoSummary extends DG.JsViewer {
     const lss = this.model.logoSummarySelection;
     const dfCols = this.dataFrame.columns;
-    const removeClusterIndexesList = lss.filter((cluster) => dfCols.contains(cluster));
-    if (removeClusterIndexesList.length == 0)
-      return grok.shell.info('Nothing removed. Please select a created cluster to remove');
+    // Names of the clusters to remove
+    const clustNames = lss.filter((cluster) => dfCols.contains(cluster));
+    if (clustNames.length == 0)
+      return grok.shell.warning('Only custom clusters can be removed');
-    for (const cluster of removeClusterIndexesList) {
+    const viewerDf = this.viewerGrid.dataFrame;
+    const viewerDfRows = viewerDf.rows;
+    const clustColCat = viewerDf.getCol(this.model.settings.clustersColumnName!).categories;
+    for (const cluster of clustNames) {
       lss.splice(lss.indexOf(cluster), 1);
       dfCols.remove(cluster);
+      delete this.model.clusterStats[CLUSTER_TYPE.CUSTOM][cluster];
+      const clustIdx = clustColCat.indexOf(cluster);
+      viewerDfRows.removeAt(clustIdx);
+      this.webLogoDfPlot.splice(clustIdx, 1);
+      this.distributionDfPlot.splice(clustIdx, 1);
     }
     this.model.logoSummarySelection = lss;
-    this.model.clusterStats = this.model.calculateClusterStatistics();
-    this.createLogoSummaryGrid();
     this.render();
   }
+  showTooltip(clustName: string, x: number, y: number, clustType: ClusterType = 'original'): HTMLDivElement | null {
+    const bs = this.dataFrame.filter;
+    const filteredDf = bs.anyFalse ? this.dataFrame.clone(bs) : this.dataFrame;
+    const rowCount = filteredDf.rowCount;
+    const maskInfo: MaskInfo = {mask: new Array(rowCount), trueCount: 0, falseCount: 0};
+    const activityCol = filteredDf.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED);
+    const activityColData = activityCol.getRawData();
+    if (clustType == CLUSTER_TYPE.ORIGINAL) {
+      //TODO: use bitset instead of splitCol
+      const origClustCol = filteredDf.getCol(this.model.settings.clustersColumnName!);
+      const origClustColData = origClustCol.getRawData();
+      const origClustColCategories = origClustCol.categories;
+      const seekValue = origClustColCategories.indexOf(clustName);
+      for (let i = 0; i < rowCount; ++i) {
+        maskInfo.mask[i] = origClustColData[i] == seekValue;
+        if (maskInfo.mask[i])
+          maskInfo.trueCount++;
+        else
+          maskInfo.falseCount++;
+      }
+    } else {
+      const clustCol: DG.Column<boolean> = filteredDf.getCol(clustName);
+      maskInfo.mask = clustCol.toList() as boolean[];
+      for (let i = 0; i < rowCount; ++i) {
+        if (maskInfo.mask[i])
+          maskInfo.trueCount++;
+        else
+          maskInfo.falseCount++;
+      }
+    }
+    const stats = bs.anyFalse ? getStats(activityColData, maskInfo) : this.model.clusterStats[clustType][clustName];
+    if (!stats.count)
+      return null;
+    const colResults: {[colName: string]: number} = {};
+    const mask = DG.BitSet.create(rowCount, (i) => maskInfo.mask[i] as boolean);
+    const splitCol = DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, mask);
+    const distDf = DG.DataFrame.fromColumns([activityCol, splitCol]);
+    for (const [colName, aggFn] of Object.entries(this.model.settings.columns || {})) {
+      const newColName = getAggregatedColName(colName, aggFn);
+      const value = getAggregatedValue(filteredDf.getCol(colName), aggFn, mask);
+      colResults[newColName] = value;
+    }
+    const das = getDistributionAndStats(distDf, stats, `Cluster: ${clustName}`, 'Other', true, splitCol.name);
+    const resultMap: {[key: string]: any} = {...das.tableMap, ...colResults};
+    const tooltip = wrapDistroAndStatsDefault(das.labels, das.histRoot, resultMap, true);
+    ui.tooltip.show(tooltip, x, y);
+    return tooltip;
+  }
+  createWebLogoPlot(pepCol: DG.Column<string>, mask: DG.BitSet): DG.DataFramePlotHelper {
+    const df = DG.DataFrame.fromColumns([pepCol]).clone(mask);
+    return df.plot;
+  }
+  createDistributionPlot(activityCol: DG.Column<number>, splitCol: DG.Column<boolean>): DG.DataFramePlotHelper {
+    const df = DG.DataFrame.fromColumns([activityCol, splitCol]);
+    return df.plot;
+  }
 }

package/src/viewers/peptide-space-viewer.ts CHANGED Viewed

@@ -57,7 +57,7 @@ export class PeptideSpaceViewer extends DG.JsViewer {
   }
   async render(computeData=false): Promise<void> {
-    if (computeData && !this.isEmbeddingCreating && !this.model.isChangingEdfBitset) {
+    if (computeData && !this.isEmbeddingCreating /*&& !this.model.isChangingEdfBitset*/) {
       this.isEmbeddingCreating = true;
       $(this.root).empty();
       const viewerHost = ui.waitBox(async () => {
@@ -65,17 +65,17 @@ export class PeptideSpaceViewer extends DG.JsViewer {
         const alignedSeqCol = this.dataFrame.getCol(this.model.settings.sequenceColumnName!);
         const edf = await computeWeights(this.dataFrame, this.method, this.measure, this.cyclesCount, alignedSeqCol);
         this.dataFrame.temp[C.EMBEDDING_STATUS] = true;
-        this.model.edf = edf;
+        // this.model.edf = edf;
         if (edf === null)
           return ui.label('Could not compute embeddings');
         const edfSelection = edf.selection;
         edfSelection.copyFrom(this.dataFrame.selection);
-        edfSelection.onChanged.subscribe(() => {
-          if (!this.model.isChangingEdfBitset)
-            this.model.fireBitsetChanged(true);
-        });
+        // edfSelection.onChanged.subscribe(() => {
+        //   if (!this.model.isChangingEdfBitset)
+        //     this.model.fireBitsetChanged(true);
+        // });
         const colorCol = this.dataFrame.getCol(C.COLUMNS_NAMES.ACTIVITY_SCALED);
         edf.columns.add(colorCol);

package/src/viewers/sar-viewer.ts CHANGED Viewed

@@ -5,7 +5,7 @@ import * as DG from 'datagrok-api/dg';
 import $ from 'cash-dom';
 import * as C from '../utils/constants';
 import * as CR from '../utils/cell-renderer';
-import {PeptidesModel} from '../model';
+import {PeptidesModel, VIEWER_TYPE} from '../model';
 export class SARViewerBase extends DG.JsViewer {
   tempName!: string;
@@ -53,7 +53,7 @@ export class SARViewerBase extends DG.JsViewer {
   render(refreshOnly = false): void {
     if (!refreshOnly) {
       $(this.root).empty();
-      let switchHost = ui.divText('Most Potent Residues', {id: 'pep-viewer-title'});
+      let switchHost = ui.divText(VIEWER_TYPE.MOST_POTENT_RESIDUES, {id: 'pep-viewer-title'});
       if (this.name == 'MC') {
         const mutationCliffsMode = ui.boolInput('', this.isMutationCliffsMode === '1');
         mutationCliffsMode.root.addEventListener('click', () => {
@@ -172,7 +172,7 @@ export class MonomerPosition extends SARViewerBase {
 /** Vertical structure activity relationship viewer */
 export class MostPotentResiduesViewer extends SARViewerBase {
   _name = 'MPR';
-  _titleHost = ui.divText('Most Potent Residues', {id: 'pep-viewer-title'});
+  _titleHost = ui.divText(VIEWER_TYPE.MOST_POTENT_RESIDUES, {id: 'pep-viewer-title'});
   _isVertical = true;
   constructor() {

package/src/widgets/distribution.ts CHANGED Viewed

@@ -5,10 +5,9 @@ import {StringDictionary} from '@datagrok-libraries/utils/src/type-declarations'
 import $ from 'cash-dom';
 import * as C from '../utils/constants';
-import {getStats, MaskInfo, Stats} from '../utils/statistics';
+import {getAggregatedValue, getStats, MaskInfo, Stats} from '../utils/statistics';
 import {PeptidesModel} from '../model';
 import {wrapDistroAndStatsDefault} from '../utils/misc';
-import wu from 'wu';
 const allConst = 'All';
 const otherConst = 'Other';
@@ -26,7 +25,7 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
   const positionsLen = positions.length;
   let aarStr = allConst;
   let otherStr = '';
-  const useSelectedStr = model.isPeptideSpaceChangingBitset;
+  // const useSelectedStr = model.isPeptideSpaceChangingBitset;
   const updateDistributionHost = (): void => {
     model.splitByPos = splitByPosition.value!;
@@ -81,13 +80,14 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
         //OPTIMIZE: don't create Bitset, use bool[]
         const mask = DG.BitSet.create(rowCount, (i) => aarList.includes(posCol.get(i)));
+        const splitCol = DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, mask);
         const maskInfo: MaskInfo = {
-          mask: mask.getBuffer(),
+          mask: splitCol.toList() as boolean[],
           trueCount: mask.trueCount,
           falseCount: mask.falseCount,
         };
         const stats = getStats(activityScaledData, maskInfo);
-        const splitCol = DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, mask);
         const distributionTable = DG.DataFrame.fromColumns([activityScaledCol, splitCol]);
         const indexes = mask.getSelectedIndexes();
@@ -137,13 +137,14 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
           }
           return false;
         });
+        const splitCol = DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, mask);
         const maskInfo: MaskInfo = {
-          mask: mask.getBuffer(),
+          mask: splitCol.toList() as boolean[],
           trueCount: mask.trueCount,
           falseCount: mask.falseCount,
         };
         const stats = getStats(activityScaledData, maskInfo);
-        const splitCol = DG.Column.fromBitSet(C.COLUMNS_NAMES.SPLIT_COL, mask);
         const distributionTable = DG.DataFrame.fromColumns([activityScaledCol, splitCol]);
         const indexes = mask.getSelectedIndexes();
@@ -169,7 +170,7 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
         res.push(ui.divText('No distribution'));
       else {
         otherStr = '';
-        if (useSelectedStr) {
+        if (false /*useSelectedStr*/) {
           aarStr = 'Selected';
           otherStr = otherConst;
         } else if (positionsLen) {
@@ -185,22 +186,18 @@ export function getDistributionWidget(table: DG.DataFrame, model: PeptidesModel)
         }
         const distributionTable = DG.DataFrame.fromColumns([activityScaledCol, splitCol]);
-        // distributionTable.filter.copyFrom(table.filter);
-        const indexes = model.getCompoundBitest().getSelectedIndexes();
+        const compoundBs = model.getCompoundBitest();
         const colResults: {[colName: string]: number} = {};
         for (const [col, agg] of Object.entries(model.settings.columns || {})) {
           const currentCol = table.getCol(col);
-          const currentColData = currentCol.getRawData();
-          const tempCol = DG.Column.float('', indexes.length);
-          tempCol.init((i) => currentColData[indexes[i]]);
-          colResults[`${agg}(${col})`] = tempCol.stats[agg as keyof DG.Stats] as number;
+          colResults[`${agg}(${col})`] = getAggregatedValue(currentCol, agg, compoundBs);
         }
         const maskInfo: MaskInfo = {
-          mask: table.selection.getBuffer(),
-          trueCount: table.selection.trueCount,
-          falseCount: table.selection.falseCount,
+          mask: splitCol.toList() as boolean[],
+          trueCount: compoundBs.trueCount,
+          falseCount: compoundBs.falseCount,
         };
         const stats = getStats(activityScaledCol.getRawData(), maskInfo);
         const das = getDistributionAndStats(distributionTable, stats, aarStr, otherStr);

package/src/widgets/peptides.ts CHANGED Viewed

@@ -188,7 +188,10 @@ export async function startAnalysis(activityColumn: DG.Column<number>, peptidesC
     newDf.setTag(C.TAGS.UUID, dfUuid);
     newDf.setTag('monomerType', monomerType);
     model = PeptidesModel.getInstance(newDf);
-    await model.addViewers();
+    // await model.addViewers();
+    if (clustersColumn) await model.addLogoSummaryTable();
+    await model.addMonomerPosition();
+    await model.addMostPotentResidues();
   } else
     grok.shell.error('The activity column must be of numeric type!');
   progress.close();