npm - @datagrok/peptides - Versions diffs - 0.3.0 → 0.5.6 - Mend

@datagrok/peptides 0.3.0 → 0.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/.eslintrc.json +29 -0
package/package.json +4 -4
package/src/describe.ts +92 -41
package/src/package.ts +13 -35
package/src/peptides.ts +93 -0
package/src/utils/cell-renderer.ts +185 -60
package/src/utils/chem-palette.ts +316 -208
package/src/utils/correlation-analysis.ts +165 -70
package/src/utils/peptide-similarity-space.ts +29 -25
package/src/utils/split-aligned.ts +8 -1
package/src/viewers/logo-viewer.ts +48 -5
package/src/viewers/model.ts +56 -16
package/src/viewers/sar-viewer.ts +100 -33
package/src/viewers/spiral-plot.ts +97 -0
package/src/viewers/stacked-barchart-viewer.ts +84 -9
package/src/widgets/analyze-peptides.ts +18 -34
package/src/widgets/manual-alignment.ts +12 -4
package/src/widgets/peptide-molecule.ts +8 -1
package/src/workers/dimensionality-reducer.ts +1 -1

package/.eslintrc.json ADDED Viewed

@@ -0,0 +1,29 @@
+{
+  "env": {
+    "browser": true,
+    "es2021": true
+  },
+  "extends": [
+    "google"
+  ],
+  "parser": "@typescript-eslint/parser",
+  "parserOptions": {
+    "ecmaVersion": 12,
+    "sourceType": "module"
+  },
+  "plugins": [
+    "@typescript-eslint"
+  ],
+  "rules": {
+    "indent": [
+      "error",
+      2
+    ],
+    "max-len": [
+      "error",
+      120
+    ],
+    "spaced-comment": "off",
+    "require-jsdoc": "off"
+  }
+}

package/package.json CHANGED Viewed

@@ -1,20 +1,20 @@
 {
 	"name": "@datagrok/peptides",
-	"version": "0.3.0",
+	"version": "0.5.6",
 	"description": "",
 	"dependencies": {
 		"@keckelt/tsne": "^1.0.2",
 		"cash-dom": "latest",
 		"d3": "latest",
-		"datagrok-api": ">0.95.4",
+		"datagrok-api": ">=0.104.0",
 		"dayjs": "latest",
 		"jaro-winkler-typescript": "^1.0.1",
 		"jstat": "^1.9.5",
 		"logojs-react": "^2.1.1",
 		"rxjs": "^6.5.5",
 		"umap-js": "^1.3.3",
-		"@datagrok-libraries/utils": ">=0.0.10",
-		"@datagrok-libraries/statistics": ">=0.1.4",
+		"@datagrok-libraries/utils": ">=0.0.13",
+		"@datagrok-libraries/statistics": ">=0.1.5",
 		"@types/d3": "^7.0.0",
 		"@types/jquery": "^3.5.6"
 	},

package/src/describe.ts CHANGED Viewed

@@ -1,15 +1,59 @@
-// eslint-disable-next-line no-unused-vars
-import * as grok from 'datagrok-api/grok';
 import * as ui from 'datagrok-api/ui';
 import * as DG from 'datagrok-api/dg';
 import {splitAlignedPeptides} from './utils/split-aligned';
 import {tTest} from '@datagrok-libraries/statistics/src/tests';
-import {fdrcorrection} from '@datagrok-libraries/statistics/src/multiple-tests.js';
+import {fdrcorrection} from '@datagrok-libraries/statistics/src/multiple-tests';
 import {ChemPalette} from './utils/chem-palette';
 import {setAARRenderer} from './utils/cell-renderer';
 const cp = new ChemPalette('grok');
+const aarGroups = {
+  'R': 'PC',
+  'H': 'PC',
+  'K': 'PC',
+  'D': 'NC',
+  'E': 'NC',
+  'S': 'U',
+  'T': 'U',
+  'N': 'U',
+  'Q': 'U',
+  'C': 'SC',
+  'U': 'SC',
+  'G': 'SC',
+  'P': 'SC',
+  'A': 'H',
+  'V': 'H',
+  'I': 'H',
+  'L': 'H',
+  'M': 'H',
+  'F': 'H',
+  'Y': 'H',
+  'W': 'H',
+  '-': '-',
+};
+const groupDescription: {[key: string]: {'description': string, 'aminoAcids': string[]}} = {
+  'PC': {'description': 'Positive Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['R', 'H', 'K']},
+  'NC': {'description': 'Negative Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['D', 'E']},
+  'U': {'description': 'Amino Acids with Polar Uncharged Side Chains', 'aminoAcids': ['S', 'T', 'N', 'Q']},
+  'SC': {'description': 'Special Cases', 'aminoAcids': ['C', 'U', 'G', 'P']},
+  'H': {
+    'description': 'Amino Acids with Hydrophobic Side Chain',
+    'aminoAcids': ['A', 'V', 'I', 'L', 'M', 'F', 'Y', 'W'],
+  },
+  '-': {'description': 'Unknown Amino Acid', 'aminoAcids': ['-']},
+};
+/*function customGridColumnHeader(cell: DG.GridCell) {
+  if (cell.isColHeader && cell.tableColumn != null) {
+    if (highlightedColumns.includes(parseInt(cell.tableColumn.name))) {
+      cell.style.backColor = 0xff1f77b4;
+    }
+  }
+}*/
+//TODO: decomposition!
 export async function describe(
   df: DG.DataFrame,
   activityColumn: string,
@@ -17,13 +61,15 @@ export async function describe(
   sourceGrid: DG.Grid,
   twoColorMode: boolean,
   initialBitset: DG.BitSet | null,
-): Promise<[DG.Grid, DG.Grid, DG.DataFrame]> {
+  grouping: boolean,
+): Promise<[DG.Grid, DG.Grid, DG.DataFrame, {[key: string]: string}]> {
   //Split the aligned sequence into separate AARs
   let splitSeqDf: DG.DataFrame | undefined;
   let invalidIndexes: number[];
   const col: DG.Column = df.columns.bySemType('alignedSequence');
   [splitSeqDf, invalidIndexes] = splitAlignedPeptides(col);
   splitSeqDf.name = 'Split sequence';
   const positionColumns = splitSeqDf.columns.names();
   const activityColumnScaled = `${activityColumn}Scaled`;
   const renderColNames: string[] = splitSeqDf.columns.names();
@@ -50,6 +96,7 @@ export async function describe(
       setAARRenderer(col, sourceGrid);
     }
   }
   if (sourceGrid) {
     const colNames:string[] = [];
     for (let i = 0; i < sourceGrid.columns.length; i++) {
@@ -105,6 +152,17 @@ export async function describe(
   let matrixDf = splitSeqDf.unpivot([activityColumnScaled], positionColumns, positionColName, aminoAcidResidue);
+  //TODO: move to chem palette
+  let groupMapping: {[key: string]: string} = {};
+  if (grouping) {
+    groupMapping = aarGroups;
+    const aarCol = matrixDf.getCol(aminoAcidResidue);
+    aarCol.init((index) => groupMapping[aarCol.get(index)[0]] ?? '-');
+    aarCol.compact();
+  } else {
+    Object.keys(aarGroups).forEach((value) => groupMapping[value] = value);
+  }
   //statistics for specific AAR at a specific position
   matrixDf = matrixDf.groupBy([positionColName, aminoAcidResidue])
     .add('count', activityColumnScaled, 'Count')
@@ -135,14 +193,14 @@ export async function describe(
     AAR = matrixDf.get(aminoAcidResidue, i);
     //@ts-ignore
-    splitSeqDf.rows.select((row) => row[position] === AAR);
+    splitSeqDf.rows.select((row) => groupMapping[row[position]] === AAR);
     currentActivity = splitSeqDf
       .clone(splitSeqDf.selection, [activityColumnScaled])
       .getCol(activityColumnScaled)
       .toList();
     //@ts-ignore
-    splitSeqDf.rows.select((row) => row[position] !== AAR);
+    splitSeqDf.rows.select((row) => groupMapping[row[position]] !== AAR);
     otherActivity = splitSeqDf
       .clone(splitSeqDf.selection, [activityColumnScaled])
       .getCol(activityColumnScaled)
@@ -190,11 +248,9 @@ export async function describe(
   aarList.sort((first, second) => getWeight(second) - getWeight(first));
   matrixDf.getCol(aminoAcidResidue).setCategoryOrder(aarList);
-  //const sequenceDf = segregateBestAtAllCateg(statsDf, twoColorMode);
   // SAR vertical table (naive, choose best Mean difference from pVals <= 0.01)
   // TODO: aquire ALL of the positions
   let sequenceDf = statsDf.groupBy(['Mean difference', aminoAcidResidue, positionColName, 'Count', 'Ratio', 'pValue'])
     .where('pValue <= 0.1')
     .aggregate();
@@ -224,17 +280,14 @@ export async function describe(
   SARVgrid.col('pValue')!.format = 'four digits after comma';
   SARVgrid.col('pValue')!.name = 'P-Value';
-  //FIXME: looks inefficient
-  for (const col of matrixDf.columns) {
-    if (col.name === aminoAcidResidue) {
-      setAARRenderer(col, SARgrid);
-      break;
+  if (!grouping) {
+    let tempCol = matrixDf.columns.byName(aminoAcidResidue);
+    if (tempCol) {
+      setAARRenderer(tempCol, SARgrid);
     }
-  }
-  for (const col of sequenceDf.columns) {
-    if (col.name === aminoAcidResidue) {
-      setAARRenderer(col, SARVgrid);
-      break;
+    tempCol = sequenceDf.columns.byName(aminoAcidResidue);
+    if (tempCol) {
+      setAARRenderer(tempCol, SARgrid);
     }
   }
@@ -251,19 +304,6 @@ export async function describe(
       return;
     }
-    // if (args.cell.isColHeader) {
-    //   if (args.cell.gridColumn.name != aminoAcidResidue) {
-    //     const textSize = args.g.measureText(args.cell.gridColumn.name);
-    //     args.g.fillStyle = '#4b4b4a';
-    //     args.g.fillText(
-    //       args.cell.gridColumn.name,
-    //       args.bounds.x + (args.bounds.width - textSize.width) / 2,
-    //       args.bounds.y + (textSize.actualBoundingBoxAscent + textSize.actualBoundingBoxDescent),
-    //     );
-    //   }
-    //   args.preventDefault();
-    // }
     if (
       args.cell.isTableCell &&
       args.cell.tableRowIndex !== null &&
@@ -357,28 +397,39 @@ export async function describe(
     }
     if (
       !cell.isColHeader &&
-        cell.tableColumn !== null &&
-        cell.tableColumn.name == aminoAcidResidue &&
-        cell.cell.value !== null &&
-        cell.tableRowIndex !== null
+      cell.tableColumn !== null &&
+      cell.tableColumn.name == aminoAcidResidue &&
+      cell.cell.value !== null &&
+      cell.tableRowIndex !== null
     ) {
-      cp.showTooltip(cell, x, y);
+      if (grouping) {
+        const currentGroup = groupDescription[cell.cell.value];
+        const divText = ui.divText('Amino Acids in this group: ' + currentGroup['aminoAcids'].join(', '));
+        ui.tooltip.show(ui.divV([ui.h3(currentGroup['description']), divText]), x, y);
+      } else {
+        cp.showTooltip(cell, x, y);
+      }
     }
     return true;
   };
   SARgrid.onCellTooltip(onCellTooltipFunc);
   SARVgrid.onCellTooltip(onCellTooltipFunc);
-  sourceGrid.onCellPrepare((cell) => {
+  sourceGrid.onCellPrepare((cell: DG.GridCell) => {
     const currentRowIndex = cell.tableRowIndex;
     if (currentRowIndex && invalidIndexes.includes(currentRowIndex) && !cell.isRowHeader) {
       cell.style.backColor = DG.Color.lightLightGray;
     }
   });
-  // for (const col of matrixDf.columns.names()) {
-  //   SARgrid.col(col)!.width = SARgrid.props.rowHeight;
-  // }
+  for (const col of matrixDf.columns.names()) {
+    SARgrid.col(col)!.width = SARgrid.props.rowHeight;
+  }
+  if (grouping) {
+    SARgrid.col(aminoAcidResidue)!.name = 'Groups';
+    SARVgrid.col(aminoAcidResidue)!.name = 'Groups';
+  }
-  return [SARgrid, SARVgrid, statsDf];
+  return [SARgrid, SARVgrid, statsDf, groupMapping];
 }

package/src/package.ts CHANGED Viewed

@@ -15,7 +15,7 @@ import {PeptideSimilaritySpaceWidget} from './utils/peptide-similarity-space';
 import {manualAlignmentWidget} from './widgets/manual-alignment';
 import {SARViewer, SARViewerVertical} from './viewers/sar-viewer';
 import {peptideMoleculeWidget} from './widgets/peptide-molecule';
-import {correlationAnalysisPlots} from './utils/correlation-analysis';
+import {SpiralPlot} from './viewers/spiral-plot';
 export const _package = new DG.Package();
 let tableGrid: DG.Grid;
@@ -25,35 +25,13 @@ let view: DG.TableView;
 async function main(chosenFile: string) {
   const pi = DG.TaskBarProgressIndicator.create('Loading Peptides');
-  //let peptides =
-  //  await grok.data.loadTable('https://datagrok.jnj.com/p/ejaeger.il23peptideidp5562/il-23_peptide_idp-5562');
   const path = _package.webRoot + 'files/' + chosenFile;
   const peptides = (await grok.data.loadTable(path));
   peptides.name = 'Peptides';
   peptides.setTag('dataType', 'peptides');
   const view = grok.shell.addTableView(peptides);
   tableGrid = view.grid;
-  // peptides.onSemanticTypeDetecting.subscribe((_: any) => {
-  //   const regexp = new RegExp(/^([^-^\n]*-){2,49}(\w|\(|\))+$/);
-  //   for (const col of peptides.columns) {
-  //     col.semType = DG.Detector.sampleCategories(col, (s: any) => regexp.test(s.trim())) ? 'alignedSequence' : null;
-  //     if (col.semType == 'alignedSequence') {
-  //       expandColumn(col, tableGrid, (ent)=>{
-  //         const subParts:string[] = ent.split('-');
-  //         // eslint-disable-next-line no-unused-vars
-  //         const [text, _] = processSequence(subParts);
-  //         let textSize = 0;
-  //         text.forEach((aar)=>{
-  //           textSize += aar.length;
-  //         });
-  //         return textSize;
-  //       });
-  //     }
-  //   }
-  // });
   view.name = 'PeptidesView';
   grok.shell.windows.showProperties = true;
   pi.close();
@@ -67,7 +45,6 @@ export function Peptides() {
   const appDescription = ui.info(
     [
-      // ui.divText('\n To start the application :', {style: {'font-weight': 'bolder'}}),
       ui.list([
         '- automatic recognition of peptide sequences',
         '- native integration with tons of Datagrok out-of-the box features (visualization, filtering, clustering, ' +
@@ -197,18 +174,19 @@ export function manualAlignment(monomer: string) {
 //input: column col {semType: alignedSequence}
 //output: widget result
 export async function peptideSpacePanel(col: DG.Column): Promise<DG.Widget> {
-  const widget = new PeptideSimilaritySpaceWidget(col);
+  const widget = new PeptideSimilaritySpaceWidget(col, view ?? grok.shell.v);
   return await widget.draw();
 }
-//name: Correllation analysis
-export async function correlationAnalysis() {
-  view = (grok.shell.v as DG.TableView);
-  const df = await grok.data.files.openTable('Demo:TestJobs:Files:DemoFiles/bio/peptides.csv');
-  const tview = grok.shell.addTableView(df);
-  const [cpviewer, bpviewer] = correlationAnalysisPlots(df.getCol('AlignedSequence'));
-  tview.dockManager.dock(cpviewer, 'right');
-  tview.dockManager.dock(bpviewer, 'down');
+//name: Spiral Plot
+////input: dataframe table
+////input: column activity
+//tags: viewer, panel
+//output: viewer result
+export async function spiralPlot(): Promise<DG.Viewer> {//(table: DG.DataFrame, activity: DG.Column) {
+// Read as dataframe
+  const table = await grok.data.files.openTable('Demo:TestJobs:Files:DemoFiles/bio/peptides.csv');
+  const activity = await table.columns.addNewCalculated('-log10(Activity)', '0-Log10(${Activity})');
+  view = grok.shell.addTableView(table);
+  return view.addViewer(SpiralPlot.fromTable(table, {valuesColumnName: activity.name}));
 }

package/src/peptides.ts ADDED Viewed

@@ -0,0 +1,93 @@
+import * as ui from 'datagrok-api/ui';
+import * as DG from 'datagrok-api/dg';
+import {createPeptideSimilaritySpaceViewer} from './utils/peptide-similarity-space';
+import {addViewerToHeader} from './viewers/stacked-barchart-viewer';
+/**
+ * Peptides controller class.
+ *
+ * @export
+ * @class Peptides
+ */
+export class Peptides {
+  /**
+   * Class initializer
+   *
+   * @param {DG.Grid} tableGrid Working talbe grid.
+   * @param {DG.TableView} view Working view.
+   * @param {DG.DataFrame} currentDf Working table.
+   * @param {{[key: string]: string}} options SAR viewer options
+   * @param {DG.Column} col Aligned sequences column.
+   * @param {string} activityColumnChoice Activity column name.
+   * @memberof Peptides
+   */
+  async init(
+    tableGrid: DG.Grid,
+    view: DG.TableView,
+    currentDf: DG.DataFrame,
+    options: {[key: string]: string},
+    col: DG.Column,
+    activityColumnChoice: string,
+  ) {
+    for (let i = 0; i < tableGrid.columns.length; i++) {
+      const aarCol = tableGrid.columns.byIndex(i);
+      if (aarCol &&
+          aarCol.name &&
+          aarCol.column?.semType != 'aminoAcids'
+      ) {
+        //@ts-ignore
+        tableGrid.columns.byIndex(i)?.visible = false;
+      }
+    }
+    const originalDfColumns = (currentDf.columns as DG.ColumnList).names();
+    const sarViewer = view.addViewer('peptide-sar-viewer', options);
+    const sarNode = view.dockManager.dock(sarViewer, DG.DOCK_TYPE.DOWN, null, 'SAR Viewer');
+    const sarViewerVertical = view.addViewer('peptide-sar-viewer-vertical');
+    view.dockManager.dock(sarViewerVertical, DG.DOCK_TYPE.RIGHT, sarNode, 'SAR Vertical Viewer');
+    const peptideSpaceViewer = await createPeptideSimilaritySpaceViewer(
+      currentDf,
+      col,
+      't-SNE',
+      'Levenshtein',
+      100,
+      view,
+      `${activityColumnChoice}Scaled`,
+    );
+    view.dockManager.dock(peptideSpaceViewer, DG.DOCK_TYPE.LEFT, sarNode, 'Peptide Space Viewer', 0.3);
+    const StackedBarchartProm = currentDf.plot.fromType('StackedBarChartAA');
+    addViewerToHeader(tableGrid, StackedBarchartProm);
+    const hideIcon = ui.iconFA('window-close', () => { //undo?, times?
+      const viewers = [];
+      for (const viewer of view.viewers) {
+        if (viewer.type !== DG.VIEWER.GRID) {
+          viewers.push(viewer);
+        }
+      }
+      viewers.forEach((v) => v.close());
+      const cols = (currentDf.columns as DG.ColumnList);
+      for (const colName of cols.names()) {
+        if (!originalDfColumns.includes(colName)) {
+          cols.remove(colName);
+        }
+      }
+      currentDf.selection.setAll(false);
+      currentDf.filter.setAll(true);
+      tableGrid.setOptions({'colHeaderHeight': 20});
+      tableGrid.columns.setVisible(originalDfColumns);
+      view.setRibbonPanels(ribbonPanels);
+    }, 'Close viewers and restore dataframe');
+    const ribbonPanels = view.getRibbonPanels();
+    view.setRibbonPanels([[hideIcon]]);
+  }
+}