npm - @datagrok/peptides - Versions diffs - 0.4.2 → 0.6.1 - Mend

@datagrok/peptides 0.4.2 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/detectors.js +1 -1
package/package.json +5 -3
package/src/describe.ts +36 -40
package/src/package.ts +27 -44
package/src/peptides.ts +70 -6
package/src/styles.css +37 -0
package/src/utils/cell-renderer.ts +126 -19
package/src/utils/chem-palette.ts +317 -214
package/src/utils/correlation-analysis.ts +149 -71
package/src/utils/peptide-similarity-space.ts +23 -19
package/src/utils/split-aligned.ts +8 -1
package/src/viewers/logo-viewer.ts +48 -5
package/src/viewers/model.ts +27 -0
package/src/viewers/sar-viewer.ts +99 -38
package/src/viewers/spiral-plot.ts +97 -0
package/src/viewers/stacked-barchart-viewer.ts +82 -7
package/src/viewers/subst-viewer.ts +276 -0
package/src/widgets/analyze-peptides.ts +14 -4
package/src/widgets/manual-alignment.ts +11 -4
package/src/widgets/peptide-molecule.ts +7 -0
package/webpack.config.js +4 -0

package/src/utils/correlation-analysis.ts CHANGED Viewed

@@ -1,11 +1,9 @@
 /* Do not change these import lines. Datagrok will import API library in exactly the same manner */
-//import * as grok from 'datagrok-api/grok';
-//import * as ui from 'datagrok-api/ui';
 import * as DG from 'datagrok-api/dg';
 import {AlignedSequenceEncoder} from '@datagrok-libraries/utils/src/sequence-encoder';
-import {assert, transposeMatrix} from '@datagrok-libraries/utils/src/operations';
-import {Vector, Matrix} from '@datagrok-libraries/utils/src/type-declarations';
+import {assert} from '@datagrok-libraries/utils/src/operations';
+import {Matrix} from '@datagrok-libraries/utils/src/type-declarations';
 import {kendallsTau} from '@datagrok-libraries/statistics/src/correlation-coefficient';
 /**
@@ -15,112 +13,192 @@ import {kendallsTau} from '@datagrok-libraries/statistics/src/correlation-coeffi
  * @param {Matrix} matrix A matrix.
  * @return {DG.DataFrame} The data frame.
  */
-export function matrix2DataFrame(matrix: Matrix): DG.DataFrame {
+function matrix2DataFrame(matrix: Matrix): DG.DataFrame {
   return DG.DataFrame.fromColumns(matrix.map((v, i) => DG.Column.fromFloat32Array(`${i+1}`, v)));
 }
 /**
- * Encodes amino acid sequences into a numeric representation.
+ * Encodes sequence into a certain scale.
  *
- * @param {DG.Column} col A column containing the sequences.
- * @return {DG.DataFrame} The resulting data frame.
+ * @param {DG.DataFrame} df A data frame containing the sequences.
+ * @param {string[]} [positionColumns] If given instructs which columns to consider as sequences containing.
+ * @return {DG.DataFrame} The data frame with seqences encoded.
  */
-function calcPositions(col: DG.Column): DG.DataFrame {
-  const sequences = col.toList().map((v, _) => AlignedSequenceEncoder.clean(v));
-  const enc = new AlignedSequenceEncoder();
-  const encSeqs = sequences.map((v) => Vector.from(enc.encode(v)));
-  const positions = transposeMatrix(encSeqs);
-  return matrix2DataFrame(positions);
+function encodeSequences(df: DG.DataFrame, positionColumns?: string[]): DG.DataFrame {
+  const [nCols, nRows] = [positionColumns ? positionColumns.length : df.columns.length, df.rowCount];
+  const enc = new AlignedSequenceEncoder('WimleyWhite');
+  const positions = new Array(nCols).fill(0).map((_) => new Float32Array(nRows));
+  for (let i = 0; i < nCols; ++i) {
+    const col: DG.Column = positionColumns ? df.getCol(positionColumns[i]) : df.columns.byIndex(i);
+    for (let j = 0; j < nRows; ++j) {
+      const letter = col.get(j);
+      positions[i][j] = enc.encodeLettter(letter);
+    }
+  }
+  const posDF = DG.DataFrame.fromColumns(positions.map((v, i) => DG.Column.fromFloat32Array(df.columns.names()[i], v)));
+  return posDF;
 }
 /**
- * Unfolds a data frame into <category>-<value> format.
+ * Formats an adjacency matrix into <category1>-<category2>-<value> format.
  *
- * @param {DG.DataFrame} df A data frame to unfold.
+ * @param {DG.DataFrame} adjMatrix A data matrix to deal with.
  * @return {DG.DataFrame} The resulting data frame.
  */
-function melt(df: DG.DataFrame): DG.DataFrame {
-  let keys: string[] = [];
-  const values: Float32Array = new Float32Array(df.columns.length*df.rowCount);
-  let i = 0;
-  for (const c of df.columns.toList()) {
-    keys = keys.concat(Array<string>(c.length).fill(c.name));
-    values.set(c.getRawData(), i);
-    i += df.rowCount;
+function createNetwork(adjMatrix: DG.DataFrame): DG.DataFrame {
+  const nCols = adjMatrix.columns.length;
+  const nRows = adjMatrix.rowCount;
+  assert(nCols == nRows);
+  const pos1: Array<number> = [];
+  const pos2: Array<number> = [];
+  const weight: Array<number> = [];
+  for (let i = 0; i < nCols; ++i) {
+    const c = adjMatrix.columns.byIndex(i);
+    for (let j = i+1; j < nRows; ++j) {
+      const r = c.getRawData()[j];
+      if (Math.abs(r) > 0) {
+        pos1.push(i+1);
+        pos2.push(j+1);
+        weight.push(r);
+      }
+    }
   }
-  assert(keys.length == values.length);
-  return DG.DataFrame.fromColumns([DG.Column.fromStrings('keys', keys), DG.Column.fromFloat32Array('values', values)]);
+  const pos1Col = DG.Column.fromList('int', 'pos1', pos1);
+  const pos2Col = DG.Column.fromList('int', 'pos2', pos2);
+  const weightCol = DG.Column.fromList('double', 'weight', weight);
+  return DG.DataFrame.fromColumns([pos1Col, pos2Col, weightCol]);
 }
 /**
- * Calculates Spearman's rho rank correlation coefficient.
+ * Calculates Kendall's tau rank correlation matrix.
  *
  * @param {DG.DataFrame} df A data frame to process.
+ * @param {number} [alpha=0.05] The significance threshold.
+ * @param {number} [rAbsCutoff=0.5] The absolute R cutoff.
  * @return {DG.DataFrame} The correlation matrix.
  */
-// eslint-disable-next-line no-unused-vars
-function calcSpearmanRhoMatrix(df: DG.DataFrame): DG.DataFrame {
+function calcKendallTauMatrix(df: DG.DataFrame, alpha: number = 0.05, rAbsCutoff = 0.5): DG.DataFrame {
   const nItems = df.columns.length;
-  const rho = new Array(nItems).fill(0).map((_) => new Float32Array(nItems).fill(0));
+  const tau = new Array(nItems).fill(0).map((_) => new Float32Array(nItems).fill(0));
   for (let i = 0; i < nItems; ++i) {
     for (let j = i+1; j < nItems; ++j) {
-      rho[i][j] = df.columns.byIndex(i).stats.spearmanCorr(df.columns.byIndex(j));
-      rho[j][i] = rho[i][j];
+      const res = kendallsTau(df.columns.byIndex(i).getRawData(), df.columns.byIndex(j).getRawData());
+      tau[i][j] = (res.prob < alpha) && (Math.abs(res.test) >= rAbsCutoff) ? res.test : 0;
+      tau[j][i] = tau[i][j];
     }
   }
-  return matrix2DataFrame(rho);
+  return matrix2DataFrame(tau);
 }
 /**
- * Calculates Kendall's tau rank correlation coefficient.
+ * Calculates a correlation matrix via method chosen.
  *
- * @param {DG.DataFrame} df A data frame to process.
- * @param {number} [alpha=0.05] The significance threshold.
+ * @param {DG.DataFrame} df A data frame.
  * @return {DG.DataFrame} The correlation matrix.
  */
-function calcKendallTauMatrix(df: DG.DataFrame, alpha: number = 0.05): DG.DataFrame {
-  const nItems = df.columns.length;
-  const tau = new Array(nItems).fill(0).map((_) => new Float32Array(nItems).fill(0));
+function calcCorrelationMatrix(df: DG.DataFrame): DG.DataFrame {
+  return calcKendallTauMatrix(df);
+}
-  for (let i = 0; i < nItems; ++i) {
-    for (let j = i+1; j < nItems; ++j) {
-      const res = kendallsTau(df.columns.byIndex(i).getRawData(), df.columns.byIndex(j).getRawData());
-      tau[i][j] = res.prob < alpha ? res.test : 0;
-      tau[j][i] = tau[i][j];
+type Weights = {[pos: number]: number};
+type Guide = {[pos: number]: Weights};
+/**
+ * Calculates a dictionary with the keys containing the first correlating positions.
+ * Values correspond to a dictionary containing the positions and corresponding R-value
+ * which the given position correlating with.
+ *
+ * @param {DG.DataFrame} network A network to process.
+ * @return {Guide} The formatted dictionary.
+ */
+function calcGuide(network: DG.DataFrame): Guide {
+  assert(network.columns.length == 3);
+  const guide: Guide = {};
+  let [pos1Col, pos2Col, weightCol] = Array.from(network.columns);
+  pos1Col = pos1Col.getRawData();
+  pos2Col = pos2Col.getRawData();
+  weightCol = weightCol.getRawData();
+  function _addWeight(pos1: number, pos2: number, weight: number) {
+    if (guide[pos1] == undefined) {
+      guide[pos1] = {};
     }
+    guide[pos1][pos2] = weight;
   }
-  return matrix2DataFrame(tau);
+  for (let i = 0; i < network.rowCount; ++i) {
+    const [pos1, pos2, weight] = [pos1Col[i], pos2Col[i], weightCol[i]];
+    _addWeight(pos1, pos2, weight);
+    _addWeight(pos2, pos1, weight);
+  }
+  return guide;
+}
+function calcCorrelations(df: DG.DataFrame, positionColumns?: string[]): Guide {
+  const posDF = encodeSequences(df, positionColumns);
+  const ccDF = calcCorrelationMatrix(posDF);
+  const nwDF = createNetwork(ccDF);
+  const guide = calcGuide(nwDF);
+  return guide;
 }
 /**
- * Creates acorrelation plot and a box plot to perform correlation analysis.
+ * Formats correlating positions to place in the corresponding tooltips.
+ * Higlights correlating positions' headers.
  *
  * @export
- * @param {DG.Column} sequencesColumn A column containing amino acid sequences.
- * @return {[DG.Viewer, DG.Viewer]} These two plots.
+ * @class CorrelationAnalysisVisualizer
  */
-export function correlationAnalysisPlots(sequencesColumn: DG.Column): [DG.Viewer, DG.Viewer] {
-  const posDF = calcPositions(sequencesColumn);
-  const cpviewer = DG.Viewer.fromType(
-    DG.VIEWER.CORR_PLOT,
-    posDF,
-    {
-      'xColumnNames': posDF.columns.names(),
-      'yColumnNames': posDF.columns.names(),
-      'correlationType': 'Spearman',
-    });
-  const rhoDF = calcKendallTauMatrix(posDF);
-  const meltDF = melt(rhoDF);
-  const bpviewer = DG.Viewer.fromType(
-    DG.VIEWER.BOX_PLOT,
-    meltDF, {
-      'categoryColumnName': 'keys',
-      'valueColumnName': 'values',
-      'statistics': ['min', 'max', 'avg', 'med'],
-    });
-  return [cpviewer, bpviewer];
+export class CorrelationAnalysisVisualizer {
+  protected guide: Guide;
+  protected highlightedColumns: number[];
+  /**
+   * Creates an instance of CorrelationAnalysisVisualizer.
+   * @param {DG.DataFrame} df A data frame to take sequences from.
+   * @param {string[]} positionColumns Optional columns list to take the sequences from.
+   * @memberof CorrelationAnalysisVisualizer
+   */
+  constructor(df: DG.DataFrame, positionColumns: string[]) {
+    if (df) {
+      this.guide = calcCorrelations(df, positionColumns);
+      this.highlightedColumns = Object.keys(this.guide).map((v) => parseInt(v));
+    } else {
+      throw new Error('Dataframe was not found in the grid.');
+    }
+  }
+  /**
+   * Returns a dictionary with the correlating positions and their R-value.
+   *
+   * @readonly
+   * @type {Guide} The dictionary.
+   * @memberof CorrelationAnalysisVisualizer
+   */
+  get path(): Guide {
+    return this.guide;
+  }
+  /**
+   * Checks if the position column name is found among correlelating ones.
+   *
+   * @param {string} name The name of the column.
+   * @return {boolean} True if the position is correlating with any oter.
+   * @memberof CorrelationAnalysisVisualizer
+   */
+  public isPositionCorrelating(name: string): boolean {
+    return this.highlightedColumns.includes(parseInt(name));
+  }
 }

package/src/utils/peptide-similarity-space.ts CHANGED Viewed

@@ -1,6 +1,4 @@
 /* Do not change these import lines. Datagrok will import API library in exactly the same manner */
-// eslint-disable-next-line no-unused-vars
-import * as grok from 'datagrok-api/grok';
 import * as ui from 'datagrok-api/ui';
 import * as DG from 'datagrok-api/dg';
@@ -10,6 +8,15 @@ import {DimensionalityReducer} from '@datagrok-libraries/utils/src/reduce-dimens
 import {Measurer} from '@datagrok-libraries/utils/src/string-measure';
 import {Coordinates} from '@datagrok-libraries/utils/src/type-declarations';
+/**
+ * A worker to perform dimensionality reduction.
+ *
+ * @param {any[]} columnData The data to process.
+ * @param {string} method A method of dimensionality reduction.
+ * @param {string} measure A distance metrics.
+ * @param {number} cyclesCount Number of iterations to run.
+ * @return {Promise<unknown>} Resulting embedding.
+ */
 function createDimensinalityReducingWorker(
   columnData: any[],
   method: string,
@@ -40,7 +47,6 @@ function inferActivityColumnsName(table: DG.DataFrame): string | null {
   const re = /activity|ic50/i;
   for (const name of table.columns.names()) {
     if (name.match(re)) {
-      console.log(`${name} found.`);
       return name;
     }
   }
@@ -103,29 +109,27 @@ export async function createPeptideSimilaritySpaceViewer(
   // Add new axes.
   for (const axis of axesNames) {
     const col = table.col(axis);
+    const newCol = edf.getCol(axis);
-    if (col == null) {
-      table.columns.insert(edf.getCol(axis));
+    if (col != null) {
+      for (let i = 0; i < newCol.length; ++i) {
+        const v = newCol.get(i);
+        table.set(axis, i, v);
+      }
     } else {
-      table.columns.replace(col, edf.getCol(axis));
+      table.columns.insert(newCol);
     }
   }
-  // const viewer = DG.Viewer.scatterPlot(table, {x: '~X', y: '~Y', color: activityColumnName ?? '~MW', size: '~MW'});
   const viewerOptions = {x: '~X', y: '~Y', color: activityColumnName ?? '~MW', size: '~MW'};
-  const viewer = view !== null ?
-    view.addViewer(DG.VIEWER.SCATTER_PLOT, viewerOptions) : DG.Viewer.scatterPlot(table, viewerOptions);
-  // Fit view if needed.
-  /*if (zoom) {
-    viewer.zoom(
-      table.getCol('~X').min,
-      table.getCol('~Y').min,
-      table.getCol('~X').max,
-      table.getCol('~Y').max,
-    );
-  }*/
+  const viewer = DG.Viewer.scatterPlot(table, viewerOptions);
+  if (view !== null) {
+    view.addViewer(viewer);
+  }
   pi.close();
-  return (viewer as DG.ScatterPlotViewer);
+  return viewer;
 }
 /**

package/src/utils/split-aligned.ts CHANGED Viewed

@@ -1,5 +1,13 @@
 import * as DG from 'datagrok-api/dg';
+/**
+ * Split aligned sequence string into separate parts containing amino acid residues.
+ *
+ * @export
+ * @param {DG.Column} peptideColumn Column containing aligned sequences.
+ * @param {boolean} [filter=true] Filter out columns with all the same residues.
+ * @return {[DG.DataFrame, number[]]} DataFrame containing split sequence and a list of invalid indexes.
+ */
 export function splitAlignedPeptides(peptideColumn: DG.Column, filter: boolean = true): [DG.DataFrame, number[]] {
   const splitPeptidesArray: string[][] = [];
   let currentSplitPeptide: string[];
@@ -45,7 +53,6 @@ export function splitAlignedPeptides(peptideColumn: DG.Column, filter: boolean =
   columnNames.push('C-terminal');
   // filter out the columns with the same values
   if (filter) {
     splitColumns = splitColumns.filter((positionArray, index) => {
       const isRetained = new Set(positionArray).size > 1;

package/src/viewers/logo-viewer.ts CHANGED Viewed

@@ -7,6 +7,13 @@ import * as logojs from 'logojs-react';
 import {splitAlignedPeptides} from '../utils/split-aligned';
 import {ChemPalette} from '../utils/chem-palette';
+/**
+ * Logo viewer.
+ *
+ * @export
+ * @class Logo
+ * @extends {DG.JsViewer}
+ */
 export class Logo extends DG.JsViewer {
   initialized: boolean;
   option: any;
@@ -18,6 +25,11 @@ export class Logo extends DG.JsViewer {
   LET_COLORS: Array<any>;
   target: DG.DataFrame | undefined | null;
+  /**
+   * Creates an instance of Logo.
+   *
+   * @memberof Logo
+   */
   constructor() {
     super();
     this.initialized = false;
@@ -59,9 +71,13 @@ export class Logo extends DG.JsViewer {
     ];
   }
+  /**
+   * Initializer function.
+   *
+   * @memberof Logo
+   */
   init() {
     this.initialized = true;
-    // this.reactHost = ui.div([]);
     console.log('INIT');
     this.target = this.dataFrame;
     [this.splitted] = splitAlignedPeptides(this.dataFrame!.columns.bySemType(this.colSemType));
@@ -70,6 +86,11 @@ export class Logo extends DG.JsViewer {
     this.root.style.maxHeight = '200px';
   }
+  /**
+   * Function to execute when the table is attached.
+   *
+   * @memberof Logo
+   */
   onTableAttached() {
     if (typeof this.dataFrame !== 'undefined') {
       if (!this.initialized) {
@@ -84,16 +105,32 @@ export class Logo extends DG.JsViewer {
     this.render();
   }
+  /**
+   * Function that is executed when the viewer is detached.
+   *
+   * @memberof Logo
+   */
   detach() {
     this.subs.forEach((sub) => sub.unsubscribe());
   }
+  /**
+   * Function that is executed when the viewer property is changed.
+   *
+   * @param {DG.Property} property
+   * @memberof Logo
+   */
   onPropertyChanged(property: DG.Property) {
     super.onPropertyChanged(property);
     this.render();
   }
+  /**
+   * Function that renders the viewer.
+   *
+   * @memberof Logo
+   */
   async render() {
     const bits = this.dataFrame!.selection;
     let selected = false;
@@ -111,18 +148,24 @@ export class Logo extends DG.JsViewer {
     if (typeof this.dataFrame !== 'undefined') {
       this.findLogo();
-      // if (this.reactHost !== null) {
-      //   this.root.appendChild(this.reactHost);
-      // }
     }
   }
+  /**
+   * Create logo.
+   *
+   * @memberof Logo
+   */
   async findLogo() {
     this.getInfoFromDf();
     logojs.embedProteinLogo(this.root, {alphabet: this.LET_COLORS, ppm: this.ppm});
   }
+  /**
+   * Retrieves information for building logo from the dataframe.
+   *
+   * @memberof Logo
+   */
   getInfoFromDf() {
     let index: number = 0;
     this.ppm = [];

package/src/viewers/model.ts CHANGED Viewed

@@ -3,6 +3,11 @@ import * as DG from 'datagrok-api/dg';
 import {describe} from '../describe';
 import {Subject} from 'rxjs';
+/**
+ * Model class for SAR viewers that retrieves and stores data.
+ *
+ * @class SARViewerModel
+ */
 class SARViewerModel {
   private viewerGrid: Subject<DG.Grid> = new Subject<DG.Grid>();
   private viewerVGrid: Subject<DG.Grid> = new Subject<DG.Grid>();
@@ -21,6 +26,11 @@ class SARViewerModel {
   private isUpdating = false;
   grouping: boolean;
+  /**
+   * Creates an instance of SARViewerModel.
+   *
+   * @memberof SARViewerModel
+   */
   constructor() {
     this.dataFrame = null;
     this.activityColumn = null;
@@ -35,6 +45,18 @@ class SARViewerModel {
     this.groupMapping$ = this.groupMapping.asObservable();
   }
+  /**
+   * Updates data with using specified parameters.
+   *
+   * @param {DG.DataFrame} df Working table.
+   * @param {string} activityCol Activity column name.
+   * @param {string} activityScaling Activity scaling method.
+   * @param {DG.Grid} sourceGrid Working table grid.
+   * @param {boolean} twoColorMode Bidirectional analysis enabled.
+   * @param {(DG.BitSet | null)} initialBitset Initial bitset.
+   * @param {boolean} grouping Grouping enabled.
+   * @memberof SARViewerModel
+   */
   async updateData(
     df: DG.DataFrame,
     activityCol: string,
@@ -54,6 +76,11 @@ class SARViewerModel {
     await this.updateDefault();
   }
+  /**
+   * Update data using current parameters.
+   *
+   * @memberof SARViewerModel
+   */
   async updateDefault() {
     if (
       this.dataFrame && this.activityColumn && this.activityScaling &&