npm - @datagrok/bio - Versions diffs - 2.8.6 → 2.9.0 - Mend

@datagrok/bio 2.8.6 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/CHANGELOG.md +19 -19
package/README.md +39 -20
package/dist/package-test.js +1 -1
package/dist/package-test.js.map +1 -1
package/dist/package.js +1 -1
package/dist/package.js.map +1 -1
package/package.json +3 -3
package/src/package-test.ts +1 -0
package/src/package-types.ts +0 -1
package/src/package.ts +50 -9
package/src/substructure-search/substructure-search.ts +84 -55
package/src/tests/activity-cliffs-tests.ts +1 -1
package/src/tests/detectors-tests.ts +2 -2
package/src/tests/scoring.ts +38 -0
package/src/utils/cell-renderer.ts +5 -3
package/src/utils/ui-utils.ts +4 -4

package/package.json CHANGED Viewed

@@ -5,7 +5,7 @@
     "name": "Leonid Stolbov",
     "email": "lstolbov@datagrok.ai"
   },
-  "version": "2.8.6",
+  "version": "2.9.0",
   "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
   "repository": {
     "type": "git",
@@ -34,14 +34,14 @@
   ],
   "dependencies": {
     "@biowasm/aioli": "^3.1.0",
-    "@datagrok-libraries/bio": "^5.36.0",
+    "@datagrok-libraries/bio": "^5.37.0",
     "@datagrok-libraries/chem-meta": "^1.0.1",
     "@datagrok-libraries/ml": "^6.3.39",
     "@datagrok-libraries/tutorials": "^1.3.6",
     "@datagrok-libraries/utils": "^4.0.17",
     "cash-dom": "^8.0.0",
     "css-loader": "^6.7.3",
-    "datagrok-api": "^1.13.3",
+    "datagrok-api": "^1.16.0",
     "dayjs": "^1.11.4",
     "fastest-levenshtein": "^1.0.16",
     "openchemlib": "6.0.1",

package/src/package-test.ts CHANGED Viewed

@@ -28,6 +28,7 @@ import './tests/to-atomic-level-tests';
 import './tests/mm-distance-tests';
 import './tests/activity-cliffs-tests';
 import './tests/sequence-space-test';
+import './tests/scoring';
 export const _package = new DG.Package();

package/src/package-types.ts CHANGED Viewed

@@ -14,7 +14,6 @@ export const enum BioPackagePropertiesNames {
 export class BioPackageProperties extends Map<string, any> {
   private _onPropertyChanged: Subject<string> = new Subject<string>();
   public get onPropertyChanged(): Observable<string> { return this._onPropertyChanged; }

package/src/package.ts CHANGED Viewed

@@ -21,7 +21,7 @@ import {removeEmptyStringRows} from '@datagrok-libraries/utils/src/dataframe-uti
 import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
 import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
-import {substructureSearchDialog} from './substructure-search/substructure-search';
+import {SubstructureSearchDialog} from './substructure-search/substructure-search';
 import {saveAsFastaUI} from './utils/save-as-fasta';
 import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
 import {delay} from '@datagrok-libraries/utils/src/test';
@@ -40,10 +40,11 @@ import {
   getLibFileNameList,
   getLibraryPanelUI
 } from './utils/monomer-lib';
-import {getMacromoleculeColumn} from './utils/ui-utils';
+import {getMacromoleculeColumns} from './utils/ui-utils';
 import {DimReductionMethods, ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
 import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
 import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-editor';
+import {SCORE, calculateScores} from '@datagrok-libraries/bio/src/utils/macromolecule/scoring';
 import {demoBio01UI} from './demo/bio01-similarity-diversity';
 import {demoBio01aUI} from './demo/bio01a-hierarchical-clustering-and-sequence-space';
@@ -602,7 +603,7 @@ export function importBam(fileContent: string): DG.DataFrame [] {
 //top-menu: Bio | Convert | Notation...
 //name: convertDialog
 export function convertDialog() {
-  const col = getMacromoleculeColumn();
+  const col = getMacromoleculeColumns()[0];
   convert(col);
 }
@@ -734,12 +735,52 @@ export function diversitySearchTopMenu() {
   view.dockManager.dock(viewer, 'down');
 }
-//top-menu: Bio | Search | Substructure...
-//name: bioSubstructureSearch
-//description: Finds sequence with the given subsequence
-export function bioSubstructureSearch(): void {
-  const col = getMacromoleculeColumn();
-  substructureSearchDialog(col);
+//name: SearchSubsequenceEditor
+//tags: editor
+//input: funccall call
+export function searchSubsequenceEditor(call: DG.FuncCall) {
+  const columns = getMacromoleculeColumns();
+  if (columns.length === 1)
+    call.func.prepare({macromolecules: columns[0]}).call(true);
+  else
+    new SubstructureSearchDialog(columns);
+}
+//top-menu: Bio | Search | Subsequence...
+//name: Subsequence Search
+//input: column macromolecules
+//editor: Bio:SearchSubsequenceEditor
+export function SubsequenceSearchTopMenu(macromolecules: DG.Column): void {
+  grok.shell.tv.getFiltersGroup({createDefaultFilters: false}).updateOrAdd({
+    type: 'Bio:bioSubstructureFilter',
+    column: macromolecules.name,
+    columnName: macromolecules.name,
+  });
+  grok.shell.tv.grid.scrollToCell(macromolecules, 0);
+}
+//top-menu: Bio | Caclulate | Identity...
+//name: Identity Scoring
+//description: Adds a column with fraction of matching monomers
+//input: dataframe table [Table containing Macromolecule column]
+//input: column macromolecules {semType: Macromolecule} [Sequences to score]
+//input: string reference [Sequence, matching column format]
+//output: column scores
+export async function sequenceIdentityScoring(table: DG.DataFrame, macromolecule: DG.Column, reference: string): Promise<DG.Column<number>> {
+  const scores = calculateScores(table, macromolecule, reference, SCORE.IDENTITY);
+  return scores;
+}
+//top-menu: Bio | Caclulate | Similarity...
+//name: Similarity Scoring
+//description: Adds a column with similarity scores, calculated as sum of monomer fingerprint similarities
+//input: dataframe table [Table containing Macromolecule column]
+//input: column macromolecules {semType: Macromolecule} [Sequences to score]
+//input: string reference [Sequence, matching column format]
+//output: column scores
+export async function sequenceSimilarityScoring(table: DG.DataFrame, macromolecule: DG.Column, reference: string): Promise<DG.Column<number>> {
+  const scores = calculateScores(table, macromolecule, reference, SCORE.SIMILARITY);
+  return scores;
 }
 //name: saveAsFasta

package/src/substructure-search/substructure-search.ts CHANGED Viewed

@@ -17,64 +17,93 @@ export const enum MONOMERIC_COL_TAGS {
 const SUBSTR_HELM_COL_NAME = 'substr_helm';
-/**
- * Searches substructure in each row of Macromolecule column
- *
- * @param {DG.column} col Column with 'Macromolecule' semantic type
- */
-export function substructureSearchDialog(col: DG.Column<string>): void {
-  const units = col.getTag(DG.TAGS.UNITS);
-  const separator = col.getTag(bioTAGS.separator);
-  // const notations = [NOTATION.FASTA, NOTATION.SEPARATOR, NOTATION.HELM];
-  const substructureInput = ui.textInput('Substructure', '');
-  const editHelmLink = ui.link('Edit helm', async () => {
-    updateDivInnerHTML(inputsDiv, grid.root);
-    await ui.tools.waitForElementInDom(grid.root);
-    setTimeout(() => {
-      grid.cell(SUBSTR_HELM_COL_NAME, 0).element.children[0].dispatchEvent(
-        new KeyboardEvent('keydown', {key: 'Enter'}));
-    }, 100);
-  });
+export class SubstructureSearchDialog {
+  units: string;
+  separator: string;
+  inputsDiv: HTMLDivElement;
+  substructureInput: DG.InputBase<string>;
+  separatorInput: DG.InputBase<string>;
+  editHelmLink: HTMLAnchorElement;
+  columnsInput: DG.InputBase<DG.Column | null>;
+  grid: DG.Grid;
+  col: DG.Column;
+  dialog: DG.Dialog;
+  constructor(columns: DG.Column<string>[]) {
+    this.col = columns[0];
+    this.createUI();
+  }
+  editHelmLinkAction(): void {
+    updateDivInnerHTML(this.inputsDiv, this.grid.root);
+    ui.tools.waitForElementInDom(this.grid.root).then(() => {
+      setTimeout(() => {
+        this.grid.cell(SUBSTR_HELM_COL_NAME, 0).element.children[0].dispatchEvent(
+          new KeyboardEvent('keydown', {key: 'Enter'})
+        );
+      }, 100);
+    });
+  }
+  updateInputs(): void {
+    const selectedInput = this.units === NOTATION.HELM ? ui.divV([this.columnsInput, this.editHelmLink]) :
+      this.units === NOTATION.SEPARATOR ? ui.inputs([this.columnsInput, this.substructureInput, this.separatorInput]) :
+        ui.inputs([this.columnsInput, this.substructureInput]);
-  const df = DG.DataFrame.create(1);
-  df.columns.addNewString(SUBSTR_HELM_COL_NAME).init((_i) => '');
-  df.col(SUBSTR_HELM_COL_NAME)!.semType = col.semType;
-  df.col(SUBSTR_HELM_COL_NAME)!.setTag(DG.TAGS.UNITS, NOTATION.HELM);
-  const grid = df.plot.grid();
-  const separatorInput = ui.textInput('Separator', separator);
-  const inputsDiv = ui.div();
-  const inputs = units === NOTATION.HELM ? ui.divV([editHelmLink]) :
-    units === NOTATION.SEPARATOR ? ui.inputs([substructureInput, separatorInput]) :
-      ui.inputs([substructureInput]);
-  updateDivInnerHTML(inputsDiv, inputs);
-  ui.dialog('Substructure Search')
-    .add(ui.divV([
-      ui.divText(`Notation: ${units}`),
-      inputsDiv,
-    ]))
-    .onOK(async () => {
-      let substructure = units === NOTATION.HELM ? df.get(SUBSTR_HELM_COL_NAME, 0) : substructureInput.value;
-      if (units === NOTATION.SEPARATOR && separatorInput.value !== separator && separatorInput.value !== '')
-        substructure = substructure.replaceAll(separatorInput.value, separator);
-      const matchesColName = `Matches: ${substructure}`;
-      const colExists = col.dataFrame.columns.names()
-        .filter((it) => it.toLocaleLowerCase() === matchesColName.toLocaleLowerCase()).length > 0;
-      if (!colExists) {
+    updateDivInnerHTML(this.inputsDiv, selectedInput);
+  }
+  updateNotationDiv(): void {
+    this.units = this.col.getTag(DG.TAGS.UNITS);
+    this.separator = this.col.getTag(bioTAGS.separator);
+    const notationDiv = this.dialog.root.getElementsByClassName('notation-text')[0];
+    if (notationDiv)
+      notationDiv.textContent = `Notation: ${this.units}`;
+  }
+  createUI(): void {
+    const dataframe = grok.shell.tv.dataFrame;
+    const seqColOptions = {filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE};
+    this.columnsInput = ui.columnInput('Column', dataframe, this.col, (column: DG.Column) => {
+      this.col = column;
+      this.updateNotationDiv();
+      this.updateInputs();
+    }, seqColOptions);
+    this.substructureInput = ui.stringInput('Substructure', '');
+    this.editHelmLink = ui.link('Edit helm', () => this.editHelmLinkAction(), undefined, {style: {position: 'relative', left: '95px'}});
+    const df = DG.DataFrame.create(1);
+    df.columns.addNewString(SUBSTR_HELM_COL_NAME).init((_i) => '');
+    df.col(SUBSTR_HELM_COL_NAME)!.semType = this.col.semType;
+    df.col(SUBSTR_HELM_COL_NAME)!.setTag(DG.TAGS.UNITS, NOTATION.HELM);
+    this.grid = df.plot.grid();
+    this.separatorInput = ui.stringInput('Separator', this.separator);
+    this.inputsDiv = ui.div();
+    this.units = this.col.getTag(DG.TAGS.UNITS);
+    this.separator = this.col.getTag(bioTAGS.separator);
+    this.updateInputs();
+    this.dialog = ui.dialog('Substructure Search')
+      .add(ui.divV([
+        ui.divText(`Notation: ${this.units}`, 'notation-text'),
+        this.inputsDiv,
+      ]))
+      .onOK(async () => {
+        let substructure = this.units === NOTATION.HELM ? df.get(SUBSTR_HELM_COL_NAME, 0) : this.substructureInput.value;
+        if (this.units === NOTATION.SEPARATOR && this.separatorInput.value !== this.separator && this.separatorInput.value !== '')
+          substructure = substructure.replaceAll(this.separatorInput.value, this.separator);
         let matches: DG.BitSet;
-        if (units === NOTATION.HELM)
-          matches = await helmSubstructureSearch(substructure, col);
+        if (this.units === NOTATION.HELM)
+          matches = await helmSubstructureSearch(substructure, this.col);
         else
-          matches = linearSubstructureSearch(substructure, col);
-        col.dataFrame.columns.add(DG.Column.fromBitSet(matchesColName, matches));
-      } else { grok.shell.warning(`Search ${substructure} is already performed`); }
-    })
-    .show();
+          matches = linearSubstructureSearch(substructure, this.col);
+        this.col.dataFrame.filter.and(matches);
+      })
+      .show();
+  }
 }
 export function linearSubstructureSearch(substructure: string, col: DG.Column<string>, separator?: string): DG.BitSet {

package/src/tests/activity-cliffs-tests.ts CHANGED Viewed

@@ -47,5 +47,5 @@ category('activityCliffs', async () => {
     viewList.push(actCliffsTableViewWithEmptyRows);
     await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 3, DimReductionMethods.UMAP, 'sequence');
-  });
+  }, {skipReason: 'GROK-13851: Unhandled exceptions'});
 });

package/src/tests/detectors-tests.ts CHANGED Viewed

@@ -274,13 +274,13 @@ MWRSWY-CKHP`;
     await _testDf(readSamples(Samples.fastaCsv), {
       'Sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
     });
-  });
+  }, {skipReason: 'GROK-13851: Unhandled exceptions'});
   test('samplesFastaFasta', async () => {
     await _testDf(readSamples(Samples.fastaFasta), {
       'sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),
     });
-  });
+  }, {skipReason: 'GROK-13851: Unhandled exceptions'});
   // peptidesComplex contains monomers with spaces in AlignedSequence columns, which are forbidden
   // test('samplesPeptidesComplexPositiveAlignedSequence', async () => {

package/src/tests/scoring.ts ADDED Viewed

@@ -0,0 +1,38 @@
+import * as DG from 'datagrok-api/dg';
+import {category, test, expectFloat, before} from '@datagrok-libraries/utils/src/test';
+import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
+import {sequenceIdentityScoring, sequenceSimilarityScoring} from '../package';
+import {getMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
+category('Scoring', () => {
+  const sequence = 'sequence';
+  const expectedSimilarity = 'expected_similarity';
+  const expectedIdentity = 'expected_identity';
+  const table = DG.DataFrame.fromCsv(`${sequence},${expectedSimilarity},${expectedIdentity}
+  PEPTIDE1{Aca.Orn.gGlu.Pqa.D-His_1Bn.dH.hHis.4Abz.D-Tic.D-Dap.Y.Iva.meS.F.P.F.D-1Nal}$$$$,1.0,1.0
+  PEPTIDE1{Iva.Gly_allyl.gGlu.Pqa.D-Dip.dH.hHis.4Abz.D-aHyp.D-Dap.Y.Iva.I.Tyr_26diMe.P.Asu.meC}$$$$,0.68,0.53
+  PEPTIDE1{[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal].[1Nal]}$$$$V2.0,0.34,0.0
+  `);
+  const seqCol: DG.Column<string> = table.getCol(sequence);
+  seqCol.setTag(DG.TAGS.UNITS, NOTATION.HELM);
+  seqCol.semType = DG.SEMTYPE.MACROMOLECULE;
+  const reference = seqCol.get(0)!;
+  before(async () => {
+    const monomerLibHelper = await getMonomerLibHelper();
+    await monomerLibHelper.loadLibraries(true);
+  });
+  test('Identity', async () => {
+    const scoresCol = await sequenceIdentityScoring(table, seqCol, reference);
+    for (let i = 0; i < scoresCol.length; i++)
+      expectFloat(scoresCol.get(i)!, table.get(expectedIdentity, i), 0.01, `Wrong identity score for sequence at position ${i}`);
+  });
+  test('Similarity', async () => {
+    const scoresCol = await sequenceSimilarityScoring(table, seqCol, reference);
+    for (let i = 0; i < scoresCol.length; i++)
+      expectFloat(scoresCol.get(i)!, table.get(expectedSimilarity, i), 0.01, `Wrong similarity score for sequence at position ${i}`);
+  });
+});

package/src/utils/cell-renderer.ts CHANGED Viewed

@@ -26,6 +26,7 @@ import * as C from './constants';
 import {_package, getBioLib} from '../package';
 import {ISeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
+import {getSplitter} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
 type TempType = { [tagName: string]: any };
@@ -292,14 +293,15 @@ export class MacromoleculeDifferenceCellRenderer extends DG.GridCellRenderer {
     const cell = gridCell.cell;
     const tableCol = gridCell.tableColumn as DG.Column<string>;
     const s: string = cell.value ?? '';
+    const separator = tableCol.tags[bioTAGS.separator];
+    const units: string = tableCol.tags[DG.TAGS.UNITS];
     w = getUpdatedWidth(grid, g, x, w);
     //TODO: can this be replaced/merged with splitSequence?
     const [s1, s2] = s.split('#');
-    const uh = UnitsHandler.getOrCreate(tableCol);
-    const splitter = uh.getSplitter();
+    const splitter = getSplitter(units, separator);
     const subParts1 = splitter(s1);
     const subParts2 = splitter(s2);
-    drawMoleculeDifferenceOnCanvas(g, x, y, w, h, subParts1, subParts2, uh.units);
+    drawMoleculeDifferenceOnCanvas(g, x, y, w, h, subParts1, subParts2, units);
   }
 }

package/src/utils/ui-utils.ts CHANGED Viewed

@@ -1,13 +1,13 @@
 import * as grok from 'datagrok-api/grok';
 import * as DG from 'datagrok-api/dg';
-export function getMacromoleculeColumn(): DG.Column | any {
-  const col = grok.shell.t.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
-  if (col === null) {
+export function getMacromoleculeColumns(): DG.Column<any>[] | any {
+  const columns = grok.shell.t.columns.bySemTypeAll(DG.SEMTYPE.MACROMOLECULE);
+  if (columns === null) {
     grok.shell.error('Current table does not contain macromolecules');
     return;
   }
-  return col;
+  return columns;
 }
 export function updateDivInnerHTML(div: HTMLElement, content: string | Node): void {