npm - @datagrok/bio - Versions diffs - 2.18.2 → 2.18.4 - Mend

@datagrok/bio 2.18.2 → 2.18.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/.eslintrc.json +1 -0
package/CHANGELOG.md +9 -0
package/dist/455.js +1 -1
package/dist/455.js.map +1 -1
package/dist/package-test.js +3 -3
package/dist/package-test.js.map +1 -1
package/dist/package.js +2 -2
package/dist/package.js.map +1 -1
package/package.json +2 -2
package/src/analysis/sequence-search-base-viewer.ts +1 -1
package/src/package.ts +2 -1
package/src/tests/seq-handler-get-region-tests.ts +2 -1
package/src/utils/monomer-lib/monomer-manager/duplicate-monomer-manager.ts +2 -0
package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +33 -3
package/src/utils/seq-helper/seq-handler.ts +10 -1
package/src/utils/seq-helper/seq-helper.ts +1 -1
package/src/widgets/composition-analysis-widget.ts +3 -1
package/test-console-output-1.log +346 -348
package/test-record-1.mp4 +0 -0

package/package.json CHANGED Viewed

@@ -5,7 +5,7 @@
     "name": "Leonid Stolbov",
     "email": "lstolbov@datagrok.ai"
   },
-  "version": "2.18.2",
+  "version": "2.18.4",
   "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
   "repository": {
     "type": "git",
@@ -44,7 +44,7 @@
   ],
   "dependencies": {
     "@biowasm/aioli": "^3.1.0",
-    "@datagrok-libraries/bio": "^5.48.1",
+    "@datagrok-libraries/bio": "^5.49.1",
     "@datagrok-libraries/chem-meta": "^1.2.7",
     "@datagrok-libraries/math": "^1.2.4",
     "@datagrok-libraries/ml": "^6.7.6",

package/src/analysis/sequence-search-base-viewer.ts CHANGED Viewed

@@ -70,7 +70,7 @@ export class SequenceSearchBaseViewer extends DG.JsViewer {
     this.render();
   }
-  /** For tests */ public computeRequested: boolean;
+  /** For tests */ public computeRequested: boolean = false;
   public renderPromise: Promise<void> = Promise.resolve();
   protected render(computeData = true): void {

package/src/package.ts CHANGED Viewed

@@ -979,7 +979,8 @@ export async function manageLibrariesApp(): Promise<DG.View> {
 export async function manageLibrariesAppTreeBrowser(treeNode: DG.TreeViewGroup, browseView: DG.BrowseView) {
   const libraries = (await (await MonomerLibManager.getInstance()).getFileManager()).getValidLibraryPaths();
   libraries.forEach((libName) => {
-    const libNode = treeNode.item(libName);
+    const nodeName = libName.endsWith('.json') ? libName.substring(0, libName.length - 5) : libName;
+    const libNode = treeNode.item(nodeName);
     // eslint-disable-next-line rxjs/no-ignored-subscription, rxjs/no-async-subscribe
     libNode.onSelected.subscribe(async () => {
       const monomerManager = await MonomerManager.getNewInstance();

package/src/tests/seq-handler-get-region-tests.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+/* eslint-disable max-lines-per-function */
 import * as grok from 'datagrok-api/grok';
 import * as DG from 'datagrok-api/dg';
@@ -68,7 +69,7 @@ PEPTIDE1{N.T.[dE].[Thr_PO3H2]}$$$$
 PEPTIDE1{[Cys_SEt].T.[dK].[Thr_PO3H2]}$$$$
 PEPTIDE1{[Cys_SEt].T.*.*}$$$$`,
       units: NOTATION.HELM,
-      alphabet: null,
+      alphabet: ALPHABET.UN,
       positionNames: {tag: null, start: '4', end: '7'}
     }

package/src/utils/monomer-lib/monomer-manager/duplicate-monomer-manager.ts CHANGED Viewed

@@ -97,6 +97,8 @@ export class DuplicateMonomerManager {
       DuplicateMonomerManager._instance = new DuplicateMonomerManager();
       await DuplicateMonomerManager._instance.refresh();
       const libManager = await MonomerLibManager.getInstance();
+      // reason: subscription happens only once, and is needed throught the lifetime of the app
+      // eslint-disable-next-line rxjs/no-async-subscribe, rxjs/no-ignored-subscription
       libManager.getMonomerLib().onChanged.subscribe(async () => await DuplicateMonomerManager._instance.refresh());
     }
     DuplicateMonomerManager._instance.refresh();

package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts CHANGED Viewed

@@ -1028,6 +1028,7 @@ function getCorrectedMolBlock(molBlock: string) {
   // 2. RGP field is present in the correct format
   // 3. R group labels are written as R# and not just R
   // 4. there is no ISO field in the molblock. if there is, it needs to be substituted with RGP field and thats it.
+  // 5. make sure that R groups have no metadata in the atomblocks
   const lines = molBlock.split('\n');
@@ -1055,7 +1056,7 @@ function getCorrectedMolBlock(molBlock: string) {
       rgroupLineNumbers[atomI - molStartIdx] = rgroupNum;
   }
-  const rgroupLineNums = Object.values(rgroupLineNumbers);
+  const rgroupLineNums = Object.keys(rgroupLineNumbers);
   // find and possibly add rgp field
   const rgpLineIdx = lines.findIndex((line) => line.startsWith('M') && line.includes('RGP'));
@@ -1066,6 +1067,23 @@ function getCorrectedMolBlock(molBlock: string) {
     const mEndIdx = lines.findIndex((line) => line.startsWith('M') && line.includes('END'));
     lines.splice(mEndIdx, 0, rgpLine);
   }
+  //make sure that R# lines do not have any metadata that can be interpreted as isotopes or anything else
+  //for example, following line could be interpreted as isotope with mass 2 in some cases
+  //"    3.9970    0.3462    0.0000 R#  0  0  0  0  0  1  0  0  0  0  2  0"
+  const rGroupActualLines = rgroupLineNums.filter((rLine) => !!Number.parseInt(rLine)).map((atomLine) => Number.parseInt(atomLine) + molStartIdx);
+  rGroupActualLines.forEach((lineIdx) => {
+    const splitLine = lines[lineIdx].split(' ');
+    const rIdx = splitLine.findIndex((s) => s === 'R#');
+    if (rIdx === -1)
+      return;
+    for (let i = rIdx + 1; i < splitLine.length; i++) {
+      if (!!splitLine[i] && splitLine[i].length == 1 && (Number.parseInt(splitLine[i]) ?? 0) > 0)
+        splitLine[i] = '0';
+    }
+    lines[lineIdx] = splitLine.join(' ');
+  });
   return lines.join('\n');
 }
@@ -1087,12 +1105,24 @@ function monomerFromDfRow(dfRow: DG.Row): Monomer {
     if (typeof metaJSON[key] === 'object')
       metaJSON[key] = JSON.stringify(metaJSON[key]);
   }
+  const smiles = dfRow.get(MONOMER_DF_COLUMN_NAMES.MONOMER);
+  if (!smiles)
+    throw new Error('Monomer SMILES is empty');
+  let molfile = '';
+  try {
+    molfile = grok.chem.convert(smiles, DG.chem.Notation.Smiles, DG.chem.Notation.MolBlock);
+    molfile = getCorrectedMolBlock(molfile);
+  } catch (e) {
+    grok.shell.error(`Error converting SMILES to molfile, \n ${smiles}`);
+    console.error(e);
+  }
   return {
     symbol: dfRow.get(MONOMER_DF_COLUMN_NAMES.SYMBOL),
     name: dfRow.get(MONOMER_DF_COLUMN_NAMES.NAME),
-    molfile: '',
-    smiles: dfRow.get(MONOMER_DF_COLUMN_NAMES.MONOMER),
+    molfile: molfile,
+    smiles: smiles,
     polymerType: dfRow.get(MONOMER_DF_COLUMN_NAMES.POLYMER_TYPE),
     monomerType: dfRow.get(MONOMER_DF_COLUMN_NAMES.MONOMER_TYPE),
     naturalAnalog: dfRow.get(MONOMER_DF_COLUMN_NAMES.NATURAL_ANALOG),

package/src/utils/seq-helper/seq-handler.ts CHANGED Viewed

@@ -1,3 +1,5 @@
+/* eslint-disable max-lines */
+/* eslint-disable @typescript-eslint/no-unused-vars */
 import * as DG from 'datagrok-api/dg';
 import wu from 'wu';
@@ -5,7 +7,7 @@ import wu from 'wu';
 /* eslint-disable max-len */
 import {ALIGNMENT, ALPHABET, candidateAlphabets, getSplitterWithSeparator, NOTATION, positionSeparator, splitterAsFasta, splitterAsHelm, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule/index';
 import {INotationProvider, ISeqSplitted, SeqColStats, SplitterFunc,} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
-import {detectAlphabet, splitterAsFastaSimple, StringListSeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
+import {detectAlphabet, detectHelmAlphabet, splitterAsFastaSimple, StringListSeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
 import {mmDistanceFunctions, MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
 import {mmDistanceFunctionType} from '@datagrok-libraries/ml/src/macromolecule-distance-functions/types';
 import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
@@ -123,6 +125,12 @@ export class SeqHandler implements ISeqHandler {
         uh.column.setTag(TAGS.alphabetSize, alphabetSize.toString());
         uh.column.setTag(TAGS.alphabetIsMultichar, alphabetIsMultichar ? 'true' : 'false');
       }
+    } else if (units === NOTATION.HELM) {
+      let alphabet = uh.column.getTag(TAGS.alphabet);
+      if (alphabet === null) {
+        alphabet = detectHelmAlphabet(uh.stats.freq, candidateAlphabets, uh.defaultGapOriginal);
+        uh.column.setTag(TAGS.alphabet, alphabet);
+      }
     }
   }
@@ -539,6 +547,7 @@ export class SeqHandler implements ISeqHandler {
     // convert the peptides list to a set for faster lookup
     const peptidesSet = new Set(peptides);
     // get splitter for given separator and check if all monomers are in the lib
+    // eslint-disable-next-line @typescript-eslint/no-unused-vars
     const splitterFunc = getSplitterWithSeparator(this.separator!);
     // iterate over the columns, split them and check if all monomers are in the lib
     //TODO maybe add missing threshold so that if there are not too many missing monomers

package/src/utils/seq-helper/seq-helper.ts CHANGED Viewed

@@ -87,7 +87,7 @@ export class SeqHelper implements ISeqHelper {
     //#endregion From HelmToMolfileConverter
-    const molHlList = molfilesV3K.map((item: MolfileWithMap) => getMolHighlight(item.monomers.values(), monomerLib));
+    //const molHlList = molfilesV3K.map((item: MolfileWithMap) => getMolHighlight(item.monomers.values(), monomerLib));
     const molCol = DG.Column.fromStrings(molColName, molList);
     molCol.semType = DG.SEMTYPE.MOLECULE;

package/src/widgets/composition-analysis-widget.ts CHANGED Viewed

@@ -26,7 +26,9 @@ export function getCompositionAnalysisWidget(
   const rowIdx = val.cell.rowIndex;
   const seqSS = sh.getSplitted(rowIdx);
   wu.count(0).take(seqSS.length).filter((posIdx) => !seqSS.isGap(posIdx)).forEach((posIdx) => {
-    const cm = seqSS.getCanonical(posIdx);
+    let cm = seqSS.getCanonical(posIdx);
+    if (biotype === HelmTypes.NUCLEOTIDE && sh.isHelm() && cm[1] === '(' && cm[cm.length - 2] === ')')
+      cm = cm.substring(2, cm.length - 2);
     const count = counts[cm] || 0;
     counts[cm] = count + 1;
   });