npm - @datagrok/bio - Versions diffs - 2.25.0 → 2.25.2 - Mend

@datagrok/bio 2.25.0 → 2.25.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/CHANGELOG.md +14 -0
package/detectors.js +26 -12
package/dist/package-test.js +5 -5
package/dist/package-test.js.map +1 -1
package/dist/package.js +3 -3
package/dist/package.js.map +1 -1
package/package.json +2 -2
package/scripts/mol-to-helm.py +1279 -0
package/src/package-api.ts +14 -0
package/src/package.g.ts +9 -0
package/src/package.ts +27 -1
package/src/utils/monomer-lib/library-file-manager/ui.ts +23 -4
package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +34 -13
package/src/utils/seq-helper/seq-handler.ts +15 -6
package/src/widgets/sequence-scrolling-widget.ts +195 -183
package/test-console-output-1.log +774 -766
package/test-record-1.mp4 +0 -0

package/src/package-api.ts CHANGED Viewed

@@ -12,6 +12,13 @@ export namespace scripts {
     return await grok.functions.call('Bio:Embed', { molecule });
   }
+  /**
+  Converts molecules to HELM notation based on monomer library
+  */
+  export async function molToHelmConverterPy(moleculesDataframe: DG.DataFrame , moleculesColumn: DG.Column , libraryJSON: string ): Promise<DG.DataFrame> {
+    return await grok.functions.call('Bio:MolToHelmConverterPy', { moleculesDataframe, moleculesColumn, libraryJSON });
+  }
   /**
   Create the model peptides/DNA sequences with peptides data
   */
@@ -172,6 +179,13 @@ export namespace funcs {
     return await grok.functions.call('Bio:SequenceSpaceTopMenu', { table, molecules, methodName, similarityMetric, plotEmbeddings, preprocessingFunction, options, clusterEmbeddings, isDemo });
   }
+  /**
+  Converts Peptide molecules to HELM notation by matching with monomer library
+  */
+  export async function moleculesToHelmTopMenu(table: DG.DataFrame , molecules: DG.Column ): Promise<void> {
+    return await grok.functions.call('Bio:MoleculesToHelmTopMenu', { table, molecules });
+  }
   /**
   Converts sequences to molblocks
   */

package/src/package.g.ts CHANGED Viewed

@@ -264,6 +264,15 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Co
   return await PackageFunctions.sequenceSpaceTopMenu(table, molecules, methodName, similarityMetric, plotEmbeddings, preprocessingFunction, options, clusterEmbeddings, isDemo);
 }
+//name: Molecules to HELM
+//description: Converts Peptide molecules to HELM notation by matching with monomer library
+//input: dataframe table { description: Input data table }
+//input: column molecules { semType: Molecule; description: Molecule column }
+//top-menu: Bio | Transform | Molecules to HELM...
+export async function moleculesToHelmTopMenu(table: DG.DataFrame, molecules: DG.Column) : Promise<void> {
+  await PackageFunctions.moleculesToHelmTopMenu(table, molecules);
+}
 //name: To Atomic Level
 //description: Converts sequences to molblocks
 //input: dataframe table { description: Input data table }

package/src/package.ts CHANGED Viewed

@@ -78,10 +78,12 @@ import {molecular3DStructureWidget, toAtomicLevelWidget} from './widgets/to-atom
 import {handleSequenceHeaderRendering} from './widgets/sequence-scrolling-widget';
 import {PolymerType} from '@datagrok-libraries/js-draw-lite/src/types/org';
 import {BilnNotationProvider} from './utils/biln';
-import {MonomerLibFromFilesProvider} from './utils/monomer-lib/library-file-manager/monomers-lib-provider';
+import * as api from './package-api';
 export const _package = new BioPackage(/*{debug: true}/**/);
 export * from './package.g';
 // /** Avoid reassigning {@link monomerLib} because consumers subscribe to {@link IMonomerLib.onChanged} event */
 // let monomerLib: MonomerLib | null = null;
 let initBioPromise: Promise<void> | null = null;
@@ -617,6 +619,30 @@ export class PackageFunctions {
     return res;
   }
+  @grok.decorators.func({
+    name: 'Molecules to HELM',
+    'top-menu': 'Bio | Transform | Molecules to HELM...',
+    description: 'Converts Peptide molecules to HELM notation by matching with monomer library',
+  })
+  static async moleculesToHelmTopMenu(
+    @grok.decorators.param({name: 'table', options: {description: 'Input data table'}})table: DG.DataFrame,
+    @grok.decorators.param({name: 'molecules', options: {semType: 'Molecule', description: 'Molecule column'}})molecules: DG.Column,
+  ) {
+    // collect current monomer library
+    const monomerLib = _package.monomerLib;
+    const libJSON = JSON.stringify(monomerLib.toJSON());
+    await api.scripts.molToHelmConverterPy(table, molecules, libJSON);
+    // semtype is not automatically set, so we set it manually
+    const newCol = table.columns.toList().find((c) => c.name.toLowerCase().includes('regenerated sequence') && c.semType !== DG.SEMTYPE.MACROMOLECULE);
+    if (newCol) {
+      newCol.meta.units = NOTATION.HELM;
+      newCol.semType = DG.SEMTYPE.MACROMOLECULE;
+      newCol.setTag('cell.renderer', 'helm');
+    }
+  }
   @grok.decorators.func({
     name: 'To Atomic Level',
     description: 'Converts sequences to molblocks',

package/src/utils/monomer-lib/library-file-manager/ui.ts CHANGED Viewed

@@ -1,3 +1,5 @@
+/* eslint-disable rxjs/no-async-subscribe */
+/* eslint-disable rxjs/no-ignored-subscription */
 /* eslint-disable max-lines */
 /* Do not change these import lines to match external modules in webpack configuration */
 import * as grok from 'datagrok-api/grok';
@@ -5,7 +7,7 @@ import * as ui from 'datagrok-api/ui';
 import * as DG from 'datagrok-api/dg';
 import $ from 'cash-dom';
-import {Subject} from 'rxjs';
+import {Subject, Subscription} from 'rxjs';
 import {
   getUserLibSettings, setUserLibSettings
@@ -67,10 +69,12 @@ class MonomerLibraryManagerWidget {
     instance._widget = await instance.createWidget();
   }
+  private _fileUploadSubscription: Subscription | null = null;
   private async createWidget() {
     const content = await this.getWidgetContent();
     const monomerLibHelper = await getMonomerLibHelper();
-    // eslint-disable-next-line rxjs/no-ignored-subscription
+    this._fileUploadSubscription?.unsubscribe();
+    this._fileUploadSubscription =
     monomerLibHelper.fileUploadRequested.subscribe(
       () => this.promptToAddLibraryFiles()
     );
@@ -92,14 +96,29 @@ class MonomerLibraryManagerWidget {
       accept: '.json',
       open: async (selectedFile) => {
         const doAdd = async (provider: IMonomerLibProvider) => {
-          const content = await selectedFile.text();
           const name = selectedFile.name;
+          const existingLibs = await provider.listLibraries();
+          // chech if library already exists
+          if (existingLibs.includes(name)) {
+            const confirm = await new Promise<boolean>((resolve) => {
+              ui.dialog('Confirm Library Update')
+                .add(ui.divText(`Library '${name}' already exists. Do you want to overwrite it?`))
+                .onOK(() => resolve(true))
+                .onCancel(() => resolve(false))
+                .show();
+            });
+            if (!confirm)
+              return;
+          }
+          const content = await selectedFile.text();
           const progressIndicator = DG.TaskBarProgressIndicator.create(`Adding ${name} as a monomer library`);
           try {
             await provider.addOrUpdateLibraryString(name, content);
           // this.eventManager.updateLibrarySelectionStatus(name, true);
           } catch (e) {
             grok.shell.error(`File ${name} is not a valid monomer library, verify it is aligned to HELM JSON schema.`);
+            console.error(e);
           } finally {
             progressIndicator.close();
           }
@@ -122,7 +141,7 @@ class MonomerLibraryManagerWidget {
           .onOK(async () => {
             const provider = providers.find((p) => p.name === providersInput.value)!; // should not be null
             await doAdd(provider);
-          });
+          }).show();
       },
     });
   }

package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts CHANGED Viewed

@@ -62,24 +62,44 @@ export async function standardiseMonomers(monomers: Monomer[]) {
 /// matches molecules in the dataframe with monomers in the library by canonical smiles
 export async function matchMoleculesWithMonomers(molDf: DG.DataFrame, molColName: string, monomerLib: IMonomerLib, polymerType: PolymerType = 'PEPTIDE'): Promise<DG.DataFrame> {
+  const duplicates = monomerLib.duplicateMonomers?.[polymerType] ?? {};
   const converterFunc = DG.Func.find({package: 'Chem', name: 'convertMoleculeNotation'})[0];
   if (!converterFunc)
     throw new Error('Function convertMoleculeNotation not found, please install Chem package');
   // first: stamdardize monomers
   const monomers = monomerLib.getMonomerSymbolsByType(polymerType).map((s) => monomerLib.getMonomer(polymerType, s)!).filter((m) => m && (m.smiles || m.molfile));
   const fixedMonomers = await standardiseMonomers(monomers);
-  const cappedSmilse = fixedMonomers.map((m, i) => ({sym: m.symbol, smiles: capSmiles(m.smiles ?? '', m.rgroups ?? []), original: m.smiles, source: monomers[i]?.lib?.source})).filter((s) => !!s?.smiles && !s.smiles.includes('[*:'));
+  const unCappedMonomerSmilesMap = fixedMonomers.filter((m) => !!m.smiles).reduce((acc, m) => {
+    acc[m.smiles] = {symbol: m.symbol, smiles: m.smiles, original: m.smiles, source: m.lib?.source}; return acc;
+  }, {} as {[smiles: string]: {symbol: string, smiles: string, original: string | undefined, source: string | undefined}});
+  const cappedMonomerSmiles = fixedMonomers.map((m, i) => ({symbol: m.symbol, smiles: capSmiles(m.smiles ?? '', m.rgroups ?? []), original: m.smiles, source: monomers[i]?.lib?.source}))
+    .filter((s) => !!s?.smiles && !s.smiles.includes('[*:'));
   // canonicalize all monomer smiles
-  const monomerSmilesCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'MonomerSmiles', cappedSmilse.map((m) => m.smiles!));
+  const monomerSmilesCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'MonomerSmiles', cappedMonomerSmiles.map((m) => m.smiles!));
   monomerSmilesCol.semType = DG.SEMTYPE.MOLECULE;
   const canonicalizedMonomersSmilesCol: DG.Column = await converterFunc.apply({molecule: monomerSmilesCol, targetNotation: DG.chem.Notation.Smiles});
   if (!canonicalizedMonomersSmilesCol || canonicalizedMonomersSmilesCol.length !== monomerSmilesCol.length)
     throw new Error('Error canonicalizing monomer smiles');
-  canonicalizedMonomersSmilesCol.toList().forEach((s, i) => cappedSmilse[i].smiles = s);
+  canonicalizedMonomersSmilesCol.toList().forEach((s, i) => cappedMonomerSmiles[i].smiles = s);
+  const cappedMonomerSmilesMap = cappedMonomerSmiles.reduce((acc, m) => { acc[m.smiles] = m; return acc; }, {} as {[smiles: string]: {symbol: string, smiles: string, original: string | undefined, source: string | undefined}});
-  const molecules = molDf.col(molColName)!;
-  const canonicalizedMoleculesCol: DG.Column = await converterFunc.apply({molecule: molecules, targetNotation: DG.chem.Notation.Smiles});
-  if (!canonicalizedMoleculesCol || canonicalizedMoleculesCol.length !== molecules.length)
+  const moleculesOriginalCol = molDf.col(molColName)!;
+  const correctedOriginalList = moleculesOriginalCol.toList().map((s) => {
+    if (!s) return s;
+    try {
+      const isMolBlock = s.includes('\n');
+      return getCorrectedSmiles([], isMolBlock ? undefined : s, isMolBlock ? s : undefined);
+    } catch (_e) {
+      return s;
+    }
+  });
+  const moleculesOriginalColCorrected = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'MoleculesOriginalCorrected', correctedOriginalList);
+  // create dummy df
+  moleculesOriginalColCorrected.semType = DG.SEMTYPE.MOLECULE;
+  const _ddf = DG.DataFrame.fromColumns([moleculesOriginalColCorrected]);
+  const canonicalizedMoleculesCol: DG.Column = await converterFunc.apply({molecule: moleculesOriginalColCorrected, targetNotation: DG.chem.Notation.Smiles});
+  if (!canonicalizedMoleculesCol || canonicalizedMoleculesCol.length !== moleculesOriginalColCorrected.length)
     throw new Error('Error canonicalizing molecules');
   const canonicalizedMolecules = canonicalizedMoleculesCol.toList();
@@ -95,13 +115,14 @@ export async function matchMoleculesWithMonomers(molDf: DG.DataFrame, molColName
   for (let i = 0; i < canonicalizedMolecules.length; i++) {
     const mol = canonicalizedMolecules[i];
     if (!mol) continue;
-    for (let j = 0; j < cappedSmilse.length; j++) {
-      if (cappedSmilse[j].smiles === mol) {
-        matchingMonomerSmilesCol.set(i, cappedSmilse[j].original!, false);
-        matchingMonomerSymbolCol.set(i, cappedSmilse[j].sym, false);
-        sourceLibCol.set(i, cappedSmilse[j].source ?? '', false);
-        break;
-      }
+    const match = cappedMonomerSmilesMap[mol] ?? unCappedMonomerSmilesMap[mol];
+    if (match) {
+      const matchSymbol = match.symbol;
+      const sources = (duplicates[matchSymbol]?.length ?? 0) > 0 ? duplicates[matchSymbol].map((m) => m?.lib?.source).filter((s) => !!s).join(', ') : (match.source ?? '');
+      const originalSmiles = match.original ?? match.smiles;
+      matchingMonomerSmilesCol.set(i, originalSmiles, false);
+      matchingMonomerSymbolCol.set(i, matchSymbol, false);
+      sourceLibCol.set(i, sources, false);
     }
   }
   return resultDf;

package/src/utils/seq-helper/seq-handler.ts CHANGED Viewed

@@ -1049,21 +1049,30 @@ function joinToHelm(srcSS: ISeqSplitted, wrappers: string[], isDnaOrRna: boolean
 }
 function joinToBiln(srcSS: ISeqSplitted): string {
+  const needsSquareBrackets = (cm: string | null) => {
+    return cm && (cm.includes('-') || cm.includes('*') || cm.includes('[R'));
+  };
   if (!srcSS.graphInfo || !((srcSS.graphInfo.connections?.length ?? 0) > 0)) {
     const resOMList: string[] = new Array<string>(srcSS.length);
     for (let posIdx: number = 0; posIdx < srcSS.length; ++posIdx) {
-      resOMList[posIdx] = srcSS.getCanonical(posIdx);
-      if (resOMList[posIdx]?.includes('-')) // Biln uses '-' as a separator, need to enclose in []
-        resOMList[posIdx] = `[${resOMList[posIdx]}]`;
+      const canonical = srcSS.getCanonical(posIdx);
+      if (needsSquareBrackets(canonical)) // Biln uses '-' as a separator, need to enclose in []. also there might be smiles in there, where Rs are represented as '*' or R
+        resOMList[posIdx] = `[${canonical}]`;
+      else
+        resOMList[posIdx] = canonical;
     }
     return resOMList.join('-'); // Biln uses '-' as a separator
   } else { // conversion happens only if there is a graph info
     const disjointSequenceIdxs = srcSS.graphInfo.disjointSeqStarts;
     const allSeqParts = new Array<string>(srcSS.length);
     for (let posIdx = 0; posIdx < srcSS.length; ++posIdx) {
-      allSeqParts[posIdx] = srcSS.getCanonical(posIdx);
-      if (allSeqParts[posIdx]?.includes('-')) // Biln uses '-' as a separator, need to enclose in []
-        allSeqParts[posIdx] = `[${allSeqParts[posIdx]}]`;
+      const canonical = srcSS.getCanonical(posIdx);
+      // allSeqParts[posIdx] = srcSS.getCanonical(posIdx);
+      if (needsSquareBrackets(canonical)) // Biln uses '-' as a separator, need to enclose in []
+        allSeqParts[posIdx] = `[${canonical}]`;
+      else
+        allSeqParts[posIdx] = canonical;
     }
     for (let i = 0; i < srcSS.graphInfo.connections.length; i++) {
       const conn: ISeqConnection = srcSS.graphInfo.connections[i];