npm - @datagrok/bio - Versions diffs - 2.22.11 → 2.23.0 - Mend

@datagrok/bio 2.22.11 → 2.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/CHANGELOG.md +10 -0
package/detectors.js +14 -0
package/dist/455.js +1 -1
package/dist/455.js.map +1 -1
package/dist/package-test.js +3 -3
package/dist/package-test.js.map +1 -1
package/dist/package.js +2 -2
package/dist/package.js.map +1 -1
package/dockerfiles/container.json +2 -2
package/files/samples/BILN.csv +625 -0
package/files/samples/BILN_W_HELM.csv +5114 -0
package/package.json +6 -6
package/src/package-api.ts +11 -0
package/src/package.g.ts +54 -24
package/src/package.ts +65 -13
package/src/tests/biln-tests.ts +167 -0
package/src/tests/converters-test.ts +14 -0
package/src/tests/detectors-tests.ts +7 -0
package/src/tests/renderers-test.ts +1 -1
package/src/tests/viewers.ts +11 -16
package/src/utils/biln.ts +69 -0
package/src/utils/cell-renderer.ts +7 -11
package/src/utils/convert.ts +3 -2
package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +48 -1
package/src/utils/save-as-fasta.ts +25 -22
package/src/utils/seq-helper/seq-handler.ts +139 -33
package/src/utils/seq-helper/seq-helper.ts +1 -1
package/src/widgets/representations.ts +1 -1
package/src/widgets/to-atomic-level-widget.ts +12 -4
package/test-console-output-1.log +1071 -3014
package/test-record-1.mp4 +0 -0

package/src/tests/detectors-tests.ts CHANGED Viewed

@@ -10,6 +10,8 @@ import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-librarie
 import {ISeqHelper, getSeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
 import {_testNeg, _testPos, DetectorTestData, DfReaderFunc, PosCol} from './utils/detectors-utils';
+import { SeqTemps } from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';
+import { _testBilnDetection, detectorTestsDataForBiln } from './biln-tests';
 /*
 // snippet to list df columns of semType='Macromolecule' (false positive)
@@ -418,6 +420,11 @@ MWRSWY-CKHPMWRSWY-CKHP`;
     }, seqHelper);
   });
+  for (const bilnT of detectorTestsDataForBiln) {
+    test(bilnT.name, async () => {
+      await _testBilnDetection(bilnT.seqs, seqHelper, bilnT.negative);
+    });
+  }
   // test('samplesFastaFasta', async () => {
   //   await _testDf(readSamples(Samples.fastaFasta), {
   //     'sequence': new PosCol(NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.PT, 20, false),

package/src/tests/renderers-test.ts CHANGED Viewed

@@ -65,7 +65,7 @@ category('renderers', () => {
   test('scatterPlotTooltip', async () => {
     await _testScatterPlotTooltip();
-  }, {skipReason: 'GROK-17450'});
+  });
   async function _rendererMacromoleculeFasta() {
     const csv: string = await grok.dapi.files.readAsText('System:AppData/Bio/samples/FASTA.csv');

package/src/tests/viewers.ts CHANGED Viewed

@@ -6,19 +6,14 @@ import {category, test, testViewer} from '@datagrok-libraries/utils/src/test';
 import {readDataframe} from './utils';
-category('viewers', () => {
-  const viewers = DG.Func.find({package: 'Bio', tags: ['viewer']}).map((f) => f.friendlyName);
-  for (const v of viewers) {
-    test(v, async () => {
-      const df = await readDataframe('samples/FASTA_DNA.csv');
-      await testViewer(v, df, {detectSemanticTypes: true});
-    }, {
-      skipReason: {
-        'Sequence Similarity Search': 'GROK-13162',
-        'Sequence Diversity Search': 'GROK-13162',
-        'WebLogo': 'GROK-13162',
-        'VdRegions': 'GROK-13162',
-      }[v],
-    });
-  }
-});
+// category('viewers', () => {
+//   const viewers = DG.Func.find({package: 'Bio', tags: ['viewer']}).map((f) => f.friendlyName);
+//   for (const v of viewers) {
+//     test(v, async () => {
+//       const df = await readDataframe('samples/FASTA_DNA.csv');
+//       await df.meta.detectSemanticTypes();
+//       await grok.data.detectSemanticTypes(df);
+//       await testViewer(v, df, {detectSemanticTypes: true});
+//     });
+//   }
+// });

package/src/utils/biln.ts ADDED Viewed

@@ -0,0 +1,69 @@
+/* eslint-disable max-len */
+/* eslint-disable max-len */
+import * as grok from 'datagrok-api/grok';
+import * as ui from 'datagrok-api/ui';
+import * as DG from 'datagrok-api/dg';
+/* eslint-disable max-len */
+import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
+import {INotationProvider, SplitterFunc} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
+import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule/consts';
+import {CellRendererBackBase} from '@datagrok-libraries/bio/src/utils/cell-renderer-back-base';
+import {MonomerPlacer} from '@datagrok-libraries/bio/src/utils/cell-renderer-monomer-placer';
+import {monomerToShort, splitterAsBiln} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
+import {_package} from '../package';
+/* eslint-enable max-len */
+export class BilnNotationProvider implements INotationProvider {
+  public readonly splitter: SplitterFunc;
+  get defaultGapOriginal(): string { return ''; }
+  constructor(
+    public readonly separator: string,
+    public readonly seqHelper: ISeqHelper,
+    public readonly seqCol: DG.Column
+  ) {
+    this.splitter = splitterAsBiln.bind(this);
+  }
+  setUnits(): void {}
+  public getHelm(seq: string, _options?: any): string {
+    // return resPseudoHelm;
+    // generate helm from biln
+    const seqSplitted = this.splitter(seq);
+    const sh = this.seqHelper.getSeqHandler(this.seqCol);
+    return sh.getJoiner({notation: NOTATION.HELM})(seqSplitted);
+  }
+  public createCellRendererBack(gridCol: DG.GridColumn | null, tableCol: DG.Column<string>):
+  CellRendererBackBase<string> {
+    const maxLengthOfMonomer = _package.properties.maxMonomerLength || 4;
+    // (_package.bioProperties ? _package.bioProperties.maxMonomerLength : 4) ?? 50;
+    const back = new BilnCellRendererBack(gridCol, tableCol,
+      maxLengthOfMonomer, this.seqHelper);
+    back.init().then(() => {});
+    return back;
+  }
+}
+export class BilnCellRendererBack extends MonomerPlacer {
+  constructor(
+    gridCol: DG.GridColumn | null, tableCol: DG.Column,
+    maxLengthOfMonomer: number, seqHelper: ISeqHelper
+  ) {
+    super(gridCol, tableCol, _package.logger, maxLengthOfMonomer, () => {
+      const sh = seqHelper.getSeqHandler(tableCol);
+      const {font, fontWidth} = MonomerPlacer.getFontSettings(tableCol);
+      return {
+        seqHandler: sh,
+        font: font,
+        fontCharWidth: fontWidth,
+        separatorWidth: 0,
+        monomerToShort: monomerToShort,
+      };
+    });
+  }
+}

package/src/utils/cell-renderer.ts CHANGED Viewed

@@ -81,10 +81,9 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
   getRendererBack(gridCell: DG.GridCell): CellRendererBackBase<string> | null {
     const [gridCol, tableCol, _temp] = getGridCellColTemp<string, any>(gridCell);
-    if (tableCol.meta.units !== NOTATION.CUSTOM)
+    if (_temp.rendererBack)
       return _temp.rendererBack;
     let back: CellRendererBackBase<string> | null = null;
     if (this.seqHelper) {
       const sh = this.seqHelper.getSeqHandler(tableCol);
       back = sh.getRendererBack(gridCol, tableCol);
@@ -96,10 +95,8 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
     const colTemp: TempType = gridCell.cell.column.temp;
     colTemp[tempTAGS.currentWord] = gridCell.cell.value;
     gridCell.grid.invalidate();
-    if (gridCell.cell.column.meta.units === NOTATION.CUSTOM) {
-      const back = this.getRendererBack(gridCell);
-      back?.onClick(gridCell, _e);
-    }
+    const back = this.getRendererBack(gridCell);
+    back?.onClick(gridCell, _e);
   }
   override onMouseEnter(gridCell: DG.GridCell, e: MouseEvent) {
@@ -195,12 +192,11 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
   }
   override render(g: CanvasRenderingContext2D, x: number, y: number, w: number, h: number, gridCell: DG.GridCell, cellStyle: DG.GridCellStyle): void {
-    if (gridCell.cell.column?.meta?.units === NOTATION.CUSTOM) {
-      const back = this.getRendererBack(gridCell);
+    const back = this.getRendererBack(gridCell);
+    if (back)
       back?.render(g, x, y, w, h, gridCell, cellStyle);
-      return;
-    }
-    this.renderInt(g, x, y, w, h, gridCell, cellStyle);
+    else
+      this.renderInt(g, x, y, w, h, gridCell, cellStyle);
   }
 }

package/src/utils/convert.ts CHANGED Viewed

@@ -39,6 +39,7 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
     NOTATION.FASTA,
     NOTATION.SEPARATOR,
     NOTATION.HELM,
+    NOTATION.BILN
   ];
   const toggleColumn = (newCol: DG.Column) => {
     srcCol = newCol;
@@ -47,7 +48,7 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
     if (currentNotation === NOTATION.HELM)
       separatorInput.value = '/'; // helm monomers can have - in the name like D-aThr;
     dialogHeader.textContent = 'Current notation: ' + currentNotation;
-    filteredNotations = notations.filter((e) => e !== currentNotation);
+    filteredNotations = notations;//.filter((e) => e !== currentNotation); TEMPORARY DO NOT FORGET TO UNCOMMENT
     targetNotationInput = ui.input.choice('Convert to', {
       value: filteredNotations[0], items: filteredNotations,
       onValueChanged: toggleSeparator
@@ -70,7 +71,7 @@ export function convert(col: DG.Column<string> | undefined, seqHelper: ISeqHelpe
   });
   const separatorArray = ['-', '.', '/'];
-  let filteredNotations = notations.filter((e) => e !== currentNotation);
+  let filteredNotations = notations;//.filter((e) => e !== currentNotation); // TEMPORARY DO NOT FORGET TO UNCOMMENT
   const separatorInput = ui.input.choice('Separator', {value: separatorArray[0], items: separatorArray});

package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts CHANGED Viewed

@@ -57,6 +57,53 @@ export async function standardiseMonomers(monomers: Monomer[]) {
   return fixedMonomers;
 }
+/// matches molecules in the dataframe with monomers in the library by canonical smiles
+export async function matchMoleculesWithMonomers(molDf: DG.DataFrame, molColName: string, monomerLib: IMonomerLib, polymerType: PolymerType = 'PEPTIDE'): Promise<DG.DataFrame> {
+  const converterFunc = DG.Func.find({package: 'Chem', name: 'convertMoleculeNotation'})[0];
+  if (!converterFunc)
+    throw new Error('Function convertMoleculeNotation not found, please install Chem package');
+  // first: stamdardize monomers
+  const monomers = monomerLib.getMonomerSymbolsByType(polymerType).map((s) => monomerLib.getMonomer(polymerType, s)!).filter((m) => m && (m.smiles || m.molfile));
+  const fixedMonomers = await standardiseMonomers(monomers);
+  const cappedSmilse = fixedMonomers.map((m, i) => ({sym: m.symbol, smiles: capSmiles(m.smiles ?? '', m.rgroups ?? []), original: m.smiles, source: monomers[i]?.lib?.source})).filter((s) => !!s?.smiles && !s.smiles.includes('[*:'));
+  // canonicalize all monomer smiles
+  const monomerSmilesCol = DG.Column.fromList(DG.COLUMN_TYPE.STRING, 'MonomerSmiles', cappedSmilse.map((m) => m.smiles!));
+  monomerSmilesCol.semType = DG.SEMTYPE.MOLECULE;
+  const canonicalizedMonomersSmilesCol: DG.Column = await converterFunc.apply({molecule: monomerSmilesCol, targetNotation: DG.chem.Notation.Smiles});
+  if (!canonicalizedMonomersSmilesCol || canonicalizedMonomersSmilesCol.length !== monomerSmilesCol.length)
+    throw new Error('Error canonicalizing monomer smiles');
+  canonicalizedMonomersSmilesCol.toList().forEach((s, i) => cappedSmilse[i].smiles = s);
+  const molecules = molDf.col(molColName)!;
+  const canonicalizedMoleculesCol: DG.Column = await converterFunc.apply({molecule: molecules, targetNotation: DG.chem.Notation.Smiles});
+  if (!canonicalizedMoleculesCol || canonicalizedMoleculesCol.length !== molecules.length)
+    throw new Error('Error canonicalizing molecules');
+  const canonicalizedMolecules = canonicalizedMoleculesCol.toList();
+  const resultDf = molDf.clone();
+  const matchingMonomerSmilesCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Matched monomer smiles'));
+  matchingMonomerSmilesCol.semType = DG.SEMTYPE.MOLECULE;
+  const matchingMonomerSymbolCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Matched monomer symbol'));
+  matchingMonomerSymbolCol.semType = 'Monomer';
+  const sourceLibCol = resultDf.columns.addNewString(resultDf.columns.getUnusedName('Matched monomer source'));
+  resultDf.columns.setOrder([molColName, matchingMonomerSymbolCol.name, matchingMonomerSmilesCol.name, sourceLibCol.name]);
+  for (let i = 0; i < canonicalizedMolecules.length; i++) {
+    const mol = canonicalizedMolecules[i];
+    if (!mol) continue;
+    for (let j = 0; j < cappedSmilse.length; j++) {
+      if (cappedSmilse[j].smiles === mol) {
+        matchingMonomerSmilesCol.set(i, cappedSmilse[j].original!, false);
+        matchingMonomerSymbolCol.set(i, cappedSmilse[j].sym, false);
+        sourceLibCol.set(i, cappedSmilse[j].source ?? '', false);
+        break;
+      }
+    }
+  }
+  return resultDf;
+}
 /** Standardizes the monomer library
  * warning: throws error if the library is not valid or has invalid monomers
  */
@@ -127,7 +174,7 @@ export function getMonomersDataFrame(monomers: Monomer[]) {
           monomers[i].id,
           JSON.stringify(monomers[i].meta ?? {}),
           monomers[i].lib?.source ?? '',
-        ]);
+        ], false);
         // something is wrong with setting dates, so setting it manually for now
         try {
           if (date)

package/src/utils/save-as-fasta.ts CHANGED Viewed

@@ -15,23 +15,21 @@ const FASTA_LINE_WIDTH = 60;
 export function saveAsFastaUI(): void {
   // Use grid for column order adjusted by user
   const grid: DG.Grid = grok.shell.tv.grid;
+  const dataFrame: DG.DataFrame = grid.dataFrame;
-  const idGColList: DG.GridColumn[] = wu.count(0).take(grid.columns.length)
-    .map((colI: number) => grid.columns.byIndex(colI)!)
-    .filter((gcol: DG.GridColumn) => gcol.column ? gcol.column.semType !== DG.SEMTYPE.MACROMOLECULE : false).toArray();
-  const defaultIdGCol: DG.GridColumn | undefined = idGColList
-    .find((gcol: DG.GridColumn) => gcol.name.toLowerCase().indexOf('id') !== -1);
-  const idDefaultValue = defaultIdGCol ? [defaultIdGCol.name] : [];
+  const idGColList: DG.Column[] = wu.count(0).take(dataFrame.columns.length)
+    .map((colI: number) => dataFrame.columns.byIndex(colI)!)
+    .filter((col: DG.Column) => col.semType !== DG.SEMTYPE.MACROMOLECULE).toArray();
+  const defaultIdGCol: DG.Column | undefined = idGColList
+    .find((col: DG.Column) => col.name.toLowerCase().indexOf('id') !== -1);
+  const idDefaultValue = defaultIdGCol ? [defaultIdGCol] : [];
-  const idGColListInput = ui.input.multiChoice('Seq id columns', {
-    value: idDefaultValue,
-    items: idGColList.map((gcol: DG.GridColumn) => gcol.name)
-  });
+  const idGColListInput = ui.input.columns('Seq id columns', {table: dataFrame, value: idDefaultValue,
+    filter: (col: DG.Column) => col.semType !== DG.SEMTYPE.MACROMOLECULE});
-  const seqGColList: DG.GridColumn[] = wu.count(0).take(grid.columns.length)/* range rom 0 to grid.columns.length */
-    .map((colI: number) => grid.columns.byIndex(colI)!)
-    .filter((gc: DG.GridColumn) => {
-      const col: DG.Column | null = gc.column;
+  const seqGColList: DG.Column[] = wu.count(0).take(dataFrame.columns.length)/* range rom 0 to grid.columns.length */
+    .map((colI: number) => dataFrame.columns.byIndex(colI)!)
+    .filter((col: DG.Column) => {
       if (col && col.semType === DG.SEMTYPE.MACROMOLECULE) {
         const sh = _package.seqHelper.getSeqHandler(col);
         return sh.isFasta();
@@ -39,10 +37,17 @@ export function saveAsFastaUI(): void {
       return false;
     }).toArray();
-  const seqDefaultValue = seqGColList.length > 0 ? seqGColList[0].name : [];
-  const seqColInput = ui.input.choice('Seq column', {
-    value: seqDefaultValue,
-    items: seqGColList.map((gCol: DG.GridColumn) => gCol.name)
+  const seqDefaultValue = seqGColList.length > 0 ? seqGColList[0] : null;
+  const seqColInput = ui.input.column('Seq column', {
+    table: dataFrame,
+    value: seqDefaultValue!,
+    filter: (col) => {
+      if (col && col.semType === DG.SEMTYPE.MACROMOLECULE) {
+        const sh = _package.seqHelper.getSeqHandler(col);
+        return sh.isFasta();
+      }
+      return false;
+    }
   });
   const lineWidthInput = ui.input.int('FASTA line width', {value: FASTA_LINE_WIDTH});
@@ -54,10 +59,8 @@ export function saveAsFastaUI(): void {
       lineWidthInput,
     ]))
     .onOK(() => {
-      const valueIdColList: DG.Column[] = idGColListInput.value ?
-        idGColListInput.value.map((colName: string) => grid.columns.byName(colName)!.column!) : [];
-      const valueSeqCol: DG.Column | null = seqColInput.value ?
-        grid.columns.byName(seqColInput.value as string)!.column : null;
+      const valueIdColList: DG.Column[] = idGColListInput.value ?? [];
+      const valueSeqCol: DG.Column | null = seqColInput.value ?? null;
       const valueLineWidth = lineWidthInput.value ?? FASTA_LINE_WIDTH;
       if (!valueSeqCol)

package/src/utils/seq-helper/seq-handler.ts CHANGED Viewed

@@ -6,7 +6,7 @@ import wu from 'wu';
 /* eslint-disable max-len */
 import {ALIGNMENT, ALPHABET, candidateAlphabets, getSplitterWithSeparator, NOTATION, positionSeparator, splitterAsFasta, splitterAsHelm, TAGS} from '@datagrok-libraries/bio/src/utils/macromolecule/index';
-import {INotationProvider, ISeqSplitted, SeqColStats, SplitterFunc,} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
+import {INotationProvider, ISeqConnection, ISeqSplitted, SeqColStats, SplitterFunc,} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
 import {detectAlphabet, detectHelmAlphabet, splitterAsFastaSimple, StringListSeqSplitted} from '@datagrok-libraries/bio/src/utils/macromolecule/utils';
 import {mmDistanceFunctions, MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
 import {mmDistanceFunctionType} from '@datagrok-libraries/ml/src/macromolecule-distance-functions/types';
@@ -28,10 +28,22 @@ export class SeqHandler implements ISeqHandler {
   protected readonly _units: string; // units, of the form fasta, separator
   protected readonly _notation: NOTATION; // current notation (without :SEQ:NT, etc.)
   protected readonly _defaultGapOriginal: string;
-  protected readonly notationProvider!: INotationProvider;
+  private _notationProvider: INotationProvider | null = null;
+  private _tempReadForNotProvider = false;
+  protected get notationProvider(): INotationProvider | null {
+    if (!this._tempReadForNotProvider) {
+      this._tempReadForNotProvider = true;
+      this._notationProvider = this._notationProvider ?? this._column.temp[SeqTemps.notationProvider] ?? null;
+    }
+    return this._notationProvider;
+  };
+  protected set notationProvider(v: INotationProvider | null) { this._notationProvider = v; };
   private _splitter: SplitterFunc | null = null;
+  private _refinerPromise: Promise<void> = Promise.resolve();
+  public get refinerPromise(): Promise<void> { return this._refinerPromise; }
   protected constructor(col: DG.Column<string>,
     private readonly seqHelper: SeqHelper,
   ) {
@@ -44,7 +56,7 @@ export class SeqHandler implements ISeqHandler {
     this._units = units!;
     this._notation = this.getNotation();
-    if (this.isCustom()) {
+    if (this.isCustom() || this.isBiln()) {
       // this.column.temp[SeqTemps.notationProvider] must be set at detector stage
       this.notationProvider = this.column.temp[SeqTemps.notationProvider] ?? null;
     }
@@ -52,7 +64,7 @@ export class SeqHandler implements ISeqHandler {
     const defaultGapOriginal = this.isFasta() ? GapOriginals[NOTATION.FASTA] :
       this.isSeparator() ? GapOriginals[NOTATION.SEPARATOR] :
         this.isHelm() ? GapOriginals[NOTATION.HELM] :
-          this.isCustom() ? (this.notationProvider?.defaultGapOriginal ?? GapOriginals[NOTATION.SEPARATOR]) :
+          this.isCustom() || this.isBiln() ? (this.notationProvider?.defaultGapOriginal ?? GapOriginals[NOTATION.SEPARATOR]) :
             undefined;
     if (defaultGapOriginal == undefined)
       throw new Error(`Unexpected defaultGapOriginal for notation '${this.notation}'`);
@@ -70,8 +82,8 @@ export class SeqHandler implements ISeqHandler {
         this.seqHelper.setUnitsToSeparatorColumn(this, separator);
       } else if (this.isHelm())
         this.seqHelper.setUnitsToHelmColumn(this);
-      else if (this.isCustom())
-        this.notationProvider!.setUnits(this);
+      else if (this.isCustom() || this.isBiln())
+        this.notationProvider?.setUnits(this);
       else
         throw new Error(`Unexpected units '${this.column.meta.units}'.`);
     }
@@ -96,8 +108,8 @@ export class SeqHandler implements ISeqHandler {
     this.columnVersion = this.column.version;
     // refine separator only at this stage
-    if (this.isSeparator() && (!this.isCustom() || !this.notationProvider) && !col.temp['seqHandlerRefined']) {
-      this.refineSeparatorNotation();
+    if (this.isSeparator() && ((!this.isCustom() && !this.isBiln()) || !this.notationProvider) && !col.temp['seqHandlerRefined']) {
+      this._refinerPromise = this.refineSeparatorNotation();
       col.temp['seqHandlerRefined'] = true;
     }
   }
@@ -138,16 +150,11 @@ export class SeqHandler implements ISeqHandler {
     const stats = getStats(categoriesSample, 3, (s) => s.split(this.separator!));
     let invalidateRequired = false;
-    const refinerList = [
-      {package: 'SequenceTranslator', name: 'refineNotationProviderForHarmonizedSequence'},
-    ];
+    const refinerList = DG.Func.find({tags: ['notationRefiner']});
     for (const refineFuncFind of refinerList) {
       try {
-        const funcList = DG.Func.find(refineFuncFind);
-        if (funcList.length === 0) continue;
-        const funcFc = funcList[0].prepare({col: this.column, stats: stats, separator: this.separator});
+        const funcFc = refineFuncFind.prepare({col: this.column, stats: stats, separator: this.separator});
         const refineRes = (await funcFc.call()).getOutputParamValue();
         invalidateRequired ||= refineRes;
       } catch (err) {
@@ -157,6 +164,8 @@ export class SeqHandler implements ISeqHandler {
     if (invalidateRequired) {
     // Applying custom notation provider MUST invalidate SeqHandler
+      // some things might still have the old seqHandler attached, so we need to make sure they have access to notationProvider
+      this._tempReadForNotProvider = false;
       delete this.column.temp[SeqTemps.seqHandler];
       this.column.fireValuesChanged();
@@ -371,7 +380,7 @@ export class SeqHandler implements ISeqHandler {
     const seq = this.column.get(rowIdx);
     if (this.notation === NOTATION.HELM)
       resHelm = seq;
-    else if (this.notation === NOTATION.CUSTOM)
+    else if (this.notation === NOTATION.CUSTOM || this.notation === NOTATION.BILN)
       resHelm = this.notationProvider!.getHelm(seq, {});
     else
       resHelm = this.getConverter(NOTATION.HELM)(seq);
@@ -440,6 +449,8 @@ export class SeqHandler implements ISeqHandler {
   public isCustom(): boolean { return this.notation === NOTATION.CUSTOM; }
+  public isBiln(): boolean { return this.notation === NOTATION.BILN; }
   public isRna(): boolean { return this.alphabet === ALPHABET.RNA; }
   public isDna(): boolean { return this.alphabet === ALPHABET.DNA; }
@@ -471,6 +482,8 @@ export class SeqHandler implements ISeqHandler {
       return NOTATION.HELM;
     else if (this.units.toLowerCase().startsWith(NOTATION.CUSTOM))
       return NOTATION.CUSTOM;
+    else if (this.units.toLowerCase().startsWith(NOTATION.BILN))
+      return NOTATION.BILN;
     else
       throw new Error(`Column '${this.column.name}' has unexpected notation '${this.units}'.`);
   }
@@ -538,6 +551,12 @@ export class SeqHandler implements ISeqHandler {
       newColumn.setTag(TAGS.alphabetSize, srcAlphabetSize);
     }
+    // if its biln, we need to set it as a separator column, later to be refined
+    if (tgtNotation === NOTATION.BILN) {
+      newColumn.setTag(TAGS.separator, '-');
+      newColumn.meta.units = NOTATION.SEPARATOR;
+    }
     return newColumn;
   }
@@ -828,6 +847,10 @@ export class SeqHandler implements ISeqHandler {
       res = function(srcSS: ISeqSplitted): string { return joinToHelm(srcSS, wrappers, isDnaOrRna); };
       break;
     }
+    case NOTATION.BILN: {
+      res = function(srcSS: ISeqSplitted): string { return joinToBiln(srcSS); };
+      break;
+    }
     default:
       throw new Error(`Unexpected notation '${notation}'.`);
     }
@@ -846,8 +869,10 @@ export class SeqHandler implements ISeqHandler {
       return function(srcSeq: string) { return srcSh.convertToHelm(srcSeq); };
     else if (tgtUnits === NOTATION.SEPARATOR)
       return function(srcSeq: string) { return srcSh.convertToSeparator(srcSeq, tgtSeparator!); };
+    else if (tgtUnits === NOTATION.BILN)
+      return function(srcSeq: string) { return srcSh.convertToBiln(srcSeq); };
     else
-      throw new Error();
+      throw new Error('Unexpected target units \'' + tgtUnits + '\'.');
   }
   /** Gets a column's UnitsHandler object from temp slot or creates a new and stores it to the temp slot. */
@@ -897,11 +922,17 @@ export class SeqHandler implements ISeqHandler {
     const wrappers = this.getHelmWrappers();
-    const isDnaOrRna = src.startsWith('DNA') || src.startsWith('RNA');
+    const isDnaOrRna = this.isDna() || this.isRna();
     const srcSS = this.splitter(src);
     return joinToHelm(srcSS, wrappers, isDnaOrRna);
   }
+  private convertToBiln(src: string): string {
+    if (this.notation == NOTATION.BILN) return src;
+    const srcSS = this.splitter(src);
+    return joinToBiln(srcSS);
+  }
   /** Splits Helm sequence adjusting nucleotides to single char symbols. (!) Removes lone phosphorus. */
   private splitterAsHelmNucl(src: string): ISeqSplitted {
     const srcMList: ISeqSplitted = this.splitter(src);
@@ -921,11 +952,11 @@ export class SeqHandler implements ISeqHandler {
   // Custom notation provider
-  getRendererBack(gridCol: DG.GridColumn | null, tableCol: DG.Column<string>): CellRendererBackBase<string> {
+  getRendererBack(gridCol: DG.GridColumn | null, tableCol: DG.Column<string>): CellRendererBackBase<string> | null {
     const temp = this.column.temp as GridCellRendererTemp<any>;
     let res = temp.rendererBack;
     if (!res)
-      res = temp.rendererBack = this.notationProvider!.createCellRendererBack(gridCol, tableCol);
+      res = temp.rendererBack = this.notationProvider?.createCellRendererBack(gridCol, tableCol);
     return res;
   }
 }
@@ -950,20 +981,95 @@ function joinToSeparator(seqS: ISeqSplitted, tgtSeparator: string, isHelm: boole
 }
 function joinToHelm(srcSS: ISeqSplitted, wrappers: string[], isDnaOrRna: boolean): string {
-  const [prefix, leftWrapper, rightWrapper, postfix] = wrappers;
-  const resOMList: string[] = new Array<string>(srcSS.length);
-  for (let posIdx: number = 0; posIdx < srcSS.length; ++posIdx) {
-    const cm = srcSS.getCanonical(posIdx);
-    let om: string = srcSS.getOriginal(posIdx);
-    if (cm === GAP_SYMBOL)
-      om = GapOriginals[NOTATION.HELM];
-    else {
-      if (isDnaOrRna)
-        om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
-      om = om.length === 1 ? `${leftWrapper}${om}${rightWrapper}` : `${leftWrapper}[${om}]${rightWrapper}`;
+  if (!srcSS.graphInfo || !((srcSS.graphInfo.connections?.length ?? 0) > 0)) {
+    // no graph info - linear sequence
+    const [prefix, leftWrapper, rightWrapper, postfix] = wrappers;
+    const resOMList: string[] = new Array<string>(srcSS.length);
+    for (let posIdx: number = 0; posIdx < srcSS.length; ++posIdx) {
+      const cm = srcSS.getCanonical(posIdx);
+      let om: string = cm;
+      if (cm === GAP_SYMBOL)
+        om = GapOriginals[NOTATION.HELM];
+      else {
+        if (isDnaOrRna)
+          om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
+        om = om.length === 1 ? `${leftWrapper}${om}${rightWrapper}` : `${leftWrapper}[${om}]${rightWrapper}`;
+      }
+      resOMList[posIdx] = om;
+    }
+    return `${prefix}${resOMList.join('.')}${postfix}`;
+  } else {
+    // there is a graph info - so we will need to be a bit tricky here
+    const seqType = isDnaOrRna ? 'RNA' : 'PEPTIDE';
+    const postFix = '$$$'; // three dollar signs - one is placed at the end of sequences
+    const disjointSequenceIdxs = srcSS.graphInfo.disjointSeqStarts;
+    const leftWrapper = wrappers[1];
+    const rightWrapper = wrappers[2];
+    const disjointSequences: string[] = [];
+    for (let i = 0; i < disjointSequenceIdxs.length; i++) {
+      const startIdx = disjointSequenceIdxs[i];
+      const endIdx = i + 1 < disjointSequenceIdxs.length ? disjointSequenceIdxs[i + 1] : srcSS.length;
+      const resOMList: string[] = new Array<string>(endIdx - startIdx);
+      for (let posIdx = startIdx; posIdx < endIdx; ++posIdx) {
+        const cm = srcSS.getCanonical(posIdx);
+        let om: string = cm;
+        if (cm === GAP_SYMBOL)
+          om = GapOriginals[NOTATION.HELM];
+        else {
+          if (isDnaOrRna)
+            om = om.replace(HELM_WRAPPERS_REGEXP, '$1');
+          om = om.length === 1 ? `${leftWrapper}${om}${rightWrapper}` : `${leftWrapper}[${om}]${rightWrapper}`;
+        }
+        resOMList[posIdx - startIdx] = om;
+      }
+      disjointSequences.push(`${seqType}${i + 1}{${resOMList.join('.')}}`);
+    }
+    // PEPTIDE2,PEPTIDE2,16:R2-1:R1|PEPTIDE3,PEPTIDE3,16:R2-1:R1|PEPTIDE3,PEPTIDE2,10:R3-1:R3|PEPTIDE1,PEPTIDE2,1:R2-9:R3$$$V2.0
+    const sequencePart = disjointSequences.join('|');
+    const sequenceConnections = srcSS.graphInfo.connections.map((conn) => {
+      return `${seqType}${conn.seqIndex1 + 1},${seqType}${conn.seqIndex2 + 1},${conn.monomerIndex1 + 1}:R${conn.rGroup1}-${conn.monomerIndex2 + 1}:R${conn.rGroup2}`;
+    }).join('|');
+    return `${sequencePart}$${sequenceConnections}${postFix}V2.0`;
+  }
+}
+function joinToBiln(srcSS: ISeqSplitted): string {
+  if (!srcSS.graphInfo || !((srcSS.graphInfo.connections?.length ?? 0) > 0)) {
+    const resOMList: string[] = new Array<string>(srcSS.length);
+    for (let posIdx: number = 0; posIdx < srcSS.length; ++posIdx) {
+      resOMList[posIdx] = srcSS.getCanonical(posIdx);
+      if (resOMList[posIdx]?.includes('-')) // Biln uses '-' as a separator, need to enclose in []
+        resOMList[posIdx] = `[${resOMList[posIdx]}]`;
+    }
+    return resOMList.join('-'); // Biln uses '-' as a separator
+  } else { // conversion happens only if there is a graph info
+    const disjointSequenceIdxs = srcSS.graphInfo.disjointSeqStarts;
+    const allSeqParts = new Array<string>(srcSS.length);
+    for (let posIdx = 0; posIdx < srcSS.length; ++posIdx) {
+      allSeqParts[posIdx] = srcSS.getCanonical(posIdx);
+      if (allSeqParts[posIdx]?.includes('-')) // Biln uses '-' as a separator, need to enclose in []
+        allSeqParts[posIdx] = `[${allSeqParts[posIdx]}]`;
     }
-    resOMList[posIdx] = om;
+    for (let i = 0; i < srcSS.graphInfo.connections.length; i++) {
+      const conn: ISeqConnection = srcSS.graphInfo.connections[i];
+      const conId = `${i + 1}`;
+      const seq1Idx = conn.seqIndex1;
+      const seq2Idx = conn.seqIndex2;
+      const monomer1Idx = disjointSequenceIdxs[seq1Idx] + conn.monomerIndex1;
+      const monomer2Idx = disjointSequenceIdxs[seq2Idx] + conn.monomerIndex2;
+      const seqPart1 = `${allSeqParts[monomer1Idx]}(${conId},${conn.rGroup1})`;
+      const seqPart2 = `${allSeqParts[monomer2Idx]}(${conId},${conn.rGroup2})`;
+      allSeqParts[monomer1Idx] = seqPart1;
+      allSeqParts[monomer2Idx] = seqPart2;
+    }
+    const disjointParts = disjointSequenceIdxs.map((startIdx, i) => {
+      const endIdx = i + 1 < disjointSequenceIdxs.length ? disjointSequenceIdxs[i + 1] : srcSS.length;
+      return allSeqParts.slice(startIdx, endIdx).join('-');
+    });
+    return disjointParts.join('.'); // Biln uses '-' as a separator and '.' between disjoint sequences
   }
-  return `${prefix}${resOMList.join('.')}${postfix}`;
 }

package/src/utils/seq-helper/seq-helper.ts CHANGED Viewed

@@ -136,7 +136,7 @@ export class SeqHelper implements ISeqHelper {
   }
   public setUnitsToSeparatorColumn(uh: SeqHandler, separator?: string) {
-    if (uh.column.semType !== DG.SEMTYPE.MACROMOLECULE || uh.column.meta.units !== NOTATION.SEPARATOR)
+    if (uh.column.semType !== DG.SEMTYPE.MACROMOLECULE)
       throw new Error(`The column of notation '${NOTATION.SEPARATOR}' must be '${DG.SEMTYPE.MACROMOLECULE}'.`);
     if (!separator)
       throw new Error(`The column of notation '${NOTATION.SEPARATOR}' must have the separator tag.`);

package/src/widgets/representations.ts CHANGED Viewed

@@ -109,7 +109,7 @@ export function getMacromoleculeColumnPropertyPanel(col: DG.Column): DG.Widget {
     const units = col.meta.units;
     // Don't show for formats that have their own complex renderers (like Helm).
-    if (units === NOTATION.HELM || units === NOTATION.CUSTOM)
+    if (units === NOTATION.HELM)
       return false;
     // For all other cases, including 'UN' (non-canonical), 'fasta', and 'separator' show the multiline toggle.