npm - @datagrok/bio - Versions diffs - 2.1.12 → 2.4.2 - Mend

@datagrok/bio 2.1.12 → 2.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

package/README.md +11 -12
package/css/helm.css +10 -0
package/detectors.js +83 -59
package/dist/package-test.js +2 -13168
package/dist/package-test.js.map +1 -0
package/dist/package.js +2 -10560
package/dist/package.js.map +1 -0
package/dockerfiles/Dockerfile +86 -0
package/files/icons/composition-analysis.svg +17 -0
package/files/icons/sequence-diversity-viewer.svg +4 -0
package/files/icons/sequence-similarity-viewer.svg +4 -0
package/files/icons/vdregions-viewer.svg +22 -0
package/files/icons/weblogo-viewer.svg +7 -0
package/files/tests/testUrl.csv +11 -0
package/files/tests/toAtomicLevelTest.csv +4 -0
package/package.json +24 -25
package/src/analysis/sequence-activity-cliffs.ts +11 -9
package/src/analysis/sequence-search-base-viewer.ts +2 -1
package/src/analysis/sequence-similarity-viewer.ts +3 -3
package/src/analysis/sequence-space.ts +2 -1
package/src/calculations/monomerLevelMols.ts +4 -4
package/src/package-test.ts +9 -2
package/src/package.ts +215 -131
package/src/substructure-search/substructure-search.ts +19 -16
package/src/tests/Palettes-test.ts +1 -1
package/src/tests/WebLogo-positions-test.ts +113 -57
package/src/tests/_first-tests.ts +9 -0
package/src/tests/activity-cliffs-tests.ts +8 -7
package/src/tests/activity-cliffs-utils.ts +17 -9
package/src/tests/bio-tests.ts +4 -5
package/src/tests/checkInputColumn-tests.ts +1 -1
package/src/tests/converters-test.ts +52 -17
package/src/tests/detectors-benchmark-tests.ts +3 -2
package/src/tests/detectors-tests.ts +177 -172
package/src/tests/fasta-export-tests.ts +1 -1
package/src/tests/monomer-libraries-tests.ts +34 -0
package/src/tests/pepsea-tests.ts +21 -0
package/src/tests/renderers-test.ts +21 -19
package/src/tests/sequence-space-test.ts +6 -4
package/src/tests/similarity-diversity-tests.ts +4 -4
package/src/tests/splitters-test.ts +4 -5
package/src/tests/substructure-filters-tests.ts +23 -1
package/src/tests/utils/sequences-generators.ts +1 -1
package/src/tests/utils.ts +2 -1
package/src/tests/viewers.ts +16 -0
package/src/utils/cell-renderer.ts +88 -35
package/src/utils/constants.ts +7 -6
package/src/utils/convert.ts +8 -2
package/src/utils/monomer-lib.ts +174 -0
package/src/utils/multiple-sequence-alignment.ts +44 -20
package/src/utils/pepsea.ts +78 -0
package/src/utils/save-as-fasta.ts +2 -1
package/src/utils/ui-utils.ts +15 -3
package/src/viewers/vd-regions-viewer.ts +113 -72
package/src/viewers/web-logo-viewer.ts +1031 -0
package/src/widgets/bio-substructure-filter.ts +38 -24
package/tsconfig.json +71 -72
package/webpack.config.js +4 -11
package/dist/vendors-node_modules_datagrok-libraries_ml_src_workers_dimensionality-reducer_js.js +0 -9039

package/src/tests/renderers-test.ts CHANGED Viewed

@@ -5,10 +5,11 @@ import {after, before, category, delay, expect, test} from '@datagrok-libraries/
 import {importFasta, multipleSequenceAlignmentAny} from '../package';
 import {convertDo} from '../utils/convert';
-import {SEM_TYPES, TAGS} from '../utils/constants';
+import * as C from '../utils/constants';
 import {generateLongSequence, generateManySequences, performanceTest} from './utils/sequences-generators';
 import {errorToConsole} from '@datagrok-libraries/utils/src/to-console';
-import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS, UnitsHandler} from '@datagrok-libraries/bio';
+import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
+import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
 category('renderers', () => {
   let tvList: DG.TableView[];
@@ -20,8 +21,9 @@ category('renderers', () => {
   });
   after(async () => {
-    dfList.forEach((df: DG.DataFrame) => { grok.shell.closeTable(df); });
-    tvList.forEach((tv: DG.TableView) => tv.close());
+    // Closing viewes and data frames leads to exception
+    // dfList.forEach((df: DG.DataFrame) => { grok.shell.closeTable(df); });
+    // tvList.forEach((tv: DG.TableView) => tv.close());
   });
   test('long sequence performance ', async () => {
@@ -50,7 +52,7 @@ category('renderers', () => {
   test('afterConvert', async () => {
     await _testAfterConvert();
-  });
+  }, {skipReason: 'GROK-12765'});
   test('selectRendererBySemType', async () => {
     await _selectRendererBySemType();
@@ -104,8 +106,8 @@ category('renderers', () => {
     const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
       ['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
     seqDiffCol.tags[DG.TAGS.UNITS] = NOTATION.SEPARATOR;
-    seqDiffCol.tags[TAGS.SEPARATOR] = '/';
-    seqDiffCol.semType = SEM_TYPES.MACROMOLECULE_DIFFERENCE;
+    seqDiffCol.tags[bioTAGS.separator] = '/';
+    seqDiffCol.semType = C.SEM_TYPES.MACROMOLECULE_DIFFERENCE;
     const df = DG.DataFrame.fromColumns([seqDiffCol]);
     const tv: DG.TableView = grok.shell.addTableView(df);
@@ -116,7 +118,7 @@ category('renderers', () => {
     tvList.push(tv);
     const resCellRenderer = seqDiffCol.getTag(DG.TAGS.CELL_RENDERER);
-    expect(resCellRenderer, 'MacromoleculeDifference');
+    expect(resCellRenderer, C.SEM_TYPES.MACROMOLECULE_DIFFERENCE);
   }
   async function _testAfterMsa() {
@@ -143,17 +145,17 @@ category('renderers', () => {
     expect(srcSeqCol.getTag(bioTAGS.alphabet), ALPHABET.PT);
     expect(srcSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
-    const msaSeqCol: DG.Column = (await multipleSequenceAlignmentAny(df, srcSeqCol!))!;
+    const msaSeqCol = multipleSequenceAlignmentAny(srcSeqCol);
     tv.grid.invalidate();
-    expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
-    expect(msaSeqCol.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
-    expect(msaSeqCol.getTag(bioTAGS.aligned), ALIGNMENT.SEQ_MSA);
-    expect(msaSeqCol.getTag(bioTAGS.alphabet), ALPHABET.PT);
-    expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
+    // expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
+    // expect(msaSeqCol.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
+    // expect(msaSeqCol.getTag(bioTAGS.aligned), ALIGNMENT.SEQ_MSA);
+    // expect(msaSeqCol.getTag(bioTAGS.alphabet), ALPHABET.PT);
+    // expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
     // check newColumn with UnitsHandler constructor
-    const uh: UnitsHandler = new UnitsHandler(msaSeqCol);
+    // const uh: UnitsHandler = new UnitsHandler(msaSeqCol);
     dfList.push(df);
     tvList.push(tv);
@@ -191,8 +193,8 @@ category('renderers', () => {
     const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
       ['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
     seqDiffCol.tags[DG.TAGS.UNITS] = NOTATION.SEPARATOR;
-    seqDiffCol.tags[TAGS.SEPARATOR] = '/';
-    seqDiffCol.semType = SEM_TYPES.MACROMOLECULE_DIFFERENCE;
+    seqDiffCol.tags[bioTAGS.separator] = '/';
+    seqDiffCol.semType = C.SEM_TYPES.MACROMOLECULE_DIFFERENCE;
     const df = DG.DataFrame.fromColumns([seqDiffCol]);
     const tv = grok.shell.addTableView(df);
     dfList.push(df);
@@ -213,8 +215,8 @@ category('renderers', () => {
     const seqDiffCol: DG.Column = DG.Column.fromStrings('SequencesDiff',
       ['meI/hHis/Aca/N/T/dK/Thr_PO3H2/Aca#D-Tyr_Et/Tyr_ab-dehydroMe/meN/E/N/dV']);
     seqDiffCol.tags[DG.TAGS.UNITS] = NOTATION.SEPARATOR;
-    seqDiffCol.tags[TAGS.SEPARATOR] = '/';
-    seqDiffCol.semType = SEM_TYPES.MACROMOLECULE;
+    seqDiffCol.tags[bioTAGS.separator] = '/';
+    seqDiffCol.semType = DG.SEMTYPE.MACROMOLECULE;
     const tgtCellRenderer = 'MacromoleculeDifference';
     seqDiffCol.setTag(DG.TAGS.CELL_RENDERER, tgtCellRenderer);
     const df = DG.DataFrame.fromColumns([seqDiffCol]);

package/src/tests/sequence-space-test.ts CHANGED Viewed

@@ -1,7 +1,9 @@
-import {after, before, category, test, expect, delay} from '@datagrok-libraries/utils/src/test';
+import * as grok from 'datagrok-api/grok';
+import * as ui from 'datagrok-api/ui';
 import * as DG from 'datagrok-api/dg';
+import {after, before, category, test, expect, delay} from '@datagrok-libraries/utils/src/test';
 import {readDataframe} from './utils';
-import * as grok from 'datagrok-api/grok';
 import {_testSequenceSpaceReturnsResult} from './sequence-space-utils';
 category('sequenceSpace', async () => {
@@ -16,7 +18,7 @@ category('sequenceSpace', async () => {
     await _testSequenceSpaceReturnsResult(testFastaDf, 'UMAP', 'MSA');
     grok.shell.closeTable(testFastaDf);
     testFastaTableView.close();
-  });
+  }, {skipReason: 'GROK-12775'});
   test('sequenceSpaceWithEmptyRows', async () => {
     testHelmWithEmptyRows = await readDataframe('tests/sample_MSA_data_empty_vals.csv');
@@ -24,5 +26,5 @@ category('sequenceSpace', async () => {
     await _testSequenceSpaceReturnsResult(testHelmWithEmptyRows, 'UMAP', 'MSA');
     grok.shell.closeTable(testHelmWithEmptyRows);
     testHelmWithEmptyRowsTableView.close();
-  });
+  }, {skipReason: 'GROK-12775'});
 });

package/src/tests/similarity-diversity-tests.ts CHANGED Viewed

@@ -31,9 +31,9 @@ category('similarity/diversity', async () => {
 async function _testSimilaritySearchViewer() {
   const molecules = await createTableView('tests/sample_MSA_data.csv');
-  const viewer = molecules.addViewer('SequenceSimilaritySearchViewer');
+  const viewer = molecules.addViewer('Sequence Similarity Search');
   await delay(100);
-  const similaritySearchViewer = getSearchViewer(viewer, 'SequenceSimilaritySearchViewer');
+  const similaritySearchViewer = getSearchViewer(viewer, 'Sequence Similarity Search');
   viewList.push(similaritySearchViewer);
   viewList.push(molecules);
   if (!similaritySearchViewer.molCol)
@@ -59,9 +59,9 @@ async function _testSimilaritySearchViewer() {
 async function _testDiversitySearchViewer() {
   const molecules = await createTableView('tests/sample_MSA_data.csv');
-  const viewer = molecules.addViewer('SequenceDiversitySearchViewer');
+  const viewer = molecules.addViewer('Sequence Diversity Search');
   await delay(10);
-  const diversitySearchviewer = getSearchViewer(viewer, 'SequenceDiversitySearchViewer');
+  const diversitySearchviewer = getSearchViewer(viewer, 'Sequence Diversity Search');
   viewList.push(diversitySearchviewer);
   viewList.push(molecules);
   if (!diversitySearchviewer.renderMolIds)

package/src/tests/splitters-test.ts CHANGED Viewed

@@ -6,7 +6,7 @@ import {after, before, category, test, expect, expectArray, expectObject} from '
 import * as C from '../utils/constants';
 import {splitToMonomers, _package, getHelmMonomers} from '../package';
 import {errorToConsole} from '@datagrok-libraries/utils/src/to-console';
-import {splitterAsFasta, splitterAsHelm} from '@datagrok-libraries/bio';
+import {TAGS as bioTAGS, splitterAsFasta, splitterAsHelm} from '@datagrok-libraries/bio/src/utils/macromolecule';
 category('splitters', () => {
@@ -83,7 +83,7 @@ category('splitters', () => {
     const semType = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
     if (semType)
       seqCol.semType = semType;
-    seqCol.setTag(C.TAGS.ALIGNED, C.MSA);
+    seqCol.setTag(bioTAGS.aligned, C.MSA);
     const tv: DG.TableView = grok.shell.addTableView(df);
     // call to calculate 'cell.renderer' tag
@@ -92,9 +92,9 @@ category('splitters', () => {
     dfList.push(df);
     tvList.push(tv);
-    splitToMonomers(seqCol);
+    splitToMonomers();
     expect(df.columns.names().includes('17'), true);
-  });
+  }, {skipReason: 'GROK-12766'});
   test('getHelmMonomers', async () => {
     const df: DG.DataFrame = DG.DataFrame.fromCsv(
@@ -132,4 +132,3 @@ export async function _testHelmSplitter(src: string, tgt: string[]) {
   console.debug(`Bio: tests: splitters: src=${JSON.stringify(src)}, res=${JSON.stringify(res)} .`);
   expectArray(res, tgt);
 }

package/src/tests/substructure-filters-tests.ts CHANGED Viewed

@@ -1,11 +1,33 @@
 import {after, before, category, test, expect, delay} from '@datagrok-libraries/utils/src/test';
 import * as DG from 'datagrok-api/dg';
 import * as grok from 'datagrok-api/grok';
 import {readDataframe} from './utils';
 import {BioSubstructureFilter, HelmFilter, SeparatorFilter} from '../widgets/bio-substructure-filter';
+import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
+import {LIB_DEFAULT, LIB_STORAGE_NAME} from '../utils/monomer-lib';
 category('substructureFilters', async () => {
+  let monomerLibHelper: IMonomerLibHelper;
+  /** Backup actual user's monomer libraries settings */
+  let userLibrariesSettings: {};
+  before(async () => {
+    monomerLibHelper = await getMonomerLibHelper();
+    userLibrariesSettings = await grok.dapi.userDataStorage.get(LIB_STORAGE_NAME, true);
+    // Test 'helm' requires default monomer library loaded
+    await grok.dapi.userDataStorage.post(LIB_STORAGE_NAME, LIB_DEFAULT, true);
+    await monomerLibHelper.loadLibraries(true); // load default libraries
+  });
+  after(async () => {
+    // UserDataStorage.put() replaces existing data
+    await grok.dapi.userDataStorage.put(LIB_STORAGE_NAME, userLibrariesSettings, true);
+    await monomerLibHelper.loadLibraries(true); // load user settings libraries
+  });
   test('fasta', async () => {
     const fasta = await readDataframe('tests/filter_FASTA.csv');
     const filter = new BioSubstructureFilter();
@@ -69,5 +91,5 @@ category('substructureFilters', async () => {
     expect(filter.dataFrame!.filter.trueCount, 1);
     expect(filter.dataFrame!.filter.get(3), true);
     helmTableView.close();
-  }, {skipReason: '#1206'});
+  }, {skipReason: 'GROK-12779'});
 });

package/src/tests/utils/sequences-generators.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import * as grok from 'datagrok-api/grok';
 import * as ui from 'datagrok-api/ui';
 import * as DG from 'datagrok-api/dg';
-import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio';
+import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
 export function generateManySequences(): DG.Column[] {

package/src/tests/utils.ts CHANGED Viewed

@@ -1,8 +1,9 @@
 import * as DG from 'datagrok-api/dg';
 import * as grok from 'datagrok-api/grok';
+import {_package} from '../package-test';
 import {expect} from '@datagrok-libraries/utils/src/test';
 import {runKalign} from '../utils/multiple-sequence-alignment';
-import {_package} from '../package-test';
 export async function loadFileAsText(name: string): Promise<string> {
   return await _package.files.readAsText(name);

package/src/tests/viewers.ts ADDED Viewed

@@ -0,0 +1,16 @@
+import * as DG from 'datagrok-api/dg';
+// import * as grok from 'datagrok-api/grok';
+//import * as ui from 'datagrok-api/ui';
+import {category, test, testViewer} from '@datagrok-libraries/utils/src/test';
+import {readDataframe} from './utils';
+category('viewers', () => {
+  const viewers = DG.Func.find({package: 'Bio', tags: ['viewer']}).map((f) => f.friendlyName);
+  for (const v of viewers) {
+    test(v, async () => {
+      await testViewer(v, await readDataframe('data/sample_FASTA_DNA.csv'), true);
+    });
+  }
+});

package/src/utils/cell-renderer.ts CHANGED Viewed

@@ -9,11 +9,23 @@ import {
   getPaletteByType,
   getSplitter,
   monomerToShort,
-  SeqPalette,
+  NOTATION,
   SplitterFunc,
   TAGS as bioTAGS,
-  UnknownSeqPalettes
-} from '@datagrok-libraries/bio';
+} from '@datagrok-libraries/bio/src/utils/macromolecule';
+import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
+import {UnknownSeqPalettes} from '@datagrok-libraries/bio/src/unknown';
+const enum tempTAGS {
+  referenceSequence = 'reference-sequence',
+  currentWord = 'current-word',
+  monomerWidth = 'monomer-width',
+  bioSumMaxLengthWords = 'bio-sum-maxLengthWords',
+  bioMaxIndex = 'bio-maxIndex',
+  bioMaxLengthWords = 'bio-maxLengthWords',
+}
+type TempType = { [tagName: string]: any };
 const undefinedColor = 'rgb(100,100,100)';
 const monomerToShortFunction: (amino: string, maxLengthOfMonomer: number) => string = monomerToShort;
@@ -50,7 +62,8 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
   get defaultWidth(): number { return 230; }
   onClick(gridCell: DG.GridCell, e: MouseEvent): void {
-    gridCell.cell.column.temp['current-word'] = gridCell.cell.value;
+    const colTemp: TempType = gridCell.cell.column.temp;
+    colTemp[tempTAGS.currentWord] = gridCell.cell.value;
     gridCell.grid.invalidate();
   }
@@ -58,8 +71,9 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
     if (gridCell.cell.column.getTag(bioTAGS.aligned) !== ALIGNMENT.SEQ_MSA)
       return;
-    const maxLengthWordsSum = gridCell.cell.column.temp['bio-sum-maxLengthWords'];
-    const maxIndex = gridCell.cell.column.temp['bio-maxIndex'];
+    const colTemp: TempType = gridCell.cell.column.temp;
+    const maxLengthWordsSum = colTemp[tempTAGS.bioSumMaxLengthWords];
+    const maxIndex = colTemp[tempTAGS.bioMaxIndex];
     const argsX = e.offsetX - gridCell.gridColumn.left + (gridCell.gridColumn.left - gridCell.bounds.x);
     let left = 0;
     let right = maxIndex;
@@ -107,7 +121,7 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
   ) {
     const grid = gridCell.gridRow !== -1 ? gridCell.grid : null;
     const cell = gridCell.cell;
-    const paletteType = gridCell.cell.column.getTag(C.TAGS.ALPHABET);
+    const paletteType = gridCell.cell.column.getTag(bioTAGS.alphabet);
     const minDistanceRenderer = 50;
     w = getUpdatedWidth(grid, g, x, w);
     g.save();
@@ -119,19 +133,28 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
     //TODO: can this be replaced/merged with splitSequence?
     const units = gridCell.cell.column.getTag(DG.TAGS.UNITS);
+    const aligned: string = gridCell.cell.column.getTag(bioTAGS.aligned);
     const palette = getPaletteByType(paletteType);
-    const separator = gridCell.cell.column.getTag('separator') ?? '';
-    const splitLimit = gridCell.bounds.width / 5;
+    const separator = gridCell.cell.column.getTag(bioTAGS.separator) ?? '';
+    const splitLimit = w / 5;
     const splitterFunc: SplitterFunc = getSplitter(units, separator, splitLimit);
-    const referenceSequence: string[] = splitterFunc(((gridCell.cell.column?.temp['reference-sequence'] != null) && (gridCell.cell.column?.temp['reference-sequence'] != ''))
-      ? gridCell.cell.column.temp['reference-sequence'] : gridCell.cell.column.temp['current-word'] ?? '');
-    const monomerWidth = (gridCell.cell.column?.temp['monomer-width'] != null) ? gridCell.cell.column.temp['monomer-width'] : 'short';
-    let gapRenderer = 5;
-    let maxIndex = 0;
+    // TODO: Store temp data to GridColumn
+    // Now the renderer requires data frame table Column underlying GridColumn
+    const colTemp: TempType = gridCell.cell.column.temp;
+    const tempReferenceSequence: string | null = colTemp[tempTAGS.referenceSequence];
+    const tempCurrentWord: string | null = colTemp[tempTAGS.currentWord];
+    const tempMonomerWidth: string | null = colTemp[tempTAGS.monomerWidth];
+    const referenceSequence: string[] = splitterFunc(
+      ((tempReferenceSequence != null) && (tempReferenceSequence != '')) ?
+        tempReferenceSequence : tempCurrentWord ?? '');
+    const monomerWidth: string = (tempMonomerWidth != null) ? tempMonomerWidth : 'short';
+    let gapRenderer = 5;
+    let maxIndex = 0;
     let maxLengthOfMonomer = 8;
     if (monomerWidth === 'short') {
@@ -149,44 +172,42 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
           const textSize = monomerToShortFunction(amino, maxLengthOfMonomer).length * 7 + gapRenderer;
           if (textSize > (maxLengthWords[index] ?? 0))
             maxLengthWords[index] = textSize;
-          if (index > maxIndex) {
-            maxIndex = index;
-          }
+          if (index > maxIndex) maxIndex = index;
         });
         samples += 1;
       }
       const minLength = 3 * 7;
       for (let i = 0; i <= maxIndex; i++) {
-        if (maxLengthWords[i] < minLength) {
-          maxLengthWords[i] = minLength;
-        }
+        if (maxLengthWords[i] < minLength) maxLengthWords[i] = minLength;
         const maxLengthWordSum: any = {};
         maxLengthWordSum[0] = maxLengthWords[0];
-        for (let i = 1; i <= maxIndex; i++) {
-          maxLengthWordSum[i] = maxLengthWordSum[i - 1] + maxLengthWords[i];
-        }
-        gridCell.cell.column.temp['bio-sum-maxLengthWords'] = maxLengthWordSum;
-        gridCell.cell.column.temp['bio-maxIndex'] = maxIndex;
-        gridCell.cell.column.temp['bio-maxLengthWords'] = maxLengthWords;
+        for (let i = 1; i <= maxIndex; i++) maxLengthWordSum[i] = maxLengthWordSum[i - 1] + maxLengthWords[i];
+        colTemp[tempTAGS.bioSumMaxLengthWords] = maxLengthWordSum;
+        colTemp[tempTAGS.bioMaxIndex] = maxIndex;
+        colTemp[tempTAGS.bioMaxLengthWords] = maxLengthWords;
         gridCell.cell.column.setTag('.calculatedCellRender', splitLimit.toString());
       }
     } else {
-      maxLengthWords = gridCell.cell.column.temp['bio-maxLengthWords'];
+      maxLengthWords = colTemp[tempTAGS.bioMaxLengthWords];
     }
     const subParts: string[] = splitterFunc(cell.value);
     let x1 = x;
     let color = undefinedColor;
     let drawStyle = DrawStyle.classic;
-    if (gridCell.cell.column.getTag('aligned').includes('MSA') && gridCell.cell.column.getTag('units') === 'separator')
+    if (aligned && aligned.includes('MSA') && units == NOTATION.SEPARATOR)
       drawStyle = DrawStyle.MSA;
     subParts.every((amino, index) => {
       color = palette.get(amino);
       g.fillStyle = undefinedColor;
       const last = index === subParts.length - 1;
-      x1 = printLeftOrCentered(x1, y, w, h, g, amino, color, 0, true, 1.0, separator, last, drawStyle, maxLengthWords, index, gridCell, referenceSequence, maxLengthOfMonomer);
-      return x1 - minDistanceRenderer - gridCell.gridColumn.left + (gridCell.gridColumn.left - gridCell.bounds.x) <= gridCell.bounds.width;
+      x1 = printLeftOrCentered(x1, y, w, h,
+        g, amino, color, 0, true, 1.0, separator, last, drawStyle,
+        maxLengthWords, index, gridCell, referenceSequence, maxLengthOfMonomer);
+      return minDistanceRenderer <= w;
     });
     g.restore();
@@ -221,7 +242,7 @@ export class MonomerCellRenderer extends DG.GridCellRenderer {
     g.textBaseline = 'middle';
     g.textAlign = 'center';
-    const palette = getPaletteByType(gridCell.cell.column.getTag(C.TAGS.ALPHABET));
+    const palette = getPaletteByType(gridCell.cell.column.getTag(bioTAGS.alphabet));
     const s: string = gridCell.cell.value;
     if (!s)
       return;
@@ -259,7 +280,7 @@ export class MacromoleculeDifferenceCellRenderer extends DG.GridCellRenderer {
     const grid = gridCell.grid;
     const cell = gridCell.cell;
     const s: string = cell.value ?? '';
-    const separator = gridCell.tableColumn!.tags[C.TAGS.SEPARATOR];
+    const separator = gridCell.tableColumn!.tags[bioTAGS.separator];
     const units: string = gridCell.tableColumn!.tags[DG.TAGS.UNITS];
     w = getUpdatedWidth(grid, g, x, w);
     //TODO: can this be replaced/merged with splitSequence?
@@ -284,9 +305,9 @@ export function drawMoleculeDifferenceOnCanvas(
   molDifferences?: { [key: number]: HTMLCanvasElement }
 ): void {
   if (subParts1.length !== subParts2.length) {
-    const emptyMonomersArray = new Array<string>(Math.abs(subParts1.length - subParts2.length)).fill('');
-    subParts1.length > subParts2.length ?
-      subParts2 = subParts2.concat(emptyMonomersArray) : subParts1 = subParts1.concat(emptyMonomersArray);
+    const sequences: IComparedSequences = fillShorterSequence(subParts1, subParts2);
+    subParts1 = sequences.subParts1;
+    subParts2 = sequences.subParts2;
   }
   const textSize1 = g.measureText(processSequence(subParts1).join(''));
   const textSize2 = g.measureText(processSequence(subParts2).join(''));
@@ -329,6 +350,11 @@ export function drawMoleculeDifferenceOnCanvas(
   g.restore();
 }
+interface IComparedSequences{
+  subParts1: string[];
+  subParts2: string[];
+}
 function createDifferenceCanvas(
   amino1: string,
   amino2: string,
@@ -351,3 +377,30 @@ function createDifferenceCanvas(
   printLeftOrCentered(0, y + shift, width, h, context, amino2, color2, 0, true);
   return canvas;
 }
+function fillShorterSequence(subParts1: string[], subParts2: string[]): IComparedSequences {
+  let numIdenticalStart = 0;
+  let numIdenticalEnd = 0;
+  const longerSeq = subParts1.length > subParts2.length ? subParts1 : subParts2;
+  let shorterSeq = subParts1.length > subParts2.length ? subParts2 : subParts1;
+  for (let i = 0; i < shorterSeq.length; i++) {
+    if (longerSeq[i] === shorterSeq[i])
+      numIdenticalStart++;
+  }
+  const lengthDiff = longerSeq.length - shorterSeq.length;
+  for (let i = longerSeq.length - 1; i > lengthDiff; i--) {
+    if (longerSeq[i] === shorterSeq[i - lengthDiff])
+      numIdenticalEnd++;
+  }
+  const emptyMonomersArray = new Array<string>(Math.abs(subParts1.length - subParts2.length)).fill('');
+  function concatWithEmptyVals(subparts: string[]): string[] {
+    return numIdenticalStart > numIdenticalEnd ? subparts.concat(emptyMonomersArray) : emptyMonomersArray.concat(subparts);
+  }
+  subParts1.length > subParts2.length ?  subParts2 = concatWithEmptyVals(subParts2) : subParts1 = concatWithEmptyVals(subParts1);
+  return {subParts1: subParts1, subParts2: subParts2};
+}

package/src/utils/constants.ts CHANGED Viewed

@@ -12,18 +12,14 @@ export enum COLUMNS_NAMES {
 export enum TAGS {
   AAR = 'AAR',
   POSITION = 'Pos',
-  SEPARATOR = 'separator',
   SELECTION = 'selection',
-  ALPHABET = 'alphabet',
-  ALIGNED = 'aligned',
 }
 export enum SEM_TYPES {
   MONOMER = 'Monomer',
   MACROMOLECULE_DIFFERENCE = 'MacromoleculeDifference',
   ACTIVITY = 'activity',
-  ACTIVITY_SCALED = 'activityScaled',
-  MACROMOLECULE = 'Macromolecule',
+  ACTIVITY_SCALED = 'activityScaled'
 }
 export const MSA = 'MSA';
@@ -47,7 +43,7 @@ export const aarGroups = {
   '-': '-',
 };
-export const groupDescription: {[key: string]: {'description': string, aminoAcids: string[]}} = {
+export const groupDescription: { [key: string]: { 'description': string, aminoAcids: string[] } } = {
   'PC': {'description': 'Positive Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['R', 'H', 'K']},
   'NC': {'description': 'Negative Amino Acids, with Electrically Charged Side Chains', 'aminoAcids': ['D', 'E']},
   'U': {'description': 'Amino Acids with Polar Uncharged Side Chains', 'aminoAcids': ['S', 'T', 'N', 'Q']},
@@ -58,3 +54,8 @@ export const groupDescription: {[key: string]: {'description': string, aminoAcid
   },
   '-': {'description': 'Unknown Amino Acid', 'aminoAcids': ['-']},
 };
+export namespace PEPSEA {
+  export const SEPARATOR = '.';
+}

package/src/utils/convert.ts CHANGED Viewed

@@ -4,7 +4,8 @@ import * as grok from 'datagrok-api/grok';
 import $ from 'cash-dom';
 import {Subscription} from 'rxjs';
-import {NOTATION, NotationConverter} from '@datagrok-libraries/bio';
+import {NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
+import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
 let convertDialog: DG.Dialog | null = null;
@@ -46,7 +47,7 @@ export function convert(col: DG.Column): void {
   });
   if (convertDialog == null) {
-    convertDialog = ui.dialog('Convert sequence notation')
+    convertDialog = ui.dialog('Convert Sequence Notation')
       .add(ui.div([
         ui.divText(
           'Current notation: ' + currentNotation,
@@ -86,6 +87,11 @@ export async function convertDo(
   const newColumn = converter.convert(targetNotation, separator);
   srcCol.dataFrame.columns.add(newColumn);
+  // Call detector directly to escape some error on detectSemanticTypes
+  const semType = await grok.functions.call('Bio:detectMacromolecule', {col: newColumn});
+  if (semType)
+    newColumn.semType = semType;
   // call to calculate 'cell.renderer' tag
   await grok.data.detectSemanticTypes(srcCol.dataFrame);