@datagrok/bio 2.21.9 → 2.21.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.21.9",
8
+ "version": "2.21.11",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -44,7 +44,7 @@
44
44
  ],
45
45
  "dependencies": {
46
46
  "@biowasm/aioli": "^3.1.0",
47
- "@datagrok-libraries/bio": "^5.53.0",
47
+ "@datagrok-libraries/bio": "^5.53.2",
48
48
  "@datagrok-libraries/chem-meta": "^1.2.7",
49
49
  "@datagrok-libraries/math": "^1.2.4",
50
50
  "@datagrok-libraries/ml": "^6.10.0",
package/src/package.ts CHANGED
@@ -462,6 +462,11 @@ export async function activityCliffs(table: DG.DataFrame, molecules: DG.Column<s
462
462
  similarity: number, methodName: DimReductionMethods,
463
463
  similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics, preprocessingFunction: DG.Func,
464
464
  options?: (IUMAPOptions | ITSNEOptions) & Options, demo?: boolean): Promise<DG.Viewer | undefined> {
465
+ //workaround for functions which add viewers to tableView (can be run only on active table view)
466
+ if (table.name !== grok.shell.tv.dataFrame.name) {
467
+ grok.shell.error(`Table ${table.name} is not an current table view`);
468
+ return;
469
+ }
465
470
  if (!checkInputColumnUI(molecules, 'Activity Cliffs'))
466
471
  return;
467
472
  const axesNames = getEmbeddingColsNames(table);
@@ -588,6 +593,11 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Co
588
593
  plotEmbeddings: boolean, preprocessingFunction?: DG.Func, options?: (IUMAPOptions | ITSNEOptions) & Options,
589
594
  clusterEmbeddings?: boolean, isDemo?: boolean
590
595
  ): Promise<DG.ScatterPlotViewer | undefined> {
596
+ //workaround for functions which add viewers to tableView (can be run only on active table view)
597
+ if (table.name !== grok.shell.tv.dataFrame.name) {
598
+ grok.shell.error(`Table ${table.name} is not an current table view`);
599
+ return;
600
+ }
591
601
  const tableView =
592
602
  grok.shell.tv.dataFrame == table ? grok.shell.tv : undefined;
593
603
  if (!checkInputColumnUI(molecules, 'Sequence Space'))
@@ -194,6 +194,10 @@ export class MonomerManager implements IMonomerManager {
194
194
  args.menu.item('Edit Monomer', async () => {
195
195
  await this.editMonomer(this.tv!.dataFrame.rows.get(rowIdx));
196
196
  });
197
+
198
+ args.menu.item('Fix all monomers', () => {
199
+ this.fixAllMonomers();
200
+ });
197
201
  if (this.tv!.dataFrame.selection.trueCount > 0) {
198
202
  args.menu.item('Remove Selected Monomers', async () => {
199
203
  const monomers = await Promise.all(Array.from(this.tv!.dataFrame.selection.getSelectedIndexes())
@@ -271,6 +275,8 @@ export class MonomerManager implements IMonomerManager {
271
275
  this._newMonomerForm.setEmptyMonomer();
272
276
  }, 'Add New Monomer');
273
277
 
278
+ const fixAllMonomersIcon = ui.iconFA('wand-magic', () => { this.fixAllMonomers(); }, 'Fix all monomers');
279
+
274
280
  const editButton = ui.icons.edit(async () => {
275
281
  if ((this.tv?.dataFrame?.currentRowIdx ?? -1) < 0) return;
276
282
  await this.editMonomer(this.tv!.dataFrame.rows.get(this.tv!.dataFrame.currentRowIdx));
@@ -309,7 +315,7 @@ export class MonomerManager implements IMonomerManager {
309
315
  DG.Utils.download(libName!, lib!, 'text/plain');
310
316
  }, 'Download Monomer Library');
311
317
 
312
- ribbons.push([newMonomerButton, editButton, deleteButton, downloadButton]);
318
+ ribbons.push([newMonomerButton, editButton, fixAllMonomersIcon, deleteButton, downloadButton]);
313
319
  this.tv.setRibbonPanels(ribbons);
314
320
 
315
321
 
@@ -443,6 +449,36 @@ export class MonomerManager implements IMonomerManager {
443
449
  }
444
450
  }
445
451
 
452
+ async fixAllMonomers() {
453
+ ui.dialog('Fix All Monomers')
454
+ .add(ui.divText('This action will fix all monomers in the library, standardize their smiles, molblocks and r-groups, assign correct natural analogs and save the library.'))
455
+ .add(ui.divText('Do you wish to continue?'))
456
+ .onOK(async () => {
457
+ const monomerDf = this.tv?.dataFrame;
458
+ let libName = this.libInput.value;
459
+ if (!monomerDf || !libName) {
460
+ grok.shell.error('No monomer library loaded');
461
+ return;
462
+ }
463
+ this.tv?.grid && ui.setUpdateIndicator(this.tv.grid.root, true);
464
+ try {
465
+ const monomers = await Promise.all(new Array(monomerDf.rowCount).fill(0).map((_, i) => monomerFromDfRow(monomerDf.rows.get(i))));
466
+ const monomersString = JSON.stringify(monomers.map((m) => ({...m, lib: undefined, wem: undefined})), null, 2);
467
+ if (!libName.endsWith('.json'))
468
+ libName += '.json';
469
+ await grok.dapi.files.writeAsText(LIB_PATH + libName, monomersString);
470
+ await this.monomerLibManamger.loadLibraries(true);
471
+ //await this.monomerLibManamger.loadLibraries(true);
472
+ grok.shell.v = await this.getViewRoot(libName);
473
+ } catch (e) {
474
+ grok.shell.error('Error saving library');
475
+ console.error(e);
476
+ } finally {
477
+ this.tv?.grid && ui.setUpdateIndicator(this.tv.grid.root, false);
478
+ }
479
+ }).show();
480
+ }
481
+
446
482
  public resetCurrentRowFollowing() {
447
483
  this._newMonomerForm.molChanged = false;
448
484
  }
@@ -1056,9 +1092,14 @@ function getCorrectedMolBlock(molBlock: string) {
1056
1092
  lines[isoLineIdx] = lines[isoLineIdx].substring(0, isoIndex) + 'RGP' + lines[isoLineIdx].substring(isoIndex + 3);
1057
1093
  }
1058
1094
 
1059
- const molStartIdx = lines.findIndex((line) => line.includes('V2000') || line.includes('V3000'));
1095
+ const molStartIdx = lines.findIndex((line) => line.includes('V2000'));
1060
1096
 
1061
- const atomCount = Number.parseInt(lines[molStartIdx].trim().split(' ')[0]);
1097
+ if (molStartIdx === -1) {
1098
+ console.error('Mol start line not found');
1099
+ return molBlock;
1100
+ }
1101
+ // only 3 positions are used for atom count, so we can safely parse it
1102
+ const atomCount = Number.parseInt(lines[molStartIdx].trim().split(' ')[0].slice(0, 3).trim());
1062
1103
  const rgroupLineNumbers: { [atomLine: number]: number } = {};
1063
1104
  for (let atomI = molStartIdx + 1; atomI < molStartIdx + 1 + atomCount; atomI++) {
1064
1105
  const rIdx = lines[atomI].indexOf('R ');
@@ -20,8 +20,6 @@ import {ConvertFunc, ISeqHandler, JoinerFunc, SeqTemps, SeqValueBase} from '@dat
20
20
 
21
21
  import {SeqHelper} from './seq-helper';
22
22
 
23
- /* eslint-enable max-len */
24
-
25
23
  /** Class for handling notation units in Macromolecule columns and
26
24
  * conversion of notation systems in Macromolecule columns
27
25
  */
@@ -322,6 +320,44 @@ export class SeqHandler implements ISeqHandler {
322
320
  }
323
321
  }
324
322
 
323
+ /// Faster method to get monomers at certain position.
324
+ /// for canonical sequences in fasta (large proteins/nucleotides)
325
+ /// will be faster than getSplitted(rowIdx).getCanonical(posIdx)
326
+ getMonomerAtPosition(rowIdx: number, posIdx: number, canonical: boolean): string {
327
+ if (this.isCanonicalAlphabet && this.isFasta() && !this.getAlphabetIsMultichar()) {
328
+ const seq = this.column.get(rowIdx) ?? '';
329
+ const res = seq[posIdx];
330
+ return canonical ? (res === GapOriginals[NOTATION.FASTA] ? GAP_SYMBOL : (res ?? GAP_SYMBOL)) : (res ?? '');
331
+ }
332
+ const mSeq: ISeqSplitted = this.getSplitted(rowIdx);
333
+ if (posIdx < 0 || posIdx >= mSeq.length)
334
+ return this.defaultGapOriginal;
335
+ return canonical ? mSeq.getCanonical(posIdx) : mSeq.getOriginal(posIdx);
336
+ }
337
+
338
+ getMonomersAtPosition(position: number, canonical: boolean): string[] {
339
+ const length = this.column.length;
340
+ const res: string[] = new Array(length).fill(GAP_SYMBOL);
341
+ if (this.isCanonicalAlphabet && this.isFasta() && !this.getAlphabetIsMultichar()) {
342
+ const colCategories = this.column.categories;
343
+ const colIndexes = this.column.getRawData();
344
+ for (let i = 0; i < length; i++) {
345
+ const seq = colCategories[colIndexes[i]] ?? '';
346
+ if (position < seq.length) {
347
+ const resChar = seq[position];
348
+ res[i] = canonical ? (resChar === GapOriginals[NOTATION.FASTA] ? GAP_SYMBOL : (resChar ?? GAP_SYMBOL)) : (resChar ?? '');
349
+ }
350
+ }
351
+ } else {
352
+ for (let i = 0; i < length; i++) {
353
+ const mSeq: ISeqSplitted = this.getSplitted(i);
354
+ if (position >= 0 && position < mSeq.length)
355
+ res[i] = canonical ? mSeq.getCanonical(position) : mSeq.getOriginal(position);
356
+ }
357
+ }
358
+ return res;
359
+ }
360
+
325
361
  /** Any Macromolecule can be represented on Helm format. The reverse is not always possible. */
326
362
  public getValue(rowIdx: number, options?: any): SeqValueBase {
327
363
  const seq: string = this.column.get(rowIdx);
@@ -344,6 +380,11 @@ export class SeqHandler implements ISeqHandler {
344
380
 
345
381
  private _stats: SeqColStats | null = null;
346
382
 
383
+ public get isCanonicalAlphabet(): boolean {
384
+ const alphabet = this.alphabet;
385
+ return alphabet === ALPHABET.DNA || alphabet === ALPHABET.RNA || alphabet === ALPHABET.PT;
386
+ }
387
+
347
388
  public get stats(): SeqColStats {
348
389
  if (this._stats === null) {
349
390
  const freq: { [m: string]: number } = {};
@@ -393,6 +434,8 @@ export class SeqHandler implements ISeqHandler {
393
434
 
394
435
  public isSeparator(): boolean { return this.notation === NOTATION.SEPARATOR || !!this.separator; }
395
436
 
437
+ public isFastaOrSeparator(): boolean { return this.isFasta() || this.isSeparator(); }
438
+
396
439
  public isHelm(): boolean { return this.notation === NOTATION.HELM; }
397
440
 
398
441
  public isCustom(): boolean { return this.notation === NOTATION.CUSTOM; }
@@ -51,10 +51,6 @@ export function handleSequenceHeaderRendering() {
51
51
  const split = sh.splitter(seq);
52
52
  const maxSeqLen = split ? split.length : 30;
53
53
 
54
- // makes no sense to have scroller if we have shorter than 50 positions
55
- if (maxSeqLen < 50)
56
- continue;
57
-
58
54
  const defaultHeaderHeight = 40;
59
55
  const scroller = new MSAScrollingHeader({
60
56
  canvas: grid.overlay,
@@ -77,7 +73,10 @@ export function handleSequenceHeaderRendering() {
77
73
  });
78
74
  },
79
75
  });
80
- grid.props.colHeaderHeight = 65;
76
+ // adjust header hight automatically only if the sequences are long enough
77
+ if (maxSeqLen > 40)
78
+ grid.props.colHeaderHeight = 65;
79
+
81
80
  setTimeout(() => { if (grid.isDetached) return; gCol.width = 400; }, 300); // needed because renderer sets its width
82
81
  grid.sub(grid.onCellRender.subscribe((e) => {
83
82
  const cell = e.cell;