@datagrok/bio 2.10.16 → 2.10.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.10.16",
8
+ "version": "2.10.22",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -34,9 +34,9 @@
34
34
  ],
35
35
  "dependencies": {
36
36
  "@biowasm/aioli": "^3.1.0",
37
- "@datagrok-libraries/bio": "^5.38.8",
37
+ "@datagrok-libraries/bio": "^5.38.11",
38
38
  "@datagrok-libraries/chem-meta": "^1.0.1",
39
- "@datagrok-libraries/ml": "^6.3.43",
39
+ "@datagrok-libraries/ml": "^6.3.49",
40
40
  "@datagrok-libraries/tutorials": "^1.3.6",
41
41
  "@datagrok-libraries/utils": "^4.0.17",
42
42
  "cash-dom": "^8.0.0",
@@ -64,7 +64,8 @@
64
64
  "webpack-bundle-analyzer": "latest",
65
65
  "webpack-cli": "^4.9.1",
66
66
  "@datagrok/chem": "1.7.2",
67
- "@datagrok/helm": "2.1.17"
67
+ "@datagrok/helm": "2.1.17",
68
+ "@datagrok/dendrogram": "^1.2.20"
68
69
  },
69
70
  "scripts": {
70
71
  "link-api": "npm link datagrok-api",
@@ -3,7 +3,9 @@ import {reduceDimensinalityWithNormalization} from '@datagrok-libraries/ml/src/s
3
3
  import {BitArrayMetrics, StringMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
4
4
  import {ISequenceSpaceParams} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
5
5
  import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
6
+ import {mmDistanceFunctionArgs} from '@datagrok-libraries/ml/src/macromolecule-distance-functions/types';
6
7
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
8
+ import {calculateMonomerSimilarity} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
7
9
  import * as grok from 'datagrok-api/grok';
8
10
  import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
9
11
 
@@ -53,41 +55,59 @@ export async function sequenceSpaceByFingerprints(spaceParams: ISequenceSpacePar
53
55
  return result;
54
56
  }
55
57
 
56
- export async function getSequenceSpace(spaceParams: ISequenceSpaceParams): Promise<ISequenceSpaceResult> {
58
+ export async function getSequenceSpace(spaceParams: ISequenceSpaceParams,
59
+ progressFunc?: (epochNum: number, epochsLength: number, embedding: number[][]) => void
60
+ ): Promise<ISequenceSpaceResult> {
57
61
  const ncUH = UnitsHandler.getOrCreate(spaceParams.seqCol);
58
62
 
59
- const distanceFName = ncUH.isMsa() ? MmDistanceFunctionsNames.HAMMING : MmDistanceFunctionsNames.LEVENSHTEIN;
63
+ //const distanceFName = ncUH.isMsa() ? MmDistanceFunctionsNames.HAMMING : MmDistanceFunctionsNames.LEVENSHTEIN;
60
64
  const seqList = spaceParams.seqCol.toList();
61
- if (ncUH.getAlphabetIsMultichar()) {
62
- const splitter = ncUH.getSplitter();
63
- const seqColLength = seqList.length;
64
- let charCodeCounter = 36;
65
- const charCodeMap = new Map<string, string>();
66
- for (let i = 0; i < seqColLength; i++) {
67
- const seq = seqList[i];
68
- if (seqList[i] === null || spaceParams.seqCol.isNone(i)) {
69
- seqList[i] = null;
70
- continue;
71
- }
72
- seqList[i] = '';
73
- const splittedSeq = splitter(seq);
74
- for (let j = 0; j < splittedSeq.length; j++) {
75
- const char = splittedSeq[j];
76
- if (!charCodeMap.has(char)) {
77
- charCodeMap.set(char, String.fromCharCode(charCodeCounter));
78
- charCodeCounter++;
79
- }
80
- seqList[i] += charCodeMap.get(char)!;
65
+
66
+ const splitter = ncUH.getSplitter();
67
+ const seqColLength = seqList.length;
68
+ let charCodeCounter = 36;
69
+ const charCodeMap = new Map<string, string>();
70
+ for (let i = 0; i < seqColLength; i++) {
71
+ const seq = seqList[i];
72
+ if (seqList[i] === null || spaceParams.seqCol.isNone(i)) {
73
+ seqList[i] = null;
74
+ continue;
75
+ }
76
+ seqList[i] = '';
77
+ const splittedSeq = splitter(seq);
78
+ for (let j = 0; j < splittedSeq.length; j++) {
79
+ const char = splittedSeq[j];
80
+ if (!charCodeMap.has(char)) {
81
+ charCodeMap.set(char, String.fromCharCode(charCodeCounter));
82
+ charCodeCounter++;
81
83
  }
84
+ seqList[i] += charCodeMap.get(char)!;
82
85
  }
83
86
  }
84
87
 
88
+ if (spaceParams.similarityMetric === MmDistanceFunctionsNames.MONOMER_CHEMICAL_DISTANCE) {
89
+ const monomers = Array.from(charCodeMap.keys());
90
+ const monomerRes = await calculateMonomerSimilarity(monomers);
91
+ // the susbstitution matrix contains similarity, but we need distances
92
+ monomerRes.scoringMatrix.forEach((row, i) => {
93
+ row.forEach((val, j) => {
94
+ monomerRes.scoringMatrix[i][j] = 1 - val;
95
+ });
96
+ });
97
+ const monomerHashToMatrixMap: {[_: string]: number} = {};
98
+ Object.entries(monomerRes.alphabetIndexes).forEach(([key, value]) => {
99
+ monomerHashToMatrixMap[charCodeMap.get(key)!] = value;
100
+ });
101
+ spaceParams.options.distanceFnArgs = {scoringMatrix: monomerRes.scoringMatrix,
102
+ alphabetIndexes: monomerHashToMatrixMap} satisfies mmDistanceFunctionArgs;
103
+ }
104
+
85
105
  const sequenceSpaceResult = await reduceDimensinalityWithNormalization(
86
106
  seqList,
87
107
  spaceParams.methodName,
88
- distanceFName,
108
+ spaceParams.similarityMetric,
89
109
  spaceParams.options,
90
- true);
110
+ true, progressFunc);
91
111
  const cols: DG.Column[] = spaceParams.embedAxesNames.map(
92
112
  (name: string, index: number) => DG.Column.fromFloat32Array(name, sequenceSpaceResult.embedding[index]));
93
113
  return {distance: sequenceSpaceResult.distance, coordinates: new DG.ColumnList(cols)};
package/src/package.ts CHANGED
@@ -69,9 +69,13 @@ import {_getEnumeratorWidget, _setPeptideColumn} from './utils/enumerator-tools'
69
69
  import {getRegionDo} from './utils/get-region';
70
70
  import {GetRegionApp} from './apps/get-region-app';
71
71
  import {GetRegionFuncEditor} from './utils/get-region-func-editor';
72
+ import {DIMENSIONALITY_REDUCER_TERMINATE_EVENT}
73
+ from '@datagrok-libraries/ml/src/workers/dimensionality-reducing-worker-creator';
74
+ import {Options} from '@datagrok-libraries/utils/src/type-declarations';
72
75
 
73
76
  export const _package = new BioPackage();
74
77
 
78
+ export const BYPASS_LARGE_DATA_WARNING = 'bypassLargeDataWarning';
75
79
  // /** Avoid reassigning {@link monomerLib} because consumers subscribe to {@link IMonomerLib.onChanged} event */
76
80
  // let monomerLib: MonomerLib | null = null;
77
81
 
@@ -221,7 +225,7 @@ export function SequenceSpaceEditor(call: DG.FuncCall) {
221
225
  ui.dialog({title: 'Sequence Space'})
222
226
  .add(funcEditor.paramsUI)
223
227
  .onOK(async () => {
224
- return call.func.prepare(funcEditor.funcParams).call(true);
228
+ return call.func.prepare(funcEditor.funcParams).call();
225
229
  })
226
230
  .show();
227
231
  }
@@ -382,7 +386,7 @@ export async function getRegionTopMenu(
382
386
  //output: viewer result
383
387
  //editor: Bio:SeqActivityCliffsEditor
384
388
  export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
385
- similarity: number, methodName: DimReductionMethods, options?: IUMAPOptions | ITSNEOptions,
389
+ similarity: number, methodName: DimReductionMethods, options?: (IUMAPOptions | ITSNEOptions) & Options,
386
390
  ): Promise<DG.Viewer | undefined> {
387
391
  if (!checkInputColumnUI(macroMolecule, 'Activity Cliffs'))
388
392
  return;
@@ -435,7 +439,7 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
435
439
  return;
436
440
  }
437
441
 
438
- if (df.rowCount > fastRowCount) {
442
+ if (df.rowCount > fastRowCount && !options?.[BYPASS_LARGE_DATA_WARNING]) {
439
443
  ui.dialog().add(ui.divText(`Activity cliffs analysis might take several minutes.
440
444
  Do you want to continue?`))
441
445
  .onOK(async () => {
@@ -457,69 +461,128 @@ export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column,
457
461
  //input: dataframe table
458
462
  //input: column molecules { semType: Macromolecule }
459
463
  //input: string methodName { choices:["UMAP", "t-SNE"] }
460
- //input: string similarityMetric { choices:["Tanimoto", "Asymmetric", "Cosine", "Sokal"] }
464
+ //input: string similarityMetric { choices:["Hamming", "Levenshtein", "Monomer chemical distance"] }
461
465
  //input: bool plotEmbeddings = true
462
466
  //input: double sparseMatrixThreshold = 0.8 [Similarity Threshold for sparse matrix calculation]
463
467
  //input: object options {optional: true}
464
468
  //editor: Bio:SequenceSpaceEditor
465
469
  export async function sequenceSpaceTopMenu(
466
470
  table: DG.DataFrame, macroMolecule: DG.Column, methodName: DimReductionMethods,
467
- similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames = BitArrayMetricsNames.Tanimoto,
468
- plotEmbeddings: boolean, sparseMatrixThreshold?: number, options?: IUMAPOptions | ITSNEOptions,
471
+ similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames = MmDistanceFunctionsNames.LEVENSHTEIN,
472
+ plotEmbeddings: boolean, sparseMatrixThreshold?: number, options?: (IUMAPOptions | ITSNEOptions) & Options,
469
473
  ): Promise<DG.Viewer | undefined> {
470
474
  // Delay is required for initial function dialog to close before starting invalidating of molfiles.
471
475
  // Otherwise, dialog is freezing
472
476
  await delay(10);
473
477
  if (!checkInputColumnUI(macroMolecule, 'Sequence space')) return;
478
+ let scatterPlot: DG.ScatterPlotViewer | undefined = undefined;
479
+ const pg = DG.TaskBarProgressIndicator.create('Initializing sequence space ...');
480
+ // function for progress of umap
481
+ try {
482
+ function progressFunc(_nEpoch: number, epochsLength: number, embeddings: number[][]) {
483
+ let embedXCol: DG.Column | null = null;
484
+ let embedYCol: DG.Column | null = null;
485
+ if (!table.columns.names().includes(embedColsNames[0])) {
486
+ embedXCol = table.columns.add(DG.Column.float(embedColsNames[0], table.rowCount));
487
+ embedYCol = table.columns.add(DG.Column.float(embedColsNames[1], table.rowCount));
488
+ if (plotEmbeddings) {
489
+ scatterPlot = grok.shell
490
+ .tableView(table.name)
491
+ .scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
492
+ }
493
+ } else {
494
+ embedXCol = table.columns.byName(embedColsNames[0]);
495
+ embedYCol = table.columns.byName(embedColsNames[1]);
496
+ }
474
497
 
475
- const embedColsNames = getEmbeddingColsNames(table);
476
- const withoutEmptyValues = DG.DataFrame.fromColumns([macroMolecule]).clone();
477
- const emptyValsIdxs = removeEmptyStringRows(withoutEmptyValues, macroMolecule);
478
-
479
- const chemSpaceParams: ISequenceSpaceParams = {
480
- seqCol: withoutEmptyValues.col(macroMolecule.name)!,
481
- methodName: methodName,
482
- similarityMetric: similarityMetric,
483
- embedAxesNames: embedColsNames,
484
- options: {...options, sparseMatrixThreshold: sparseMatrixThreshold ?? 0.8,
485
- usingSparseMatrix: table.rowCount > 20000},
486
- };
498
+ embedXCol.init((i) => embeddings[i][0]);
499
+ embedYCol.init((i) => embeddings[i][1]);
500
+ const progress = (_nEpoch / epochsLength * 100);
501
+ pg.update(progress, `Running sequence space ... ${progress.toFixed(0)}%`);
502
+ }
503
+ const embedColsNames = getEmbeddingColsNames(table);
504
+ const withoutEmptyValues = DG.DataFrame.fromColumns([macroMolecule]).clone();
505
+ const emptyValsIdxs = removeEmptyStringRows(withoutEmptyValues, macroMolecule);
506
+
507
+ const chemSpaceParams: ISequenceSpaceParams = {
508
+ seqCol: withoutEmptyValues.col(macroMolecule.name)!,
509
+ methodName: methodName,
510
+ similarityMetric: similarityMetric,
511
+ embedAxesNames: embedColsNames,
512
+ options: {...options, sparseMatrixThreshold: sparseMatrixThreshold ?? 0.8,
513
+ usingSparseMatrix: table.rowCount > 20000},
514
+ };
515
+
516
+ const allowedRowCount = methodName === DimReductionMethods.UMAP ? 100000 : 15000;
517
+ // number of rows which will be processed relatively fast
518
+ const fastRowCount = methodName === DimReductionMethods.UMAP ? 5000 : 2000;
519
+ if (table.rowCount > allowedRowCount) {
520
+ grok.shell.warning(`Too many rows, maximum for sequence space is ${allowedRowCount}`);
521
+ return;
522
+ }
487
523
 
488
- const allowedRowCount = methodName === DimReductionMethods.UMAP ? 100000 : 15000;
489
- // number of rows which will be processed relatively fast
490
- const fastRowCount = methodName === DimReductionMethods.UMAP ? 5000 : 2000;
491
- if (table.rowCount > allowedRowCount) {
492
- grok.shell.warning(`Too many rows, maximum for sequence space is ${allowedRowCount}`);
493
- return;
494
- }
524
+ async function getSeqSpace() {
525
+ let resolveF: Function | null = null;
526
+
527
+ const sub = grok.events.onViewerClosed.subscribe((args) => {
528
+ const v = args.args.viewer as unknown as DG.Viewer<any>;
529
+ if (v?.getOptions()?.look?.title && scatterPlot?.getOptions()?.look?.title &&
530
+ v?.getOptions()?.look?.title === scatterPlot?.getOptions()?.look?.title) {
531
+ grok.events.fireCustomEvent(DIMENSIONALITY_REDUCER_TERMINATE_EVENT, {});
532
+ sub.unsubscribe();
533
+ resolveF?.();
534
+ pg.close();
535
+ }
536
+ });
537
+ const sequenceSpaceResPromise = new Promise<ISequenceSpaceResult | undefined>(async (resolve) => {
538
+ resolveF = resolve;
539
+ const res = await getSequenceSpace(chemSpaceParams,
540
+ options?.[BYPASS_LARGE_DATA_WARNING] ? undefined : progressFunc);
541
+ resolve(res);
542
+ });
543
+ const sequenceSpaceRes = await sequenceSpaceResPromise;
544
+ pg.close();
545
+ sub.unsubscribe();
546
+ return sequenceSpaceRes ? processResult(sequenceSpaceRes) : sequenceSpaceRes;
547
+ }
495
548
 
496
- if (table.rowCount > fastRowCount) {
497
- ui.dialog().add(ui.divText(`Sequence space analysis might take several minutes.
549
+ if (table.rowCount > fastRowCount && !options?.[BYPASS_LARGE_DATA_WARNING]) {
550
+ ui.dialog().add(ui.divText(`Sequence space analysis might take several minutes.
498
551
  Do you want to continue?`))
499
- .onOK(async () => {
500
- const progressBar = DG.TaskBarProgressIndicator.create(`Running Sequence space...`);
501
- const sequenceSpaceRes = await getSequenceSpace(chemSpaceParams);
502
- progressBar.close();
503
- return processResult(sequenceSpaceRes);
504
- })
505
- .show();
506
- } else {
507
- const sequenceSpaceRes = await getSequenceSpace(chemSpaceParams);
508
- return processResult(sequenceSpaceRes);
509
- }
510
-
511
- function processResult(sequenceSpaceRes: ISequenceSpaceResult): DG.ScatterPlotViewer | undefined {
512
- const embeddings = sequenceSpaceRes.coordinates;
513
- for (const col of embeddings) {
514
- const listValues = col.toList();
515
- emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
516
- table.columns.add(DG.Column.float(col.name, table.rowCount).init((i) => listValues[i]));
552
+ .onOK(async () => {
553
+ await getSeqSpace();
554
+ })
555
+ .onCancel(() => { pg.close(); })
556
+ .show();
557
+ } else {
558
+ return await getSeqSpace();
517
559
  }
518
- if (plotEmbeddings) {
519
- return grok.shell
520
- .tableView(table.name)
521
- .scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
560
+
561
+ function processResult(sequenceSpaceRes: ISequenceSpaceResult): DG.ScatterPlotViewer | undefined {
562
+ const embeddings = sequenceSpaceRes.coordinates;
563
+ for (const col of embeddings) {
564
+ const listValues = col.toList();
565
+ emptyValsIdxs.forEach((ind: number) => listValues.splice(ind, 0, null));
566
+ let embedCol = table.columns.byName(col.name);
567
+ if (!embedCol) {
568
+ embedCol = DG.Column.float(col.name, listValues.length);
569
+ table.columns.add(embedCol);
570
+ }
571
+ embedCol.init((i) => listValues[i]);
572
+ //table.columns.add(DG.Column.float(col.name, table.rowCount).init((i) => listValues[i]));
573
+ }
574
+ if (plotEmbeddings) {
575
+ if (!scatterPlot) {
576
+ scatterPlot = grok.shell
577
+ .tableView(table.name)
578
+ .scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
579
+ }
580
+ return scatterPlot;
581
+ }
522
582
  }
583
+ } catch (e) {
584
+ console.error(e);
585
+ pg.close();
523
586
  }
524
587
 
525
588
 
@@ -38,7 +38,7 @@ category('activityCliffs', async () => {
38
38
  const cliffsNum = DG.Test.isInBenchmark ? 6 : 3;
39
39
 
40
40
  await _testActivityCliffsOpen(actCliffsDf, cliffsNum, DimReductionMethods.UMAP, 'sequence');
41
- }, {skipReason: 'GROK-13952'});
41
+ });
42
42
 
43
43
  test('activityCliffsWithEmptyRows', async () => {
44
44
  actCliffsDfWithEmptyRows = await readDataframe('tests/100_3_clustests_empty_vals.csv');
@@ -47,5 +47,5 @@ category('activityCliffs', async () => {
47
47
  viewList.push(actCliffsTableViewWithEmptyRows);
48
48
 
49
49
  await _testActivityCliffsOpen(actCliffsDfWithEmptyRows, 3, DimReductionMethods.UMAP, 'sequence');
50
- }, {skipReason: 'GROK-13851: Unhandled exceptions'});
50
+ });
51
51
  });
@@ -2,7 +2,7 @@ import * as DG from 'datagrok-api/dg';
2
2
  import * as grok from 'datagrok-api/grok';
3
3
 
4
4
  import {expect} from '@datagrok-libraries/utils/src/test';
5
- import {activityCliffs} from '../package';
5
+ import {activityCliffs, BYPASS_LARGE_DATA_WARNING} from '../package';
6
6
  import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
7
7
 
8
8
  export async function _testActivityCliffsOpen(df: DG.DataFrame, numberCliffs: number, method: DimReductionMethods,
@@ -10,7 +10,7 @@ export async function _testActivityCliffsOpen(df: DG.DataFrame, numberCliffs: nu
10
10
  await grok.data.detectSemanticTypes(df);
11
11
  const scatterPlot = await activityCliffs(
12
12
  df, df.getCol(colName), df.getCol('activity'),
13
- 90, method);
13
+ 90, method, {[`${BYPASS_LARGE_DATA_WARNING}`]: true});
14
14
  // const scatterPlot = (await grok.functions.call('Bio:activityCliffs', {
15
15
  // table: df, molecules: df.getCol(colName), activities: df.getCol('Activity'),
16
16
  // similarity: 50, methodName: method
@@ -62,9 +62,9 @@ A/C/G/T/C
62
62
  C/A/G/T/G/T
63
63
  T/T/C/A/A/C`,
64
64
  helmDna: `seq
65
- DNA1{D(A)P.D(C)P.D(G)P.D(T)P.D(C)P}$$$$
66
- DNA1{D(C)P.D(A)P.D(G)P.D(T)P.D(G)P.D(T)P}$$$$
67
- DNA1{D(T)P.D(T)P.D(C)P.D(A)P.D(A)P.D(C)P}$$$$`,
65
+ RNA1{d(A)p.d(C)p.d(G)p.d(T)p.d(C)p}$$$$
66
+ RNA1{d(C)p.d(A)p.d(G)p.d(T)p.d(G)p.d(T)p}$$$$
67
+ RNA1{d(T)p.d(T)p.d(C)p.d(A)p.d(A)p.d(C)p}$$$$`,
68
68
  fastaRna: `seq
69
69
  ACGUC
70
70
  CAGUGU
@@ -74,9 +74,9 @@ A*C*G*U*C
74
74
  C*A*G*U*G*U
75
75
  U*U*C*A*A*C`,
76
76
  helmRna: `seq
77
- RNA1{R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$$
78
- RNA1{R(C)P.R(A)P.R(G)P.R(U)P.R(G)P.R(U)P}$$$$
79
- RNA1{R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P}$$$$`,
77
+ RNA1{r(A)p.r(C)p.r(G)p.r(U)p.r(C)p}$$$$
78
+ RNA1{r(C)p.r(A)p.r(G)p.r(U)p.r(G)p.r(U)p}$$$$
79
+ RNA1{r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p}$$$$`,
80
80
  fastaGaps: `seq
81
81
  FW-PH-EYY
82
82
  FYNRQWYV-
@@ -103,17 +103,17 @@ PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D}$$$$
103
103
  PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$$
104
104
  PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Thr_PO3H2.Aca.Tyr_PO3H2}$$$$`,
105
105
  helmLoneDeoxyribose: `seq
106
- DNA1{D(A).D(C).D(G).D(T).D(C)}$$$$
107
- DNA1{D(C).D(A).D(G).D(T).D(G).D(T)P}$$$$
108
- DNA1{D(T).D(T).D(C).D(A).D(A).D(C)P}$$$$`,
106
+ RNA1{d(A).d(C).d(G).d(T).d(C)}$$$$
107
+ RNA1{d(C).d(A).d(G).d(T).d(G).d(T)p}$$$$
108
+ RNA1{d(T).d(T).d(C).d(A).d(A).d(C)p}$$$$`,
109
109
  helmLoneRibose: `seq
110
- RNA1{R(A).R(C).R(G).R(U).R(C)}$$$$
111
- RNA1{R(C).R(A).R(G).R(U).R(G).R(U)P}$$$$
112
- RNA1{R(U).R(U).R(C).R(A).R(A).R(C)P}$$$$`,
110
+ RNA1{r(A).r(C).r(G).r(U).r(C)}$$$$
111
+ RNA1{r(C).r(A).r(G).r(U).r(G).r(U)p}$$$$
112
+ RNA1{r(U).r(U).r(C).r(A).r(A).r(C)p}$$$$`,
113
113
  helmLonePhosphorus: `seq
114
- RNA1{P.P.R(A)P.R(C)P.R(G)P.R(U)P.R(C)P}$$$$
115
- RNA1{P.P.R(C)P.R(A)P.P.R(G)P.R(U)P.R(G)P.R(U)P}$$$$
116
- RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$$`,
114
+ RNA1{p.p.r(A)p.r(C)p.r(G)p.r(U)p.r(C)p}$$$$
115
+ RNA1{p.p.r(C)p.r(A)p.p.r(G)p.r(U)p.r(G)p.r(U)p}$$$$
116
+ RNA1{p.r(U)p.r(U)p.r(C)p.r(A)p.r(A)p.r(C)p.p.p}$$$$`,
117
117
  };
118
118
 
119
119
  /** Also detects semantic types
@@ -19,15 +19,15 @@ category('sequenceSpace', async () => {
19
19
  );
20
20
  testFastaTableView = grok.shell.addTableView(testFastaDf);
21
21
  await _testSequenceSpaceReturnsResult(testFastaDf, DimReductionMethods.UMAP, 'sequence');
22
- grok.shell.closeTable(testFastaDf);
23
- testFastaTableView.close();
22
+ //grok.shell.closeTable(testFastaDf);
23
+ //testFastaTableView.close();
24
24
  });
25
25
 
26
26
  test('sequenceSpaceWithEmptyRows', async () => {
27
27
  testHelmWithEmptyRows = await readDataframe('tests/100_3_clustests_empty_vals.csv');
28
28
  testHelmWithEmptyRowsTableView = grok.shell.addTableView(testHelmWithEmptyRows);
29
29
  await _testSequenceSpaceReturnsResult(testHelmWithEmptyRows, DimReductionMethods.UMAP, 'sequence');
30
- grok.shell.closeTable(testHelmWithEmptyRows);
31
- testHelmWithEmptyRowsTableView.close();
30
+ //grok.shell.closeTable(testHelmWithEmptyRows);
31
+ //testHelmWithEmptyRowsTableView.close();
32
32
  });
33
33
  });
@@ -1,7 +1,7 @@
1
1
  import * as DG from 'datagrok-api/dg';
2
2
  import * as grok from 'datagrok-api/grok';
3
3
  import {expect} from '@datagrok-libraries/utils/src/test';
4
- import {sequenceSpaceTopMenu} from '../package';
4
+ import {BYPASS_LARGE_DATA_WARNING, sequenceSpaceTopMenu} from '../package';
5
5
  import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
6
6
  import {DimReductionMethods} from '@datagrok-libraries/ml/src/reduce-dimensionality';
7
7
 
@@ -14,6 +14,7 @@ export async function _testSequenceSpaceReturnsResult(
14
14
  if (semType)
15
15
  col.semType = semType;
16
16
 
17
- const sp = await sequenceSpaceTopMenu(df, df.col(colName)!, algorithm, MmDistanceFunctionsNames.LEVENSHTEIN, true);
17
+ const sp = await sequenceSpaceTopMenu(df, df.col(colName)!, algorithm, MmDistanceFunctionsNames.LEVENSHTEIN, true,
18
+ 0.6, {[`${BYPASS_LARGE_DATA_WARNING}`]: true});
18
19
  expect(sp != null, true);
19
20
  }
@@ -138,6 +138,8 @@ export class MacromoleculeSequenceCellRenderer extends DG.GridCellRenderer {
138
138
  // Now the renderer requires data frame table Column underlying GridColumn
139
139
  const grid = gridCell.grid;
140
140
  const tableCol: DG.Column = gridCell.cell.column;
141
+ if (!grid || !tableCol) return;
142
+
141
143
  const tableColTemp: TempType = tableCol.temp;
142
144
 
143
145
  // Cell renderer settings
@@ -303,6 +303,7 @@ export class VdRegionsViewer extends DG.JsViewer implements IVdRegionsViewer {
303
303
  skipEmptyPositions: this.skipEmptyPositions,
304
304
  positionWidth: this.positionWidth,
305
305
  positionHeight: this.positionHeight,
306
+ filterSource: this.filterSource,
306
307
  }) as WebLogoViewer;
307
308
  wl.onSizeChanged.subscribe(() => { this.calcSize(); });
308
309
  return [orderI, chain, wl];
@@ -363,7 +364,7 @@ export class VdRegionsViewer extends DG.JsViewer implements IVdRegionsViewer {
363
364
  // this.mainLayout.style.height = '100%';
364
365
  // this.mainLayout.style.border = '1px solid black';
365
366
 
366
- this.filterSourceInput = ui.choiceInput<FilterSources>('Data source', defaults.filterSource,
367
+ this.filterSourceInput = ui.choiceInput<FilterSources>('Data source', this.filterSource,
367
368
  Object.values(FilterSources), this.filterSourceInputOnValueChanged.bind(this));
368
369
  this.filterSourceInput.root.style.position = 'absolute';
369
370
  this.filterSourceInput.root.style.right = '9px';