@datagrok/bio 2.26.4 → 2.26.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Davit Rizhinashvili",
6
6
  "email": "drizhinashvili@datagrok.ai"
7
7
  },
8
- "version": "2.26.4",
8
+ "version": "2.26.5",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -44,10 +44,10 @@
44
44
  ],
45
45
  "dependencies": {
46
46
  "@biowasm/aioli": "^3.1.0",
47
- "@datagrok-libraries/bio": "^5.63.3",
47
+ "@datagrok-libraries/bio": "^5.63.5",
48
48
  "@datagrok-libraries/chem-meta": "^1.2.9",
49
49
  "@datagrok-libraries/math": "^1.2.6",
50
- "@datagrok-libraries/ml": "^6.10.9",
50
+ "@datagrok-libraries/ml": "^6.10.11",
51
51
  "@datagrok-libraries/test": "^1.1.0",
52
52
  "@datagrok-libraries/tutorials": "^1.7.4",
53
53
  "@datagrok-libraries/utils": "^4.6.9",
@@ -77,7 +77,7 @@
77
77
  "@types/wu": "^2.1.44",
78
78
  "@typescript-eslint/eslint-plugin": "^8.8.1",
79
79
  "@typescript-eslint/parser": "^8.8.1",
80
- "datagrok-tools": "^5.1.5",
80
+ "datagrok-tools": "^5.1.9",
81
81
  "eslint": "^8.57.1",
82
82
  "eslint-config-google": "^0.14.0",
83
83
  "eslint-plugin-rxjs": "^5.0.3",
@@ -15,8 +15,20 @@ import {HelmType} from '@datagrok-libraries/bio/src/helm/types';
15
15
  import {drawMoleculeDifferenceOnCanvas} from '../utils/cell-renderer';
16
16
  import {invalidateMols, MONOMERIC_COL_TAGS} from '../substructure-search/substructure-search';
17
17
 
18
+ import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
19
+ import {BitArrayMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
20
+
18
21
  import {_package} from '../package';
19
22
 
23
+ export type SeqActivityCliffsParams = {
24
+ seqColName: string,
25
+ activityColName: string,
26
+ similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics,
27
+ similarity: number,
28
+ options: any,
29
+ isDemo?: boolean,
30
+ }
31
+
20
32
  export async function getDistances(col: DG.Column, seq: string): Promise<Array<number>> {
21
33
  const stringArray = col.toList();
22
34
  const distances = new Array(stringArray.length).fill(0);
@@ -192,6 +192,14 @@ export namespace funcs {
192
192
  return await grok.functions.call('Bio:ActivityCliffs', { table, molecules, activities, similarity, methodName, similarityMetric, preprocessingFunction, options, demo });
193
193
  }
194
194
 
195
+ export async function seqActivityCliffsInitFunction(sp: any ): Promise<void> {
196
+ return await grok.functions.call('Bio:SeqActivityCliffsInitFunction', { sp });
197
+ }
198
+
199
+ export async function seqActivityCliffsTransform(table: DG.DataFrame , molecules: DG.Column , activities: DG.Column , similarity: number , methodName: string , similarityMetric: string , options?: string , isDemo?: boolean , axesNames?: any ): Promise<void> {
200
+ return await grok.functions.call('Bio:SeqActivityCliffsTransform', { table, molecules, activities, similarity, methodName, similarityMetric, options, isDemo, axesNames });
201
+ }
202
+
195
203
  export async function macromoleculePreprocessingFunction(col: DG.Column , metric: string , gapOpen?: number , gapExtend?: number , fingerprintType?: string ): Promise<any> {
196
204
  return await grok.functions.call('Bio:MacromoleculePreprocessingFunction', { col, metric, gapOpen, gapExtend, fingerprintType });
197
205
  }
@@ -203,10 +211,14 @@ export namespace funcs {
203
211
  /**
204
212
  Creates 2D sequence space with projected sequences by pairwise distance
205
213
  */
206
- export async function sequenceSpaceTopMenu(table: DG.DataFrame , molecules: DG.Column , methodName: string , similarityMetric: string , plotEmbeddings: boolean , preprocessingFunction?: any , options?: any , clusterEmbeddings?: boolean , isDemo?: boolean ): Promise<void> {
214
+ export async function sequenceSpaceTopMenu(table: DG.DataFrame , molecules: DG.Column , methodName: string , similarityMetric: string , plotEmbeddings: boolean , preprocessingFunction?: any , options?: any , clusterEmbeddings?: boolean , isDemo?: boolean ): Promise<any> {
207
215
  return await grok.functions.call('Bio:SequenceSpaceTopMenu', { table, molecules, methodName, similarityMetric, plotEmbeddings, preprocessingFunction, options, clusterEmbeddings, isDemo });
208
216
  }
209
217
 
218
+ export async function sequenceSpaceTransform(table: DG.DataFrame , molecules: DG.Column , methodName: string , similarityMetric: string , plotEmbeddings: boolean , options?: string , clusterEmbeddings?: boolean , embedColsNames?: any , clusterColName?: string ): Promise<any> {
219
+ return await grok.functions.call('Bio:SequenceSpaceTransform', { table, molecules, methodName, similarityMetric, plotEmbeddings, options, clusterEmbeddings, embedColsNames, clusterColName });
220
+ }
221
+
210
222
  /**
211
223
  Converts Peptide molecules to HELM notation by matching with monomer library
212
224
  */
@@ -37,6 +37,7 @@ import './tests/mm-distance-tests';
37
37
  import './tests/activity-cliffs-tests';
38
38
  import './tests/sequence-space-test';
39
39
  import './tests/scoring';
40
+ import './tests/projects-tests';
40
41
 
41
42
 
42
43
  export const _package = new DG.Package();
package/src/package.g.ts CHANGED
@@ -270,6 +270,25 @@ export async function activityCliffs(table: DG.DataFrame, molecules: DG.Column<a
270
270
  return await PackageFunctions.activityCliffs(table, molecules, activities, similarity, methodName, similarityMetric, preprocessingFunction, options, demo);
271
271
  }
272
272
 
273
+ //input: viewer sp
274
+ export async function seqActivityCliffsInitFunction(sp: any) : Promise<void> {
275
+ await PackageFunctions.seqActivityCliffsInitFunction(sp);
276
+ }
277
+
278
+ //input: dataframe table { description: Input data table }
279
+ //input: column molecules { semType: Macromolecule }
280
+ //input: column activities { type: numerical }
281
+ //input: double similarity = 80 { description: Similarity cutoff }
282
+ //input: string methodName
283
+ //input: string similarityMetric
284
+ //input: string options { optional: true }
285
+ //input: bool isDemo { optional: true }
286
+ //input: list<string> axesNames { optional: true }
287
+ //meta.role: transform
288
+ export async function seqActivityCliffsTransform(table: DG.DataFrame, molecules: DG.Column, activities: DG.Column, similarity: number, methodName: any, similarityMetric: any, options?: string, isDemo?: boolean, axesNames?: string[]) : Promise<void> {
289
+ await PackageFunctions.seqActivityCliffsTransform(table, molecules, activities, similarity, methodName, similarityMetric, options, isDemo, axesNames);
290
+ }
291
+
273
292
  //name: Encode Sequences
274
293
  //tags: dim-red-preprocessing-function
275
294
  //input: column col { semType: Macromolecule }
@@ -309,12 +328,28 @@ export async function helmPreprocessingFunction(col: DG.Column<any>, _metric: an
309
328
  //input: object options { optional: true }
310
329
  //input: bool clusterEmbeddings = true { optional: true }
311
330
  //input: bool isDemo { optional: true }
331
+ //output: viewer result
312
332
  //top-menu: Bio | Analyze | Sequence Space...
313
333
  //editor: Bio:SequenceSpaceEditor
314
334
  export async function sequenceSpaceTopMenu(table: DG.DataFrame, molecules: DG.Column, methodName: any, similarityMetric: any, plotEmbeddings: boolean, preprocessingFunction?: any, options?: any, clusterEmbeddings?: boolean, isDemo?: boolean) : Promise<any> {
315
335
  return await PackageFunctions.sequenceSpaceTopMenu(table, molecules, methodName, similarityMetric, plotEmbeddings, preprocessingFunction, options, clusterEmbeddings, isDemo);
316
336
  }
317
337
 
338
+ //input: dataframe table
339
+ //input: column molecules { semType: Macromolecule }
340
+ //input: string methodName
341
+ //input: string similarityMetric
342
+ //input: bool plotEmbeddings = true
343
+ //input: string options { optional: true }
344
+ //input: bool clusterEmbeddings { optional: true }
345
+ //input: list<string> embedColsNames { optional: true }
346
+ //input: string clusterColName { optional: true }
347
+ //output: viewer result
348
+ //meta.role: transform
349
+ export async function sequenceSpaceTransform(table: DG.DataFrame, molecules: DG.Column, methodName: any, similarityMetric: any, plotEmbeddings: boolean, options?: string, clusterEmbeddings?: boolean, embedColsNames?: string[], clusterColName?: string) : Promise<any> {
350
+ return await PackageFunctions.sequenceSpaceTransform(table, molecules, methodName, similarityMetric, plotEmbeddings, options, clusterEmbeddings, embedColsNames, clusterColName);
351
+ }
352
+
318
353
  //name: Molecules to HELM
319
354
  //description: Converts Peptide molecules to HELM notation by matching with monomer library
320
355
  //input: dataframe table { description: Input data table }
package/src/package.ts CHANGED
@@ -9,7 +9,7 @@ import * as DG from 'datagrok-api/dg';
9
9
 
10
10
  import {Options} from '@datagrok-libraries/utils/src/type-declarations';
11
11
  import {DimReductionBaseEditor, PreprocessFunctionReturnType} from '@datagrok-libraries/ml/src/functionEditors/dimensionality-reduction-editor';
12
- import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
12
+ import {getActivityCliffsEmbeddings, runActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
13
13
  import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
14
14
  import {BitArrayMetrics, KnownMetrics} from '@datagrok-libraries/ml/src/typed-metrics';
15
15
  import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
@@ -38,7 +38,7 @@ import {MacromoleculeDifferenceCellRenderer, MacromoleculeSequenceCellRenderer,}
38
38
  import {VdRegionsViewer} from './viewers/vd-regions-viewer';
39
39
  import {SequenceAlignment} from './seq_align';
40
40
  import {getEncodedSeqSpaceCol} from './analysis/sequence-space';
41
- import {createLinesGrid, createPropPanelElement, createTooltipElement,} from './analysis/sequence-activity-cliffs';
41
+ import {createLinesGrid, createPropPanelElement, createTooltipElement, SeqActivityCliffsParams} from './analysis/sequence-activity-cliffs';
42
42
  import {SequenceSimilarityViewer} from './analysis/sequence-similarity-viewer';
43
43
  import {SequenceDiversityViewer} from './analysis/sequence-diversity-viewer';
44
44
  import {invalidateMols, MONOMERIC_COL_TAGS, SubstructureSearchDialog} from './substructure-search/substructure-search';
@@ -522,38 +522,6 @@ export class PackageFunctions {
522
522
  }
523
523
  if (!checkInputColumnUI(molecules, 'Activity Cliffs'))
524
524
  return;
525
- const axesNames = getEmbeddingColsNames(table);
526
- const tags = {
527
- 'units': molecules.meta.units!,
528
- 'aligned': molecules.getTag(bioTAGS.aligned),
529
- 'separator': molecules.getTag(bioTAGS.separator),
530
- 'alphabet': molecules.getTag(bioTAGS.alphabet),
531
- };
532
- const columnDistanceMetric: MmDistanceFunctionsNames | BitArrayMetrics = similarityMetric;
533
- const seqCol = molecules;
534
-
535
- const runCliffs = async () => {
536
- const sp = await getActivityCliffs(
537
- table,
538
- seqCol,
539
- axesNames,
540
- 'Activity cliffs', //scatterTitle
541
- activities,
542
- similarity,
543
- columnDistanceMetric, //similarityMetric
544
- methodName,
545
- {...(options ?? {})},
546
- DG.SEMTYPE.MACROMOLECULE,
547
- tags,
548
- preprocessingFunction,
549
- createTooltipElement,
550
- createPropPanelElement,
551
- createLinesGrid,
552
- undefined,
553
- demo
554
- );
555
- return sp;
556
- };
557
525
 
558
526
  const allowedRowCount = methodName === DimReductionMethods.UMAP ? 200_000 : 20_000;
559
527
  const fastRowCount = methodName === DimReductionMethods.UMAP ? 5_000 : 2_000;
@@ -562,29 +530,125 @@ export class PackageFunctions {
562
530
  return;
563
531
  }
564
532
 
533
+ const axesNames = getEmbeddingColsNames(table);
534
+
535
+ const runCliffs = async (): Promise<void> => {
536
+ await DG.Func.find({name: 'seqActivityCliffsTransform'})[0].prepare({
537
+ table: table,
538
+ molecules: molecules,
539
+ activities: activities,
540
+ similarity: similarity,
541
+ methodName: methodName,
542
+ similarityMetric: similarityMetric,
543
+ options: JSON.stringify(options),
544
+ isDemo: demo,
545
+ axesNames: axesNames,
546
+ }).call(undefined, undefined, {processed: false});
547
+
548
+ const view = grok.shell.tv;
549
+
550
+ const description = `Molecules: ${molecules.name}, activities: ${activities.name}, method: ${methodName}, ${options ? `options: ${JSON.stringify(options)},` : ``} similarity: ${similarityMetric}, similarity cutoff: ${similarity}`;
551
+ view.addViewer(DG.VIEWER.SCATTER_PLOT, {
552
+ xColumnName: axesNames[0],
553
+ yColumnName: axesNames[1],
554
+ color: activities.name,
555
+ showXSelector: false,
556
+ showYSelector: false,
557
+ showSizeSelector: false,
558
+ showColorSelector: false,
559
+ markerMinSize: 5,
560
+ markerMaxSize: 25,
561
+ title: 'Activity cliffs',
562
+ initializationFunction: 'seqActivityCliffsInitFunction',
563
+ description: description,
564
+ descriptionVisibilityMode: 'Never',
565
+ }) as DG.ScatterPlotViewer;
566
+ };
567
+
565
568
  const pi = DG.TaskBarProgressIndicator.create(`Running sequence activity cliffs ...`);
566
- const scRes = (await new Promise<DG.Viewer | undefined>((resolve, reject) => {
569
+ try {
567
570
  if (table.rowCount > fastRowCount && !options?.[BYPASS_LARGE_DATA_WARNING]) {
568
- ui.dialog().add(ui.divText(`Activity cliffs analysis might take several minutes.
571
+ await new Promise<void>((resolve, reject) => {
572
+ ui.dialog().add(ui.divText(`Activity cliffs analysis might take several minutes.
569
573
  Do you want to continue?`))
570
- .onOK(async () => {
571
- runCliffs().then((res) => resolve(res)).catch((err) => reject(err));
572
- })
573
- .onCancel(() => { resolve(undefined); })
574
- .show();
574
+ .onOK(async () => {
575
+ runCliffs().then(() => resolve()).catch((err) => reject(err));
576
+ })
577
+ .onCancel(() => { resolve(); })
578
+ .show();
579
+ });
575
580
  } else
576
- runCliffs().then((res) => resolve(res)).catch((err) => reject(err));
577
- }).catch((err: any) => {
581
+ await runCliffs();
582
+ } catch (err: any) {
578
583
  const [errMsg, errStack] = errInfo(err);
579
584
  _package.logger.error(errMsg, undefined, errStack);
580
585
  throw err;
581
- }).finally(() => { pi.close(); })) as DG.ScatterPlotViewer | undefined;
582
- if (scRes?.props?.xColumnName && scRes?.props?.yColumnName && table.col(scRes.props.xColumnName) && table.col(scRes.props.yColumnName)) {
583
- table.col(scRes.props.xColumnName)!.set(0, table.col(scRes.props.xColumnName)!.get(0)); // to trigger rendering
584
- table.col(scRes.props.yColumnName)!.set(0, table.col(scRes.props.yColumnName)!.get(0)); // to trigger rendering
586
+ } finally {
587
+ pi.close();
585
588
  }
589
+ }
586
590
 
587
- return scRes;
591
+ @grok.decorators.func({
592
+ name: 'seqActivityCliffsInitFunction',
593
+ })
594
+ static async seqActivityCliffsInitFunction(
595
+ @grok.decorators.param({type: 'viewer'}) sp: DG.ScatterPlotViewer): Promise<void> {
596
+ const tag = sp.dataFrame.getTag('seqActivityCliffsParams');
597
+ if (!tag) {
598
+ grok.shell.error(`Sequence activity cliffs parameters not found in table tags`);
599
+ return;
600
+ }
601
+ const actCliffsParams: SeqActivityCliffsParams = JSON.parse(tag);
602
+ const molCol = sp.dataFrame.col(actCliffsParams.seqColName)!
603
+ const actCol = sp.dataFrame.col(actCliffsParams.activityColName)!;
604
+
605
+ const preprocessingFunction = DG.Func.find({name: 'macromoleculePreprocessingFunction', package: 'Bio'})[0];
606
+ const encodedColWithOptions = await preprocessingFunction.apply({
607
+ col: molCol, metric: actCliffsParams.similarityMetric,
608
+ });
609
+
610
+ const axesNames = [sp.getOptions().look['xColumnName'], sp.getOptions().look['yColumnName']];
611
+ const tags = {
612
+ 'units': molCol.meta.units!,
613
+ 'aligned': molCol.getTag(bioTAGS.aligned),
614
+ 'separator': molCol.getTag(bioTAGS.separator),
615
+ 'alphabet': molCol.getTag(bioTAGS.alphabet),
616
+ };
617
+
618
+ await runActivityCliffs(sp, sp.dataFrame, molCol, encodedColWithOptions, actCol, axesNames,
619
+ actCliffsParams.similarity, actCliffsParams.similarityMetric, actCliffsParams.options ?? {},
620
+ DG.SEMTYPE.MACROMOLECULE, tags,
621
+ createTooltipElement, createPropPanelElement, createLinesGrid, undefined, actCliffsParams.isDemo);
622
+ }
623
+
624
+ @grok.decorators.func({
625
+ meta: {role: 'transform'},
626
+ })
627
+ static async seqActivityCliffsTransform(
628
+ @grok.decorators.param({options: {description: 'Input data table'}}) table: DG.DataFrame,
629
+ @grok.decorators.param({type: 'column', options: {semType: 'Macromolecule'}}) molecules: DG.Column,
630
+ @grok.decorators.param({type: 'column', options: {type: 'numerical'}}) activities: DG.Column,
631
+ @grok.decorators.param({options: {description: 'Similarity cutoff', initialValue: '80'}}) similarity: number,
632
+ @grok.decorators.param({type: 'string'}) methodName: DimReductionMethods,
633
+ @grok.decorators.param({type: 'string'}) similarityMetric: MmDistanceFunctionsNames | BitArrayMetrics,
634
+ @grok.decorators.param({options: {optional: true}}) options?: string,
635
+ @grok.decorators.param({options: {optional: true}}) isDemo?: boolean,
636
+ @grok.decorators.param({options: {optional: true}}) axesNames?: string[]): Promise<void> {
637
+ await table.meta.detectSemanticTypes();
638
+ const preprocessingFunction = DG.Func.find({name: 'macromoleculePreprocessingFunction', package: 'Bio'})[0];
639
+ if (!axesNames)
640
+ axesNames = getEmbeddingColsNames(table);
641
+ await getActivityCliffsEmbeddings(table, molecules, axesNames, similarity,
642
+ similarityMetric, methodName, JSON.parse(options ?? '{}'), preprocessingFunction);
643
+ const tagContent: SeqActivityCliffsParams = {
644
+ seqColName: molecules.name,
645
+ activityColName: activities.name,
646
+ similarityMetric: similarityMetric,
647
+ similarity: similarity,
648
+ options: options ?? {},
649
+ isDemo: isDemo,
650
+ };
651
+ table.setTag('seqActivityCliffsParams', JSON.stringify(tagContent));
588
652
  }
589
653
 
590
654
  @grok.decorators.func({
@@ -644,7 +708,7 @@ export class PackageFunctions {
644
708
  description: 'Creates 2D sequence space with projected sequences by pairwise distance',
645
709
  'top-menu': 'Bio | Analyze | Sequence Space...',
646
710
  editor: 'Bio:SequenceSpaceEditor',
647
- outputs: [],
711
+ outputs: [{type: 'viewer', name: 'result'}],
648
712
  })
649
713
  static async sequenceSpaceTopMenu(
650
714
  table: DG.DataFrame,
@@ -662,22 +726,65 @@ export class PackageFunctions {
662
726
  grok.shell.error(`Table ${table.name} is not a current table view`);
663
727
  return;
664
728
  }
665
- const tableView =
666
- grok.shell.tv.dataFrame == table ? grok.shell.tv : undefined;
667
729
  if (!checkInputColumnUI(molecules, 'Sequence Space'))
668
730
  return;
669
- if (!preprocessingFunction)
670
- preprocessingFunction = DG.Func.find({name: 'macromoleculePreprocessingFunction', package: 'Bio'})[0];
671
- options ??= {};
731
+ const clusterColName = table.columns.getUnusedName('Cluster (DBSCAN)');
732
+ const embedColsNames: string[] = getEmbeddingColsNames(table);
733
+ await DG.Func.find({name: 'sequenceSpaceTransform'})[0].prepare({
734
+ table: table,
735
+ molecules: molecules,
736
+ methodName: methodName,
737
+ similarityMetric: similarityMetric,
738
+ plotEmbeddings: false,
739
+ options: JSON.stringify(options),
740
+ clusterEmbeddings: clusterEmbeddings,
741
+ embedColsNames: embedColsNames,
742
+ clusterColName: clusterColName,
743
+ }).call(undefined, undefined, {processed: false});
744
+
745
+ let res: DG.ScatterPlotViewer | undefined;
746
+ if (plotEmbeddings) {
747
+ const tv = grok.shell.tv;
748
+ res = tv.scatterPlot({x: embedColsNames[0], y: embedColsNames[1], title: 'Sequence space'});
749
+ const description = `Molecules column: ${molecules.name}, method: ${methodName}, ${options ? `options: ${JSON.stringify(options)},` : ``} similarity: ${similarityMetric}`;
750
+ res.setOptions({description: description, descriptionVisibilityMode: 'Never'});
751
+ if (clusterEmbeddings)
752
+ res.props.colorColumnName = clusterColName;
753
+ }
754
+ return res;
755
+ }
756
+
757
+ @grok.decorators.func({
758
+ outputs: [{type: 'viewer', name: 'result'}],
759
+ meta: {role: 'transform'},
760
+ })
761
+ static async sequenceSpaceTransform(
762
+ table: DG.DataFrame,
763
+ @grok.decorators.param({options: {semType: 'Macromolecule'}}) molecules: DG.Column,
764
+ @grok.decorators.param({type: 'string'}) methodName: DimReductionMethods,
765
+ @grok.decorators.param({type: 'string'}) similarityMetric: BitArrayMetrics | MmDistanceFunctionsNames,
766
+ @grok.decorators.param({options: {initialValue: 'true'}}) plotEmbeddings: boolean,
767
+ @grok.decorators.param({options: {optional: true}}) options?: string,
768
+ @grok.decorators.param({options: {optional: true}}) clusterEmbeddings?: boolean,
769
+ @grok.decorators.param({options: {optional: true}}) embedColsNames?: string[],
770
+ @grok.decorators.param({options: {optional: true}}) clusterColName?: string,
771
+ ): Promise<DG.ScatterPlotViewer | undefined> {
772
+ await table.meta.detectSemanticTypes();
773
+ const preprocessingFunction = DG.Func.find({name: 'macromoleculePreprocessingFunction', package: 'Bio'})[0];
774
+ const parsedOptions: any = JSON.parse(options ?? '{}');
775
+ const tableView =
776
+ grok.shell.tv?.dataFrame == table ? grok.shell.tv : undefined;
672
777
  const res = await multiColReduceDimensionality(table, [molecules], methodName,
673
778
  [similarityMetric as KnownMetrics], [1], [preprocessingFunction], 'MANHATTAN',
674
779
  plotEmbeddings, clusterEmbeddings ?? false,
675
- /* dimRedOptions */ {...options, preprocessingFuncArgs: [options.preprocessingFuncArgs ?? {}]},
780
+ /* dimRedOptions */ {...parsedOptions, preprocessingFuncArgs: [parsedOptions.preprocessingFuncArgs ?? {}]},
676
781
  /* uiOptions */{
677
782
  fastRowCount: 10000,
678
783
  scatterPlotName: 'Sequence space',
679
- bypassLargeDataWarning: options?.[BYPASS_LARGE_DATA_WARNING],
784
+ bypassLargeDataWarning: parsedOptions?.[BYPASS_LARGE_DATA_WARNING],
680
785
  tableView: tableView,
786
+ embedColsNames: embedColsNames,
787
+ clusterColName: clusterColName,
681
788
  });
682
789
  return res;
683
790
  }
@@ -0,0 +1,202 @@
1
+ import * as DG from 'datagrok-api/dg';
2
+ import * as grok from 'datagrok-api/grok';
3
+
4
+ import {after, awaitCheck, category, delay, expect, test} from '@datagrok-libraries/test/src/test';
5
+ import {MmDistanceFunctionsNames} from '@datagrok-libraries/ml/src/macromolecule-distance-functions';
6
+ import {BYPASS_LARGE_DATA_WARNING} from '@datagrok-libraries/ml/src/functionEditors/consts';
7
+ import {getMonomerLibHelper, IMonomerLibHelper} from '@datagrok-libraries/bio/src/types/monomer-library';
8
+ import {getUserLibSettings, setUserLibSettings} from '@datagrok-libraries/bio/src/monomer-works/lib-settings';
9
+ import {UserLibSettings} from '@datagrok-libraries/bio/src/monomer-works/types';
10
+
11
+ import {readDataframe} from './utils';
12
+
13
+
14
+ category('projects', () => {
15
+ let monomerLibHelper: IMonomerLibHelper;
16
+ let userLibSettings: UserLibSettings;
17
+
18
+ async function createTableView(tableName: string): Promise<DG.TableView> {
19
+ const df = await readDataframe(tableName);
20
+ df.name = tableName.replace('.csv', '');
21
+ await grok.data.detectSemanticTypes(df);
22
+ const view = grok.shell.addTableView(df);
23
+ return view;
24
+ }
25
+
26
+ async function saveAndOpenProject(tv: DG.TableView, dataSync?: boolean): Promise<void> {
27
+ const project = DG.Project.create();
28
+ const tableInfo = tv.dataFrame.getTableInfo();
29
+ if (dataSync) {
30
+ //@ts-ignore
31
+ tableInfo.tags[DG.Tags.DataSync] = 'sync';
32
+ //@ts-ignore
33
+ tableInfo.tags[DG.Tags.CreationScript] = grok.shell.tv.dataFrame.getTag(DG.Tags.CreationScript);
34
+ }
35
+ const layoutInfo = tv.getInfo();
36
+ project.addChild(tableInfo);
37
+ project.addChild(layoutInfo);
38
+ await grok.dapi.tables.uploadDataFrame(tv.dataFrame);
39
+ await grok.dapi.tables.save(tableInfo);
40
+ await grok.dapi.views.save(layoutInfo);
41
+ await grok.dapi.projects.save(project);
42
+ const projId = project.id;
43
+ grok.shell.closeAll();
44
+ const p = await grok.dapi.projects.find(projId);
45
+ await p.open();
46
+ }
47
+
48
+ async function dataFrameContainsColumns(colArr: string[]): Promise<void> {
49
+ let col = '';
50
+ const getError = () => `${col} hasn't been added to dataframe`;
51
+ await awaitCheck(() => {
52
+ if (!grok.shell.tv.dataFrame)
53
+ return false;
54
+ for (const colName of colArr) {
55
+ if (!grok.shell.tv.dataFrame.col(colName)) {
56
+ col = colName;
57
+ return false;
58
+ }
59
+ }
60
+ return true;
61
+ }, getError(), 5000);
62
+ }
63
+
64
+ async function checkViewerAdded(viewerType: string): Promise<void> {
65
+ await awaitCheck(() => {
66
+ for (const v of grok.shell.tv.viewers) {
67
+ if (v.type === viewerType)
68
+ return true;
69
+ }
70
+ return false;
71
+ }, `${viewerType} hasn\'t been added`, 5000);
72
+ }
73
+
74
+ async function runSaveAndOpenProjectTest(tableName: string, analysisFunc: (tv: DG.TableView) => Promise<void>,
75
+ colList: string[], viewerType: string, dataSync?: boolean,
76
+ additionalChecks?: (tv: DG.TableView) => Promise<void>) {
77
+ let tv;
78
+ if (dataSync) {
79
+ await DG.Func.find({name: 'OpenFile'})[0].prepare({
80
+ fullPath: `System:AppData/Bio/${tableName}`,
81
+ }).call(undefined, undefined, {processed: false});
82
+ tv = grok.shell.tv;
83
+ await grok.data.detectSemanticTypes(tv.dataFrame);
84
+ } else
85
+ tv = await createTableView(tableName);
86
+ await delay(100);
87
+ await analysisFunc(tv);
88
+ await delay(10);
89
+ await saveAndOpenProject(tv, dataSync);
90
+ await delay(10);
91
+ await dataFrameContainsColumns(colList);
92
+ if (viewerType)
93
+ await checkViewerAdded(viewerType);
94
+ if (additionalChecks)
95
+ await additionalChecks(tv);
96
+ }
97
+
98
+ async function runSequenceSpace(tv: DG.TableView): Promise<void> {
99
+ const seqCol = tv.dataFrame.col('sequence')!;
100
+ const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
101
+ if (semType)
102
+ seqCol.semType = semType;
103
+ await DG.Func.find({package: 'Bio', name: 'sequenceSpaceTopMenu'})[0].prepare({
104
+ table: tv.dataFrame,
105
+ molecules: seqCol,
106
+ methodName: 'UMAP',
107
+ similarityMetric: MmDistanceFunctionsNames.LEVENSHTEIN,
108
+ plotEmbeddings: true,
109
+ options: {[BYPASS_LARGE_DATA_WARNING]: true},
110
+ clusterEmbeddings: true,
111
+ }).call(undefined, undefined, {processed: false});
112
+ await delay(10);
113
+ }
114
+
115
+ async function runActivityCliffs(tv: DG.TableView): Promise<void> {
116
+ const seqCol = tv.dataFrame.col('sequence')!;
117
+ const semType: string = await grok.functions.call('Bio:detectMacromolecule', {col: seqCol});
118
+ if (semType)
119
+ seqCol.semType = semType;
120
+ await DG.Func.find({package: 'Bio', name: 'activityCliffs'})[0].prepare({
121
+ table: tv.dataFrame,
122
+ molecules: seqCol,
123
+ activities: tv.dataFrame.col('Activity'),
124
+ similarity: 90,
125
+ methodName: 'UMAP',
126
+ similarityMetric: MmDistanceFunctionsNames.LEVENSHTEIN,
127
+ preprocessingFunction: DG.Func.find({name: 'macromoleculePreprocessingFunction', package: 'Bio'})[0],
128
+ options: {[BYPASS_LARGE_DATA_WARNING]: true},
129
+ }).call(undefined, undefined, {processed: false});
130
+ await delay(10);
131
+ }
132
+
133
+ async function checkActivityCliffsInit(tv: DG.TableView): Promise<void> {
134
+ let sp: DG.Viewer | null = null;
135
+ for (const v of grok.shell.tv.viewers) {
136
+ if (v.type === DG.VIEWER.SCATTER_PLOT)
137
+ sp = v;
138
+ }
139
+ await awaitCheck(() => {
140
+ const link = sp?.root.getElementsByClassName('scatter_plot_link');
141
+ return !link || !link.length ? false : (link[0] as HTMLElement).innerText.toLowerCase().includes('cliffs');
142
+ }, 'Initialization function hasn\'t been applied on scatter plot', 5000);
143
+ }
144
+
145
+ test('sequence_space', async () => {
146
+ monomerLibHelper = await getMonomerLibHelper();
147
+ userLibSettings = await getUserLibSettings();
148
+ await monomerLibHelper.loadMonomerLibForTests();
149
+
150
+ await runSaveAndOpenProjectTest('tests/100_3_clustests.csv', runSequenceSpace,
151
+ ['sequence', 'Embed_X_1', 'Embed_Y_1', 'Cluster (DBSCAN)'], DG.VIEWER.SCATTER_PLOT);
152
+ await delay(100);
153
+
154
+ await setUserLibSettings(userLibSettings);
155
+ await monomerLibHelper.loadMonomerLib(true);
156
+ }, {timeout: 60000});
157
+
158
+ test('sequence_space_sync', async () => {
159
+ monomerLibHelper = await getMonomerLibHelper();
160
+ userLibSettings = await getUserLibSettings();
161
+ await monomerLibHelper.loadMonomerLibForTests();
162
+
163
+ await runSaveAndOpenProjectTest('tests/100_3_clustests.csv', runSequenceSpace,
164
+ ['sequence', 'Embed_X_1', 'Embed_Y_1', 'Cluster (DBSCAN)'], DG.VIEWER.SCATTER_PLOT, true);
165
+ await delay(100);
166
+
167
+ await setUserLibSettings(userLibSettings);
168
+ await monomerLibHelper.loadMonomerLib(true);
169
+ }, {timeout: 60000});
170
+
171
+ test('activity_cliffs', async () => {
172
+ monomerLibHelper = await getMonomerLibHelper();
173
+ userLibSettings = await getUserLibSettings();
174
+ await monomerLibHelper.loadMonomerLibForTests();
175
+
176
+ await runSaveAndOpenProjectTest('tests/100_3_clustests.csv', runActivityCliffs,
177
+ ['sequence', 'Activity', 'Embed_X_1', 'Embed_Y_1'],
178
+ DG.VIEWER.SCATTER_PLOT, false, checkActivityCliffsInit);
179
+ await delay(100);
180
+
181
+ await setUserLibSettings(userLibSettings);
182
+ await monomerLibHelper.loadMonomerLib(true);
183
+ }, {timeout: 60000});
184
+
185
+ test('activity_cliffs_sync', async () => {
186
+ monomerLibHelper = await getMonomerLibHelper();
187
+ userLibSettings = await getUserLibSettings();
188
+ await monomerLibHelper.loadMonomerLibForTests();
189
+
190
+ await runSaveAndOpenProjectTest('tests/100_3_clustests.csv', runActivityCliffs,
191
+ ['sequence', 'Activity', 'Embed_X_1', 'Embed_Y_1'],
192
+ DG.VIEWER.SCATTER_PLOT, true, checkActivityCliffsInit);
193
+ await delay(100);
194
+
195
+ await setUserLibSettings(userLibSettings);
196
+ await monomerLibHelper.loadMonomerLib(true);
197
+ }, {timeout: 60000});
198
+
199
+ after(async () => {
200
+ grok.shell.closeAll();
201
+ });
202
+ });