@datagrok/bio 2.4.3 → 2.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.4.3",
8
+ "version": "2.4.6",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -16,12 +16,13 @@
16
16
  "@biowasm/aioli": "^3.1.0",
17
17
  "@datagrok-libraries/bio": "^5.27.0",
18
18
  "@datagrok-libraries/chem-meta": "^1.0.1",
19
- "@datagrok-libraries/ml": "^6.3.13",
19
+ "@datagrok-libraries/ml": "^6.3.16",
20
20
  "@datagrok-libraries/utils": "^2.1.3",
21
21
  "cash-dom": "^8.0.0",
22
22
  "css-loader": "^6.7.3",
23
23
  "datagrok-api": "^1.13.3",
24
24
  "dayjs": "^1.11.4",
25
+ "fastest-levenshtein": "^1.0.16",
25
26
  "openchemlib": "6.0.1",
26
27
  "rxjs": "^6.5.5",
27
28
  "source-map-loader": "^4.0.1",
@@ -0,0 +1,45 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ import {_package} from '../package';
6
+ import {delay} from '@datagrok-libraries/utils/src/test';
7
+ import {step} from './utils';
8
+
9
+ const dataFn = 'data/sample_FASTA_DNA.csv';
10
+
11
+ export async function demoBio01UI(funcPath: string) {
12
+ let view: DG.TableView;
13
+ let df: DG.DataFrame;
14
+
15
+ try {
16
+ await step(`Loading DNA notation 'fasta'.`, async () => {
17
+ df = await _package.files.readCsv(dataFn);
18
+ view = grok.shell.addTableView(df);
19
+ view.path = view.basePath = funcPath;
20
+ })();
21
+
22
+ await step('Sequence similarity search.', async () => {
23
+ const simViewer = await df.plot.fromType('Sequence Similarity Search') as DG.Viewer;
24
+ view.dockManager.dock(simViewer, DG.DOCK_TYPE.RIGHT, null, 'Similarity search', 0.35);
25
+ })();
26
+
27
+ await step('Sequence diversity search.', async () => {
28
+ const divViewer = await df.plot.fromType('Sequence Diversity Search') as DG.Viewer;
29
+ view.dockManager.dock(divViewer, DG.DOCK_TYPE.DOWN, null, 'Diversity search', 0.27);
30
+ })();
31
+
32
+ await step('Current row 3.', async () => {
33
+ df.currentRowIdx = 3;
34
+ })();
35
+
36
+ await step('Current row 7', async () => {
37
+ df.currentRowIdx = 7;
38
+ });
39
+ } catch (err: any) {
40
+ if (err instanceof Error)
41
+ _package.logger.error(err.message, undefined, err.stack);
42
+ else
43
+ _package.logger.error(err.toString());
44
+ }
45
+ }
@@ -0,0 +1,68 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ import {_package} from '../package';
6
+
7
+ import * as lev from 'fastest-levenshtein';
8
+ import {DistanceMatrix} from '@datagrok-libraries/bio/src/trees/distance-matrix';
9
+ import {getTreeHelper, ITreeHelper} from '@datagrok-libraries/bio/src/trees/tree-helper';
10
+ import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/src/trees/dendrogram';
11
+ import {demoSequenceSpace, step} from './utils';
12
+
13
+ const dataFn = 'data/sample_FASTA_DNA.csv';
14
+ const seqColName = 'sequence';
15
+
16
+ export async function demoBio01aUI(funcPath: string) {
17
+ let treeHelper: ITreeHelper;
18
+ let dendrogramSvc: IDendrogramService;
19
+ let view: DG.TableView;
20
+ let df: DG.DataFrame;
21
+ let spViewer: DG.ScatterPlotViewer;
22
+
23
+ const method: string = 'UMAP';
24
+ const idRows: { [id: number]: number } = {};
25
+ const embedCols: { [colName: string]: DG.Column<number> } = {};
26
+
27
+ try {
28
+ await step(`Loading DNA notation 'fasta'.`, async () => {
29
+ [df, treeHelper, dendrogramSvc] = await Promise.all([
30
+ _package.files.readCsv(dataFn),
31
+ getTreeHelper(),
32
+ getDendrogramService()
33
+ ]);
34
+ view = grok.shell.addTableView(df);
35
+ view.grid.props.rowHeight = 22;
36
+ view.path = view.basePath = funcPath;
37
+ })();
38
+
39
+ await step('Building sequence space.', async () => {
40
+ spViewer = await demoSequenceSpace(view, df, seqColName, method);
41
+ })();
42
+
43
+ await step('Hierarchical clustering.', async () => {
44
+ const seqCol: DG.Column<string> = df.getCol(seqColName);
45
+ const seqList = seqCol.toList();
46
+ const distance: DistanceMatrix = DistanceMatrix.calc(seqList, (aSeq: string, bSeq: string) => {
47
+ const levDistance = lev.distance(aSeq, bSeq);
48
+ return levDistance / ((aSeq.length + bSeq.length) / 2);
49
+ });
50
+ const treeRoot = await treeHelper.hierarchicalClusteringByDistance(distance, 'ward');
51
+ dendrogramSvc.injectTreeForGrid(view.grid, treeRoot, undefined, 150, undefined);
52
+ })();
53
+
54
+ await step('Selection.', async () => {
55
+ df.selection.init((idx: number) => [15].includes(idx));
56
+ })();
57
+
58
+ await step('Select bunch of sequences.', async () => {
59
+ df.selection.init((idx: number) => [21, 9, 58].includes(idx));
60
+ df.currentRowIdx = 27;
61
+ })();
62
+ } catch (err: any) {
63
+ if (err instanceof Error)
64
+ _package.logger.error(err.message, undefined, err.stack);
65
+ else
66
+ _package.logger.error(err.toString());
67
+ }
68
+ }
@@ -0,0 +1,94 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ import {_package, activityCliffs,} from '../package';
6
+ import $ from 'cash-dom';
7
+
8
+ import {TEMPS as acTEMPS} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
9
+ import * as lev from 'fastest-levenshtein';
10
+ import {DistanceMatrix} from '@datagrok-libraries/bio/src/trees/distance-matrix';
11
+ import {getTreeHelper, ITreeHelper} from '@datagrok-libraries/bio/src/trees/tree-helper';
12
+ import {getDendrogramService, IDendrogramService} from '@datagrok-libraries/bio/src/trees/dendrogram';
13
+ import {step} from './utils';
14
+
15
+ const dataFn = 'samples/sample_FASTA.csv';
16
+
17
+ export async function demoBio01bUI(funcPath: string) {
18
+ let treeHelper: ITreeHelper;
19
+ let dendrogramSvc: IDendrogramService;
20
+ let view: DG.TableView;
21
+ let df: DG.DataFrame;
22
+ let activityCliffsViewer: DG.ScatterPlotViewer;
23
+
24
+ const method: string = 'UMAP';
25
+ const idRows: { [id: number]: number } = {};
26
+
27
+ try {
28
+ await step('Loading DNA notation \'fasta\'.', async () => {
29
+ [df, treeHelper, dendrogramSvc] = await Promise.all([
30
+ _package.files.readCsv(dataFn),
31
+ getTreeHelper(),
32
+ getDendrogramService()
33
+ ]);
34
+
35
+ view = grok.shell.addTableView(df);
36
+ view.path = view.basePath = funcPath;
37
+ view.grid.props.rowHeight = 22;
38
+ const uniProtKbGCol = view.grid.columns.byName('UniProtKB')!;
39
+ uniProtKbGCol.width = 75;
40
+ const lengthGCol = view.grid.columns.byName('Length')!;
41
+ lengthGCol.width = 0;
42
+ })();
43
+
44
+ await step('Analyze for activity cliffs.', async () => {
45
+ activityCliffsViewer = (await activityCliffs(
46
+ df, df.getCol('Sequence'), df.getCol('Activity'),
47
+ 80, method)) as DG.ScatterPlotViewer;
48
+ view.dockManager.dock(activityCliffsViewer, DG.DOCK_TYPE.RIGHT, null, 'Activity Cliffs', 0.35);
49
+
50
+ // Show grid viewer with the cliffs
51
+ const cliffsLink: HTMLButtonElement = $(activityCliffsViewer.root)
52
+ .find('button.scatter_plot_link,cliffs_grid').get()[0] as HTMLButtonElement;
53
+ cliffsLink.click();
54
+ })();
55
+
56
+ await step('Hierarchical clustering.', async () => {
57
+ const seqCol: DG.Column<string> = df.getCol('sequence');
58
+ const seqList = seqCol.toList();
59
+ const distance: DistanceMatrix = DistanceMatrix.calc(seqList, (aSeq: string, bSeq: string) => {
60
+ const levDistance = lev.distance(aSeq, bSeq);
61
+ return levDistance / ((aSeq.length + bSeq.length) / 2);
62
+ });
63
+ const treeRoot = await treeHelper.hierarchicalClusteringByDistance(distance, 'ward');
64
+ dendrogramSvc.injectTreeForGrid(view.grid, treeRoot, undefined, 150, undefined);
65
+
66
+ // adjust for visual
67
+ const activityGCol = view.grid.columns.byName('Activity')!;
68
+ activityGCol.scrollIntoView();
69
+ })();
70
+
71
+ await step('Browse the cliff.', async () => {
72
+ //cliffsDfGrid.dataFrame.currentRowIdx = -1; // reset
73
+ const cliffsDfGrid: DG.Grid = activityCliffsViewer.dataFrame.temp[acTEMPS.cliffsDfGrid];
74
+ //cliffsDfGrid.dataFrame.selection.init((i) => i == currentCliffIdx);
75
+ cliffsDfGrid.dataFrame.currentRowIdx = 0;
76
+ //cliffsDfGrid.dataFrame.selection.set(currentCliffIdx, true, true);
77
+
78
+ // /* workaround to select rows of the cliff */
79
+ // const entryCol: DG.Column = df.getCol('Entry');
80
+ // df.selection.init((rowIdx) => ['UPI00000BFE1D', 'UPI00000BFE17'].includes(entryCol.get(rowIdx)));
81
+ //
82
+ // const selectionIdxList: Int32Array = df.selection.getSelectedIndexes();
83
+ // if (selectionIdxList.length > 0) {
84
+ // df.currentRowIdx = selectionIdxList[0];
85
+ // view.grid.scrollToCell('UniProtKB', view.grid.tableRowToGrid(selectionIdxList[0]));
86
+ // }
87
+ })();
88
+ } catch (err: any) {
89
+ if (err instanceof Error)
90
+ _package.logger.error(err.message, undefined, err.stack);
91
+ else
92
+ _package.logger.error(err.toString());
93
+ }
94
+ }
@@ -0,0 +1,59 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as ui from 'datagrok-api/ui';
3
+ import * as DG from 'datagrok-api/dg';
4
+
5
+ import {_package, sequenceSpaceTopMenu} from '../package';
6
+ import {step} from './utils';
7
+
8
+ import {IWebLogoViewer} from '@datagrok-libraries/bio/src/viewers/web-logo';
9
+ import {pepseaMethods, runPepsea} from '../utils/pepsea';
10
+ import {StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
11
+
12
+ const helmFn: string = 'samples/sample_HELM.csv';
13
+
14
+ export async function demoBio05UI(funcPath: string): Promise<void> {
15
+ let view: DG.TableView;
16
+ let df: DG.DataFrame;
17
+ let helmCol: DG.Column<string>;
18
+ let msaHelmCol: DG.Column<string>;
19
+ let wlViewer: DG.Viewer & IWebLogoViewer;
20
+ let ssViewer: DG.ScatterPlotViewer;
21
+
22
+ const helmColName: string = 'HELM';
23
+ const msaHelmColName: string = 'msa(HELM)';
24
+
25
+ try {
26
+ await step(`Loading peptides notation 'HELM'.`, async () => {
27
+ view = grok.shell.addTableView(df = await _package.files.readCsv(helmFn));
28
+ view.path = view.basePath = funcPath;
29
+ })();
30
+
31
+ await step('MSA on non-natural aminoacids with PepSeA.', async () => {
32
+ helmCol = df.getCol(helmColName);
33
+ const method: string = pepseaMethods[0];
34
+ const gapOpen: number = 1.53;
35
+ const gapExtend: number = 0;
36
+ msaHelmCol = await runPepsea(helmCol, msaHelmColName, method, gapOpen, gapExtend, undefined);
37
+ df.columns.add(msaHelmCol);
38
+ })();
39
+
40
+ await step('Composition analysis on MSA results', async () => {
41
+ wlViewer = await df.plot.fromType('WebLogo', {
42
+ sequenceColumnName: msaHelmColName
43
+ }) as DG.Viewer & IWebLogoViewer;
44
+ view.dockManager.dock(wlViewer, DG.DOCK_TYPE.DOWN, null, 'Composition analysis', 0.2);
45
+ })();
46
+
47
+ await step('Building sequence space.', async () => {
48
+ const method: string = 'UMAP';
49
+ ssViewer = (await sequenceSpaceTopMenu(df, msaHelmCol,
50
+ 'UMAP', StringMetricsNames.Levenshtein, true)) as DG.ScatterPlotViewer;
51
+ view.dockManager.dock(ssViewer, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
52
+ })();
53
+ } catch (err: any) {
54
+ if (err instanceof Error)
55
+ _package.logger.error(err.message, undefined, err.stack);
56
+ else
57
+ _package.logger.error(err.toString());
58
+ }
59
+ }
@@ -0,0 +1,95 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as DG from 'datagrok-api/dg';
3
+ import * as ui from 'datagrok-api/ui';
4
+
5
+ import {_package, sequenceSpaceTopMenu} from '../package';
6
+ import {reduceDimensinalityWithNormalization} from '@datagrok-libraries/ml/src/sequence-space';
7
+ import {StringMetricsNames} from '@datagrok-libraries/ml/src/typed-metrics';
8
+ import {delay} from '@datagrok-libraries/utils/src/test';
9
+
10
+ export function step(message: string, action: () => Promise<void>, delayMs: number = 1600): () => Promise<void> {
11
+ return async function() {
12
+ grok.shell.info(message);
13
+ const pi = DG.TaskBarProgressIndicator.create(message);
14
+ try {
15
+ await action();
16
+ } finally {
17
+ pi.close();
18
+ await delay(delayMs);
19
+ }
20
+ };
21
+ }
22
+
23
+ enum EMBED_COL_NAMES {
24
+ X = 'Embed_X',
25
+ Y = 'Embed_Y'
26
+ }
27
+
28
+ export async function demoSequenceSpace(
29
+ view: DG.TableView, df: DG.DataFrame, colName: string, method: string
30
+ ): Promise<DG.ScatterPlotViewer> {
31
+ let resSpaceViewer: DG.ScatterPlotViewer;
32
+ if (true) {
33
+ // Custom sequence space implementation for closer resembling of hierarchical clustering results.
34
+ const embedColNameList = Object.values(EMBED_COL_NAMES);
35
+ // ensure embed columns exist
36
+ for (let embedI: number = 0; embedI < embedColNameList.length; embedI++) {
37
+ const embedColName: string = embedColNameList[embedI];
38
+ const embedCol: DG.Column | null = df.col(embedColName);
39
+ if (!embedCol) {
40
+ // Notification is required to reflect added data frame Embed_<X> columns to grid columns
41
+ // MolecularLiabilityBrowser.setView() corrects grid columns' names with .replace('_', ' ');
42
+ const notify: boolean = embedI == embedColNameList.length - 1; // notify on adding last Embed_<X> column
43
+ df.columns.add(DG.Column.float(embedColName, df.rowCount), notify);
44
+ }
45
+ }
46
+
47
+ if (df.rowCount >= 1) {
48
+ const seqCol: DG.Column<string> = df.getCol(colName);
49
+ const seqList = seqCol.toList();
50
+
51
+ const t1: number = Date.now();
52
+ _package.logger.debug('Bio: demoBio01aUI(), calc reduceDimensionality start...');
53
+ const redDimRes = await reduceDimensinalityWithNormalization( // TODO: Rename method typo
54
+ seqList, method, StringMetricsNames.Levenshtein, {});
55
+ const t2: number = Date.now();
56
+ _package.logger.debug('Bio: demoBio01aUI(), calc reduceDimensionality ' +
57
+ `ET: ${((t2 - t1) / 1000)} s`);
58
+
59
+ for (let embedI: number = 0; embedI < embedColNameList.length; embedI++) {
60
+ const embedColName: string = embedColNameList[embedI];
61
+ const embedCol: DG.Column = df.getCol(embedColName);
62
+ const embedColData: Float32Array = redDimRes.embedding[embedI];
63
+ // TODO: User DG.Column.setRawData()
64
+ // embedCol.setRawData(embedColData);
65
+ embedCol.init((rowI) => { return embedColData[rowI]; });
66
+ }
67
+
68
+ const rowCount: number = df.rowCount;
69
+ const idCol: DG.Column = df.getCol('id');
70
+ for (let idRowI = 0; idRowI < rowCount; idRowI++) {
71
+ const id = idCol.get(idRowI);
72
+ //idRows[id] = idRowI;
73
+ }
74
+
75
+ for (const embedColName of Object.values(EMBED_COL_NAMES)) {
76
+ const embedCol: DG.Column<number> = df.getCol(embedColName);
77
+ //embedCols[embedColName] = embedCol;
78
+ }
79
+
80
+ const t3: number = Date.now();
81
+ _package.logger.debug('MLB: MlbVrSpaceBrowser.buildView(), postprocess reduceDimensionality ' +
82
+ `ET: ${((t3 - t2) / 1000)} s`);
83
+ }
84
+ resSpaceViewer = (await df.plot.fromType(DG.VIEWER.SCATTER_PLOT, {
85
+ 'xColumnName': EMBED_COL_NAMES.X,
86
+ 'yColumnName': EMBED_COL_NAMES.Y,
87
+ 'lassoTool': true,
88
+ })) as DG.ScatterPlotViewer;
89
+ } else {
90
+ resSpaceViewer = (await sequenceSpaceTopMenu(df, df.getCol(colName),
91
+ 'UMAP', StringMetricsNames.Levenshtein, true)) as DG.ScatterPlotViewer;
92
+ }
93
+ view.dockManager.dock(resSpaceViewer!, DG.DOCK_TYPE.RIGHT, null, 'Sequence Space', 0.35);
94
+ return resSpaceViewer;
95
+ }
package/src/package.ts CHANGED
@@ -48,6 +48,10 @@ import {getMacromoleculeColumn} from './utils/ui-utils';
48
48
  import {IUMAPOptions, ITSNEOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
49
49
  import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
50
50
  import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-editor';
51
+ import {demoBio01UI} from './demo/bio01-similarity-diversity';
52
+ import {demoBio01aUI} from './demo/bio01a-hierarchical-clustering-and-sequence-space';
53
+ import {demoBio01bUI} from './demo/bio01b-hierarchical-clustering-and-activity-cliffs';
54
+ import {demoBio05UI} from './demo/bio05-helm-msa-sequence-space';
51
55
 
52
56
  // /** Avoid reassinging {@link monomerLib} because consumers subscribe to {@link IMonomerLib.onChanged} event */
53
57
  // let monomerLib: MonomerLib | null = null;
@@ -303,7 +307,7 @@ export function SeqActivityCliffsEditor(call: DG.FuncCall) {
303
307
  .show();
304
308
  }
305
309
 
306
- //top-menu: Bio | Sequence Activity Cliffs...
310
+ //top-menu: Bio | SAR | Activity Cliffs...
307
311
  //name: Sequence Activity Cliffs
308
312
  //description: detect activity cliffs
309
313
  //input: dataframe table [Input data table]
@@ -360,7 +364,7 @@ export function SequenceSpaceEditor(call: DG.FuncCall) {
360
364
  .show();
361
365
  }
362
366
 
363
- //top-menu: Bio | Sequence Space...
367
+ //top-menu: Bio | Structure | Sequence Space...
364
368
  //name: Sequence Space
365
369
  //input: dataframe table
366
370
  //input: column molecules { semType: Macromolecule }
@@ -370,7 +374,8 @@ export function SequenceSpaceEditor(call: DG.FuncCall) {
370
374
  //input: object options {optional: true}
371
375
  //editor: Bio:SequenceSpaceEditor
372
376
  export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: DG.Column, methodName: string,
373
- similarityMetric: string = 'Tanimoto', plotEmbeddings: boolean, options?: IUMAPOptions | ITSNEOptions): Promise<DG.Viewer | undefined> {
377
+ similarityMetric: string = 'Tanimoto', plotEmbeddings: boolean, options?: IUMAPOptions | ITSNEOptions
378
+ ): Promise<DG.Viewer | undefined> {
374
379
  // Delay is required for initial function dialog to close before starting invalidating of molfiles.
375
380
  // Otherwise, dialog is freezing
376
381
  await delay(10);
@@ -430,7 +435,7 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
430
435
  } */
431
436
  };
432
437
 
433
- //top-menu: Bio | To Atomic Level...
438
+ //top-menu: Bio | Atomic Level | To Atomic Level...
434
439
  //name: To Atomic Level
435
440
  //description: returns molfiles for each monomer from HELM library
436
441
  //input: dataframe df [Input data table]
@@ -447,7 +452,7 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
447
452
  await _toAtomicLevel(df, macroMolecule, monomersLibObject);
448
453
  }
449
454
 
450
- //top-menu: Bio | MSA...
455
+ //top-menu: Bio | Alignment | MSA...
451
456
  //name: MSA...
452
457
  //tags: bio, panel
453
458
  export function multipleSequenceAlignmentAny(col: DG.Column<string> | null = null): void {
@@ -515,8 +520,8 @@ export function multipleSequenceAlignmentAny(col: DG.Column<string> | null = nul
515
520
  .show();
516
521
  }
517
522
 
523
+ //top-menu: Bio | Structure | Composition Analysis
518
524
  //name: Composition Analysis
519
- //top-menu: Bio | Composition Analysis
520
525
  //meta.icon: files/icons/composition-analysis.svg
521
526
  //output: viewer result
522
527
  export async function compositionAnalysis(): Promise<void> {
@@ -574,8 +579,8 @@ export async function compositionAnalysis(): Promise<void> {
574
579
  await handler(col);
575
580
  }
576
581
 
577
- //top-menu: Bio | SDF to JSON lib...
578
- //name: SDF to JSON Lib
582
+ //top-menu: Bio | Atomic Level | SDF to JSON Library...
583
+ //name: SDF to JSON Library
579
584
  //input: dataframe table
580
585
  export async function sdfToJsonLib(table: DG.DataFrame) {
581
586
  const jsonMonomerLibrary = createJsonMonomerLibFromSdf(table);
@@ -728,7 +733,7 @@ export function diversitySearchTopMenu() {
728
733
  view.dockManager.dock(viewer, 'down');
729
734
  }
730
735
 
731
- //top-menu: Bio | Substructure Search ...
736
+ //top-menu: Bio | Structure | Substructure Search ...
732
737
  //name: bioSubstructureSearch
733
738
  export function bioSubstructureSearch(): void {
734
739
  const col = getMacromoleculeColumn();
@@ -750,3 +755,33 @@ export function saveAsFasta() {
750
755
  export function bioSubstructureFilter(): BioSubstructureFilter {
751
756
  return new BioSubstructureFilter();
752
757
  }
758
+
759
+ // -- Demo --
760
+
761
+ //name: demoBio01
762
+ //meta.demoPath: Bioinformatics | Similarity & Diversity
763
+ //description:
764
+ export async function demoBio01(): Promise<void> {
765
+ await demoBio01UI('func/Bio.demoBio01');
766
+ }
767
+
768
+ //name:demoBio01a
769
+ //meta.demoPath: Bioinformatics | Hierarchical Clustering & Sequence Space
770
+ //description:
771
+ export async function demoBio01a(): Promise<void> {
772
+ await demoBio01aUI('func/Bio.demoBio01a');
773
+ }
774
+
775
+ //name: demoBio01c
776
+ //meta.demoPath: Bioinformatics | Hierarchical Clustering & Activity Cliffs
777
+ //description:
778
+ export async function demoBio01b(): Promise<void> {
779
+ await demoBio01bUI('func/Bio.demoBio01b');
780
+ }
781
+
782
+ //name: demoBio05
783
+ //meta.demoPath: Bioinformatics | Helm, MSA, Sequence Space
784
+ //description:
785
+ export async function demoBio05(): Promise<void> {
786
+ await demoBio05UI('func/demoBio05');
787
+ }
@@ -19,8 +19,10 @@ category('detectors:weak-and-likely', () => {
19
19
  fastaPtWeak1 = 'fastaPtWeak1',
20
20
  fastaPtWeak1LikelyName = 'fastaPtWeak1LikelyName',
21
21
 
22
+ /* Notation 'fasta' alphabet 'UN' is forbidden for likely columns too. */
22
23
  fastaUn1 = 'fastaUn1',
23
24
  fastaUn1LikelyName = 'fastaUn1LikelyName',
25
+ fastaUn2LikelyName = 'fastaUn2LikelyName',
24
26
  fastaUnMsa1LikelyName = 'fastaUnMsa1LikelyName',
25
27
  }
26
28
 
@@ -72,6 +74,11 @@ category('detectors:weak-and-likely', () => {
72
74
  2,other
73
75
  3,some
74
76
  4,another
77
+ `,
78
+ [csvTests.fastaUn2LikelyName]: `protein
79
+ Boombastic
80
+ Megafantastic
81
+ "just-a-random-thought,oy!"
75
82
  `,
76
83
  [csvTests.fastaUnMsa1LikelyName]: `id,seq
77
84
  1,word
@@ -119,11 +126,12 @@ category('detectors:weak-and-likely', () => {
119
126
  await _testNeg(readCsv(csvTests.fastaUn1), 'colName');
120
127
  });
121
128
  test(csvTests.fastaUn1LikelyName, async () => {
122
- await _testPos(readCsv(csvTests.fastaUn1LikelyName), 'seq',
123
- NOTATION.FASTA, ALIGNMENT.SEQ, ALPHABET.UN, 11, false);
129
+ await _testNeg(readCsv(csvTests.fastaUn1LikelyName), 'seq');
130
+ });
131
+ test(csvTests.fastaUn2LikelyName, async () => {
132
+ await _testNeg(readCsv(csvTests.fastaUn2LikelyName), 'protein');
124
133
  });
125
134
  test(csvTests.fastaUnMsa1LikelyName, async () => {
126
- await _testPos(readCsv(csvTests.fastaUnMsa1LikelyName), 'seq',
127
- NOTATION.FASTA, ALIGNMENT.SEQ_MSA, ALPHABET.UN, 10, false);
135
+ await _testNeg(readCsv(csvTests.fastaUnMsa1LikelyName), 'seq');
128
136
  });
129
137
  });
@@ -73,6 +73,7 @@ async function _testDiversitySearchViewer() {
73
73
  }
74
74
 
75
75
  function getSearchViewer(viewer: DG.Viewer, name: string) {
76
+ //@ts-ignore
76
77
  for (const v of viewer.view.viewers) {
77
78
  if (v.type === name)
78
79
  return v;
@@ -6,18 +6,21 @@ import * as C from './constants';
6
6
 
7
7
  export const pepseaMethods = ['mafft --auto', 'mafft', 'linsi', 'ginsi', 'einsi', 'fftns', 'fftnsi', 'nwns', 'nwnsi'];
8
8
  const alignmentObjectMetaKeys = ['AlignedSeq', 'AlignedSubpeptide', 'HELM', 'ID', 'PolymerID'];
9
- type PepseaRepsonse = {
9
+ type PepseaResponse = {
10
10
  Alignment: {
11
11
  PolymerID: string, AlignedSubpeptide: string, HELM: string, ID: string, AlignedSeq: string, [key: string]: string,
12
12
  }[],
13
- AlignmentScore: {[key: string]: number | null},
13
+ AlignmentScore: { [key: string]: number | null },
14
14
  };
15
- type PepseaBodyUnit = {ID: string, HELM: string};
15
+ type PepseaBodyUnit = { ID: string, HELM: string };
16
16
 
17
+ /** Gets the column containing MSA sequences produced by the 'PepSeA' tool from the {@link srcCol} column.
18
+ * Does not add the result column to the dataframe of {@link srcCol}.
19
+ */
17
20
  export async function runPepsea(srcCol: DG.Column<string>, unUsedName: string,
18
21
  method: typeof pepseaMethods[number] = 'ginsi', gapOpen: number = 1.53, gapExtend: number = 0.0,
19
22
  clustersCol: DG.Column<string | number> | null = null,
20
- ): Promise<DG.Column<string>> {
23
+ ): Promise<DG.Column<string>> {
21
24
  const peptideCount = srcCol.length;
22
25
  clustersCol ??= DG.Column.int('Clusters', peptideCount).init(0);
23
26
  if (clustersCol.type != DG.COLUMN_TYPE.STRING)
@@ -58,13 +61,14 @@ export async function runPepsea(srcCol: DG.Column<string>, unUsedName: string,
58
61
  alignedSequencesCol.setTag(bioTAGS.separator, C.PEPSEA.SEPARATOR);
59
62
  alignedSequencesCol.setTag(bioTAGS.aligned, ALIGNMENT.SEQ_MSA);
60
63
  alignedSequencesCol.setTag(bioTAGS.alphabet, ALPHABET.UN);
64
+ alignedSequencesCol.setTag(bioTAGS.alphabetIsMultichar, 'true');
61
65
  alignedSequencesCol.semType = DG.SEMTYPE.MACROMOLECULE;
62
66
 
63
67
  return alignedSequencesCol;
64
68
  }
65
69
 
66
70
  async function requestAlignedObjects(dockerfileId: string, body: PepseaBodyUnit[], method: string, gapOpen: number,
67
- gapExtend: number): Promise<PepseaRepsonse> {
71
+ gapExtend: number): Promise<PepseaResponse> {
68
72
  const params = {
69
73
  method: 'POST',
70
74
  headers: {'Accept': 'application/json', 'Content-Type': 'application/json'},