@datagrok/bio 2.4.9 → 2.4.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.4.9",
8
+ "version": "2.4.11",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
@@ -55,7 +55,7 @@ export async function demoBio01aUI(funcPath: string) {
55
55
  df.selection.init((idx: number) => [15].includes(idx));
56
56
  })();
57
57
 
58
- await step('Select bunch of sequences.', async () => {
58
+ await step('Select a bunch of sequences.', async () => {
59
59
  df.selection.init((idx: number) => [21, 9, 58].includes(idx));
60
60
  df.currentRowIdx = 27;
61
61
  })();
package/src/package.ts CHANGED
@@ -2,20 +2,22 @@
2
2
  import * as grok from 'datagrok-api/grok';
3
3
  import * as ui from 'datagrok-api/ui';
4
4
  import * as DG from 'datagrok-api/dg';
5
-
6
- export const _package = new DG.Package();
7
-
8
- import {MacromoleculeDifferenceCellRenderer, MonomerCellRenderer} from './utils/cell-renderer';
5
+ import {
6
+ MacromoleculeDifferenceCellRenderer,
7
+ MacromoleculeSequenceCellRenderer,
8
+ MonomerCellRenderer
9
+ } from './utils/cell-renderer';
9
10
  import {VdRegionsViewer} from './viewers/vd-regions-viewer';
10
- import {runKalign} from './utils/multiple-sequence-alignment';
11
11
  import {SequenceAlignment} from './seq_align';
12
12
  import {getEmbeddingColsNames, sequenceSpaceByFingerprints} from './analysis/sequence-space';
13
13
  import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
14
14
  import {
15
- createLinesGrid, createPropPanelElement, createTooltipElement, getChemSimilaritiesMatrix,
15
+ createLinesGrid,
16
+ createPropPanelElement,
17
+ createTooltipElement,
18
+ getChemSimilaritiesMatrix,
16
19
  } from './analysis/sequence-activity-cliffs';
17
20
  import {HELM_CORE_LIB_FILENAME} from '@datagrok-libraries/bio/src/utils/const';
18
- import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
19
21
  import {convert} from './utils/convert';
20
22
  import {getMacroMolColumnPropertyPanel, representationsWidget} from './widgets/representations';
21
23
  import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
@@ -30,14 +32,7 @@ import {substructureSearchDialog} from './substructure-search/substructure-searc
30
32
  import {saveAsFastaUI} from './utils/save-as-fasta';
31
33
  import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
32
34
  import {delay} from '@datagrok-libraries/utils/src/test';
33
- import {
34
- getStats,
35
- NOTATION,
36
- splitterAsHelm,
37
- TAGS as bioTAGS,
38
- ALPHABET
39
- } from '@datagrok-libraries/bio/src/utils/macromolecule';
40
- import {pepseaMethods, runPepsea} from './utils/pepsea';
35
+ import {getStats, splitterAsHelm, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
41
36
  import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
42
37
  import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
43
38
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
@@ -45,13 +40,17 @@ import {WebLogoViewer} from './viewers/web-logo-viewer';
45
40
  import {createJsonMonomerLibFromSdf, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
46
41
  import {LIB_PATH, LIB_STORAGE_NAME, MonomerLibHelper} from './utils/monomer-lib';
47
42
  import {getMacromoleculeColumn} from './utils/ui-utils';
48
- import {IUMAPOptions, ITSNEOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
43
+ import {ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
49
44
  import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
50
45
  import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-editor';
51
46
  import {demoBio01UI} from './demo/bio01-similarity-diversity';
52
47
  import {demoBio01aUI} from './demo/bio01a-hierarchical-clustering-and-sequence-space';
53
48
  import {demoBio01bUI} from './demo/bio01b-hierarchical-clustering-and-activity-cliffs';
54
49
  import {demoBio05UI} from './demo/bio05-helm-msa-sequence-space';
50
+ import {checkInputColumnUI} from './utils/check-input-column';
51
+ import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
52
+
53
+ export const _package = new DG.Package();
55
54
 
56
55
  // /** Avoid reassinging {@link monomerLib} because consumers subscribe to {@link IMonomerLib.onChanged} event */
57
56
  // let monomerLib: MonomerLib | null = null;
@@ -217,51 +216,6 @@ export function macromoleculeDifferenceCellRenderer(): MacromoleculeDifferenceCe
217
216
  }
218
217
 
219
218
 
220
- function checkInputColumnUi(col: DG.Column, name: string, allowedNotations: string[] = [],
221
- allowedAlphabets: string[] = [], notify: boolean = true): boolean {
222
- const [res, msg]: [boolean, string] = checkInputColumn(col, name, allowedNotations, allowedAlphabets);
223
- if (notify && !res)
224
- grok.shell.warning(msg);
225
- return res;
226
- }
227
-
228
- export function checkInputColumn(
229
- col: DG.Column, name: string, allowedNotations: string[] = [], allowedAlphabets: string[] = []
230
- ): [boolean, string] {
231
- let res: boolean = true;
232
- let msg: string = '';
233
-
234
- const uh = new UnitsHandler(col);
235
- if (col.semType !== DG.SEMTYPE.MACROMOLECULE) {
236
- grok.shell.warning(name + ' analysis is allowed for Macromolecules semantic type');
237
- res = false;
238
- } else {
239
- const notation: string = uh.notation;
240
- if (allowedNotations.length > 0 &&
241
- !allowedNotations.some((n) => notation.toUpperCase() == (n.toUpperCase()))
242
- ) {
243
- const notationAdd = allowedNotations.length == 0 ? 'any notation' :
244
- (`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
245
- msg = `${name} + ' analysis is allowed for Macromolecules with notation ${notationAdd}.`;
246
- res = false;
247
- } else if (!uh.isHelm()) {
248
- // alphabet is not specified for 'helm' notation
249
- const alphabet: string = uh.alphabet;
250
- if (
251
- allowedAlphabets.length > 0 &&
252
- !allowedAlphabets.some((a) => alphabet.toUpperCase() == (a.toUpperCase()))
253
- ) {
254
- const alphabetAdd = allowedAlphabets.length == 0 ? 'any alphabet' :
255
- (`alphabet${allowedAlphabets.length > 1 ? 's' : ''} ${allowedAlphabets.map((a) => `"${a}"`).join(', ')}.`);
256
- msg = `${name} + ' analysis is allowed for Macromolecules with alphabet ${alphabetAdd}.`;
257
- res = false;
258
- }
259
- }
260
- }
261
-
262
- return [res, msg];
263
- }
264
-
265
219
  //name: sequenceAlignment
266
220
  //input: string alignType {choices: ['Local alignment', 'Global alignment']}
267
221
  // eslint-disable-next-line max-len
@@ -321,7 +275,7 @@ export function SeqActivityCliffsEditor(call: DG.FuncCall) {
321
275
  export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
322
276
  similarity: number, methodName: string, options?: IUMAPOptions | ITSNEOptions
323
277
  ): Promise<DG.Viewer | undefined> {
324
- if (!checkInputColumnUi(macroMolecule, 'Activity Cliffs'))
278
+ if (!checkInputColumnUI(macroMolecule, 'Activity Cliffs'))
325
279
  return;
326
280
  const axesNames = getEmbeddingColsNames(df);
327
281
  const tags = {
@@ -379,7 +333,7 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
379
333
  // Delay is required for initial function dialog to close before starting invalidating of molfiles.
380
334
  // Otherwise, dialog is freezing
381
335
  await delay(10);
382
- if (!checkInputColumnUi(macroMolecule, 'Sequence space'))
336
+ if (!checkInputColumnUI(macroMolecule, 'Sequence space'))
383
337
  return;
384
338
 
385
339
  const embedColsNames = getEmbeddingColsNames(table);
@@ -445,7 +399,7 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
445
399
  grok.shell.warning('Transformation to atomic level requires package "Chem" installed.');
446
400
  return;
447
401
  }
448
- if (!checkInputColumnUi(macroMolecule, 'To Atomic Level'))
402
+ if (!checkInputColumnUI(macroMolecule, 'To Atomic Level'))
449
403
  return;
450
404
  const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
451
405
  const monomersLibObject: any[] = JSON.parse(monomersLibFile);
@@ -456,68 +410,7 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
456
410
  //name: MSA...
457
411
  //tags: bio, panel
458
412
  export function multipleSequenceAlignmentAny(col: DG.Column<string> | null = null): void {
459
- const table = col?.dataFrame ?? grok.shell.t;
460
- const seqCol = col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
461
- if (seqCol == null) {
462
- grok.shell.warning(`MSAError: dataset doesn't conain any Macromolecule column`);
463
- return;
464
- }
465
-
466
- let performAlignment: () => Promise<DG.Column<string> | null> = async () => null;
467
- const methodInput = ui.choiceInput('Method', pepseaMethods[0], pepseaMethods);
468
- methodInput.setTooltip('Alignment method');
469
- const gapOpenInput = ui.floatInput('Gap open', 1.53);
470
- gapOpenInput.setTooltip('Gap opening penalty at group-to-group alignment');
471
- const gapExtendInput = ui.floatInput('Gap extend', 0);
472
- gapExtendInput.setTooltip('Gap extension penalty to skip the alignment');
473
- const inputRootStyles = [methodInput.root.style, gapOpenInput.root.style, gapExtendInput.root.style];
474
-
475
- const colInput = ui.columnInput('Sequence', table, seqCol, () => {
476
- const potentialCol = colInput.value;
477
- const unusedName = table.columns.getUnusedName(`msa(${potentialCol.name})`);
478
-
479
- if (checkInputColumnUi(
480
- potentialCol, potentialCol.name, [NOTATION.FASTA], [ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT], false)) {
481
- for (const inputRootStyle of inputRootStyles)
482
- inputRootStyle.display = 'none';
483
-
484
- performAlignment = () => runKalign(potentialCol, false, unusedName, clustersColInput.value);
485
- } else if (checkInputColumnUi(potentialCol, potentialCol.name, [NOTATION.HELM], [], false)) {
486
- for (const inputRootStyle of inputRootStyles)
487
- inputRootStyle.display = 'initial';
488
-
489
- performAlignment = () => runPepsea(potentialCol, unusedName, methodInput.value!, gapOpenInput.value!,
490
- gapExtendInput.value!, clustersColInput.value);
491
- } else {
492
- for (const inputRootStyle of inputRootStyles)
493
- inputRootStyle.display = 'none';
494
-
495
- performAlignment = async () => null;
496
- }
497
- }) as DG.InputBase<DG.Column<string>>;
498
- colInput.setTooltip('Sequences column to use for alignment');
499
- colInput.fireChanged();
500
-
501
- const clustersColInput = ui.columnInput('Clusters', table, null);
502
- clustersColInput.nullable = true;
503
-
504
- let msaCol: DG.Column<string> | null = null;
505
- ui.dialog('MSA')
506
- .add(colInput)
507
- .add(clustersColInput)
508
- .add(methodInput)
509
- .add(gapOpenInput)
510
- .add(gapExtendInput)
511
- .onOK(async () => {
512
- colInput.fireChanged();
513
- msaCol = await performAlignment();
514
- if (msaCol == null)
515
- return grok.shell.warning('Wrong column format');
516
-
517
- table.columns.add(msaCol);
518
- await grok.data.detectSemanticTypes(table);
519
- })
520
- .show();
413
+ multipleSequenceAlignmentUI(col);
521
414
  }
522
415
 
523
416
  //top-menu: Bio | Structure | Composition Analysis
@@ -539,7 +432,7 @@ export async function compositionAnalysis(): Promise<void> {
539
432
  });
540
433
 
541
434
  const handler = async (col: DG.Column) => {
542
- if (!checkInputColumnUi(col, 'Composition'))
435
+ if (!checkInputColumnUI(col, 'Composition'))
543
436
  return;
544
437
 
545
438
  const wlViewer = tv.addViewer('WebLogo', {sequenceColumnName: col.name});
@@ -758,30 +651,34 @@ export function bioSubstructureFilter(): BioSubstructureFilter {
758
651
 
759
652
  // -- Demo --
760
653
 
761
- //name: demoBio01
762
- //meta.demoPath: Bioinformatics | Similarity & Diversity
654
+ // demoBio01
655
+ //name: demoBioSimilarityDiversity
656
+ //meta.demoPath: Bioinformatics | Similarity, Diversity
763
657
  //description:
764
- export async function demoBio01(): Promise<void> {
658
+ export async function demoBioSimilarityDiversity(): Promise<void> {
765
659
  await demoBio01UI('func/Bio.demoBio01');
766
660
  }
767
661
 
768
- //name:demoBio01a
769
- //meta.demoPath: Bioinformatics | Hierarchical Clustering & Sequence Space
662
+ // demoBio01a
663
+ //name:demoBioSequenceSpace
664
+ //meta.demoPath: Bioinformatics | Sequence Space
770
665
  //description:
771
- export async function demoBio01a(): Promise<void> {
666
+ export async function demoBioSequenceSpace(): Promise<void> {
772
667
  await demoBio01aUI('func/Bio.demoBio01a');
773
668
  }
774
669
 
775
- //name: demoBio01c
776
- //meta.demoPath: Bioinformatics | Hierarchical Clustering & Activity Cliffs
670
+ // demoBio01b
671
+ //name: demoBioActivityCliffs
672
+ //meta.demoPath: Bioinformatics | Activity Cliffs
777
673
  //description:
778
- export async function demoBio01b(): Promise<void> {
674
+ export async function demoBioActivityCliffs(): Promise<void> {
779
675
  await demoBio01bUI('func/Bio.demoBio01b');
780
676
  }
781
677
 
782
- //name: demoBio05
678
+ // demoBio05
679
+ //name: demoBioHelmMsaSequenceSpace
783
680
  //meta.demoPath: Bioinformatics | Helm, MSA, Sequence Space
784
681
  //description:
785
- export async function demoBio05(): Promise<void> {
682
+ export async function demoBioHelmMsaSequenceSpace(): Promise<void> {
786
683
  await demoBio05UI('func/demoBio05');
787
684
  }
@@ -2,11 +2,10 @@ import * as grok from 'datagrok-api/grok';
2
2
  import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
-
6
5
  import {after, before, category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
7
6
 
8
- import {checkInputColumn, multipleSequenceAlignmentAny} from '../package';
9
7
  import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
8
+ import {checkInputColumn} from '../utils/check-input-column';
10
9
 
11
10
  category('checkInputColumn', () => {
12
11
  const csv = `seq
@@ -149,7 +149,7 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
149
149
 
150
150
  function converter(tgtNotation: NOTATION, tgtSeparator: string | null = null): ConverterFunc {
151
151
  if (tgtNotation === NOTATION.SEPARATOR && !tgtSeparator)
152
- throw new Error(`Argument 'separator' is missed for notation '${tgtNotation.toString()}'.`);
152
+ throw new Error(`Argument 'separator' is mandatory for target notation '${tgtNotation.toString()}'.`);
153
153
 
154
154
  return function(srcCol: DG.Column): DG.Column {
155
155
  const converter = new NotationConverter(srcCol);
@@ -429,7 +429,7 @@ class PosCol {
429
429
  public readonly alphabetIsMultichar: boolean,
430
430
  public readonly separator?: string
431
431
  ) { };
432
- };
432
+ }
433
433
 
434
434
  export async function _testDf(readDf: DfReaderFunc, posCols: { [colName: string]: PosCol }): Promise<void> {
435
435
  const df: DG.DataFrame = await readDf();
@@ -9,7 +9,7 @@ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
9
9
 
10
10
 
11
11
  category('fastaFileHandler', () => {
12
- const fastaNormalFormatting = `>description:1
12
+ const fastaNormalFormatting: string = `>description:1
13
13
  MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW
14
14
 
15
15
  >description:2
@@ -22,7 +22,7 @@ MMELVLKTIIGPIVVGVVLRIVDKWLNKDK
22
22
  MDRTDEVSNHTHDKPTLTWFEEIFEEYHSPFHN
23
23
  `;
24
24
 
25
- const fastaExtraSpaces = `>description:1
25
+ const fastaExtraSpaces: string = `>description:1
26
26
  MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW
27
27
 
28
28
  >description:2
@@ -35,7 +35,7 @@ M MELVLKTI IGPI VVGVVLR IVDKWLNKDK
35
35
  MDR TDEVSNHTHDKP TLTWFEEIFEEYHSPFHN
36
36
  `;
37
37
 
38
- const fastaExtraNewlines = `>description:1
38
+ const fastaExtraNewlines: string = `>description:1
39
39
 
40
40
  MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW
41
41
 
@@ -67,39 +67,6 @@ TLTWFEEIFEE
67
67
 
68
68
  YHSPFHN
69
69
  `;
70
- // a "broken" fasta file
71
- // const fastaBroken = `
72
-
73
- // >description:1
74
- // MDYKETLLM
75
- // PKTDFPMRGGLPN
76
- // KEPQIQEKW
77
-
78
-
79
-
80
- // >description:2
81
- // MIEVFL FGIVLGLIPI TLAGLFVTAYLQYRRGDQLDL
82
-
83
- // >description:3
84
-
85
- // M
86
- // MELVLKTIIGP
87
- // IVVGVVLR
88
- // IVDKWLNKD
89
-
90
- // K
91
-
92
- // >description:4
93
- // MDRTDEV
94
-
95
- // SNHTHDKP
96
- // TLTWFEEI
97
- // FEE
98
-
99
- // YHSPFHN
100
-
101
-
102
- // `;
103
70
 
104
71
  const descriptionsArray = [
105
72
  'description:1', 'description:2', 'description:3', 'description:4',
@@ -112,11 +79,6 @@ YHSPFHN
112
79
  'MMELVLKTIIGPIVVGVVLRIVDKWLNKDK',
113
80
  'MDRTDEVSNHTHDKPTLTWFEEIFEEYHSPFHN',
114
81
  ];
115
- const sequencesCol = DG.Column.fromStrings('sequence', sequencesArray);
116
- sequencesCol.semType = DG.SEMTYPE.MACROMOLECULE;
117
- UnitsHandler.setUnitsToFastaColumn(sequencesCol);
118
-
119
- const fastaDf = DG.DataFrame.fromColumns([descriptionCol, sequencesCol]);
120
82
 
121
83
  function _testColumnsParser(inputFasta: string) {
122
84
  const ffh = new FastaFileHandler(inputFasta);
@@ -0,0 +1,52 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as DG from 'datagrok-api/dg';
3
+ import * as ui from 'datagrok-api/ui';
4
+
5
+ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
6
+
7
+ /** */
8
+ export function checkInputColumnUI(col: DG.Column, name: string, allowedNotations: string[] = [],
9
+ allowedAlphabets: string[] = [], notify: boolean = true): boolean {
10
+ const [res, msg]: [boolean, string] = checkInputColumn(col, name, allowedNotations, allowedAlphabets);
11
+ if (notify && !res)
12
+ grok.shell.warning(msg);
13
+ return res;
14
+ }
15
+
16
+ /** */
17
+ export function checkInputColumn(
18
+ col: DG.Column, name: string, allowedNotations: string[] = [], allowedAlphabets: string[] = []
19
+ ): [boolean, string] {
20
+ let res: boolean = true;
21
+ let msg: string = '';
22
+
23
+ const uh = new UnitsHandler(col);
24
+ if (col.semType !== DG.SEMTYPE.MACROMOLECULE) {
25
+ grok.shell.warning(name + ' analysis is allowed for Macromolecules semantic type');
26
+ res = false;
27
+ } else {
28
+ const notation: string = uh.notation;
29
+ if (allowedNotations.length > 0 &&
30
+ !allowedNotations.some((n) => notation.toUpperCase() == (n.toUpperCase()))
31
+ ) {
32
+ const notationAdd = allowedNotations.length == 0 ? 'any notation' :
33
+ (`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
34
+ msg = `${name} + ' analysis is allowed for Macromolecules with notation ${notationAdd}.`;
35
+ res = false;
36
+ } else if (!uh.isHelm()) {
37
+ // alphabet is not specified for 'helm' notation
38
+ const alphabet: string = uh.alphabet;
39
+ if (
40
+ allowedAlphabets.length > 0 &&
41
+ !allowedAlphabets.some((a) => alphabet.toUpperCase() == (a.toUpperCase()))
42
+ ) {
43
+ const alphabetAdd = allowedAlphabets.length == 0 ? 'any alphabet' :
44
+ (`alphabet${allowedAlphabets.length > 1 ? 's' : ''} ${allowedAlphabets.map((a) => `"${a}"`).join(', ')}.`);
45
+ msg = `${name} + ' analysis is allowed for Macromolecules with alphabet ${alphabetAdd}.`;
46
+ res = false;
47
+ }
48
+ }
49
+ }
50
+
51
+ return [res, msg];
52
+ }
@@ -0,0 +1,81 @@
1
+ import * as grok from 'datagrok-api/grok';
2
+ import * as DG from 'datagrok-api/dg';
3
+ import * as ui from 'datagrok-api/ui';
4
+
5
+ import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
6
+ import {runKalign} from './multiple-sequence-alignment';
7
+ import {pepseaMethods, runPepsea} from './pepsea';
8
+ import {checkInputColumnUI} from './check-input-column';
9
+
10
+ export function multipleSequenceAlignmentUI(col: DG.Column<string> | null = null) {
11
+ const table = col?.dataFrame ?? grok.shell.t;
12
+ const seqCol = col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
13
+ if (seqCol == null) {
14
+ grok.shell.warning(`MSAError: dataset doesn't conain any Macromolecule column`);
15
+ return;
16
+ }
17
+
18
+ let performAlignment: () => Promise<DG.Column<string> | null> = async () => null;
19
+ const methodInput = ui.choiceInput('Method', pepseaMethods[0], pepseaMethods);
20
+ methodInput.setTooltip('Alignment method');
21
+ const gapOpenInput = ui.floatInput('Gap open', 1.53);
22
+ gapOpenInput.setTooltip('Gap opening penalty at group-to-group alignment');
23
+ const gapExtendInput = ui.floatInput('Gap extend', 0);
24
+ gapExtendInput.setTooltip('Gap extension penalty to skip the alignment');
25
+ const inputRootStyles = [methodInput.root.style, gapOpenInput.root.style, gapExtendInput.root.style];
26
+
27
+ const colInput = ui.columnInput('Sequence', table, seqCol, () => {
28
+ const potentialCol = colInput.value;
29
+ const unusedName = table.columns.getUnusedName(`msa(${potentialCol.name})`);
30
+
31
+ if (checkInputColumnUI(
32
+ potentialCol, potentialCol.name, [NOTATION.FASTA], [ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT], false)) {
33
+ for (const inputRootStyle of inputRootStyles)
34
+ inputRootStyle.display = 'none';
35
+
36
+ performAlignment = () => runKalign(potentialCol, false, unusedName, clustersColInput.value);
37
+ } else if (checkInputColumnUI(potentialCol, potentialCol.name, [NOTATION.HELM], [], false)) {
38
+ for (const inputRootStyle of inputRootStyles)
39
+ inputRootStyle.display = 'initial';
40
+
41
+ performAlignment = () => runPepsea(potentialCol, unusedName, methodInput.value!, gapOpenInput.value!,
42
+ gapExtendInput.value!, clustersColInput.value);
43
+ } else {
44
+ for (const inputRootStyle of inputRootStyles)
45
+ inputRootStyle.display = 'none';
46
+
47
+ performAlignment = async () => null;
48
+ }
49
+ }) as DG.InputBase<DG.Column<string>>;
50
+ colInput.setTooltip('Sequences column to use for alignment');
51
+ colInput.fireChanged();
52
+
53
+ const clustersColInput = ui.columnInput('Clusters', table, null);
54
+ clustersColInput.nullable = true;
55
+
56
+ let msaCol: DG.Column<string> | null = null;
57
+ ui.dialog('MSA')
58
+ .add(colInput)
59
+ .add(clustersColInput)
60
+ .add(methodInput)
61
+ .add(gapOpenInput)
62
+ .add(gapExtendInput)
63
+ .onOK(async () => {
64
+ const pi = DG.TaskBarProgressIndicator.create('Analyze for MSA ...');
65
+ try {
66
+ colInput.fireChanged();
67
+ msaCol = await performAlignment(); // progress
68
+ if (msaCol == null)
69
+ return grok.shell.warning('Wrong column format');
70
+
71
+ table.columns.add(msaCol);
72
+ await grok.data.detectSemanticTypes(table);
73
+ } catch (err: any) {
74
+ const errMsg: string = err instanceof Error ? err.message : err.toString();
75
+ grok.shell.error(errMsg);
76
+ } finally {
77
+ pi.close();
78
+ }
79
+ })
80
+ .show();
81
+ }