@datagrok/bio 2.4.9 → 2.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +1 -1
- package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +1 -1
- package/src/package.ts +35 -138
- package/src/tests/checkInputColumn-tests.ts +1 -2
- package/src/tests/converters-test.ts +1 -1
- package/src/tests/detectors-tests.ts +1 -1
- package/src/tests/fasta-handler-test.ts +3 -41
- package/src/utils/check-input-column.ts +52 -0
- package/src/utils/multiple-sequence-alignment-ui.ts +81 -0
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.4.
|
|
8
|
+
"version": "2.4.11",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
|
@@ -55,7 +55,7 @@ export async function demoBio01aUI(funcPath: string) {
|
|
|
55
55
|
df.selection.init((idx: number) => [15].includes(idx));
|
|
56
56
|
})();
|
|
57
57
|
|
|
58
|
-
await step('Select bunch of sequences.', async () => {
|
|
58
|
+
await step('Select a bunch of sequences.', async () => {
|
|
59
59
|
df.selection.init((idx: number) => [21, 9, 58].includes(idx));
|
|
60
60
|
df.currentRowIdx = 27;
|
|
61
61
|
})();
|
package/src/package.ts
CHANGED
|
@@ -2,20 +2,22 @@
|
|
|
2
2
|
import * as grok from 'datagrok-api/grok';
|
|
3
3
|
import * as ui from 'datagrok-api/ui';
|
|
4
4
|
import * as DG from 'datagrok-api/dg';
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
import {
|
|
6
|
+
MacromoleculeDifferenceCellRenderer,
|
|
7
|
+
MacromoleculeSequenceCellRenderer,
|
|
8
|
+
MonomerCellRenderer
|
|
9
|
+
} from './utils/cell-renderer';
|
|
9
10
|
import {VdRegionsViewer} from './viewers/vd-regions-viewer';
|
|
10
|
-
import {runKalign} from './utils/multiple-sequence-alignment';
|
|
11
11
|
import {SequenceAlignment} from './seq_align';
|
|
12
12
|
import {getEmbeddingColsNames, sequenceSpaceByFingerprints} from './analysis/sequence-space';
|
|
13
13
|
import {getActivityCliffs} from '@datagrok-libraries/ml/src/viewers/activity-cliffs';
|
|
14
14
|
import {
|
|
15
|
-
createLinesGrid,
|
|
15
|
+
createLinesGrid,
|
|
16
|
+
createPropPanelElement,
|
|
17
|
+
createTooltipElement,
|
|
18
|
+
getChemSimilaritiesMatrix,
|
|
16
19
|
} from './analysis/sequence-activity-cliffs';
|
|
17
20
|
import {HELM_CORE_LIB_FILENAME} from '@datagrok-libraries/bio/src/utils/const';
|
|
18
|
-
import {MacromoleculeSequenceCellRenderer} from './utils/cell-renderer';
|
|
19
21
|
import {convert} from './utils/convert';
|
|
20
22
|
import {getMacroMolColumnPropertyPanel, representationsWidget} from './widgets/representations';
|
|
21
23
|
import {_toAtomicLevel} from '@datagrok-libraries/bio/src/monomer-works/to-atomic-level';
|
|
@@ -30,14 +32,7 @@ import {substructureSearchDialog} from './substructure-search/substructure-searc
|
|
|
30
32
|
import {saveAsFastaUI} from './utils/save-as-fasta';
|
|
31
33
|
import {BioSubstructureFilter} from './widgets/bio-substructure-filter';
|
|
32
34
|
import {delay} from '@datagrok-libraries/utils/src/test';
|
|
33
|
-
import {
|
|
34
|
-
getStats,
|
|
35
|
-
NOTATION,
|
|
36
|
-
splitterAsHelm,
|
|
37
|
-
TAGS as bioTAGS,
|
|
38
|
-
ALPHABET
|
|
39
|
-
} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
40
|
-
import {pepseaMethods, runPepsea} from './utils/pepsea';
|
|
35
|
+
import {getStats, splitterAsHelm, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
41
36
|
import {IMonomerLib} from '@datagrok-libraries/bio/src/types';
|
|
42
37
|
import {SeqPalette} from '@datagrok-libraries/bio/src/seq-palettes';
|
|
43
38
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
@@ -45,13 +40,17 @@ import {WebLogoViewer} from './viewers/web-logo-viewer';
|
|
|
45
40
|
import {createJsonMonomerLibFromSdf, IMonomerLibHelper} from '@datagrok-libraries/bio/src/monomer-works/monomer-utils';
|
|
46
41
|
import {LIB_PATH, LIB_STORAGE_NAME, MonomerLibHelper} from './utils/monomer-lib';
|
|
47
42
|
import {getMacromoleculeColumn} from './utils/ui-utils';
|
|
48
|
-
import {
|
|
43
|
+
import {ITSNEOptions, IUMAPOptions} from '@datagrok-libraries/ml/src/reduce-dimensionality';
|
|
49
44
|
import {SequenceSpaceFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/seq-space-editor';
|
|
50
45
|
import {ActivityCliffsFunctionEditor} from '@datagrok-libraries/ml/src/functionEditors/activity-cliffs-editor';
|
|
51
46
|
import {demoBio01UI} from './demo/bio01-similarity-diversity';
|
|
52
47
|
import {demoBio01aUI} from './demo/bio01a-hierarchical-clustering-and-sequence-space';
|
|
53
48
|
import {demoBio01bUI} from './demo/bio01b-hierarchical-clustering-and-activity-cliffs';
|
|
54
49
|
import {demoBio05UI} from './demo/bio05-helm-msa-sequence-space';
|
|
50
|
+
import {checkInputColumnUI} from './utils/check-input-column';
|
|
51
|
+
import {multipleSequenceAlignmentUI} from './utils/multiple-sequence-alignment-ui';
|
|
52
|
+
|
|
53
|
+
export const _package = new DG.Package();
|
|
55
54
|
|
|
56
55
|
// /** Avoid reassinging {@link monomerLib} because consumers subscribe to {@link IMonomerLib.onChanged} event */
|
|
57
56
|
// let monomerLib: MonomerLib | null = null;
|
|
@@ -217,51 +216,6 @@ export function macromoleculeDifferenceCellRenderer(): MacromoleculeDifferenceCe
|
|
|
217
216
|
}
|
|
218
217
|
|
|
219
218
|
|
|
220
|
-
function checkInputColumnUi(col: DG.Column, name: string, allowedNotations: string[] = [],
|
|
221
|
-
allowedAlphabets: string[] = [], notify: boolean = true): boolean {
|
|
222
|
-
const [res, msg]: [boolean, string] = checkInputColumn(col, name, allowedNotations, allowedAlphabets);
|
|
223
|
-
if (notify && !res)
|
|
224
|
-
grok.shell.warning(msg);
|
|
225
|
-
return res;
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
export function checkInputColumn(
|
|
229
|
-
col: DG.Column, name: string, allowedNotations: string[] = [], allowedAlphabets: string[] = []
|
|
230
|
-
): [boolean, string] {
|
|
231
|
-
let res: boolean = true;
|
|
232
|
-
let msg: string = '';
|
|
233
|
-
|
|
234
|
-
const uh = new UnitsHandler(col);
|
|
235
|
-
if (col.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
236
|
-
grok.shell.warning(name + ' analysis is allowed for Macromolecules semantic type');
|
|
237
|
-
res = false;
|
|
238
|
-
} else {
|
|
239
|
-
const notation: string = uh.notation;
|
|
240
|
-
if (allowedNotations.length > 0 &&
|
|
241
|
-
!allowedNotations.some((n) => notation.toUpperCase() == (n.toUpperCase()))
|
|
242
|
-
) {
|
|
243
|
-
const notationAdd = allowedNotations.length == 0 ? 'any notation' :
|
|
244
|
-
(`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
|
|
245
|
-
msg = `${name} + ' analysis is allowed for Macromolecules with notation ${notationAdd}.`;
|
|
246
|
-
res = false;
|
|
247
|
-
} else if (!uh.isHelm()) {
|
|
248
|
-
// alphabet is not specified for 'helm' notation
|
|
249
|
-
const alphabet: string = uh.alphabet;
|
|
250
|
-
if (
|
|
251
|
-
allowedAlphabets.length > 0 &&
|
|
252
|
-
!allowedAlphabets.some((a) => alphabet.toUpperCase() == (a.toUpperCase()))
|
|
253
|
-
) {
|
|
254
|
-
const alphabetAdd = allowedAlphabets.length == 0 ? 'any alphabet' :
|
|
255
|
-
(`alphabet${allowedAlphabets.length > 1 ? 's' : ''} ${allowedAlphabets.map((a) => `"${a}"`).join(', ')}.`);
|
|
256
|
-
msg = `${name} + ' analysis is allowed for Macromolecules with alphabet ${alphabetAdd}.`;
|
|
257
|
-
res = false;
|
|
258
|
-
}
|
|
259
|
-
}
|
|
260
|
-
}
|
|
261
|
-
|
|
262
|
-
return [res, msg];
|
|
263
|
-
}
|
|
264
|
-
|
|
265
219
|
//name: sequenceAlignment
|
|
266
220
|
//input: string alignType {choices: ['Local alignment', 'Global alignment']}
|
|
267
221
|
// eslint-disable-next-line max-len
|
|
@@ -321,7 +275,7 @@ export function SeqActivityCliffsEditor(call: DG.FuncCall) {
|
|
|
321
275
|
export async function activityCliffs(df: DG.DataFrame, macroMolecule: DG.Column, activities: DG.Column,
|
|
322
276
|
similarity: number, methodName: string, options?: IUMAPOptions | ITSNEOptions
|
|
323
277
|
): Promise<DG.Viewer | undefined> {
|
|
324
|
-
if (!
|
|
278
|
+
if (!checkInputColumnUI(macroMolecule, 'Activity Cliffs'))
|
|
325
279
|
return;
|
|
326
280
|
const axesNames = getEmbeddingColsNames(df);
|
|
327
281
|
const tags = {
|
|
@@ -379,7 +333,7 @@ export async function sequenceSpaceTopMenu(table: DG.DataFrame, macroMolecule: D
|
|
|
379
333
|
// Delay is required for initial function dialog to close before starting invalidating of molfiles.
|
|
380
334
|
// Otherwise, dialog is freezing
|
|
381
335
|
await delay(10);
|
|
382
|
-
if (!
|
|
336
|
+
if (!checkInputColumnUI(macroMolecule, 'Sequence space'))
|
|
383
337
|
return;
|
|
384
338
|
|
|
385
339
|
const embedColsNames = getEmbeddingColsNames(table);
|
|
@@ -445,7 +399,7 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
445
399
|
grok.shell.warning('Transformation to atomic level requires package "Chem" installed.');
|
|
446
400
|
return;
|
|
447
401
|
}
|
|
448
|
-
if (!
|
|
402
|
+
if (!checkInputColumnUI(macroMolecule, 'To Atomic Level'))
|
|
449
403
|
return;
|
|
450
404
|
const monomersLibFile = await _package.files.readAsText(HELM_CORE_LIB_FILENAME);
|
|
451
405
|
const monomersLibObject: any[] = JSON.parse(monomersLibFile);
|
|
@@ -456,68 +410,7 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
456
410
|
//name: MSA...
|
|
457
411
|
//tags: bio, panel
|
|
458
412
|
export function multipleSequenceAlignmentAny(col: DG.Column<string> | null = null): void {
|
|
459
|
-
|
|
460
|
-
const seqCol = col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
|
|
461
|
-
if (seqCol == null) {
|
|
462
|
-
grok.shell.warning(`MSAError: dataset doesn't conain any Macromolecule column`);
|
|
463
|
-
return;
|
|
464
|
-
}
|
|
465
|
-
|
|
466
|
-
let performAlignment: () => Promise<DG.Column<string> | null> = async () => null;
|
|
467
|
-
const methodInput = ui.choiceInput('Method', pepseaMethods[0], pepseaMethods);
|
|
468
|
-
methodInput.setTooltip('Alignment method');
|
|
469
|
-
const gapOpenInput = ui.floatInput('Gap open', 1.53);
|
|
470
|
-
gapOpenInput.setTooltip('Gap opening penalty at group-to-group alignment');
|
|
471
|
-
const gapExtendInput = ui.floatInput('Gap extend', 0);
|
|
472
|
-
gapExtendInput.setTooltip('Gap extension penalty to skip the alignment');
|
|
473
|
-
const inputRootStyles = [methodInput.root.style, gapOpenInput.root.style, gapExtendInput.root.style];
|
|
474
|
-
|
|
475
|
-
const colInput = ui.columnInput('Sequence', table, seqCol, () => {
|
|
476
|
-
const potentialCol = colInput.value;
|
|
477
|
-
const unusedName = table.columns.getUnusedName(`msa(${potentialCol.name})`);
|
|
478
|
-
|
|
479
|
-
if (checkInputColumnUi(
|
|
480
|
-
potentialCol, potentialCol.name, [NOTATION.FASTA], [ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT], false)) {
|
|
481
|
-
for (const inputRootStyle of inputRootStyles)
|
|
482
|
-
inputRootStyle.display = 'none';
|
|
483
|
-
|
|
484
|
-
performAlignment = () => runKalign(potentialCol, false, unusedName, clustersColInput.value);
|
|
485
|
-
} else if (checkInputColumnUi(potentialCol, potentialCol.name, [NOTATION.HELM], [], false)) {
|
|
486
|
-
for (const inputRootStyle of inputRootStyles)
|
|
487
|
-
inputRootStyle.display = 'initial';
|
|
488
|
-
|
|
489
|
-
performAlignment = () => runPepsea(potentialCol, unusedName, methodInput.value!, gapOpenInput.value!,
|
|
490
|
-
gapExtendInput.value!, clustersColInput.value);
|
|
491
|
-
} else {
|
|
492
|
-
for (const inputRootStyle of inputRootStyles)
|
|
493
|
-
inputRootStyle.display = 'none';
|
|
494
|
-
|
|
495
|
-
performAlignment = async () => null;
|
|
496
|
-
}
|
|
497
|
-
}) as DG.InputBase<DG.Column<string>>;
|
|
498
|
-
colInput.setTooltip('Sequences column to use for alignment');
|
|
499
|
-
colInput.fireChanged();
|
|
500
|
-
|
|
501
|
-
const clustersColInput = ui.columnInput('Clusters', table, null);
|
|
502
|
-
clustersColInput.nullable = true;
|
|
503
|
-
|
|
504
|
-
let msaCol: DG.Column<string> | null = null;
|
|
505
|
-
ui.dialog('MSA')
|
|
506
|
-
.add(colInput)
|
|
507
|
-
.add(clustersColInput)
|
|
508
|
-
.add(methodInput)
|
|
509
|
-
.add(gapOpenInput)
|
|
510
|
-
.add(gapExtendInput)
|
|
511
|
-
.onOK(async () => {
|
|
512
|
-
colInput.fireChanged();
|
|
513
|
-
msaCol = await performAlignment();
|
|
514
|
-
if (msaCol == null)
|
|
515
|
-
return grok.shell.warning('Wrong column format');
|
|
516
|
-
|
|
517
|
-
table.columns.add(msaCol);
|
|
518
|
-
await grok.data.detectSemanticTypes(table);
|
|
519
|
-
})
|
|
520
|
-
.show();
|
|
413
|
+
multipleSequenceAlignmentUI(col);
|
|
521
414
|
}
|
|
522
415
|
|
|
523
416
|
//top-menu: Bio | Structure | Composition Analysis
|
|
@@ -539,7 +432,7 @@ export async function compositionAnalysis(): Promise<void> {
|
|
|
539
432
|
});
|
|
540
433
|
|
|
541
434
|
const handler = async (col: DG.Column) => {
|
|
542
|
-
if (!
|
|
435
|
+
if (!checkInputColumnUI(col, 'Composition'))
|
|
543
436
|
return;
|
|
544
437
|
|
|
545
438
|
const wlViewer = tv.addViewer('WebLogo', {sequenceColumnName: col.name});
|
|
@@ -758,30 +651,34 @@ export function bioSubstructureFilter(): BioSubstructureFilter {
|
|
|
758
651
|
|
|
759
652
|
// -- Demo --
|
|
760
653
|
|
|
761
|
-
//
|
|
762
|
-
//
|
|
654
|
+
// demoBio01
|
|
655
|
+
//name: demoBioSimilarityDiversity
|
|
656
|
+
//meta.demoPath: Bioinformatics | Similarity, Diversity
|
|
763
657
|
//description:
|
|
764
|
-
export async function
|
|
658
|
+
export async function demoBioSimilarityDiversity(): Promise<void> {
|
|
765
659
|
await demoBio01UI('func/Bio.demoBio01');
|
|
766
660
|
}
|
|
767
661
|
|
|
768
|
-
//
|
|
769
|
-
//
|
|
662
|
+
// demoBio01a
|
|
663
|
+
//name:demoBioSequenceSpace
|
|
664
|
+
//meta.demoPath: Bioinformatics | Sequence Space
|
|
770
665
|
//description:
|
|
771
|
-
export async function
|
|
666
|
+
export async function demoBioSequenceSpace(): Promise<void> {
|
|
772
667
|
await demoBio01aUI('func/Bio.demoBio01a');
|
|
773
668
|
}
|
|
774
669
|
|
|
775
|
-
//
|
|
776
|
-
//
|
|
670
|
+
// demoBio01b
|
|
671
|
+
//name: demoBioActivityCliffs
|
|
672
|
+
//meta.demoPath: Bioinformatics | Activity Cliffs
|
|
777
673
|
//description:
|
|
778
|
-
export async function
|
|
674
|
+
export async function demoBioActivityCliffs(): Promise<void> {
|
|
779
675
|
await demoBio01bUI('func/Bio.demoBio01b');
|
|
780
676
|
}
|
|
781
677
|
|
|
782
|
-
//
|
|
678
|
+
// demoBio05
|
|
679
|
+
//name: demoBioHelmMsaSequenceSpace
|
|
783
680
|
//meta.demoPath: Bioinformatics | Helm, MSA, Sequence Space
|
|
784
681
|
//description:
|
|
785
|
-
export async function
|
|
682
|
+
export async function demoBioHelmMsaSequenceSpace(): Promise<void> {
|
|
786
683
|
await demoBio05UI('func/demoBio05');
|
|
787
684
|
}
|
|
@@ -2,11 +2,10 @@ import * as grok from 'datagrok-api/grok';
|
|
|
2
2
|
import * as ui from 'datagrok-api/ui';
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
|
-
|
|
6
5
|
import {after, before, category, test, expect, expectArray} from '@datagrok-libraries/utils/src/test';
|
|
7
6
|
|
|
8
|
-
import {checkInputColumn, multipleSequenceAlignmentAny} from '../package';
|
|
9
7
|
import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
8
|
+
import {checkInputColumn} from '../utils/check-input-column';
|
|
10
9
|
|
|
11
10
|
category('checkInputColumn', () => {
|
|
12
11
|
const csv = `seq
|
|
@@ -149,7 +149,7 @@ RNA1{P.R(U)P.R(U)P.R(C)P.R(A)P.R(A)P.R(C)P.P.P}$$$
|
|
|
149
149
|
|
|
150
150
|
function converter(tgtNotation: NOTATION, tgtSeparator: string | null = null): ConverterFunc {
|
|
151
151
|
if (tgtNotation === NOTATION.SEPARATOR && !tgtSeparator)
|
|
152
|
-
throw new Error(`Argument 'separator' is
|
|
152
|
+
throw new Error(`Argument 'separator' is mandatory for target notation '${tgtNotation.toString()}'.`);
|
|
153
153
|
|
|
154
154
|
return function(srcCol: DG.Column): DG.Column {
|
|
155
155
|
const converter = new NotationConverter(srcCol);
|
|
@@ -429,7 +429,7 @@ class PosCol {
|
|
|
429
429
|
public readonly alphabetIsMultichar: boolean,
|
|
430
430
|
public readonly separator?: string
|
|
431
431
|
) { };
|
|
432
|
-
}
|
|
432
|
+
}
|
|
433
433
|
|
|
434
434
|
export async function _testDf(readDf: DfReaderFunc, posCols: { [colName: string]: PosCol }): Promise<void> {
|
|
435
435
|
const df: DG.DataFrame = await readDf();
|
|
@@ -9,7 +9,7 @@ import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
category('fastaFileHandler', () => {
|
|
12
|
-
const fastaNormalFormatting = `>description:1
|
|
12
|
+
const fastaNormalFormatting: string = `>description:1
|
|
13
13
|
MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW
|
|
14
14
|
|
|
15
15
|
>description:2
|
|
@@ -22,7 +22,7 @@ MMELVLKTIIGPIVVGVVLRIVDKWLNKDK
|
|
|
22
22
|
MDRTDEVSNHTHDKPTLTWFEEIFEEYHSPFHN
|
|
23
23
|
`;
|
|
24
24
|
|
|
25
|
-
const fastaExtraSpaces = `>description:1
|
|
25
|
+
const fastaExtraSpaces: string = `>description:1
|
|
26
26
|
MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW
|
|
27
27
|
|
|
28
28
|
>description:2
|
|
@@ -35,7 +35,7 @@ M MELVLKTI IGPI VVGVVLR IVDKWLNKDK
|
|
|
35
35
|
MDR TDEVSNHTHDKP TLTWFEEIFEEYHSPFHN
|
|
36
36
|
`;
|
|
37
37
|
|
|
38
|
-
const fastaExtraNewlines = `>description:1
|
|
38
|
+
const fastaExtraNewlines: string = `>description:1
|
|
39
39
|
|
|
40
40
|
MDYKETLLMPKTDFPMRGGLPNKEPQIQEKW
|
|
41
41
|
|
|
@@ -67,39 +67,6 @@ TLTWFEEIFEE
|
|
|
67
67
|
|
|
68
68
|
YHSPFHN
|
|
69
69
|
`;
|
|
70
|
-
// a "broken" fasta file
|
|
71
|
-
// const fastaBroken = `
|
|
72
|
-
|
|
73
|
-
// >description:1
|
|
74
|
-
// MDYKETLLM
|
|
75
|
-
// PKTDFPMRGGLPN
|
|
76
|
-
// KEPQIQEKW
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
// >description:2
|
|
81
|
-
// MIEVFL FGIVLGLIPI TLAGLFVTAYLQYRRGDQLDL
|
|
82
|
-
|
|
83
|
-
// >description:3
|
|
84
|
-
|
|
85
|
-
// M
|
|
86
|
-
// MELVLKTIIGP
|
|
87
|
-
// IVVGVVLR
|
|
88
|
-
// IVDKWLNKD
|
|
89
|
-
|
|
90
|
-
// K
|
|
91
|
-
|
|
92
|
-
// >description:4
|
|
93
|
-
// MDRTDEV
|
|
94
|
-
|
|
95
|
-
// SNHTHDKP
|
|
96
|
-
// TLTWFEEI
|
|
97
|
-
// FEE
|
|
98
|
-
|
|
99
|
-
// YHSPFHN
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
// `;
|
|
103
70
|
|
|
104
71
|
const descriptionsArray = [
|
|
105
72
|
'description:1', 'description:2', 'description:3', 'description:4',
|
|
@@ -112,11 +79,6 @@ YHSPFHN
|
|
|
112
79
|
'MMELVLKTIIGPIVVGVVLRIVDKWLNKDK',
|
|
113
80
|
'MDRTDEVSNHTHDKPTLTWFEEIFEEYHSPFHN',
|
|
114
81
|
];
|
|
115
|
-
const sequencesCol = DG.Column.fromStrings('sequence', sequencesArray);
|
|
116
|
-
sequencesCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
117
|
-
UnitsHandler.setUnitsToFastaColumn(sequencesCol);
|
|
118
|
-
|
|
119
|
-
const fastaDf = DG.DataFrame.fromColumns([descriptionCol, sequencesCol]);
|
|
120
82
|
|
|
121
83
|
function _testColumnsParser(inputFasta: string) {
|
|
122
84
|
const ffh = new FastaFileHandler(inputFasta);
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
|
|
5
|
+
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
6
|
+
|
|
7
|
+
/** */
|
|
8
|
+
export function checkInputColumnUI(col: DG.Column, name: string, allowedNotations: string[] = [],
|
|
9
|
+
allowedAlphabets: string[] = [], notify: boolean = true): boolean {
|
|
10
|
+
const [res, msg]: [boolean, string] = checkInputColumn(col, name, allowedNotations, allowedAlphabets);
|
|
11
|
+
if (notify && !res)
|
|
12
|
+
grok.shell.warning(msg);
|
|
13
|
+
return res;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/** */
|
|
17
|
+
export function checkInputColumn(
|
|
18
|
+
col: DG.Column, name: string, allowedNotations: string[] = [], allowedAlphabets: string[] = []
|
|
19
|
+
): [boolean, string] {
|
|
20
|
+
let res: boolean = true;
|
|
21
|
+
let msg: string = '';
|
|
22
|
+
|
|
23
|
+
const uh = new UnitsHandler(col);
|
|
24
|
+
if (col.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
25
|
+
grok.shell.warning(name + ' analysis is allowed for Macromolecules semantic type');
|
|
26
|
+
res = false;
|
|
27
|
+
} else {
|
|
28
|
+
const notation: string = uh.notation;
|
|
29
|
+
if (allowedNotations.length > 0 &&
|
|
30
|
+
!allowedNotations.some((n) => notation.toUpperCase() == (n.toUpperCase()))
|
|
31
|
+
) {
|
|
32
|
+
const notationAdd = allowedNotations.length == 0 ? 'any notation' :
|
|
33
|
+
(`notation${allowedNotations.length > 1 ? 's' : ''} ${allowedNotations.map((n) => `"${n}"`).join(', ')} `);
|
|
34
|
+
msg = `${name} + ' analysis is allowed for Macromolecules with notation ${notationAdd}.`;
|
|
35
|
+
res = false;
|
|
36
|
+
} else if (!uh.isHelm()) {
|
|
37
|
+
// alphabet is not specified for 'helm' notation
|
|
38
|
+
const alphabet: string = uh.alphabet;
|
|
39
|
+
if (
|
|
40
|
+
allowedAlphabets.length > 0 &&
|
|
41
|
+
!allowedAlphabets.some((a) => alphabet.toUpperCase() == (a.toUpperCase()))
|
|
42
|
+
) {
|
|
43
|
+
const alphabetAdd = allowedAlphabets.length == 0 ? 'any alphabet' :
|
|
44
|
+
(`alphabet${allowedAlphabets.length > 1 ? 's' : ''} ${allowedAlphabets.map((a) => `"${a}"`).join(', ')}.`);
|
|
45
|
+
msg = `${name} + ' analysis is allowed for Macromolecules with alphabet ${alphabetAdd}.`;
|
|
46
|
+
res = false;
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
return [res, msg];
|
|
52
|
+
}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import * as grok from 'datagrok-api/grok';
|
|
2
|
+
import * as DG from 'datagrok-api/dg';
|
|
3
|
+
import * as ui from 'datagrok-api/ui';
|
|
4
|
+
|
|
5
|
+
import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
|
+
import {runKalign} from './multiple-sequence-alignment';
|
|
7
|
+
import {pepseaMethods, runPepsea} from './pepsea';
|
|
8
|
+
import {checkInputColumnUI} from './check-input-column';
|
|
9
|
+
|
|
10
|
+
export function multipleSequenceAlignmentUI(col: DG.Column<string> | null = null) {
|
|
11
|
+
const table = col?.dataFrame ?? grok.shell.t;
|
|
12
|
+
const seqCol = col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
|
|
13
|
+
if (seqCol == null) {
|
|
14
|
+
grok.shell.warning(`MSAError: dataset doesn't conain any Macromolecule column`);
|
|
15
|
+
return;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
let performAlignment: () => Promise<DG.Column<string> | null> = async () => null;
|
|
19
|
+
const methodInput = ui.choiceInput('Method', pepseaMethods[0], pepseaMethods);
|
|
20
|
+
methodInput.setTooltip('Alignment method');
|
|
21
|
+
const gapOpenInput = ui.floatInput('Gap open', 1.53);
|
|
22
|
+
gapOpenInput.setTooltip('Gap opening penalty at group-to-group alignment');
|
|
23
|
+
const gapExtendInput = ui.floatInput('Gap extend', 0);
|
|
24
|
+
gapExtendInput.setTooltip('Gap extension penalty to skip the alignment');
|
|
25
|
+
const inputRootStyles = [methodInput.root.style, gapOpenInput.root.style, gapExtendInput.root.style];
|
|
26
|
+
|
|
27
|
+
const colInput = ui.columnInput('Sequence', table, seqCol, () => {
|
|
28
|
+
const potentialCol = colInput.value;
|
|
29
|
+
const unusedName = table.columns.getUnusedName(`msa(${potentialCol.name})`);
|
|
30
|
+
|
|
31
|
+
if (checkInputColumnUI(
|
|
32
|
+
potentialCol, potentialCol.name, [NOTATION.FASTA], [ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT], false)) {
|
|
33
|
+
for (const inputRootStyle of inputRootStyles)
|
|
34
|
+
inputRootStyle.display = 'none';
|
|
35
|
+
|
|
36
|
+
performAlignment = () => runKalign(potentialCol, false, unusedName, clustersColInput.value);
|
|
37
|
+
} else if (checkInputColumnUI(potentialCol, potentialCol.name, [NOTATION.HELM], [], false)) {
|
|
38
|
+
for (const inputRootStyle of inputRootStyles)
|
|
39
|
+
inputRootStyle.display = 'initial';
|
|
40
|
+
|
|
41
|
+
performAlignment = () => runPepsea(potentialCol, unusedName, methodInput.value!, gapOpenInput.value!,
|
|
42
|
+
gapExtendInput.value!, clustersColInput.value);
|
|
43
|
+
} else {
|
|
44
|
+
for (const inputRootStyle of inputRootStyles)
|
|
45
|
+
inputRootStyle.display = 'none';
|
|
46
|
+
|
|
47
|
+
performAlignment = async () => null;
|
|
48
|
+
}
|
|
49
|
+
}) as DG.InputBase<DG.Column<string>>;
|
|
50
|
+
colInput.setTooltip('Sequences column to use for alignment');
|
|
51
|
+
colInput.fireChanged();
|
|
52
|
+
|
|
53
|
+
const clustersColInput = ui.columnInput('Clusters', table, null);
|
|
54
|
+
clustersColInput.nullable = true;
|
|
55
|
+
|
|
56
|
+
let msaCol: DG.Column<string> | null = null;
|
|
57
|
+
ui.dialog('MSA')
|
|
58
|
+
.add(colInput)
|
|
59
|
+
.add(clustersColInput)
|
|
60
|
+
.add(methodInput)
|
|
61
|
+
.add(gapOpenInput)
|
|
62
|
+
.add(gapExtendInput)
|
|
63
|
+
.onOK(async () => {
|
|
64
|
+
const pi = DG.TaskBarProgressIndicator.create('Analyze for MSA ...');
|
|
65
|
+
try {
|
|
66
|
+
colInput.fireChanged();
|
|
67
|
+
msaCol = await performAlignment(); // progress
|
|
68
|
+
if (msaCol == null)
|
|
69
|
+
return grok.shell.warning('Wrong column format');
|
|
70
|
+
|
|
71
|
+
table.columns.add(msaCol);
|
|
72
|
+
await grok.data.detectSemanticTypes(table);
|
|
73
|
+
} catch (err: any) {
|
|
74
|
+
const errMsg: string = err instanceof Error ? err.message : err.toString();
|
|
75
|
+
grok.shell.error(errMsg);
|
|
76
|
+
} finally {
|
|
77
|
+
pi.close();
|
|
78
|
+
}
|
|
79
|
+
})
|
|
80
|
+
.show();
|
|
81
|
+
}
|