@datagrok/bio 2.4.12 → 2.4.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +2 -1
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +6 -5
- package/scripts/motif_generator.py +119 -0
- package/src/demo/bio01-similarity-diversity.ts +40 -29
- package/src/demo/bio01a-hierarchical-clustering-and-sequence-space.ts +51 -40
- package/src/demo/bio01b-hierarchical-clustering-and-activity-cliffs.ts +71 -62
- package/src/demo/bio05-helm-msa-sequence-space.ts +43 -34
- package/src/demo/utils.ts +7 -13
- package/src/package.ts +10 -6
- package/src/tests/msa-tests.ts +92 -4
- package/src/tests/renderers-test.ts +8 -7
- package/src/utils/multiple-sequence-alignment-ui.ts +126 -58
- package/src/utils/multiple-sequence-alignment.ts +5 -7
package/src/package.ts
CHANGED
|
@@ -409,8 +409,8 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
409
409
|
//top-menu: Bio | Alignment | MSA...
|
|
410
410
|
//name: MSA...
|
|
411
411
|
//tags: bio, panel
|
|
412
|
-
export function multipleSequenceAlignmentAny(
|
|
413
|
-
multipleSequenceAlignmentUI(
|
|
412
|
+
export function multipleSequenceAlignmentAny(): void {
|
|
413
|
+
multipleSequenceAlignmentUI();
|
|
414
414
|
}
|
|
415
415
|
|
|
416
416
|
//top-menu: Bio | Structure | Composition Analysis
|
|
@@ -655,30 +655,34 @@ export function bioSubstructureFilter(): BioSubstructureFilter {
|
|
|
655
655
|
//name: demoBioSimilarityDiversity
|
|
656
656
|
//meta.demoPath: Bioinformatics | Similarity, Diversity
|
|
657
657
|
//description:
|
|
658
|
+
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Similarity,%20Diversity
|
|
658
659
|
export async function demoBioSimilarityDiversity(): Promise<void> {
|
|
659
|
-
await demoBio01UI(
|
|
660
|
+
await demoBio01UI();
|
|
660
661
|
}
|
|
661
662
|
|
|
662
663
|
// demoBio01a
|
|
663
664
|
//name:demoBioSequenceSpace
|
|
664
665
|
//meta.demoPath: Bioinformatics | Sequence Space
|
|
665
666
|
//description:
|
|
667
|
+
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Sequence%20Space
|
|
666
668
|
export async function demoBioSequenceSpace(): Promise<void> {
|
|
667
|
-
await demoBio01aUI(
|
|
669
|
+
await demoBio01aUI();
|
|
668
670
|
}
|
|
669
671
|
|
|
670
672
|
// demoBio01b
|
|
671
673
|
//name: demoBioActivityCliffs
|
|
672
674
|
//meta.demoPath: Bioinformatics | Activity Cliffs
|
|
673
675
|
//description:
|
|
676
|
+
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Activity%20Cliffs
|
|
674
677
|
export async function demoBioActivityCliffs(): Promise<void> {
|
|
675
|
-
await demoBio01bUI(
|
|
678
|
+
await demoBio01bUI();
|
|
676
679
|
}
|
|
677
680
|
|
|
678
681
|
// demoBio05
|
|
679
682
|
//name: demoBioHelmMsaSequenceSpace
|
|
680
683
|
//meta.demoPath: Bioinformatics | Helm, MSA, Sequence Space
|
|
681
684
|
//description:
|
|
685
|
+
//meta.path: /apps/Tutorials/Demo/Bioinformatics/Helm,%20MSA,%20Sequence%20Space
|
|
682
686
|
export async function demoBioHelmMsaSequenceSpace(): Promise<void> {
|
|
683
|
-
await demoBio05UI(
|
|
687
|
+
await demoBio05UI();
|
|
684
688
|
}
|
package/src/tests/msa-tests.ts
CHANGED
|
@@ -3,8 +3,9 @@ import * as ui from 'datagrok-api/ui';
|
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
5
|
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
6
|
-
|
|
6
|
+
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
7
|
import {runKalign} from '../utils/multiple-sequence-alignment';
|
|
8
|
+
import {multipleSequenceAlignmentUI} from '../utils/multiple-sequence-alignment-ui';
|
|
8
9
|
//import * as grok from 'datagrok-api/grok';
|
|
9
10
|
|
|
10
11
|
export const _package = new DG.Package();
|
|
@@ -31,9 +32,46 @@ FWR-WYVKHPFWR-WYVKHPFWR-WYVKHPFWR-WYVKHPFWR-WYVKHPFWR-WYVKHPFWR-WYVKHPFWR-WYVKHP
|
|
|
31
32
|
YNR-WYVKHPYNR-WYVKHPYNR-WYVKHPYNR-WYVKHPYNR-WYVKHPYNR-WYVKHPYNR-WYVKHPYNR-WYVKHP
|
|
32
33
|
MWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHP`;
|
|
33
34
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
|
|
36
|
+
const helmFromCsv = `seq
|
|
37
|
+
PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2}$$$$
|
|
38
|
+
PEPTIDE1{meI.Aca.N.T.dE.Thr_PO3H2}$$$$
|
|
39
|
+
PEPTIDE1{hHis.Aca.N.T.dE.Thr_PO3H2}$$$$`;
|
|
40
|
+
|
|
41
|
+
const helmToCsv = `seq
|
|
42
|
+
meI.hHis.Aca.N.T.dE.Thr_PO3H2
|
|
43
|
+
.meI.Aca.N.T.dE.Thr_PO3H2
|
|
44
|
+
.hHis.Aca.N.T.dE.Thr_PO3H2`;
|
|
45
|
+
|
|
46
|
+
const longHelmFromCsv = `seq
|
|
47
|
+
PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.dV.E.N.D-Orn.D-aThr.Phe_4Me.Thr_PO3H2}$$$$
|
|
48
|
+
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Tyr_PO3H2.D-Chg.dV.Phe_ab-dehydro.N.D-Orn.D-aThr.Phe_4Me}$$$$
|
|
49
|
+
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Tyr_PO3H2.D-Chg.dV.Thr_PO3H2.N.D-Orn.D-aThr.Phe_4Me}$$$$`;
|
|
50
|
+
|
|
51
|
+
const longHelmToCsv = `seq
|
|
52
|
+
meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.dV.E.N.D-Orn.D-aThr.Phe_4Me.Thr_PO3H2
|
|
53
|
+
meI.hHis.Aca.Cys_SEt.T.dK..Tyr_PO3H2.D-Chg.dV.Phe_ab-dehydro.N.D-Orn.D-aThr.Phe_4Me.
|
|
54
|
+
Lys_Boc.hHis.Aca.Cys_SEt.T.dK..Tyr_PO3H2.D-Chg.dV.Thr_PO3H2.N.D-Orn.D-aThr.Phe_4Me.`;
|
|
55
|
+
|
|
56
|
+
const SeparatorFromCsv = `seq
|
|
57
|
+
F-W-P-H-E-Y
|
|
58
|
+
Y-N-R-Q-W-Y-V
|
|
59
|
+
M-K-P-S-E-Y-V`;
|
|
60
|
+
|
|
61
|
+
const SeparatorToCsv = `seq
|
|
62
|
+
FWPHEY-
|
|
63
|
+
YNRQWYV
|
|
64
|
+
MKPSEYV`;
|
|
65
|
+
|
|
66
|
+
const SeparatorLongFromCsv = `seq
|
|
67
|
+
M-I-E-V-F-L-F-G-I-V-L-G-L-I-P-I-T-L-A-G-L-F-V-T-A-Y-L-Q-Y-R-R-G-D-Q-L-D-L
|
|
68
|
+
M-M-E-L-V-L-K-T-I-I-G-P-I-V-V-G-V-V-L-R-I-V-D-K-W-L-N-K-D-K
|
|
69
|
+
M-D-R-T-D-E-V-S-N-H-T-H-D-K-P-T-L-T-W-F-E-E-I-F-E-E-Y-H-S-P-F-H-N`;
|
|
70
|
+
|
|
71
|
+
const SeparatorLongToCsv = `seq
|
|
72
|
+
MIEV-FLFGIVLGLIPITLAGLFVTAYLQYRRGDQLDL
|
|
73
|
+
MMEL-VLKTII-GPIVVGVVLRIVDKWLNKDK------
|
|
74
|
+
MDRTDEVSNHTHDKPTLTWFEEIFEEYHSPFHN-----`;
|
|
37
75
|
|
|
38
76
|
test('isCorrect', async () => {
|
|
39
77
|
await _testMsaIsCorrect(fromCsv, toCsv);
|
|
@@ -42,6 +80,26 @@ MWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHP
|
|
|
42
80
|
test('isCorrectLong', async () => {
|
|
43
81
|
await _testMsaIsCorrect(longFromCsv, longToCsv);
|
|
44
82
|
});
|
|
83
|
+
|
|
84
|
+
test('isCorrectHelm', async () => {
|
|
85
|
+
await _testMSAOnColumn(helmFromCsv, helmToCsv, NOTATION.HELM, NOTATION.SEPARATOR, undefined, 'mafft');
|
|
86
|
+
}, {skipReason: 'GROK-13053'});
|
|
87
|
+
|
|
88
|
+
test('isCorrectHelmLong', async () => {
|
|
89
|
+
await _testMSAOnColumn(longHelmFromCsv, longHelmToCsv, NOTATION.HELM, NOTATION.SEPARATOR, undefined, 'mafft');
|
|
90
|
+
}, {skipReason: 'GROK-13053'});
|
|
91
|
+
|
|
92
|
+
test('isCorrectSeparator', async () => {
|
|
93
|
+
await _testMSAOnColumn(
|
|
94
|
+
SeparatorFromCsv, SeparatorToCsv, NOTATION.SEPARATOR, NOTATION.FASTA, ALPHABET.PT
|
|
95
|
+
);
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
test('isCorrectSeparatorLong', async () => {
|
|
99
|
+
await _testMSAOnColumn(
|
|
100
|
+
SeparatorLongFromCsv, SeparatorLongToCsv, NOTATION.SEPARATOR, NOTATION.FASTA, ALPHABET.PT
|
|
101
|
+
);
|
|
102
|
+
});
|
|
45
103
|
});
|
|
46
104
|
|
|
47
105
|
async function _testMsaIsCorrect(srcCsv: string, tgtCsv: string): Promise<void> {
|
|
@@ -58,3 +116,33 @@ async function _testMsaIsCorrect(srcCsv: string, tgtCsv: string): Promise<void>
|
|
|
58
116
|
const msaCol: DG.Column = await runKalign(srcCol, true);
|
|
59
117
|
expectArray(msaCol.toList(), tgtCol.toList());
|
|
60
118
|
}
|
|
119
|
+
|
|
120
|
+
async function _testMSAOnColumn(
|
|
121
|
+
srcCsv: string, tgtCsv: string,
|
|
122
|
+
srcNotation: NOTATION, tgtNotation: NOTATION, alphabet?: ALPHABET, pepseaMethod?: string
|
|
123
|
+
): Promise<void> {
|
|
124
|
+
const srcDf: DG.DataFrame = DG.DataFrame.fromCsv(srcCsv);
|
|
125
|
+
const tgtDf: DG.DataFrame = DG.DataFrame.fromCsv(tgtCsv);
|
|
126
|
+
|
|
127
|
+
const srcSeqCol = srcDf.getCol('seq')!;
|
|
128
|
+
const tgtCol = tgtDf.getCol('seq')!;
|
|
129
|
+
const srcCol: DG.Column = srcDf.getCol('seq')!;
|
|
130
|
+
const semType: string = await grok.functions
|
|
131
|
+
.call('Bio:detectMacromolecule', {col: srcCol}) as unknown as string;
|
|
132
|
+
if (semType)
|
|
133
|
+
srcCol.semType = semType;
|
|
134
|
+
|
|
135
|
+
await grok.data.detectSemanticTypes(srcDf);
|
|
136
|
+
expect(srcSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
137
|
+
expect(srcSeqCol.getTag(DG.TAGS.UNITS), srcNotation);
|
|
138
|
+
if (alphabet)
|
|
139
|
+
expect(srcSeqCol.getTag(bioTAGS.alphabet), alphabet);
|
|
140
|
+
|
|
141
|
+
const msaSeqCol = await multipleSequenceAlignmentUI(srcSeqCol, pepseaMethod);
|
|
142
|
+
expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
143
|
+
expect(msaSeqCol.getTag(DG.TAGS.UNITS), tgtNotation);
|
|
144
|
+
expect(msaSeqCol.getTag(bioTAGS.aligned), ALIGNMENT.SEQ_MSA);
|
|
145
|
+
if (alphabet)
|
|
146
|
+
expect(msaSeqCol.getTag(bioTAGS.alphabet), alphabet);
|
|
147
|
+
expectArray(msaSeqCol.toList(), tgtCol.toList());
|
|
148
|
+
}
|
|
@@ -10,6 +10,7 @@ import {generateLongSequence, generateManySequences, performanceTest} from './ut
|
|
|
10
10
|
import {errorToConsole} from '@datagrok-libraries/utils/src/to-console';
|
|
11
11
|
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
12
12
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
13
|
+
import {multipleSequenceAlignmentUI} from '../utils/multiple-sequence-alignment-ui';
|
|
13
14
|
|
|
14
15
|
category('renderers', () => {
|
|
15
16
|
let tvList: DG.TableView[];
|
|
@@ -145,17 +146,17 @@ category('renderers', () => {
|
|
|
145
146
|
expect(srcSeqCol.getTag(bioTAGS.alphabet), ALPHABET.PT);
|
|
146
147
|
expect(srcSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
147
148
|
|
|
148
|
-
const msaSeqCol =
|
|
149
|
+
const msaSeqCol = await multipleSequenceAlignmentUI(srcSeqCol);
|
|
149
150
|
tv.grid.invalidate();
|
|
150
151
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
152
|
+
expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
153
|
+
expect(msaSeqCol.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
|
|
154
|
+
expect(msaSeqCol.getTag(bioTAGS.aligned), ALIGNMENT.SEQ_MSA);
|
|
155
|
+
expect(msaSeqCol.getTag(bioTAGS.alphabet), ALPHABET.PT);
|
|
156
|
+
expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
156
157
|
|
|
157
158
|
// check newColumn with UnitsHandler constructor
|
|
158
|
-
|
|
159
|
+
const uh: UnitsHandler = new UnitsHandler(msaSeqCol);
|
|
159
160
|
|
|
160
161
|
dfList.push(df);
|
|
161
162
|
tvList.push(tv);
|
|
@@ -6,76 +6,144 @@ import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecul
|
|
|
6
6
|
import {runKalign} from './multiple-sequence-alignment';
|
|
7
7
|
import {pepseaMethods, runPepsea} from './pepsea';
|
|
8
8
|
import {checkInputColumnUI} from './check-input-column';
|
|
9
|
+
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
10
|
+
import {_package} from '../package';
|
|
9
11
|
|
|
10
|
-
export
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
if (seqCol == null) {
|
|
14
|
-
grok.shell.warning(`MSAError: dataset doesn't conain any Macromolecule column`);
|
|
15
|
-
return;
|
|
12
|
+
export class MsaWarning extends Error {
|
|
13
|
+
constructor(message: string, options?: ErrorOptions) {
|
|
14
|
+
super(message, options);
|
|
16
15
|
}
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export async function multipleSequenceAlignmentUI(
|
|
19
|
+
col: DG.Column<string> | null = null,
|
|
20
|
+
pepseaMethod: typeof pepseaMethods[number] = pepseaMethods[0]
|
|
21
|
+
): Promise<DG.Column> {
|
|
22
|
+
return new Promise(async (resolve, reject) => {
|
|
23
|
+
const table = col?.dataFrame ?? grok.shell.t;
|
|
24
|
+
const seqCol = col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
|
|
25
|
+
if (seqCol == null) {
|
|
26
|
+
const errMsg = `MSAError: dataset doesn't conain any Macromolecule column`;
|
|
27
|
+
grok.shell.warning(errMsg);
|
|
28
|
+
reject(new MsaWarning(errMsg));
|
|
29
|
+
}
|
|
17
30
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
31
|
+
// UI
|
|
32
|
+
const methodInput = ui.choiceInput('Method', pepseaMethod, pepseaMethods);
|
|
33
|
+
methodInput.setTooltip('Alignment method');
|
|
34
|
+
const gapOpenInput = ui.floatInput('Gap open', 1.53);
|
|
35
|
+
gapOpenInput.setTooltip('Gap opening penalty at group-to-group alignment');
|
|
36
|
+
const gapExtendInput = ui.floatInput('Gap extend', 0);
|
|
37
|
+
gapExtendInput.setTooltip('Gap extension penalty to skip the alignment');
|
|
38
|
+
const inputRootStyles = [methodInput.root.style, gapOpenInput.root.style, gapExtendInput.root.style];
|
|
39
|
+
let performAlignment: (() => Promise<DG.Column<string>>) | undefined;
|
|
26
40
|
|
|
27
|
-
|
|
28
|
-
const
|
|
29
|
-
|
|
41
|
+
// TODO: allow only macromolecule colums to be chosen
|
|
42
|
+
const colInput = ui.columnInput('Sequence', table, seqCol, () => {
|
|
43
|
+
performAlignment = onColInputChange(
|
|
44
|
+
colInput.value, table, inputRootStyles, methodInput, clustersColInput, gapOpenInput, gapExtendInput
|
|
45
|
+
);
|
|
46
|
+
}
|
|
47
|
+
) as DG.InputBase<DG.Column<string>>;
|
|
48
|
+
colInput.setTooltip('Sequences column to use for alignment');
|
|
49
|
+
const clustersColInput = ui.columnInput('Clusters', table, null);
|
|
50
|
+
clustersColInput.nullable = true;
|
|
51
|
+
colInput.fireChanged();
|
|
52
|
+
//if column is specified (from tests), run alignment and resolve with the result
|
|
53
|
+
if (col) {
|
|
54
|
+
performAlignment = onColInputChange(
|
|
55
|
+
col, table, inputRootStyles, methodInput, clustersColInput, gapOpenInput, gapExtendInput
|
|
56
|
+
);
|
|
30
57
|
|
|
31
|
-
|
|
32
|
-
|
|
58
|
+
await onDialogOk(colInput, table, performAlignment, resolve, reject);
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
const dlg = ui.dialog('MSA')
|
|
62
|
+
.add(colInput)
|
|
63
|
+
.add(clustersColInput)
|
|
64
|
+
.add(methodInput)
|
|
65
|
+
.add(gapOpenInput)
|
|
66
|
+
.add(gapExtendInput)
|
|
67
|
+
.onOK(async () => {
|
|
68
|
+
await onDialogOk(colInput, table, performAlignment, resolve, reject);
|
|
69
|
+
})
|
|
70
|
+
.show();
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
async function onDialogOk(
|
|
75
|
+
colInput: DG.InputBase< DG.Column<any>>,
|
|
76
|
+
table: DG.DataFrame,
|
|
77
|
+
performAlignment: (() => Promise<DG.Column<string>>) | undefined,
|
|
78
|
+
resolve: (value: DG.Column<any>) => void,
|
|
79
|
+
reject: (reason: any) => void
|
|
80
|
+
): Promise<void> {
|
|
81
|
+
let msaCol: DG.Column<string> | null = null;
|
|
82
|
+
const pi = DG.TaskBarProgressIndicator.create('Analyze for MSA ...');
|
|
83
|
+
try {
|
|
84
|
+
colInput.fireChanged();
|
|
85
|
+
if (colInput.value.semType !== DG.SEMTYPE.MACROMOLECULE)
|
|
86
|
+
throw new Error('Chosen column has to be of Macromolecule semantic type');
|
|
87
|
+
if (performAlignment === undefined) // value can only be undefined when column can't be processed with either method
|
|
88
|
+
throw new Error('Invalid column format');
|
|
89
|
+
msaCol = await performAlignment(); // progress
|
|
90
|
+
if (msaCol == null)
|
|
91
|
+
return grok.shell.warning('Wrong column format');
|
|
92
|
+
|
|
93
|
+
table.columns.add(msaCol);
|
|
94
|
+
await grok.data.detectSemanticTypes(table);
|
|
95
|
+
|
|
96
|
+
resolve(msaCol);
|
|
97
|
+
} catch (err: any) {
|
|
98
|
+
const errMsg: string = err instanceof Error ? err.message : err.toString();
|
|
99
|
+
grok.shell.error(errMsg);
|
|
100
|
+
reject(err);
|
|
101
|
+
} finally {
|
|
102
|
+
pi.close();
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
function onColInputChange(
|
|
108
|
+
col: DG.Column<string>,
|
|
109
|
+
table: DG.DataFrame,
|
|
110
|
+
inputRootStyles: CSSStyleDeclaration[],
|
|
111
|
+
methodInput: DG.InputBase<string | null>,
|
|
112
|
+
clustersColInput: DG.InputBase<DG.Column<any> | null>,
|
|
113
|
+
gapOpenInput: DG.InputBase<number | null>,
|
|
114
|
+
gapExtendInput: DG.InputBase<number | null>
|
|
115
|
+
): (() => Promise<DG.Column<string>>) | undefined {
|
|
116
|
+
try {
|
|
117
|
+
if (col.semType !== DG.SEMTYPE.MACROMOLECULE)
|
|
118
|
+
return;
|
|
119
|
+
const unusedName = table.columns.getUnusedName(`msa(${col.name})`);
|
|
120
|
+
|
|
121
|
+
if (checkInputColumnUI(col, col.name,
|
|
122
|
+
[NOTATION.FASTA, NOTATION.SEPARATOR], [ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT], false)
|
|
123
|
+
) { // Kalign - natural alphabets. if the notation is separator, convert to fasta and then run kalign
|
|
33
124
|
for (const inputRootStyle of inputRootStyles)
|
|
34
125
|
inputRootStyle.display = 'none';
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
126
|
+
const potentialColNC = new NotationConverter(col);
|
|
127
|
+
const performCol: DG.Column<string> = potentialColNC.isFasta() ? col :
|
|
128
|
+
potentialColNC.convert(NOTATION.FASTA);
|
|
129
|
+
return async () => await runKalign(performCol, false, unusedName, clustersColInput.value);
|
|
130
|
+
} else if (checkInputColumnUI(col, col.name,
|
|
131
|
+
[NOTATION.HELM], [], false)
|
|
132
|
+
) { // PepSeA branch - Helm notation or separator notation with unknown alphabets
|
|
38
133
|
for (const inputRootStyle of inputRootStyles)
|
|
39
|
-
inputRootStyle.display
|
|
134
|
+
inputRootStyle.removeProperty('display');
|
|
40
135
|
|
|
41
|
-
|
|
42
|
-
|
|
136
|
+
return async () => await runPepsea(col, unusedName, methodInput.value!,
|
|
137
|
+
gapOpenInput.value!, gapExtendInput.value!, clustersColInput.value);
|
|
43
138
|
} else {
|
|
44
139
|
for (const inputRootStyle of inputRootStyles)
|
|
45
140
|
inputRootStyle.display = 'none';
|
|
46
141
|
|
|
47
|
-
|
|
142
|
+
return;
|
|
48
143
|
}
|
|
49
|
-
})
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
clustersColInput.nullable = true;
|
|
55
|
-
|
|
56
|
-
let msaCol: DG.Column<string> | null = null;
|
|
57
|
-
ui.dialog('MSA')
|
|
58
|
-
.add(colInput)
|
|
59
|
-
.add(clustersColInput)
|
|
60
|
-
.add(methodInput)
|
|
61
|
-
.add(gapOpenInput)
|
|
62
|
-
.add(gapExtendInput)
|
|
63
|
-
.onOK(async () => {
|
|
64
|
-
const pi = DG.TaskBarProgressIndicator.create('Analyze for MSA ...');
|
|
65
|
-
try {
|
|
66
|
-
colInput.fireChanged();
|
|
67
|
-
msaCol = await performAlignment(); // progress
|
|
68
|
-
if (msaCol == null)
|
|
69
|
-
return grok.shell.warning('Wrong column format');
|
|
70
|
-
|
|
71
|
-
table.columns.add(msaCol);
|
|
72
|
-
await grok.data.detectSemanticTypes(table);
|
|
73
|
-
} catch (err: any) {
|
|
74
|
-
const errMsg: string = err instanceof Error ? err.message : err.toString();
|
|
75
|
-
grok.shell.error(errMsg);
|
|
76
|
-
} finally {
|
|
77
|
-
pi.close();
|
|
78
|
-
}
|
|
79
|
-
})
|
|
80
|
-
.show();
|
|
144
|
+
} catch (err: any) {
|
|
145
|
+
const errMsg: string = err instanceof Error ? err.message : err.toString();
|
|
146
|
+
grok.shell.error(errMsg);
|
|
147
|
+
_package.logger.error(errMsg);
|
|
148
|
+
}
|
|
81
149
|
}
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
|
|
4
4
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
5
|
-
import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
5
|
+
import {ALIGNMENT, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
6
|
//@ts-ignore: there are no types for this library
|
|
7
7
|
import Aioli from '@biowasm/aioli';
|
|
8
8
|
|
|
@@ -25,7 +25,8 @@ function _stringsToFasta(sequences: string[]): string {
|
|
|
25
25
|
*
|
|
26
26
|
* @param {DG.Column} srcCol Column with sequences.
|
|
27
27
|
* @param {boolean} isAligned Whether the column is aligned.
|
|
28
|
-
* @param {string} unUsedName
|
|
28
|
+
* @param {string | undefined} unUsedName
|
|
29
|
+
* @param {DG.Column | null} clustersCol Column with clusters.
|
|
29
30
|
* @return {Promise<DG.Column>} Aligned sequences.
|
|
30
31
|
*/
|
|
31
32
|
export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean = false, unUsedName: string = '',
|
|
@@ -40,7 +41,7 @@ export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean =
|
|
|
40
41
|
if (clustersCol.type != DG.COLUMN_TYPE.STRING)
|
|
41
42
|
clustersCol = clustersCol.convertTo(DG.TYPE.STRING);
|
|
42
43
|
clustersCol.compact();
|
|
43
|
-
|
|
44
|
+
|
|
44
45
|
//TODO: use fixed-size inner arrays, but first need to expose the method to get each category count
|
|
45
46
|
const clustersColCategories = clustersCol.categories;
|
|
46
47
|
const clustersColData = clustersCol.getRawData();
|
|
@@ -61,8 +62,6 @@ export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean =
|
|
|
61
62
|
for (let clusterIdx = 0; clusterIdx < clustersColCategories.length; ++clusterIdx) {
|
|
62
63
|
const clusterSequences = fastaSequences[clusterIdx];
|
|
63
64
|
const fasta = _stringsToFasta(clusterSequences);
|
|
64
|
-
|
|
65
|
-
console.log(['fasta.length =', fasta.length]);
|
|
66
65
|
|
|
67
66
|
await CLI.fs.writeFile(fastaInputFilename, fasta);
|
|
68
67
|
const output = await CLI.exec(`kalign ${fastaInputFilename} -f fasta -o ${fastaOutputFilename}`);
|
|
@@ -82,8 +81,7 @@ export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean =
|
|
|
82
81
|
// units
|
|
83
82
|
const srcUnits = srcCol.getTag(DG.TAGS.UNITS);
|
|
84
83
|
//aligned
|
|
85
|
-
const
|
|
86
|
-
const tgtAligned = srcAligned + '.MSA';
|
|
84
|
+
const tgtAligned = ALIGNMENT.SEQ_MSA;
|
|
87
85
|
//alphabet
|
|
88
86
|
const srcAlphabet = srcCol.getTag(bioTAGS.alphabet);
|
|
89
87
|
|