@datagrok/bio 2.4.13 → 2.4.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.json +2 -1
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +1 -1
- package/dist/package.js.map +1 -1
- package/package.json +1 -1
- package/src/package.ts +2 -2
- package/src/tests/msa-tests.ts +92 -4
- package/src/tests/renderers-test.ts +8 -7
- package/src/utils/multiple-sequence-alignment-ui.ts +126 -58
- package/src/utils/multiple-sequence-alignment.ts +5 -7
package/package.json
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"name": "Leonid Stolbov",
|
|
6
6
|
"email": "lstolbov@datagrok.ai"
|
|
7
7
|
},
|
|
8
|
-
"version": "2.4.
|
|
8
|
+
"version": "2.4.14",
|
|
9
9
|
"description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
|
|
10
10
|
"repository": {
|
|
11
11
|
"type": "git",
|
package/src/package.ts
CHANGED
|
@@ -409,8 +409,8 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
|
|
|
409
409
|
//top-menu: Bio | Alignment | MSA...
|
|
410
410
|
//name: MSA...
|
|
411
411
|
//tags: bio, panel
|
|
412
|
-
export function multipleSequenceAlignmentAny(
|
|
413
|
-
multipleSequenceAlignmentUI(
|
|
412
|
+
export function multipleSequenceAlignmentAny(): void {
|
|
413
|
+
multipleSequenceAlignmentUI();
|
|
414
414
|
}
|
|
415
415
|
|
|
416
416
|
//top-menu: Bio | Structure | Composition Analysis
|
package/src/tests/msa-tests.ts
CHANGED
|
@@ -3,8 +3,9 @@ import * as ui from 'datagrok-api/ui';
|
|
|
3
3
|
import * as DG from 'datagrok-api/dg';
|
|
4
4
|
|
|
5
5
|
import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
|
|
6
|
-
|
|
6
|
+
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
7
7
|
import {runKalign} from '../utils/multiple-sequence-alignment';
|
|
8
|
+
import {multipleSequenceAlignmentUI} from '../utils/multiple-sequence-alignment-ui';
|
|
8
9
|
//import * as grok from 'datagrok-api/grok';
|
|
9
10
|
|
|
10
11
|
export const _package = new DG.Package();
|
|
@@ -31,9 +32,46 @@ FWR-WYVKHPFWR-WYVKHPFWR-WYVKHPFWR-WYVKHPFWR-WYVKHPFWR-WYVKHPFWR-WYVKHPFWR-WYVKHP
|
|
|
31
32
|
YNR-WYVKHPYNR-WYVKHPYNR-WYVKHPYNR-WYVKHPYNR-WYVKHPYNR-WYVKHPYNR-WYVKHPYNR-WYVKHP
|
|
32
33
|
MWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHP`;
|
|
33
34
|
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
|
|
36
|
+
const helmFromCsv = `seq
|
|
37
|
+
PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2}$$$$
|
|
38
|
+
PEPTIDE1{meI.Aca.N.T.dE.Thr_PO3H2}$$$$
|
|
39
|
+
PEPTIDE1{hHis.Aca.N.T.dE.Thr_PO3H2}$$$$`;
|
|
40
|
+
|
|
41
|
+
const helmToCsv = `seq
|
|
42
|
+
meI.hHis.Aca.N.T.dE.Thr_PO3H2
|
|
43
|
+
.meI.Aca.N.T.dE.Thr_PO3H2
|
|
44
|
+
.hHis.Aca.N.T.dE.Thr_PO3H2`;
|
|
45
|
+
|
|
46
|
+
const longHelmFromCsv = `seq
|
|
47
|
+
PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.dV.E.N.D-Orn.D-aThr.Phe_4Me.Thr_PO3H2}$$$$
|
|
48
|
+
PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Tyr_PO3H2.D-Chg.dV.Phe_ab-dehydro.N.D-Orn.D-aThr.Phe_4Me}$$$$
|
|
49
|
+
PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Tyr_PO3H2.D-Chg.dV.Thr_PO3H2.N.D-Orn.D-aThr.Phe_4Me}$$$$`;
|
|
50
|
+
|
|
51
|
+
const longHelmToCsv = `seq
|
|
52
|
+
meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.dV.E.N.D-Orn.D-aThr.Phe_4Me.Thr_PO3H2
|
|
53
|
+
meI.hHis.Aca.Cys_SEt.T.dK..Tyr_PO3H2.D-Chg.dV.Phe_ab-dehydro.N.D-Orn.D-aThr.Phe_4Me.
|
|
54
|
+
Lys_Boc.hHis.Aca.Cys_SEt.T.dK..Tyr_PO3H2.D-Chg.dV.Thr_PO3H2.N.D-Orn.D-aThr.Phe_4Me.`;
|
|
55
|
+
|
|
56
|
+
const SeparatorFromCsv = `seq
|
|
57
|
+
F-W-P-H-E-Y
|
|
58
|
+
Y-N-R-Q-W-Y-V
|
|
59
|
+
M-K-P-S-E-Y-V`;
|
|
60
|
+
|
|
61
|
+
const SeparatorToCsv = `seq
|
|
62
|
+
FWPHEY-
|
|
63
|
+
YNRQWYV
|
|
64
|
+
MKPSEYV`;
|
|
65
|
+
|
|
66
|
+
const SeparatorLongFromCsv = `seq
|
|
67
|
+
M-I-E-V-F-L-F-G-I-V-L-G-L-I-P-I-T-L-A-G-L-F-V-T-A-Y-L-Q-Y-R-R-G-D-Q-L-D-L
|
|
68
|
+
M-M-E-L-V-L-K-T-I-I-G-P-I-V-V-G-V-V-L-R-I-V-D-K-W-L-N-K-D-K
|
|
69
|
+
M-D-R-T-D-E-V-S-N-H-T-H-D-K-P-T-L-T-W-F-E-E-I-F-E-E-Y-H-S-P-F-H-N`;
|
|
70
|
+
|
|
71
|
+
const SeparatorLongToCsv = `seq
|
|
72
|
+
MIEV-FLFGIVLGLIPITLAGLFVTAYLQYRRGDQLDL
|
|
73
|
+
MMEL-VLKTII-GPIVVGVVLRIVDKWLNKDK------
|
|
74
|
+
MDRTDEVSNHTHDKPTLTWFEEIFEEYHSPFHN-----`;
|
|
37
75
|
|
|
38
76
|
test('isCorrect', async () => {
|
|
39
77
|
await _testMsaIsCorrect(fromCsv, toCsv);
|
|
@@ -42,6 +80,26 @@ MWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHP
|
|
|
42
80
|
test('isCorrectLong', async () => {
|
|
43
81
|
await _testMsaIsCorrect(longFromCsv, longToCsv);
|
|
44
82
|
});
|
|
83
|
+
|
|
84
|
+
test('isCorrectHelm', async () => {
|
|
85
|
+
await _testMSAOnColumn(helmFromCsv, helmToCsv, NOTATION.HELM, NOTATION.SEPARATOR, undefined, 'mafft');
|
|
86
|
+
}, {skipReason: 'GROK-13053'});
|
|
87
|
+
|
|
88
|
+
test('isCorrectHelmLong', async () => {
|
|
89
|
+
await _testMSAOnColumn(longHelmFromCsv, longHelmToCsv, NOTATION.HELM, NOTATION.SEPARATOR, undefined, 'mafft');
|
|
90
|
+
}, {skipReason: 'GROK-13053'});
|
|
91
|
+
|
|
92
|
+
test('isCorrectSeparator', async () => {
|
|
93
|
+
await _testMSAOnColumn(
|
|
94
|
+
SeparatorFromCsv, SeparatorToCsv, NOTATION.SEPARATOR, NOTATION.FASTA, ALPHABET.PT
|
|
95
|
+
);
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
test('isCorrectSeparatorLong', async () => {
|
|
99
|
+
await _testMSAOnColumn(
|
|
100
|
+
SeparatorLongFromCsv, SeparatorLongToCsv, NOTATION.SEPARATOR, NOTATION.FASTA, ALPHABET.PT
|
|
101
|
+
);
|
|
102
|
+
});
|
|
45
103
|
});
|
|
46
104
|
|
|
47
105
|
async function _testMsaIsCorrect(srcCsv: string, tgtCsv: string): Promise<void> {
|
|
@@ -58,3 +116,33 @@ async function _testMsaIsCorrect(srcCsv: string, tgtCsv: string): Promise<void>
|
|
|
58
116
|
const msaCol: DG.Column = await runKalign(srcCol, true);
|
|
59
117
|
expectArray(msaCol.toList(), tgtCol.toList());
|
|
60
118
|
}
|
|
119
|
+
|
|
120
|
+
async function _testMSAOnColumn(
|
|
121
|
+
srcCsv: string, tgtCsv: string,
|
|
122
|
+
srcNotation: NOTATION, tgtNotation: NOTATION, alphabet?: ALPHABET, pepseaMethod?: string
|
|
123
|
+
): Promise<void> {
|
|
124
|
+
const srcDf: DG.DataFrame = DG.DataFrame.fromCsv(srcCsv);
|
|
125
|
+
const tgtDf: DG.DataFrame = DG.DataFrame.fromCsv(tgtCsv);
|
|
126
|
+
|
|
127
|
+
const srcSeqCol = srcDf.getCol('seq')!;
|
|
128
|
+
const tgtCol = tgtDf.getCol('seq')!;
|
|
129
|
+
const srcCol: DG.Column = srcDf.getCol('seq')!;
|
|
130
|
+
const semType: string = await grok.functions
|
|
131
|
+
.call('Bio:detectMacromolecule', {col: srcCol}) as unknown as string;
|
|
132
|
+
if (semType)
|
|
133
|
+
srcCol.semType = semType;
|
|
134
|
+
|
|
135
|
+
await grok.data.detectSemanticTypes(srcDf);
|
|
136
|
+
expect(srcSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
137
|
+
expect(srcSeqCol.getTag(DG.TAGS.UNITS), srcNotation);
|
|
138
|
+
if (alphabet)
|
|
139
|
+
expect(srcSeqCol.getTag(bioTAGS.alphabet), alphabet);
|
|
140
|
+
|
|
141
|
+
const msaSeqCol = await multipleSequenceAlignmentUI(srcSeqCol, pepseaMethod);
|
|
142
|
+
expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
143
|
+
expect(msaSeqCol.getTag(DG.TAGS.UNITS), tgtNotation);
|
|
144
|
+
expect(msaSeqCol.getTag(bioTAGS.aligned), ALIGNMENT.SEQ_MSA);
|
|
145
|
+
if (alphabet)
|
|
146
|
+
expect(msaSeqCol.getTag(bioTAGS.alphabet), alphabet);
|
|
147
|
+
expectArray(msaSeqCol.toList(), tgtCol.toList());
|
|
148
|
+
}
|
|
@@ -10,6 +10,7 @@ import {generateLongSequence, generateManySequences, performanceTest} from './ut
|
|
|
10
10
|
import {errorToConsole} from '@datagrok-libraries/utils/src/to-console';
|
|
11
11
|
import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
12
12
|
import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
|
|
13
|
+
import {multipleSequenceAlignmentUI} from '../utils/multiple-sequence-alignment-ui';
|
|
13
14
|
|
|
14
15
|
category('renderers', () => {
|
|
15
16
|
let tvList: DG.TableView[];
|
|
@@ -145,17 +146,17 @@ category('renderers', () => {
|
|
|
145
146
|
expect(srcSeqCol.getTag(bioTAGS.alphabet), ALPHABET.PT);
|
|
146
147
|
expect(srcSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
147
148
|
|
|
148
|
-
const msaSeqCol =
|
|
149
|
+
const msaSeqCol = await multipleSequenceAlignmentUI(srcSeqCol);
|
|
149
150
|
tv.grid.invalidate();
|
|
150
151
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
152
|
+
expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
|
|
153
|
+
expect(msaSeqCol.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
|
|
154
|
+
expect(msaSeqCol.getTag(bioTAGS.aligned), ALIGNMENT.SEQ_MSA);
|
|
155
|
+
expect(msaSeqCol.getTag(bioTAGS.alphabet), ALPHABET.PT);
|
|
156
|
+
expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
|
|
156
157
|
|
|
157
158
|
// check newColumn with UnitsHandler constructor
|
|
158
|
-
|
|
159
|
+
const uh: UnitsHandler = new UnitsHandler(msaSeqCol);
|
|
159
160
|
|
|
160
161
|
dfList.push(df);
|
|
161
162
|
tvList.push(tv);
|
|
@@ -6,76 +6,144 @@ import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecul
|
|
|
6
6
|
import {runKalign} from './multiple-sequence-alignment';
|
|
7
7
|
import {pepseaMethods, runPepsea} from './pepsea';
|
|
8
8
|
import {checkInputColumnUI} from './check-input-column';
|
|
9
|
+
import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
|
|
10
|
+
import {_package} from '../package';
|
|
9
11
|
|
|
10
|
-
export
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
if (seqCol == null) {
|
|
14
|
-
grok.shell.warning(`MSAError: dataset doesn't conain any Macromolecule column`);
|
|
15
|
-
return;
|
|
12
|
+
export class MsaWarning extends Error {
|
|
13
|
+
constructor(message: string, options?: ErrorOptions) {
|
|
14
|
+
super(message, options);
|
|
16
15
|
}
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export async function multipleSequenceAlignmentUI(
|
|
19
|
+
col: DG.Column<string> | null = null,
|
|
20
|
+
pepseaMethod: typeof pepseaMethods[number] = pepseaMethods[0]
|
|
21
|
+
): Promise<DG.Column> {
|
|
22
|
+
return new Promise(async (resolve, reject) => {
|
|
23
|
+
const table = col?.dataFrame ?? grok.shell.t;
|
|
24
|
+
const seqCol = col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
|
|
25
|
+
if (seqCol == null) {
|
|
26
|
+
const errMsg = `MSAError: dataset doesn't conain any Macromolecule column`;
|
|
27
|
+
grok.shell.warning(errMsg);
|
|
28
|
+
reject(new MsaWarning(errMsg));
|
|
29
|
+
}
|
|
17
30
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
31
|
+
// UI
|
|
32
|
+
const methodInput = ui.choiceInput('Method', pepseaMethod, pepseaMethods);
|
|
33
|
+
methodInput.setTooltip('Alignment method');
|
|
34
|
+
const gapOpenInput = ui.floatInput('Gap open', 1.53);
|
|
35
|
+
gapOpenInput.setTooltip('Gap opening penalty at group-to-group alignment');
|
|
36
|
+
const gapExtendInput = ui.floatInput('Gap extend', 0);
|
|
37
|
+
gapExtendInput.setTooltip('Gap extension penalty to skip the alignment');
|
|
38
|
+
const inputRootStyles = [methodInput.root.style, gapOpenInput.root.style, gapExtendInput.root.style];
|
|
39
|
+
let performAlignment: (() => Promise<DG.Column<string>>) | undefined;
|
|
26
40
|
|
|
27
|
-
|
|
28
|
-
const
|
|
29
|
-
|
|
41
|
+
// TODO: allow only macromolecule colums to be chosen
|
|
42
|
+
const colInput = ui.columnInput('Sequence', table, seqCol, () => {
|
|
43
|
+
performAlignment = onColInputChange(
|
|
44
|
+
colInput.value, table, inputRootStyles, methodInput, clustersColInput, gapOpenInput, gapExtendInput
|
|
45
|
+
);
|
|
46
|
+
}
|
|
47
|
+
) as DG.InputBase<DG.Column<string>>;
|
|
48
|
+
colInput.setTooltip('Sequences column to use for alignment');
|
|
49
|
+
const clustersColInput = ui.columnInput('Clusters', table, null);
|
|
50
|
+
clustersColInput.nullable = true;
|
|
51
|
+
colInput.fireChanged();
|
|
52
|
+
//if column is specified (from tests), run alignment and resolve with the result
|
|
53
|
+
if (col) {
|
|
54
|
+
performAlignment = onColInputChange(
|
|
55
|
+
col, table, inputRootStyles, methodInput, clustersColInput, gapOpenInput, gapExtendInput
|
|
56
|
+
);
|
|
30
57
|
|
|
31
|
-
|
|
32
|
-
|
|
58
|
+
await onDialogOk(colInput, table, performAlignment, resolve, reject);
|
|
59
|
+
return;
|
|
60
|
+
}
|
|
61
|
+
const dlg = ui.dialog('MSA')
|
|
62
|
+
.add(colInput)
|
|
63
|
+
.add(clustersColInput)
|
|
64
|
+
.add(methodInput)
|
|
65
|
+
.add(gapOpenInput)
|
|
66
|
+
.add(gapExtendInput)
|
|
67
|
+
.onOK(async () => {
|
|
68
|
+
await onDialogOk(colInput, table, performAlignment, resolve, reject);
|
|
69
|
+
})
|
|
70
|
+
.show();
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
async function onDialogOk(
|
|
75
|
+
colInput: DG.InputBase< DG.Column<any>>,
|
|
76
|
+
table: DG.DataFrame,
|
|
77
|
+
performAlignment: (() => Promise<DG.Column<string>>) | undefined,
|
|
78
|
+
resolve: (value: DG.Column<any>) => void,
|
|
79
|
+
reject: (reason: any) => void
|
|
80
|
+
): Promise<void> {
|
|
81
|
+
let msaCol: DG.Column<string> | null = null;
|
|
82
|
+
const pi = DG.TaskBarProgressIndicator.create('Analyze for MSA ...');
|
|
83
|
+
try {
|
|
84
|
+
colInput.fireChanged();
|
|
85
|
+
if (colInput.value.semType !== DG.SEMTYPE.MACROMOLECULE)
|
|
86
|
+
throw new Error('Chosen column has to be of Macromolecule semantic type');
|
|
87
|
+
if (performAlignment === undefined) // value can only be undefined when column can't be processed with either method
|
|
88
|
+
throw new Error('Invalid column format');
|
|
89
|
+
msaCol = await performAlignment(); // progress
|
|
90
|
+
if (msaCol == null)
|
|
91
|
+
return grok.shell.warning('Wrong column format');
|
|
92
|
+
|
|
93
|
+
table.columns.add(msaCol);
|
|
94
|
+
await grok.data.detectSemanticTypes(table);
|
|
95
|
+
|
|
96
|
+
resolve(msaCol);
|
|
97
|
+
} catch (err: any) {
|
|
98
|
+
const errMsg: string = err instanceof Error ? err.message : err.toString();
|
|
99
|
+
grok.shell.error(errMsg);
|
|
100
|
+
reject(err);
|
|
101
|
+
} finally {
|
|
102
|
+
pi.close();
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
function onColInputChange(
|
|
108
|
+
col: DG.Column<string>,
|
|
109
|
+
table: DG.DataFrame,
|
|
110
|
+
inputRootStyles: CSSStyleDeclaration[],
|
|
111
|
+
methodInput: DG.InputBase<string | null>,
|
|
112
|
+
clustersColInput: DG.InputBase<DG.Column<any> | null>,
|
|
113
|
+
gapOpenInput: DG.InputBase<number | null>,
|
|
114
|
+
gapExtendInput: DG.InputBase<number | null>
|
|
115
|
+
): (() => Promise<DG.Column<string>>) | undefined {
|
|
116
|
+
try {
|
|
117
|
+
if (col.semType !== DG.SEMTYPE.MACROMOLECULE)
|
|
118
|
+
return;
|
|
119
|
+
const unusedName = table.columns.getUnusedName(`msa(${col.name})`);
|
|
120
|
+
|
|
121
|
+
if (checkInputColumnUI(col, col.name,
|
|
122
|
+
[NOTATION.FASTA, NOTATION.SEPARATOR], [ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT], false)
|
|
123
|
+
) { // Kalign - natural alphabets. if the notation is separator, convert to fasta and then run kalign
|
|
33
124
|
for (const inputRootStyle of inputRootStyles)
|
|
34
125
|
inputRootStyle.display = 'none';
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
126
|
+
const potentialColNC = new NotationConverter(col);
|
|
127
|
+
const performCol: DG.Column<string> = potentialColNC.isFasta() ? col :
|
|
128
|
+
potentialColNC.convert(NOTATION.FASTA);
|
|
129
|
+
return async () => await runKalign(performCol, false, unusedName, clustersColInput.value);
|
|
130
|
+
} else if (checkInputColumnUI(col, col.name,
|
|
131
|
+
[NOTATION.HELM], [], false)
|
|
132
|
+
) { // PepSeA branch - Helm notation or separator notation with unknown alphabets
|
|
38
133
|
for (const inputRootStyle of inputRootStyles)
|
|
39
|
-
inputRootStyle.display
|
|
134
|
+
inputRootStyle.removeProperty('display');
|
|
40
135
|
|
|
41
|
-
|
|
42
|
-
|
|
136
|
+
return async () => await runPepsea(col, unusedName, methodInput.value!,
|
|
137
|
+
gapOpenInput.value!, gapExtendInput.value!, clustersColInput.value);
|
|
43
138
|
} else {
|
|
44
139
|
for (const inputRootStyle of inputRootStyles)
|
|
45
140
|
inputRootStyle.display = 'none';
|
|
46
141
|
|
|
47
|
-
|
|
142
|
+
return;
|
|
48
143
|
}
|
|
49
|
-
})
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
clustersColInput.nullable = true;
|
|
55
|
-
|
|
56
|
-
let msaCol: DG.Column<string> | null = null;
|
|
57
|
-
ui.dialog('MSA')
|
|
58
|
-
.add(colInput)
|
|
59
|
-
.add(clustersColInput)
|
|
60
|
-
.add(methodInput)
|
|
61
|
-
.add(gapOpenInput)
|
|
62
|
-
.add(gapExtendInput)
|
|
63
|
-
.onOK(async () => {
|
|
64
|
-
const pi = DG.TaskBarProgressIndicator.create('Analyze for MSA ...');
|
|
65
|
-
try {
|
|
66
|
-
colInput.fireChanged();
|
|
67
|
-
msaCol = await performAlignment(); // progress
|
|
68
|
-
if (msaCol == null)
|
|
69
|
-
return grok.shell.warning('Wrong column format');
|
|
70
|
-
|
|
71
|
-
table.columns.add(msaCol);
|
|
72
|
-
await grok.data.detectSemanticTypes(table);
|
|
73
|
-
} catch (err: any) {
|
|
74
|
-
const errMsg: string = err instanceof Error ? err.message : err.toString();
|
|
75
|
-
grok.shell.error(errMsg);
|
|
76
|
-
} finally {
|
|
77
|
-
pi.close();
|
|
78
|
-
}
|
|
79
|
-
})
|
|
80
|
-
.show();
|
|
144
|
+
} catch (err: any) {
|
|
145
|
+
const errMsg: string = err instanceof Error ? err.message : err.toString();
|
|
146
|
+
grok.shell.error(errMsg);
|
|
147
|
+
_package.logger.error(errMsg);
|
|
148
|
+
}
|
|
81
149
|
}
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import * as DG from 'datagrok-api/dg';
|
|
3
3
|
|
|
4
4
|
import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
|
|
5
|
-
import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
5
|
+
import {ALIGNMENT, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
6
6
|
//@ts-ignore: there are no types for this library
|
|
7
7
|
import Aioli from '@biowasm/aioli';
|
|
8
8
|
|
|
@@ -25,7 +25,8 @@ function _stringsToFasta(sequences: string[]): string {
|
|
|
25
25
|
*
|
|
26
26
|
* @param {DG.Column} srcCol Column with sequences.
|
|
27
27
|
* @param {boolean} isAligned Whether the column is aligned.
|
|
28
|
-
* @param {string} unUsedName
|
|
28
|
+
* @param {string | undefined} unUsedName
|
|
29
|
+
* @param {DG.Column | null} clustersCol Column with clusters.
|
|
29
30
|
* @return {Promise<DG.Column>} Aligned sequences.
|
|
30
31
|
*/
|
|
31
32
|
export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean = false, unUsedName: string = '',
|
|
@@ -40,7 +41,7 @@ export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean =
|
|
|
40
41
|
if (clustersCol.type != DG.COLUMN_TYPE.STRING)
|
|
41
42
|
clustersCol = clustersCol.convertTo(DG.TYPE.STRING);
|
|
42
43
|
clustersCol.compact();
|
|
43
|
-
|
|
44
|
+
|
|
44
45
|
//TODO: use fixed-size inner arrays, but first need to expose the method to get each category count
|
|
45
46
|
const clustersColCategories = clustersCol.categories;
|
|
46
47
|
const clustersColData = clustersCol.getRawData();
|
|
@@ -61,8 +62,6 @@ export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean =
|
|
|
61
62
|
for (let clusterIdx = 0; clusterIdx < clustersColCategories.length; ++clusterIdx) {
|
|
62
63
|
const clusterSequences = fastaSequences[clusterIdx];
|
|
63
64
|
const fasta = _stringsToFasta(clusterSequences);
|
|
64
|
-
|
|
65
|
-
console.log(['fasta.length =', fasta.length]);
|
|
66
65
|
|
|
67
66
|
await CLI.fs.writeFile(fastaInputFilename, fasta);
|
|
68
67
|
const output = await CLI.exec(`kalign ${fastaInputFilename} -f fasta -o ${fastaOutputFilename}`);
|
|
@@ -82,8 +81,7 @@ export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean =
|
|
|
82
81
|
// units
|
|
83
82
|
const srcUnits = srcCol.getTag(DG.TAGS.UNITS);
|
|
84
83
|
//aligned
|
|
85
|
-
const
|
|
86
|
-
const tgtAligned = srcAligned + '.MSA';
|
|
84
|
+
const tgtAligned = ALIGNMENT.SEQ_MSA;
|
|
87
85
|
//alphabet
|
|
88
86
|
const srcAlphabet = srcCol.getTag(bioTAGS.alphabet);
|
|
89
87
|
|