@datagrok/bio 2.26.8 → 2.27.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/CLAUDE.md +35 -5
- package/detectors.js +4 -2
- package/dist/287.js +1 -1
- package/dist/287.js.map +1 -1
- package/dist/422.js +1 -1
- package/dist/422.js.map +1 -1
- package/dist/package-test.js +1 -1
- package/dist/package-test.js.map +1 -1
- package/dist/package.js +3 -3
- package/dist/package.js.map +1 -1
- package/package.json +2 -2
- package/scripts/mol-to-helm.py +642 -170
- package/src/analysis/sequence-activity-cliffs.ts +8 -6
- package/src/package-api.ts +9 -2
- package/src/package.g.ts +12 -0
- package/src/package.ts +23 -9
- package/src/tests/msa-tests.ts +6 -2
- package/src/utils/annotations/annotation-manager-ui.ts +1 -1
- package/src/utils/constants.ts +3 -7
- package/src/utils/monomer-lib/library-file-manager/ui.ts +1 -1
- package/src/utils/monomer-lib/monomer-manager/monomer-manager.ts +1 -1
- package/src/utils/multiple-sequence-alignment-ui.ts +403 -194
- package/src/utils/pepsea.ts +138 -116
- package/src/utils/types.ts +7 -4
- package/test-console-output-1.log +584 -615
- package/test-record-1.mp4 +0 -0
|
@@ -1,234 +1,443 @@
|
|
|
1
|
-
/* eslint-disable max-len */
|
|
2
|
-
/* eslint-disable max-params */
|
|
3
1
|
/* eslint-disable max-lines-per-function */
|
|
4
2
|
import * as grok from 'datagrok-api/grok';
|
|
5
3
|
import * as DG from 'datagrok-api/dg';
|
|
6
4
|
import * as ui from 'datagrok-api/ui';
|
|
7
5
|
|
|
6
|
+
import {Subscription} from 'rxjs';
|
|
7
|
+
|
|
8
8
|
import {ColumnInputOptions} from '@datagrok-libraries/utils/src/type-declarations';
|
|
9
|
-
import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
9
|
+
import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
|
|
10
|
+
import {NotationProviderBase} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
|
|
11
|
+
import {SeqTemps} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';
|
|
10
12
|
import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
|
|
11
13
|
|
|
12
|
-
import {MsaWarning, runKalign} from './multiple-sequence-alignment';
|
|
13
|
-
import {pepseaMethods, runPepsea} from './pepsea';
|
|
14
|
+
import {MsaWarning, runKalign, checkForSingleSeqClusters} from './multiple-sequence-alignment';
|
|
14
15
|
import {checkInputColumn} from './check-input-column';
|
|
15
16
|
import {MultipleSequenceAlignmentUIOptions} from './types';
|
|
16
|
-
import {kalignVersion,
|
|
17
|
+
import {kalignVersion, MSA_ENGINE_ROLE} from './constants';
|
|
18
|
+
import {_package} from '../package';
|
|
17
19
|
|
|
18
20
|
import '../../css/msa.css';
|
|
19
|
-
|
|
21
|
+
|
|
22
|
+
type AlignmentMode = 'kalign' | 'engine';
|
|
23
|
+
|
|
24
|
+
/** State holder for the MSA dialog, avoids TypeScript narrowing issues with closures. */
|
|
25
|
+
class MsaDialogState {
|
|
26
|
+
mode: AlignmentMode = 'kalign';
|
|
27
|
+
currentFunc: DG.Func | null = null;
|
|
28
|
+
currentFuncCall: DG.FuncCall | null = null;
|
|
29
|
+
}
|
|
30
|
+
|
|
20
31
|
|
|
21
32
|
export async function multipleSequenceAlignmentUI(
|
|
22
33
|
options: MultipleSequenceAlignmentUIOptions, seqHelper: ISeqHelper,
|
|
23
34
|
): Promise<DG.Column> {
|
|
24
35
|
return new Promise(async (resolve, reject) => {
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
const table = options.col?.dataFrame ?? grok.shell.t;
|
|
32
|
-
if (!table) {
|
|
33
|
-
const errMsg: string = `Multiple Sequence Alignment analysis requires a dataset with a macromolecule column.`;
|
|
34
|
-
reject(new MsaWarning(ui.divText(errMsg)));
|
|
35
|
-
return; // Prevents creating the MSA dialog
|
|
36
|
-
}
|
|
36
|
+
try {
|
|
37
|
+
const table = options.col?.dataFrame ?? grok.shell.t;
|
|
38
|
+
if (!table) {
|
|
39
|
+
reject(new MsaWarning(ui.divText('MSA requires a dataset with a macromolecule column.')));
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
37
42
|
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
}
|
|
43
|
+
const seqCol = options.col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
|
|
44
|
+
if (!seqCol) {
|
|
45
|
+
reject(new MsaWarning(ui.divText('MSA requires a dataset with a macromolecule column.')));
|
|
46
|
+
return;
|
|
47
|
+
}
|
|
44
48
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
// shared UI
|
|
55
|
-
const gapOpenInput = ui.input.float('Gap open', {value: options.pepsea.gapOpen});
|
|
56
|
-
gapOpenInput.setTooltip('Gap opening penalty at group-to-group alignment');
|
|
57
|
-
const gapExtendInput = ui.input.float('Gap extend', {value: options.pepsea.gapExtend});
|
|
58
|
-
gapExtendInput.setTooltip('Gap extension penalty to skip the alignment');
|
|
59
|
-
|
|
60
|
-
const onlySelectedRowsInput = ui.input.bool('Selected Rows Only', {value: false});
|
|
61
|
-
|
|
62
|
-
const msaParamsDiv = ui.inputs([gapOpenInput, gapExtendInput, terminalGapInput]);
|
|
63
|
-
const msaParamsButton = ui.button('Alignment parameters', () => {
|
|
64
|
-
msaParamsDiv.hidden = !msaParamsDiv.hidden;
|
|
65
|
-
[gapOpenInput, gapExtendInput, terminalGapInput].forEach((input) => {
|
|
66
|
-
input.root.style.removeProperty('max-width');
|
|
67
|
-
input.captionLabel.style.removeProperty('max-width');
|
|
68
|
-
});
|
|
69
|
-
}, 'Adjust alignment parameters such as penalties for opening and extending gaps');
|
|
70
|
-
msaParamsButton.classList.add('msa-params-button');
|
|
71
|
-
msaParamsDiv.hidden = true;
|
|
72
|
-
msaParamsButton.prepend(ui.icons.settings(() => null));
|
|
73
|
-
const pepseaInputRootStyles: CSSStyleDeclaration[] = [methodInput.root.style];
|
|
74
|
-
const kalignInputRootStyles: CSSStyleDeclaration[] = [terminalGapInput.root.style, kalignVersionDiv.style];
|
|
75
|
-
|
|
76
|
-
let performAlignment: (() => Promise<DG.Column<string> | null>) | undefined;
|
|
77
|
-
|
|
78
|
-
let prevSeqCol = seqCol;
|
|
79
|
-
const colInput = ui.input.column(
|
|
80
|
-
'Sequence', {
|
|
81
|
-
table: table, value: seqCol, onValueChanged: async (value: DG.Column<any>) => {
|
|
49
|
+
const state = new MsaDialogState();
|
|
50
|
+
|
|
51
|
+
// --- Common UI ---
|
|
52
|
+
|
|
53
|
+
let prevSeqCol = seqCol;
|
|
54
|
+
const colInput = ui.input.column('Sequence', {
|
|
55
|
+
table, value: seqCol,
|
|
56
|
+
onValueChanged: async (value: DG.Column<any>) => {
|
|
82
57
|
if (!value || value.semType !== DG.SEMTYPE.MACROMOLECULE) {
|
|
83
58
|
okBtn.disabled = true;
|
|
84
|
-
await DG.delay(0);
|
|
59
|
+
await DG.delay(0);
|
|
85
60
|
colInput.value = prevSeqCol as DG.Column<string>;
|
|
86
61
|
return;
|
|
87
62
|
}
|
|
88
63
|
prevSeqCol = value;
|
|
89
64
|
okBtn.disabled = false;
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
.
|
|
104
|
-
.
|
|
105
|
-
.
|
|
106
|
-
.
|
|
107
|
-
.
|
|
108
|
-
.
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
65
|
+
await onColumnChanged(value);
|
|
66
|
+
},
|
|
67
|
+
filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE,
|
|
68
|
+
} as ColumnInputOptions) as DG.InputBase<DG.Column<string>>;
|
|
69
|
+
colInput.setTooltip('Sequences column to use for alignment');
|
|
70
|
+
|
|
71
|
+
const clustersColInput = ui.input.column('Clusters', {table, value: options.clustersCol!});
|
|
72
|
+
clustersColInput.nullable = true;
|
|
73
|
+
|
|
74
|
+
const onlySelectedInput = ui.input.bool('Selected Rows Only', {value: false});
|
|
75
|
+
|
|
76
|
+
// --- Kalign UI (canonical sequences) ---
|
|
77
|
+
|
|
78
|
+
const kalignGapOpen = ui.input.float('Gap open', {value: options?.kalign?.gapOpen});
|
|
79
|
+
kalignGapOpen.setTooltip('Gap opening penalty at group-to-group alignment');
|
|
80
|
+
const kalignGapExtend = ui.input.float('Gap extend', {value: options?.kalign?.gapExtend});
|
|
81
|
+
kalignGapExtend.setTooltip('Gap extension penalty to skip the alignment');
|
|
82
|
+
const kalignTerminalGap = ui.input.float('Terminal gap', {value: options?.kalign?.terminalGap});
|
|
83
|
+
kalignTerminalGap.setTooltip('Penalty for opening a gap at the beginning or end of the sequence');
|
|
84
|
+
const kalignVersionDiv = ui.p(`Kalign version: ${kalignVersion}`, 'kalign-version');
|
|
85
|
+
|
|
86
|
+
const kalignParamsDiv = ui.inputs([kalignGapOpen, kalignGapExtend, kalignTerminalGap]);
|
|
87
|
+
kalignParamsDiv.hidden = true;
|
|
88
|
+
const kalignParamsButton = ui.button('Alignment parameters', () => {
|
|
89
|
+
kalignParamsDiv.hidden = !kalignParamsDiv.hidden;
|
|
90
|
+
[kalignGapOpen, kalignGapExtend, kalignTerminalGap].forEach((input) => {
|
|
91
|
+
input.root.style.removeProperty('max-width');
|
|
92
|
+
input.captionLabel.style.removeProperty('max-width');
|
|
93
|
+
});
|
|
94
|
+
}, 'Adjust alignment parameters such as penalties for opening and extending gaps');
|
|
95
|
+
kalignParamsButton.classList.add('msa-params-button');
|
|
96
|
+
kalignParamsButton.prepend(ui.icons.settings(() => null, 'Settings'));
|
|
97
|
+
|
|
98
|
+
const kalignElements = [kalignParamsDiv, kalignParamsButton, kalignVersionDiv];
|
|
99
|
+
|
|
100
|
+
// --- Engine UI (non-canonical sequences, dynamically discovered) ---
|
|
101
|
+
|
|
102
|
+
const msaEngines = DG.Func.find({meta: {role: MSA_ENGINE_ROLE}});
|
|
103
|
+
// Sort so the default engine (meta.defaultAlignment) comes first
|
|
104
|
+
msaEngines.sort((a, b) => {
|
|
105
|
+
const aDefault = a.options['defaultAlignment'] === 'true' ? 1 : 0;
|
|
106
|
+
const bDefault = b.options['defaultAlignment'] === 'true' ? 1 : 0;
|
|
107
|
+
return bDefault - aDefault;
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
const engineInput = ui.input.choice('Engine', {
|
|
111
|
+
value: msaEngines.length > 0 ? msaEngines[0].friendlyName : '',
|
|
112
|
+
items: msaEngines.map((f) => f.friendlyName),
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
const engineDescDiv = ui.div('', {style: {fontSize: '12px', opacity: '0.7', marginBottom: '6px'}});
|
|
116
|
+
const engineParamsDiv = ui.div();
|
|
117
|
+
const engineParamsButton = ui.button('Alignment parameters', () => {
|
|
118
|
+
engineParamsDiv.hidden = !engineParamsDiv.hidden;
|
|
119
|
+
}, 'Adjust engine-specific alignment parameters');
|
|
120
|
+
engineParamsButton.classList.add('msa-params-button');
|
|
121
|
+
engineParamsButton.prepend(ui.icons.settings(() => null, 'Settings'));
|
|
122
|
+
|
|
123
|
+
// "Include HELM" checkbox: shown when column has a notation provider with fromHelm
|
|
124
|
+
const includeHelmInput = ui.input.bool('Include HELM', {value: true});
|
|
125
|
+
includeHelmInput.setTooltip('Also add the aligned HELM column alongside the converted notation');
|
|
126
|
+
includeHelmInput.root.style.display = 'none';
|
|
127
|
+
|
|
128
|
+
const engineElements = [
|
|
129
|
+
engineInput.root, engineDescDiv, engineParamsButton, engineParamsDiv, includeHelmInput.root,
|
|
130
|
+
];
|
|
131
|
+
|
|
132
|
+
// --- State management ---
|
|
133
|
+
|
|
134
|
+
async function updateEngineEditor(): Promise<void> {
|
|
135
|
+
engineParamsDiv.innerHTML = '';
|
|
136
|
+
state.currentFuncCall = null;
|
|
137
|
+
|
|
138
|
+
const selectedName = engineInput.value;
|
|
139
|
+
state.currentFunc = msaEngines.find((f) => f.friendlyName === selectedName) ?? null;
|
|
140
|
+
engineDescDiv.textContent = state.currentFunc?.description ?? '';
|
|
141
|
+
if (!state.currentFunc) return;
|
|
142
|
+
|
|
143
|
+
state.currentFuncCall = state.currentFunc.prepare({});
|
|
144
|
+
const inputs = await state.currentFuncCall.buildEditor(engineParamsDiv);
|
|
145
|
+
|
|
146
|
+
// Hide the first input (sequence column) - managed by the dialog's column selector
|
|
147
|
+
if (inputs.length > 0 && inputs[0].inputType === 'column')
|
|
148
|
+
inputs[0].root.style.display = 'none';
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const _engineSub: Subscription = engineInput.onChanged.subscribe(() => updateEngineEditor());
|
|
152
|
+
|
|
153
|
+
function switchMode(newMode: AlignmentMode): void {
|
|
154
|
+
state.mode = newMode;
|
|
155
|
+
for (const el of kalignElements)
|
|
156
|
+
el.style.display = newMode === 'kalign' ? '' : 'none';
|
|
157
|
+
for (const el of engineElements)
|
|
158
|
+
el.style.display = newMode === 'engine' ? '' : 'none';
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
async function onColumnChanged(col: DG.Column<string>): Promise<void> {
|
|
162
|
+
try {
|
|
163
|
+
if (col.semType !== DG.SEMTYPE.MACROMOLECULE) return;
|
|
164
|
+
|
|
165
|
+
const isCanonical = checkInputColumn(
|
|
166
|
+
col, col.name, seqHelper,
|
|
167
|
+
[NOTATION.FASTA, NOTATION.SEPARATOR], [ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT],
|
|
168
|
+
)[0];
|
|
169
|
+
const isHelm = checkInputColumn(col, col.name, seqHelper, [NOTATION.HELM], [])[0];
|
|
170
|
+
const isSepUnknown = checkInputColumn(
|
|
171
|
+
col, col.name, seqHelper, [NOTATION.SEPARATOR, NOTATION.CUSTOM, NOTATION.BILN], [ALPHABET.UN],
|
|
172
|
+
)[0];
|
|
173
|
+
|
|
174
|
+
if (isCanonical) {
|
|
175
|
+
switchMode('kalign');
|
|
176
|
+
kalignGapOpen.value = null;
|
|
177
|
+
kalignGapExtend.value = null;
|
|
178
|
+
kalignTerminalGap.value = null;
|
|
179
|
+
} else if (isHelm || isSepUnknown) {
|
|
180
|
+
if (msaEngines.length === 0) {
|
|
181
|
+
grok.shell.warning('No MSA engines found for non-canonical sequences.');
|
|
182
|
+
switchMode('kalign');
|
|
183
|
+
return;
|
|
184
|
+
}
|
|
185
|
+
switchMode('engine');
|
|
186
|
+
await updateEngineEditor();
|
|
187
|
+
|
|
188
|
+
// Show "Include HELM" checkbox if the column has a notation provider with fromHelm
|
|
189
|
+
const np = col.temp?.[SeqTemps.notationProvider];
|
|
190
|
+
const npCons = np ? np.constructor as typeof NotationProviderBase : null;
|
|
191
|
+
const hasFromHelm = npCons?.implementsFromHelm === true;
|
|
192
|
+
includeHelmInput.root.style.display = hasFromHelm ? '' : 'none';
|
|
193
|
+
} else
|
|
194
|
+
switchMode('kalign');
|
|
195
|
+
} catch (err: any) {
|
|
196
|
+
const errMsg = err instanceof Error ? err.message : err.toString();
|
|
197
|
+
grok.shell.error(errMsg);
|
|
198
|
+
_package.logger.error(errMsg);
|
|
199
|
+
}
|
|
200
|
+
}
|
|
122
201
|
|
|
123
|
-
|
|
202
|
+
// --- Alignment execution ---
|
|
203
|
+
|
|
204
|
+
async function doAlignment(): Promise<DG.Column<string>> {
|
|
205
|
+
const col = colInput.value;
|
|
206
|
+
if (!col || col.semType !== DG.SEMTYPE.MACROMOLECULE)
|
|
207
|
+
throw new Error('Chosen column must be of Macromolecule semantic type');
|
|
208
|
+
|
|
209
|
+
if (state.mode === 'kalign')
|
|
210
|
+
return doKalign(col, table);
|
|
211
|
+
|
|
212
|
+
return doEngineMsa(col, table);
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
async function doKalign(col: DG.Column<string>, df: DG.DataFrame): Promise<DG.Column<string>> {
|
|
216
|
+
const unusedName = df.columns.getUnusedName(`msa(${col.name})`);
|
|
217
|
+
const sh = seqHelper.getSeqHandler(col);
|
|
218
|
+
const fastaCol = sh.isFasta() ? col : sh.convert(NOTATION.FASTA);
|
|
219
|
+
return runKalign(
|
|
220
|
+
df, fastaCol, false, unusedName, clustersColInput.value,
|
|
221
|
+
kalignGapOpen.value ?? undefined, kalignGapExtend.value ?? undefined,
|
|
222
|
+
kalignTerminalGap.value ?? undefined, onlySelectedInput.value,
|
|
223
|
+
);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
async function doEngineMsa(col: DG.Column<string>, df: DG.DataFrame): Promise<DG.Column<string>> {
|
|
227
|
+
if (!state.currentFunc || !state.currentFuncCall)
|
|
228
|
+
throw new Error('No MSA engine selected');
|
|
229
|
+
|
|
230
|
+
// Convert to HELM if needed - prefer notation provider's toHelm if available
|
|
231
|
+
const sh = seqHelper.getSeqHandler(col);
|
|
232
|
+
let srcCol: DG.Column<string>;
|
|
233
|
+
if (sh.isHelm())
|
|
234
|
+
srcCol = col;
|
|
235
|
+
else if (sh.isSeparator() && sh.alphabet === ALPHABET.UN)
|
|
236
|
+
srcCol = sh.convert(NOTATION.HELM);
|
|
237
|
+
else
|
|
238
|
+
srcCol = sh.convert(NOTATION.HELM);
|
|
239
|
+
|
|
240
|
+
const func = state.currentFunc;
|
|
241
|
+
const firstParamName = func.inputs[0].name;
|
|
242
|
+
|
|
243
|
+
// Read config params from the editor (all params except the first column param)
|
|
244
|
+
const configParams: Record<string, any> = {};
|
|
245
|
+
for (let i = 1; i < func.inputs.length; i++) {
|
|
246
|
+
const name = func.inputs[i].name;
|
|
247
|
+
configParams[name] = state.currentFuncCall.inputs[name];
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
const helmResultCol = await runEngineWithClustering(
|
|
251
|
+
func, firstParamName, configParams, srcCol,
|
|
252
|
+
clustersColInput.value, onlySelectedInput.value, df,
|
|
253
|
+
);
|
|
254
|
+
|
|
255
|
+
// If column has a notation provider with fromHelm, convert result back to original notation
|
|
256
|
+
const np = col.temp?.[SeqTemps.notationProvider];
|
|
257
|
+
const npCons = np ? np.constructor as typeof NotationProviderBase : null;
|
|
258
|
+
if (npCons?.implementsFromHelm) {
|
|
259
|
+
const convertedName = df.columns.getUnusedName(`msa(${col.name})`);
|
|
260
|
+
const convertedCol = DG.Column.string(convertedName, helmResultCol.length);
|
|
261
|
+
convertedCol.init((i) => {
|
|
262
|
+
const helm = helmResultCol.get(i);
|
|
263
|
+
if (!helm) return '';
|
|
264
|
+
try {
|
|
265
|
+
return npCons.convertFromHelm(helm, {});
|
|
266
|
+
} catch {
|
|
267
|
+
return '';
|
|
268
|
+
}
|
|
269
|
+
});
|
|
270
|
+
convertedCol.semType = DG.SEMTYPE.MACROMOLECULE;
|
|
271
|
+
convertedCol.meta.units = NOTATION.CUSTOM;
|
|
272
|
+
convertedCol.setTag(bioTAGS.aligned, 'SEQ.MSA');
|
|
273
|
+
convertedCol.setTag(bioTAGS.alphabet, ALPHABET.UN);
|
|
274
|
+
|
|
275
|
+
// Add HELM column too if requested
|
|
276
|
+
if (includeHelmInput.value)
|
|
277
|
+
df.columns.add(helmResultCol);
|
|
278
|
+
|
|
279
|
+
return convertedCol;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
return helmResultCol;
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
/** Apply engine and params from options (for programmatic/test use). */
|
|
286
|
+
async function applyEngineOptions(): Promise<void> {
|
|
287
|
+
if (!options.engine || state.mode !== 'engine') return;
|
|
288
|
+
|
|
289
|
+
const engine = msaEngines.find(
|
|
290
|
+
(f) => f.name === options.engine || f.friendlyName === options.engine,
|
|
291
|
+
);
|
|
292
|
+
if (!engine) return;
|
|
293
|
+
|
|
294
|
+
engineInput.value = engine.friendlyName;
|
|
295
|
+
await updateEngineEditor();
|
|
296
|
+
if (options.engineParams && state.currentFuncCall) {
|
|
297
|
+
for (const [key, value] of Object.entries(options.engineParams))
|
|
298
|
+
state.currentFuncCall.inputs[key] = value;
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// --- Dialog ---
|
|
303
|
+
|
|
304
|
+
const dlg = ui.dialog('MSA')
|
|
305
|
+
.add(colInput)
|
|
306
|
+
.add(clustersColInput)
|
|
307
|
+
.add(engineInput)
|
|
308
|
+
.add(engineDescDiv)
|
|
309
|
+
.add(engineParamsButton)
|
|
310
|
+
.add(engineParamsDiv)
|
|
311
|
+
.add(includeHelmInput)
|
|
312
|
+
.add(kalignParamsDiv)
|
|
313
|
+
.add(kalignParamsButton)
|
|
314
|
+
.add(kalignVersionDiv)
|
|
315
|
+
.add(onlySelectedInput)
|
|
316
|
+
.onOK(async () => {
|
|
317
|
+
const pi = DG.TaskBarProgressIndicator.create('Performing MSA...');
|
|
318
|
+
try {
|
|
319
|
+
const resultCol = await doAlignment();
|
|
320
|
+
table.columns.add(resultCol);
|
|
321
|
+
await grok.data.detectSemanticTypes(table);
|
|
322
|
+
if (resultCol.meta.units !== NOTATION.HELM)
|
|
323
|
+
resultCol.setTag(bioTAGS.aligned, 'SEQ.MSA');
|
|
324
|
+
resolve(resultCol);
|
|
325
|
+
} catch (err: any) {
|
|
326
|
+
reject(err);
|
|
327
|
+
} finally {
|
|
328
|
+
pi.close();
|
|
329
|
+
}
|
|
330
|
+
});
|
|
331
|
+
const okBtn = dlg.getButton('OK');
|
|
332
|
+
|
|
333
|
+
// Initialize: detect mode from initial column
|
|
334
|
+
switchMode('kalign');
|
|
335
|
+
colInput.fireChanged();
|
|
336
|
+
|
|
337
|
+
// If column is pre-specified (tests/programmatic), run immediately without dialog
|
|
338
|
+
if (options.col) {
|
|
339
|
+
await onColumnChanged(options.col);
|
|
340
|
+
await applyEngineOptions();
|
|
341
|
+
|
|
342
|
+
const pi = DG.TaskBarProgressIndicator.create('Performing MSA...');
|
|
343
|
+
try {
|
|
344
|
+
const resultCol = await doAlignment();
|
|
345
|
+
table.columns.add(resultCol);
|
|
346
|
+
await grok.data.detectSemanticTypes(table);
|
|
347
|
+
if (resultCol.meta.units !== NOTATION.HELM)
|
|
348
|
+
resultCol.setTag(bioTAGS.aligned, 'SEQ.MSA');
|
|
349
|
+
resolve(resultCol);
|
|
350
|
+
} catch (err: any) {
|
|
351
|
+
reject(err);
|
|
352
|
+
} finally {
|
|
353
|
+
pi.close();
|
|
354
|
+
}
|
|
355
|
+
return;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
dlg.show();
|
|
359
|
+
} catch (err: any) {
|
|
360
|
+
reject(err);
|
|
361
|
+
}
|
|
124
362
|
});
|
|
125
363
|
}
|
|
126
364
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
365
|
+
|
|
366
|
+
/** Runs a discovered MSA engine function with per-cluster alignment support.
|
|
367
|
+
* Groups rows by cluster, creates subset columns, calls the engine per cluster,
|
|
368
|
+
* and merges results into a single output column. */
|
|
369
|
+
async function runEngineWithClustering(
|
|
370
|
+
func: DG.Func, colParamName: string, configParams: Record<string, any>,
|
|
371
|
+
srcCol: DG.Column<string>, clustersCol: DG.Column | null,
|
|
372
|
+
onlySelected: boolean, table: DG.DataFrame,
|
|
373
|
+
): Promise<DG.Column<string>> {
|
|
374
|
+
const rowCount = srcCol.length;
|
|
375
|
+
|
|
376
|
+
// Group rows by cluster
|
|
377
|
+
clustersCol ??= DG.Column.string('Clusters', rowCount).init('0');
|
|
378
|
+
if (clustersCol.type !== DG.COLUMN_TYPE.STRING)
|
|
379
|
+
clustersCol = clustersCol.convertTo(DG.TYPE.STRING);
|
|
380
|
+
|
|
381
|
+
const categories = clustersCol.categories;
|
|
382
|
+
const data = clustersCol.getRawData();
|
|
383
|
+
const clusterIndexes: number[][] = new Array(categories.length);
|
|
384
|
+
|
|
385
|
+
if (onlySelected) {
|
|
386
|
+
const sel = table.selection;
|
|
387
|
+
if (sel.trueCount === 0)
|
|
388
|
+
throw new Error('No selected rows in the table.');
|
|
389
|
+
for (let i = -1; (i = sel.findNext(i, true)) !== -1;)
|
|
390
|
+
(clusterIndexes[data[i]] ??= []).push(i);
|
|
391
|
+
} else {
|
|
392
|
+
for (let i = 0; i < rowCount; i++)
|
|
393
|
+
(clusterIndexes[data[i]] ??= []).push(i);
|
|
154
394
|
}
|
|
155
|
-
|
|
395
|
+
checkForSingleSeqClusters(clusterIndexes, categories);
|
|
396
|
+
|
|
397
|
+
const unusedName = table.columns.getUnusedName(`msa(${srcCol.name})`);
|
|
398
|
+
const resultValues: string[] = new Array(rowCount).fill('');
|
|
399
|
+
let lastResultCol: DG.Column<string> | null = null;
|
|
400
|
+
|
|
401
|
+
for (const rowIds of clusterIndexes) {
|
|
402
|
+
if (!rowIds || rowIds.length === 0) continue;
|
|
403
|
+
|
|
404
|
+
// Create a subset column with just this cluster's sequences
|
|
405
|
+
const subsetSeqs = rowIds.map((i) => srcCol.get(i)!);
|
|
406
|
+
const subsetCol = DG.Column.fromStrings('seq', subsetSeqs);
|
|
407
|
+
copyColumnMetadata(srcCol, subsetCol);
|
|
408
|
+
DG.DataFrame.fromColumns([subsetCol]); // attach to a DataFrame for column operations
|
|
156
409
|
|
|
410
|
+
// Call the engine function with the subset
|
|
411
|
+
const call = func.prepare({[colParamName]: subsetCol, ...configParams});
|
|
412
|
+
await call.call();
|
|
413
|
+
const clusterResult = call.getOutputParamValue() as DG.Column<string>;
|
|
414
|
+
lastResultCol = clusterResult;
|
|
157
415
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
const
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
[NOTATION.FASTA, NOTATION.SEPARATOR], [ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT])[0]
|
|
172
|
-
) { // Kalign - natural alphabets. if the notation is separator, convert to fasta and then run kalign
|
|
173
|
-
switchDialog(pepseaInputRootStyles, kalignInputRootStyles, 'kalign');
|
|
174
|
-
gapOpenInput.value = null;
|
|
175
|
-
gapExtendInput.value = null;
|
|
176
|
-
terminalGapInput.value = null;
|
|
177
|
-
const potentialColSh = seqHelper.getSeqHandler(col);
|
|
178
|
-
const performCol: DG.Column<string> = potentialColSh.isFasta() ? col :
|
|
179
|
-
potentialColSh.convert(NOTATION.FASTA);
|
|
180
|
-
return async () => await runKalign(table, performCol, false, unusedName, clustersColInput.value, undefined, undefined, undefined, selectedRowsOnlyInput.value);
|
|
181
|
-
} else if (checkInputColumn(col, col.name, seqHelper, [NOTATION.HELM], [])[0]) {
|
|
182
|
-
// PepSeA branch - Helm notation or separator notation with unknown alphabets
|
|
183
|
-
switchDialog(pepseaInputRootStyles, kalignInputRootStyles, 'pepsea');
|
|
184
|
-
gapOpenInput.value ??= msaDefaultOptions.pepsea.gapOpen;
|
|
185
|
-
gapExtendInput.value ??= msaDefaultOptions.pepsea.gapExtend;
|
|
186
|
-
|
|
187
|
-
return async () => {
|
|
188
|
-
return runPepsea(table, col, unusedName, methodInput.value!,
|
|
189
|
-
gapOpenInput.value!, gapExtendInput.value!, clustersColInput.value, undefined, selectedRowsOnlyInput.value);
|
|
190
|
-
};
|
|
191
|
-
} else if (checkInputColumn(col, col.name, seqHelper, [NOTATION.SEPARATOR], [ALPHABET.UN])[0]) {
|
|
192
|
-
//if the column is separator with unknown alphabet, it might be helm. check if it can be converted to helm
|
|
193
|
-
const potentialColSh = seqHelper.getSeqHandler(col);
|
|
194
|
-
const helmCol = potentialColSh.convert(NOTATION.HELM);
|
|
195
|
-
switchDialog(pepseaInputRootStyles, kalignInputRootStyles, 'pepsea');
|
|
196
|
-
gapOpenInput.value ??= msaDefaultOptions.pepsea.gapOpen;
|
|
197
|
-
gapExtendInput.value ??= msaDefaultOptions.pepsea.gapExtend;
|
|
198
|
-
// convert to helm and assign alignment function to PepSea
|
|
199
|
-
|
|
200
|
-
return async () => {
|
|
201
|
-
return runPepsea(table, helmCol, unusedName, methodInput.value!,
|
|
202
|
-
gapOpenInput.value!, gapExtendInput.value!, clustersColInput.value, undefined, selectedRowsOnlyInput.value);
|
|
203
|
-
};
|
|
204
|
-
} else {
|
|
205
|
-
gapOpenInput.value = null;
|
|
206
|
-
gapExtendInput.value = null;
|
|
207
|
-
terminalGapInput.value = null;
|
|
208
|
-
switchDialog(pepseaInputRootStyles, kalignInputRootStyles, 'kalign');
|
|
209
|
-
return;
|
|
416
|
+
// Map cluster results back to original row positions
|
|
417
|
+
for (let i = 0; i < rowIds.length; i++)
|
|
418
|
+
resultValues[rowIds[i]] = clusterResult.get(i) ?? '';
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
// Build final column with metadata from the engine's output
|
|
422
|
+
const finalCol = DG.Column.fromStrings(unusedName, resultValues);
|
|
423
|
+
if (lastResultCol) {
|
|
424
|
+
finalCol.meta.units = lastResultCol.meta.units;
|
|
425
|
+
finalCol.semType = lastResultCol.semType;
|
|
426
|
+
for (const tag of [bioTAGS.alphabet, bioTAGS.separator, bioTAGS.alphabetIsMultichar]) {
|
|
427
|
+
const val = lastResultCol.getTag(tag);
|
|
428
|
+
if (val) finalCol.setTag(tag, val);
|
|
210
429
|
}
|
|
211
|
-
} catch (err: any) {
|
|
212
|
-
const errMsg: string = err instanceof Error ? err.message : err.toString();
|
|
213
|
-
grok.shell.error(errMsg);
|
|
214
|
-
_package.logger.error(errMsg);
|
|
215
430
|
}
|
|
431
|
+
|
|
432
|
+
return finalCol;
|
|
216
433
|
}
|
|
217
434
|
|
|
218
|
-
type MSADialogType = 'kalign' | 'pepsea';
|
|
219
435
|
|
|
220
|
-
function
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
for (const inputRootStyle of kalignInputRootStyles)
|
|
227
|
-
inputRootStyle.removeProperty('display');
|
|
228
|
-
} else {
|
|
229
|
-
for (const inputRootStyle of kalignInputRootStyles)
|
|
230
|
-
inputRootStyle.display = 'none';
|
|
231
|
-
for (const inputRootStyle of pepseaInputRootStyles)
|
|
232
|
-
inputRootStyle.removeProperty('display');
|
|
436
|
+
function copyColumnMetadata(src: DG.Column, dst: DG.Column): void {
|
|
437
|
+
dst.semType = src.semType;
|
|
438
|
+
dst.meta.units = src.meta.units;
|
|
439
|
+
for (const tag of [bioTAGS.alphabet, bioTAGS.separator, bioTAGS.alphabetIsMultichar]) {
|
|
440
|
+
const val = src.getTag(tag);
|
|
441
|
+
if (val) dst.setTag(tag, val);
|
|
233
442
|
}
|
|
234
443
|
}
|