@datagrok/bio 2.26.7 → 2.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,234 +1,443 @@
1
- /* eslint-disable max-len */
2
- /* eslint-disable max-params */
3
1
  /* eslint-disable max-lines-per-function */
4
2
  import * as grok from 'datagrok-api/grok';
5
3
  import * as DG from 'datagrok-api/dg';
6
4
  import * as ui from 'datagrok-api/ui';
7
5
 
6
+ import {Subscription} from 'rxjs';
7
+
8
8
  import {ColumnInputOptions} from '@datagrok-libraries/utils/src/type-declarations';
9
- import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecule';
9
+ import {ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
10
+ import {NotationProviderBase} from '@datagrok-libraries/bio/src/utils/macromolecule/types';
11
+ import {SeqTemps} from '@datagrok-libraries/bio/src/utils/macromolecule/seq-handler';
10
12
  import {ISeqHelper} from '@datagrok-libraries/bio/src/utils/seq-helper';
11
13
 
12
- import {MsaWarning, runKalign} from './multiple-sequence-alignment';
13
- import {pepseaMethods, runPepsea} from './pepsea';
14
+ import {MsaWarning, runKalign, checkForSingleSeqClusters} from './multiple-sequence-alignment';
14
15
  import {checkInputColumn} from './check-input-column';
15
16
  import {MultipleSequenceAlignmentUIOptions} from './types';
16
- import {kalignVersion, msaDefaultOptions} from './constants';
17
+ import {kalignVersion, MSA_ENGINE_ROLE} from './constants';
18
+ import {_package} from '../package';
17
19
 
18
20
  import '../../css/msa.css';
19
- import {_package} from '../package';
21
+
22
+ type AlignmentMode = 'kalign' | 'engine';
23
+
24
+ /** State holder for the MSA dialog, avoids TypeScript narrowing issues with closures. */
25
+ class MsaDialogState {
26
+ mode: AlignmentMode = 'kalign';
27
+ currentFunc: DG.Func | null = null;
28
+ currentFuncCall: DG.FuncCall | null = null;
29
+ }
30
+
20
31
 
21
32
  export async function multipleSequenceAlignmentUI(
22
33
  options: MultipleSequenceAlignmentUIOptions, seqHelper: ISeqHelper,
23
34
  ): Promise<DG.Column> {
24
35
  return new Promise(async (resolve, reject) => {
25
- options.clustersCol ??= null;
26
- options.pepsea ??= {};
27
- options.pepsea.method ??= msaDefaultOptions.pepsea.method;
28
- options.pepsea.gapOpen ??= msaDefaultOptions.pepsea.gapOpen;
29
- options.pepsea.gapExtend ??= msaDefaultOptions.pepsea.gapExtend;
30
-
31
- const table = options.col?.dataFrame ?? grok.shell.t;
32
- if (!table) {
33
- const errMsg: string = `Multiple Sequence Alignment analysis requires a dataset with a macromolecule column.`;
34
- reject(new MsaWarning(ui.divText(errMsg)));
35
- return; // Prevents creating the MSA dialog
36
- }
36
+ try {
37
+ const table = options.col?.dataFrame ?? grok.shell.t;
38
+ if (!table) {
39
+ reject(new MsaWarning(ui.divText('MSA requires a dataset with a macromolecule column.')));
40
+ return;
41
+ }
37
42
 
38
- const seqCol = options.col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
39
- if (seqCol == null) {
40
- const errMsg: string = `Multiple Sequence Alignment analysis requires a dataset with a macromolecule column.`;
41
- reject(new MsaWarning(ui.divText(errMsg)));
42
- return; // Prevents creating the MSA dialog
43
- }
43
+ const seqCol = options.col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
44
+ if (!seqCol) {
45
+ reject(new MsaWarning(ui.divText('MSA requires a dataset with a macromolecule column.')));
46
+ return;
47
+ }
44
48
 
45
- // UI for PepSea alignment
46
- const methodInput = ui.input.choice('Method', {value: options.pepsea.method, items: pepseaMethods});
47
- methodInput.setTooltip('Alignment method');
48
-
49
- // UI for Kalign alignment
50
- const terminalGapInput = ui.input.float('Terminal gap', {value: options?.kalign?.terminalGap});
51
- terminalGapInput.setTooltip('Penalty for opening a gap at the beginning or end of the sequence');
52
- const kalignVersionDiv = ui.p(`Kalign version: ${kalignVersion}`, 'kalign-version');
53
-
54
- // shared UI
55
- const gapOpenInput = ui.input.float('Gap open', {value: options.pepsea.gapOpen});
56
- gapOpenInput.setTooltip('Gap opening penalty at group-to-group alignment');
57
- const gapExtendInput = ui.input.float('Gap extend', {value: options.pepsea.gapExtend});
58
- gapExtendInput.setTooltip('Gap extension penalty to skip the alignment');
59
-
60
- const onlySelectedRowsInput = ui.input.bool('Selected Rows Only', {value: false});
61
-
62
- const msaParamsDiv = ui.inputs([gapOpenInput, gapExtendInput, terminalGapInput]);
63
- const msaParamsButton = ui.button('Alignment parameters', () => {
64
- msaParamsDiv.hidden = !msaParamsDiv.hidden;
65
- [gapOpenInput, gapExtendInput, terminalGapInput].forEach((input) => {
66
- input.root.style.removeProperty('max-width');
67
- input.captionLabel.style.removeProperty('max-width');
68
- });
69
- }, 'Adjust alignment parameters such as penalties for opening and extending gaps');
70
- msaParamsButton.classList.add('msa-params-button');
71
- msaParamsDiv.hidden = true;
72
- msaParamsButton.prepend(ui.icons.settings(() => null));
73
- const pepseaInputRootStyles: CSSStyleDeclaration[] = [methodInput.root.style];
74
- const kalignInputRootStyles: CSSStyleDeclaration[] = [terminalGapInput.root.style, kalignVersionDiv.style];
75
-
76
- let performAlignment: (() => Promise<DG.Column<string> | null>) | undefined;
77
-
78
- let prevSeqCol = seqCol;
79
- const colInput = ui.input.column(
80
- 'Sequence', {
81
- table: table, value: seqCol, onValueChanged: async (value: DG.Column<any>) => {
49
+ const state = new MsaDialogState();
50
+
51
+ // --- Common UI ---
52
+
53
+ let prevSeqCol = seqCol;
54
+ const colInput = ui.input.column('Sequence', {
55
+ table, value: seqCol,
56
+ onValueChanged: async (value: DG.Column<any>) => {
82
57
  if (!value || value.semType !== DG.SEMTYPE.MACROMOLECULE) {
83
58
  okBtn.disabled = true;
84
- await DG.delay(0); // to
59
+ await DG.delay(0);
85
60
  colInput.value = prevSeqCol as DG.Column<string>;
86
61
  return;
87
62
  }
88
63
  prevSeqCol = value;
89
64
  okBtn.disabled = false;
90
- performAlignment = await onColInputChange(
91
- colInput.value, table, seqHelper, pepseaInputRootStyles, kalignInputRootStyles,
92
- methodInput, clustersColInput, gapOpenInput, gapExtendInput, terminalGapInput, onlySelectedRowsInput
93
- );
94
- }, filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE
95
- } as ColumnInputOptions
96
- ) as DG.InputBase<DG.Column<string>>;
97
- colInput.setTooltip('Sequences column to use for alignment');
98
- const clustersColInput = ui.input.column('Clusters', {table: table, value: options.clustersCol!});
99
- clustersColInput.nullable = true;
100
-
101
- const dlg = ui.dialog('MSA')
102
- .add(colInput)
103
- .add(clustersColInput)
104
- .add(methodInput)
105
- .add(msaParamsDiv)
106
- .add(msaParamsButton)
107
- .add(onlySelectedRowsInput)
108
- .add(kalignVersionDiv)
109
- .onOK(async () => { await onDialogOk(colInput, table, performAlignment, resolve, reject); });
110
- const okBtn = dlg.getButton('OK');
111
-
112
- colInput.fireChanged(); // changes okBtn
113
- //if column is specified (from tests), run alignment and resolve with the result
114
- if (options.col) {
115
- performAlignment = await onColInputChange(
116
- options.col, table, seqHelper, pepseaInputRootStyles, kalignInputRootStyles,
117
- methodInput, clustersColInput, gapOpenInput, gapExtendInput, terminalGapInput, onlySelectedRowsInput
118
- );
119
- await onDialogOk(colInput, table, performAlignment, resolve, reject);
120
- return; // Prevents show the dialog
121
- }
65
+ await onColumnChanged(value);
66
+ },
67
+ filter: (col: DG.Column) => col.semType === DG.SEMTYPE.MACROMOLECULE,
68
+ } as ColumnInputOptions) as DG.InputBase<DG.Column<string>>;
69
+ colInput.setTooltip('Sequences column to use for alignment');
70
+
71
+ const clustersColInput = ui.input.column('Clusters', {table, value: options.clustersCol!});
72
+ clustersColInput.nullable = true;
73
+
74
+ const onlySelectedInput = ui.input.bool('Selected Rows Only', {value: false});
75
+
76
+ // --- Kalign UI (canonical sequences) ---
77
+
78
+ const kalignGapOpen = ui.input.float('Gap open', {value: options?.kalign?.gapOpen});
79
+ kalignGapOpen.setTooltip('Gap opening penalty at group-to-group alignment');
80
+ const kalignGapExtend = ui.input.float('Gap extend', {value: options?.kalign?.gapExtend});
81
+ kalignGapExtend.setTooltip('Gap extension penalty to skip the alignment');
82
+ const kalignTerminalGap = ui.input.float('Terminal gap', {value: options?.kalign?.terminalGap});
83
+ kalignTerminalGap.setTooltip('Penalty for opening a gap at the beginning or end of the sequence');
84
+ const kalignVersionDiv = ui.p(`Kalign version: ${kalignVersion}`, 'kalign-version');
85
+
86
+ const kalignParamsDiv = ui.inputs([kalignGapOpen, kalignGapExtend, kalignTerminalGap]);
87
+ kalignParamsDiv.hidden = true;
88
+ const kalignParamsButton = ui.button('Alignment parameters', () => {
89
+ kalignParamsDiv.hidden = !kalignParamsDiv.hidden;
90
+ [kalignGapOpen, kalignGapExtend, kalignTerminalGap].forEach((input) => {
91
+ input.root.style.removeProperty('max-width');
92
+ input.captionLabel.style.removeProperty('max-width');
93
+ });
94
+ }, 'Adjust alignment parameters such as penalties for opening and extending gaps');
95
+ kalignParamsButton.classList.add('msa-params-button');
96
+ kalignParamsButton.prepend(ui.icons.settings(() => null));
97
+
98
+ const kalignElements = [kalignParamsDiv, kalignParamsButton, kalignVersionDiv];
99
+
100
+ // --- Engine UI (non-canonical sequences, dynamically discovered) ---
101
+
102
+ const msaEngines = DG.Func.find({meta: {role: MSA_ENGINE_ROLE}});
103
+ // Sort so the default engine (meta.defaultAlignment) comes first
104
+ msaEngines.sort((a, b) => {
105
+ const aDefault = a.options['defaultAlignment'] === 'true' ? 1 : 0;
106
+ const bDefault = b.options['defaultAlignment'] === 'true' ? 1 : 0;
107
+ return bDefault - aDefault;
108
+ });
109
+
110
+ const engineInput = ui.input.choice('Engine', {
111
+ value: msaEngines.length > 0 ? msaEngines[0].friendlyName : '',
112
+ items: msaEngines.map((f) => f.friendlyName),
113
+ });
114
+
115
+ const engineDescDiv = ui.div('', {style: {fontSize: '12px', opacity: '0.7', marginBottom: '6px'}});
116
+ const engineParamsDiv = ui.div();
117
+ const engineParamsButton = ui.button('Alignment parameters', () => {
118
+ engineParamsDiv.hidden = !engineParamsDiv.hidden;
119
+ }, 'Adjust engine-specific alignment parameters');
120
+ engineParamsButton.classList.add('msa-params-button');
121
+ engineParamsButton.prepend(ui.icons.settings(() => null));
122
+
123
+ // "Include HELM" checkbox: shown when column has a notation provider with fromHelm
124
+ const includeHelmInput = ui.input.bool('Include HELM', {value: true});
125
+ includeHelmInput.setTooltip('Also add the aligned HELM column alongside the converted notation');
126
+ includeHelmInput.root.style.display = 'none';
127
+
128
+ const engineElements = [
129
+ engineInput.root, engineDescDiv, engineParamsButton, engineParamsDiv, includeHelmInput.root,
130
+ ];
131
+
132
+ // --- State management ---
133
+
134
+ async function updateEngineEditor(): Promise<void> {
135
+ engineParamsDiv.innerHTML = '';
136
+ state.currentFuncCall = null;
137
+
138
+ const selectedName = engineInput.value;
139
+ state.currentFunc = msaEngines.find((f) => f.friendlyName === selectedName) ?? null;
140
+ engineDescDiv.textContent = state.currentFunc?.description ?? '';
141
+ if (!state.currentFunc) return;
142
+
143
+ state.currentFuncCall = state.currentFunc.prepare({});
144
+ const inputs = await state.currentFuncCall.buildEditor(engineParamsDiv);
145
+
146
+ // Hide the first input (sequence column) - managed by the dialog's column selector
147
+ if (inputs.length > 0 && inputs[0].inputType === 'column')
148
+ inputs[0].root.style.display = 'none';
149
+ }
150
+
151
+ const _engineSub: Subscription = engineInput.onChanged.subscribe(() => updateEngineEditor());
152
+
153
+ function switchMode(newMode: AlignmentMode): void {
154
+ state.mode = newMode;
155
+ for (const el of kalignElements)
156
+ el.style.display = newMode === 'kalign' ? '' : 'none';
157
+ for (const el of engineElements)
158
+ el.style.display = newMode === 'engine' ? '' : 'none';
159
+ }
160
+
161
+ async function onColumnChanged(col: DG.Column<string>): Promise<void> {
162
+ try {
163
+ if (col.semType !== DG.SEMTYPE.MACROMOLECULE) return;
164
+
165
+ const isCanonical = checkInputColumn(
166
+ col, col.name, seqHelper,
167
+ [NOTATION.FASTA, NOTATION.SEPARATOR], [ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT],
168
+ )[0];
169
+ const isHelm = checkInputColumn(col, col.name, seqHelper, [NOTATION.HELM], [])[0];
170
+ const isSepUnknown = checkInputColumn(
171
+ col, col.name, seqHelper, [NOTATION.SEPARATOR, NOTATION.CUSTOM, NOTATION.BILN], [ALPHABET.UN],
172
+ )[0];
173
+
174
+ if (isCanonical) {
175
+ switchMode('kalign');
176
+ kalignGapOpen.value = null;
177
+ kalignGapExtend.value = null;
178
+ kalignTerminalGap.value = null;
179
+ } else if (isHelm || isSepUnknown) {
180
+ if (msaEngines.length === 0) {
181
+ grok.shell.warning('No MSA engines found for non-canonical sequences.');
182
+ switchMode('kalign');
183
+ return;
184
+ }
185
+ switchMode('engine');
186
+ await updateEngineEditor();
187
+
188
+ // Show "Include HELM" checkbox if the column has a notation provider with fromHelm
189
+ const np = col.temp?.[SeqTemps.notationProvider];
190
+ const npCons = np ? np.constructor as typeof NotationProviderBase : null;
191
+ const hasFromHelm = npCons?.implementsFromHelm === true;
192
+ includeHelmInput.root.style.display = hasFromHelm ? '' : 'none';
193
+ } else
194
+ switchMode('kalign');
195
+ } catch (err: any) {
196
+ const errMsg = err instanceof Error ? err.message : err.toString();
197
+ grok.shell.error(errMsg);
198
+ _package.logger.error(errMsg);
199
+ }
200
+ }
122
201
 
123
- dlg.show();
202
+ // --- Alignment execution ---
203
+
204
+ async function doAlignment(): Promise<DG.Column<string>> {
205
+ const col = colInput.value;
206
+ if (!col || col.semType !== DG.SEMTYPE.MACROMOLECULE)
207
+ throw new Error('Chosen column must be of Macromolecule semantic type');
208
+
209
+ if (state.mode === 'kalign')
210
+ return doKalign(col, table);
211
+
212
+ return doEngineMsa(col, table);
213
+ }
214
+
215
+ async function doKalign(col: DG.Column<string>, df: DG.DataFrame): Promise<DG.Column<string>> {
216
+ const unusedName = df.columns.getUnusedName(`msa(${col.name})`);
217
+ const sh = seqHelper.getSeqHandler(col);
218
+ const fastaCol = sh.isFasta() ? col : sh.convert(NOTATION.FASTA);
219
+ return runKalign(
220
+ df, fastaCol, false, unusedName, clustersColInput.value,
221
+ kalignGapOpen.value ?? undefined, kalignGapExtend.value ?? undefined,
222
+ kalignTerminalGap.value ?? undefined, onlySelectedInput.value,
223
+ );
224
+ }
225
+
226
+ async function doEngineMsa(col: DG.Column<string>, df: DG.DataFrame): Promise<DG.Column<string>> {
227
+ if (!state.currentFunc || !state.currentFuncCall)
228
+ throw new Error('No MSA engine selected');
229
+
230
+ // Convert to HELM if needed - prefer notation provider's toHelm if available
231
+ const sh = seqHelper.getSeqHandler(col);
232
+ let srcCol: DG.Column<string>;
233
+ if (sh.isHelm())
234
+ srcCol = col;
235
+ else if (sh.isSeparator() && sh.alphabet === ALPHABET.UN)
236
+ srcCol = sh.convert(NOTATION.HELM);
237
+ else
238
+ srcCol = sh.convert(NOTATION.HELM);
239
+
240
+ const func = state.currentFunc;
241
+ const firstParamName = func.inputs[0].name;
242
+
243
+ // Read config params from the editor (all params except the first column param)
244
+ const configParams: Record<string, any> = {};
245
+ for (let i = 1; i < func.inputs.length; i++) {
246
+ const name = func.inputs[i].name;
247
+ configParams[name] = state.currentFuncCall.inputs[name];
248
+ }
249
+
250
+ const helmResultCol = await runEngineWithClustering(
251
+ func, firstParamName, configParams, srcCol,
252
+ clustersColInput.value, onlySelectedInput.value, df,
253
+ );
254
+
255
+ // If column has a notation provider with fromHelm, convert result back to original notation
256
+ const np = col.temp?.[SeqTemps.notationProvider];
257
+ const npCons = np ? np.constructor as typeof NotationProviderBase : null;
258
+ if (npCons?.implementsFromHelm) {
259
+ const convertedName = df.columns.getUnusedName(`msa(${col.name})`);
260
+ const convertedCol = DG.Column.string(convertedName, helmResultCol.length);
261
+ convertedCol.init((i) => {
262
+ const helm = helmResultCol.get(i);
263
+ if (!helm) return '';
264
+ try {
265
+ return npCons.convertFromHelm(helm, {});
266
+ } catch {
267
+ return '';
268
+ }
269
+ });
270
+ convertedCol.semType = DG.SEMTYPE.MACROMOLECULE;
271
+ convertedCol.meta.units = NOTATION.CUSTOM;
272
+ convertedCol.setTag(bioTAGS.aligned, 'SEQ.MSA');
273
+ convertedCol.setTag(bioTAGS.alphabet, ALPHABET.UN);
274
+
275
+ // Add HELM column too if requested
276
+ if (includeHelmInput.value)
277
+ df.columns.add(helmResultCol);
278
+
279
+ return convertedCol;
280
+ }
281
+
282
+ return helmResultCol;
283
+ }
284
+
285
+ /** Apply engine and params from options (for programmatic/test use). */
286
+ async function applyEngineOptions(): Promise<void> {
287
+ if (!options.engine || state.mode !== 'engine') return;
288
+
289
+ const engine = msaEngines.find(
290
+ (f) => f.name === options.engine || f.friendlyName === options.engine,
291
+ );
292
+ if (!engine) return;
293
+
294
+ engineInput.value = engine.friendlyName;
295
+ await updateEngineEditor();
296
+ if (options.engineParams && state.currentFuncCall) {
297
+ for (const [key, value] of Object.entries(options.engineParams))
298
+ state.currentFuncCall.inputs[key] = value;
299
+ }
300
+ }
301
+
302
+ // --- Dialog ---
303
+
304
+ const dlg = ui.dialog('MSA')
305
+ .add(colInput)
306
+ .add(clustersColInput)
307
+ .add(engineInput)
308
+ .add(engineDescDiv)
309
+ .add(engineParamsButton)
310
+ .add(engineParamsDiv)
311
+ .add(includeHelmInput)
312
+ .add(kalignParamsDiv)
313
+ .add(kalignParamsButton)
314
+ .add(kalignVersionDiv)
315
+ .add(onlySelectedInput)
316
+ .onOK(async () => {
317
+ const pi = DG.TaskBarProgressIndicator.create('Performing MSA...');
318
+ try {
319
+ const resultCol = await doAlignment();
320
+ table.columns.add(resultCol);
321
+ await grok.data.detectSemanticTypes(table);
322
+ if (resultCol.meta.units !== NOTATION.HELM)
323
+ resultCol.setTag(bioTAGS.aligned, 'SEQ.MSA');
324
+ resolve(resultCol);
325
+ } catch (err: any) {
326
+ reject(err);
327
+ } finally {
328
+ pi.close();
329
+ }
330
+ });
331
+ const okBtn = dlg.getButton('OK');
332
+
333
+ // Initialize: detect mode from initial column
334
+ switchMode('kalign');
335
+ colInput.fireChanged();
336
+
337
+ // If column is pre-specified (tests/programmatic), run immediately without dialog
338
+ if (options.col) {
339
+ await onColumnChanged(options.col);
340
+ await applyEngineOptions();
341
+
342
+ const pi = DG.TaskBarProgressIndicator.create('Performing MSA...');
343
+ try {
344
+ const resultCol = await doAlignment();
345
+ table.columns.add(resultCol);
346
+ await grok.data.detectSemanticTypes(table);
347
+ if (resultCol.meta.units !== NOTATION.HELM)
348
+ resultCol.setTag(bioTAGS.aligned, 'SEQ.MSA');
349
+ resolve(resultCol);
350
+ } catch (err: any) {
351
+ reject(err);
352
+ } finally {
353
+ pi.close();
354
+ }
355
+ return;
356
+ }
357
+
358
+ dlg.show();
359
+ } catch (err: any) {
360
+ reject(err);
361
+ }
124
362
  });
125
363
  }
126
364
 
127
- async function onDialogOk(
128
- colInput: DG.InputBase<DG.Column<any>>,
129
- table: DG.DataFrame,
130
- performAlignment: (() => Promise<DG.Column<string> | null>) | undefined,
131
- resolve: (value: DG.Column<any>) => void,
132
- reject: (reason: any) => void,
133
- ): Promise<void> {
134
- let msaCol: DG.Column<string> | null = null;
135
- const pi = DG.TaskBarProgressIndicator.create('Analyze for MSA ...');
136
- try {
137
- colInput.fireChanged();
138
- if (colInput.value.semType !== DG.SEMTYPE.MACROMOLECULE)
139
- throw new Error('Chosen column has to be of Macromolecule semantic type');
140
- if (performAlignment == undefined) // value can only be undefined when column can't be processed with either method
141
- throw new Error('Invalid column format');
142
- msaCol = await performAlignment(); // progress
143
- if (msaCol == null)
144
- return reject('PepSeA container has not started');
145
-
146
- table.columns.add(msaCol);
147
- await grok.data.detectSemanticTypes(table);
148
-
149
- resolve(msaCol);
150
- } catch (err: any) {
151
- reject(err);
152
- } finally {
153
- pi.close();
365
+
366
+ /** Runs a discovered MSA engine function with per-cluster alignment support.
367
+ * Groups rows by cluster, creates subset columns, calls the engine per cluster,
368
+ * and merges results into a single output column. */
369
+ async function runEngineWithClustering(
370
+ func: DG.Func, colParamName: string, configParams: Record<string, any>,
371
+ srcCol: DG.Column<string>, clustersCol: DG.Column | null,
372
+ onlySelected: boolean, table: DG.DataFrame,
373
+ ): Promise<DG.Column<string>> {
374
+ const rowCount = srcCol.length;
375
+
376
+ // Group rows by cluster
377
+ clustersCol ??= DG.Column.string('Clusters', rowCount).init('0');
378
+ if (clustersCol.type !== DG.COLUMN_TYPE.STRING)
379
+ clustersCol = clustersCol.convertTo(DG.TYPE.STRING);
380
+
381
+ const categories = clustersCol.categories;
382
+ const data = clustersCol.getRawData();
383
+ const clusterIndexes: number[][] = new Array(categories.length);
384
+
385
+ if (onlySelected) {
386
+ const sel = table.selection;
387
+ if (sel.trueCount === 0)
388
+ throw new Error('No selected rows in the table.');
389
+ for (let i = -1; (i = sel.findNext(i, true)) !== -1;)
390
+ (clusterIndexes[data[i]] ??= []).push(i);
391
+ } else {
392
+ for (let i = 0; i < rowCount; i++)
393
+ (clusterIndexes[data[i]] ??= []).push(i);
154
394
  }
155
- }
395
+ checkForSingleSeqClusters(clusterIndexes, categories);
396
+
397
+ const unusedName = table.columns.getUnusedName(`msa(${srcCol.name})`);
398
+ const resultValues: string[] = new Array(rowCount).fill('');
399
+ let lastResultCol: DG.Column<string> | null = null;
400
+
401
+ for (const rowIds of clusterIndexes) {
402
+ if (!rowIds || rowIds.length === 0) continue;
403
+
404
+ // Create a subset column with just this cluster's sequences
405
+ const subsetSeqs = rowIds.map((i) => srcCol.get(i)!);
406
+ const subsetCol = DG.Column.fromStrings('seq', subsetSeqs);
407
+ copyColumnMetadata(srcCol, subsetCol);
408
+ DG.DataFrame.fromColumns([subsetCol]); // attach to a DataFrame for column operations
156
409
 
410
+ // Call the engine function with the subset
411
+ const call = func.prepare({[colParamName]: subsetCol, ...configParams});
412
+ await call.call();
413
+ const clusterResult = call.getOutputParamValue() as DG.Column<string>;
414
+ lastResultCol = clusterResult;
157
415
 
158
- async function onColInputChange(
159
- col: DG.Column<string>, table: DG.DataFrame, seqHelper: ISeqHelper,
160
- pepseaInputRootStyles: CSSStyleDeclaration[], kalignInputRootStyles: CSSStyleDeclaration[],
161
- methodInput: DG.InputBase<string | null>, clustersColInput: DG.InputBase<DG.Column<any> | null>,
162
- gapOpenInput: DG.InputBase<number | null>, gapExtendInput: DG.InputBase<number | null>,
163
- terminalGapInput: DG.InputBase<number | null>, selectedRowsOnlyInput: DG.InputBase<boolean>,
164
- ): Promise<(() => Promise<DG.Column<string> | null>) | undefined> {
165
- try {
166
- if (col.semType !== DG.SEMTYPE.MACROMOLECULE)
167
- return;
168
- const unusedName = table.columns.getUnusedName(`msa(${col.name})`);
169
-
170
- if (checkInputColumn(col, col.name, seqHelper,
171
- [NOTATION.FASTA, NOTATION.SEPARATOR], [ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT])[0]
172
- ) { // Kalign - natural alphabets. if the notation is separator, convert to fasta and then run kalign
173
- switchDialog(pepseaInputRootStyles, kalignInputRootStyles, 'kalign');
174
- gapOpenInput.value = null;
175
- gapExtendInput.value = null;
176
- terminalGapInput.value = null;
177
- const potentialColSh = seqHelper.getSeqHandler(col);
178
- const performCol: DG.Column<string> = potentialColSh.isFasta() ? col :
179
- potentialColSh.convert(NOTATION.FASTA);
180
- return async () => await runKalign(table, performCol, false, unusedName, clustersColInput.value, undefined, undefined, undefined, selectedRowsOnlyInput.value);
181
- } else if (checkInputColumn(col, col.name, seqHelper, [NOTATION.HELM], [])[0]) {
182
- // PepSeA branch - Helm notation or separator notation with unknown alphabets
183
- switchDialog(pepseaInputRootStyles, kalignInputRootStyles, 'pepsea');
184
- gapOpenInput.value ??= msaDefaultOptions.pepsea.gapOpen;
185
- gapExtendInput.value ??= msaDefaultOptions.pepsea.gapExtend;
186
-
187
- return async () => {
188
- return runPepsea(table, col, unusedName, methodInput.value!,
189
- gapOpenInput.value!, gapExtendInput.value!, clustersColInput.value, undefined, selectedRowsOnlyInput.value);
190
- };
191
- } else if (checkInputColumn(col, col.name, seqHelper, [NOTATION.SEPARATOR], [ALPHABET.UN])[0]) {
192
- //if the column is separator with unknown alphabet, it might be helm. check if it can be converted to helm
193
- const potentialColSh = seqHelper.getSeqHandler(col);
194
- const helmCol = potentialColSh.convert(NOTATION.HELM);
195
- switchDialog(pepseaInputRootStyles, kalignInputRootStyles, 'pepsea');
196
- gapOpenInput.value ??= msaDefaultOptions.pepsea.gapOpen;
197
- gapExtendInput.value ??= msaDefaultOptions.pepsea.gapExtend;
198
- // convert to helm and assign alignment function to PepSea
199
-
200
- return async () => {
201
- return runPepsea(table, helmCol, unusedName, methodInput.value!,
202
- gapOpenInput.value!, gapExtendInput.value!, clustersColInput.value, undefined, selectedRowsOnlyInput.value);
203
- };
204
- } else {
205
- gapOpenInput.value = null;
206
- gapExtendInput.value = null;
207
- terminalGapInput.value = null;
208
- switchDialog(pepseaInputRootStyles, kalignInputRootStyles, 'kalign');
209
- return;
416
+ // Map cluster results back to original row positions
417
+ for (let i = 0; i < rowIds.length; i++)
418
+ resultValues[rowIds[i]] = clusterResult.get(i) ?? '';
419
+ }
420
+
421
+ // Build final column with metadata from the engine's output
422
+ const finalCol = DG.Column.fromStrings(unusedName, resultValues);
423
+ if (lastResultCol) {
424
+ finalCol.meta.units = lastResultCol.meta.units;
425
+ finalCol.semType = lastResultCol.semType;
426
+ for (const tag of [bioTAGS.alphabet, bioTAGS.separator, bioTAGS.alphabetIsMultichar]) {
427
+ const val = lastResultCol.getTag(tag);
428
+ if (val) finalCol.setTag(tag, val);
210
429
  }
211
- } catch (err: any) {
212
- const errMsg: string = err instanceof Error ? err.message : err.toString();
213
- grok.shell.error(errMsg);
214
- _package.logger.error(errMsg);
215
430
  }
431
+
432
+ return finalCol;
216
433
  }
217
434
 
218
- type MSADialogType = 'kalign' | 'pepsea';
219
435
 
220
- function switchDialog(
221
- pepseaInputRootStyles: CSSStyleDeclaration[], kalignInputRootStyles: CSSStyleDeclaration[], dialogType: MSADialogType,
222
- ) {
223
- if (dialogType === 'kalign') {
224
- for (const inputRootStyle of pepseaInputRootStyles)
225
- inputRootStyle.display = 'none';
226
- for (const inputRootStyle of kalignInputRootStyles)
227
- inputRootStyle.removeProperty('display');
228
- } else {
229
- for (const inputRootStyle of kalignInputRootStyles)
230
- inputRootStyle.display = 'none';
231
- for (const inputRootStyle of pepseaInputRootStyles)
232
- inputRootStyle.removeProperty('display');
436
+ function copyColumnMetadata(src: DG.Column, dst: DG.Column): void {
437
+ dst.semType = src.semType;
438
+ dst.meta.units = src.meta.units;
439
+ for (const tag of [bioTAGS.alphabet, bioTAGS.separator, bioTAGS.alphabetIsMultichar]) {
440
+ const val = src.getTag(tag);
441
+ if (val) dst.setTag(tag, val);
233
442
  }
234
443
  }