@datagrok/bio 2.4.13 → 2.4.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -5,7 +5,7 @@
5
5
  "name": "Leonid Stolbov",
6
6
  "email": "lstolbov@datagrok.ai"
7
7
  },
8
- "version": "2.4.13",
8
+ "version": "2.4.15",
9
9
  "description": "Bioinformatics support (import/export of sequences, conversion, visualization, analysis). [See more](https://github.com/datagrok-ai/public/blob/master/packages/Bio/README.md) for details.",
10
10
  "repository": {
11
11
  "type": "git",
package/src/package.ts CHANGED
@@ -230,6 +230,8 @@ export function sequenceAlignment(alignType: string, alignTable: string, gap: nu
230
230
  return res;
231
231
  }
232
232
 
233
+ // -- Viewers --
234
+
233
235
  //name: WebLogo
234
236
  //description: WebLogo
235
237
  //tags: viewer, panel
@@ -409,8 +411,8 @@ export async function toAtomicLevel(df: DG.DataFrame, macroMolecule: DG.Column):
409
411
  //top-menu: Bio | Alignment | MSA...
410
412
  //name: MSA...
411
413
  //tags: bio, panel
412
- export function multipleSequenceAlignmentAny(col: DG.Column<string> | null = null): void {
413
- multipleSequenceAlignmentUI(col);
414
+ export function multipleSequenceAlignmentAny(): void {
415
+ multipleSequenceAlignmentUI();
414
416
  }
415
417
 
416
418
  //top-menu: Bio | Structure | Composition Analysis
@@ -3,8 +3,9 @@ import * as ui from 'datagrok-api/ui';
3
3
  import * as DG from 'datagrok-api/dg';
4
4
 
5
5
  import {category, expect, expectArray, test} from '@datagrok-libraries/utils/src/test';
6
-
6
+ import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
7
7
  import {runKalign} from '../utils/multiple-sequence-alignment';
8
+ import {multipleSequenceAlignmentUI} from '../utils/multiple-sequence-alignment-ui';
8
9
  //import * as grok from 'datagrok-api/grok';
9
10
 
10
11
  export const _package = new DG.Package();
@@ -31,9 +32,46 @@ FWR-WYVKHPFWR-WYVKHPFWR-WYVKHPFWR-WYVKHPFWR-WYVKHPFWR-WYVKHPFWR-WYVKHPFWR-WYVKHP
31
32
  YNR-WYVKHPYNR-WYVKHPYNR-WYVKHPYNR-WYVKHPYNR-WYVKHPYNR-WYVKHPYNR-WYVKHPYNR-WYVKHP
32
33
  MWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHP`;
33
34
 
34
- // test('test_table.is_not_empty', async () => {
35
- // await _testTableIsNotEmpty(table);
36
- // });
35
+
36
+ const helmFromCsv = `seq
37
+ PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2}$$$$
38
+ PEPTIDE1{meI.Aca.N.T.dE.Thr_PO3H2}$$$$
39
+ PEPTIDE1{hHis.Aca.N.T.dE.Thr_PO3H2}$$$$`;
40
+
41
+ const helmToCsv = `seq
42
+ meI.hHis.Aca.N.T.dE.Thr_PO3H2
43
+ .meI.Aca.N.T.dE.Thr_PO3H2
44
+ .hHis.Aca.N.T.dE.Thr_PO3H2`;
45
+
46
+ const longHelmFromCsv = `seq
47
+ PEPTIDE1{meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.dV.E.N.D-Orn.D-aThr.Phe_4Me.Thr_PO3H2}$$$$
48
+ PEPTIDE1{meI.hHis.Aca.Cys_SEt.T.dK.Tyr_PO3H2.D-Chg.dV.Phe_ab-dehydro.N.D-Orn.D-aThr.Phe_4Me}$$$$
49
+ PEPTIDE1{Lys_Boc.hHis.Aca.Cys_SEt.T.dK.Tyr_PO3H2.D-Chg.dV.Thr_PO3H2.N.D-Orn.D-aThr.Phe_4Me}$$$$`;
50
+
51
+ const longHelmToCsv = `seq
52
+ meI.hHis.Aca.N.T.dE.Thr_PO3H2.Aca.D-Tyr_Et.dV.E.N.D-Orn.D-aThr.Phe_4Me.Thr_PO3H2
53
+ meI.hHis.Aca.Cys_SEt.T.dK..Tyr_PO3H2.D-Chg.dV.Phe_ab-dehydro.N.D-Orn.D-aThr.Phe_4Me.
54
+ Lys_Boc.hHis.Aca.Cys_SEt.T.dK..Tyr_PO3H2.D-Chg.dV.Thr_PO3H2.N.D-Orn.D-aThr.Phe_4Me.`;
55
+
56
+ const SeparatorFromCsv = `seq
57
+ F-W-P-H-E-Y
58
+ Y-N-R-Q-W-Y-V
59
+ M-K-P-S-E-Y-V`;
60
+
61
+ const SeparatorToCsv = `seq
62
+ FWPHEY-
63
+ YNRQWYV
64
+ MKPSEYV`;
65
+
66
+ const SeparatorLongFromCsv = `seq
67
+ M-I-E-V-F-L-F-G-I-V-L-G-L-I-P-I-T-L-A-G-L-F-V-T-A-Y-L-Q-Y-R-R-G-D-Q-L-D-L
68
+ M-M-E-L-V-L-K-T-I-I-G-P-I-V-V-G-V-V-L-R-I-V-D-K-W-L-N-K-D-K
69
+ M-D-R-T-D-E-V-S-N-H-T-H-D-K-P-T-L-T-W-F-E-E-I-F-E-E-Y-H-S-P-F-H-N`;
70
+
71
+ const SeparatorLongToCsv = `seq
72
+ MIEV-FLFGIVLGLIPITLAGLFVTAYLQYRRGDQLDL
73
+ MMEL-VLKTII-GPIVVGVVLRIVDKWLNKDK------
74
+ MDRTDEVSNHTHDKPTLTWFEEIFEEYHSPFHN-----`;
37
75
 
38
76
  test('isCorrect', async () => {
39
77
  await _testMsaIsCorrect(fromCsv, toCsv);
@@ -42,6 +80,26 @@ MWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHPMWRSWYCKHP
42
80
  test('isCorrectLong', async () => {
43
81
  await _testMsaIsCorrect(longFromCsv, longToCsv);
44
82
  });
83
+
84
+ test('isCorrectHelm', async () => {
85
+ await _testMSAOnColumn(helmFromCsv, helmToCsv, NOTATION.HELM, NOTATION.SEPARATOR, undefined, 'mafft');
86
+ }, {skipReason: 'GROK-13053'});
87
+
88
+ test('isCorrectHelmLong', async () => {
89
+ await _testMSAOnColumn(longHelmFromCsv, longHelmToCsv, NOTATION.HELM, NOTATION.SEPARATOR, undefined, 'mafft');
90
+ }, {skipReason: 'GROK-13053'});
91
+
92
+ test('isCorrectSeparator', async () => {
93
+ await _testMSAOnColumn(
94
+ SeparatorFromCsv, SeparatorToCsv, NOTATION.SEPARATOR, NOTATION.FASTA, ALPHABET.PT
95
+ );
96
+ });
97
+
98
+ test('isCorrectSeparatorLong', async () => {
99
+ await _testMSAOnColumn(
100
+ SeparatorLongFromCsv, SeparatorLongToCsv, NOTATION.SEPARATOR, NOTATION.FASTA, ALPHABET.PT
101
+ );
102
+ });
45
103
  });
46
104
 
47
105
  async function _testMsaIsCorrect(srcCsv: string, tgtCsv: string): Promise<void> {
@@ -58,3 +116,33 @@ async function _testMsaIsCorrect(srcCsv: string, tgtCsv: string): Promise<void>
58
116
  const msaCol: DG.Column = await runKalign(srcCol, true);
59
117
  expectArray(msaCol.toList(), tgtCol.toList());
60
118
  }
119
+
120
+ async function _testMSAOnColumn(
121
+ srcCsv: string, tgtCsv: string,
122
+ srcNotation: NOTATION, tgtNotation: NOTATION, alphabet?: ALPHABET, pepseaMethod?: string
123
+ ): Promise<void> {
124
+ const srcDf: DG.DataFrame = DG.DataFrame.fromCsv(srcCsv);
125
+ const tgtDf: DG.DataFrame = DG.DataFrame.fromCsv(tgtCsv);
126
+
127
+ const srcSeqCol = srcDf.getCol('seq')!;
128
+ const tgtCol = tgtDf.getCol('seq')!;
129
+ const srcCol: DG.Column = srcDf.getCol('seq')!;
130
+ const semType: string = await grok.functions
131
+ .call('Bio:detectMacromolecule', {col: srcCol}) as unknown as string;
132
+ if (semType)
133
+ srcCol.semType = semType;
134
+
135
+ await grok.data.detectSemanticTypes(srcDf);
136
+ expect(srcSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
137
+ expect(srcSeqCol.getTag(DG.TAGS.UNITS), srcNotation);
138
+ if (alphabet)
139
+ expect(srcSeqCol.getTag(bioTAGS.alphabet), alphabet);
140
+
141
+ const msaSeqCol = await multipleSequenceAlignmentUI(srcSeqCol, pepseaMethod);
142
+ expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
143
+ expect(msaSeqCol.getTag(DG.TAGS.UNITS), tgtNotation);
144
+ expect(msaSeqCol.getTag(bioTAGS.aligned), ALIGNMENT.SEQ_MSA);
145
+ if (alphabet)
146
+ expect(msaSeqCol.getTag(bioTAGS.alphabet), alphabet);
147
+ expectArray(msaSeqCol.toList(), tgtCol.toList());
148
+ }
@@ -10,6 +10,7 @@ import {generateLongSequence, generateManySequences, performanceTest} from './ut
10
10
  import {errorToConsole} from '@datagrok-libraries/utils/src/to-console';
11
11
  import {ALIGNMENT, ALPHABET, NOTATION, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
12
12
  import {UnitsHandler} from '@datagrok-libraries/bio/src/utils/units-handler';
13
+ import {multipleSequenceAlignmentUI} from '../utils/multiple-sequence-alignment-ui';
13
14
 
14
15
  category('renderers', () => {
15
16
  let tvList: DG.TableView[];
@@ -145,17 +146,17 @@ category('renderers', () => {
145
146
  expect(srcSeqCol.getTag(bioTAGS.alphabet), ALPHABET.PT);
146
147
  expect(srcSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
147
148
 
148
- const msaSeqCol = multipleSequenceAlignmentAny(srcSeqCol);
149
+ const msaSeqCol = await multipleSequenceAlignmentUI(srcSeqCol);
149
150
  tv.grid.invalidate();
150
151
 
151
- // expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
152
- // expect(msaSeqCol.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
153
- // expect(msaSeqCol.getTag(bioTAGS.aligned), ALIGNMENT.SEQ_MSA);
154
- // expect(msaSeqCol.getTag(bioTAGS.alphabet), ALPHABET.PT);
155
- // expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
152
+ expect(msaSeqCol.semType, DG.SEMTYPE.MACROMOLECULE);
153
+ expect(msaSeqCol.getTag(DG.TAGS.UNITS), NOTATION.FASTA);
154
+ expect(msaSeqCol.getTag(bioTAGS.aligned), ALIGNMENT.SEQ_MSA);
155
+ expect(msaSeqCol.getTag(bioTAGS.alphabet), ALPHABET.PT);
156
+ expect(msaSeqCol.getTag(DG.TAGS.CELL_RENDERER), 'sequence');
156
157
 
157
158
  // check newColumn with UnitsHandler constructor
158
- // const uh: UnitsHandler = new UnitsHandler(msaSeqCol);
159
+ const uh: UnitsHandler = new UnitsHandler(msaSeqCol);
159
160
 
160
161
  dfList.push(df);
161
162
  tvList.push(tv);
@@ -6,76 +6,144 @@ import {ALPHABET, NOTATION} from '@datagrok-libraries/bio/src/utils/macromolecul
6
6
  import {runKalign} from './multiple-sequence-alignment';
7
7
  import {pepseaMethods, runPepsea} from './pepsea';
8
8
  import {checkInputColumnUI} from './check-input-column';
9
+ import {NotationConverter} from '@datagrok-libraries/bio/src/utils/notation-converter';
10
+ import {_package} from '../package';
9
11
 
10
- export function multipleSequenceAlignmentUI(col: DG.Column<string> | null = null) {
11
- const table = col?.dataFrame ?? grok.shell.t;
12
- const seqCol = col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
13
- if (seqCol == null) {
14
- grok.shell.warning(`MSAError: dataset doesn't conain any Macromolecule column`);
15
- return;
12
+ export class MsaWarning extends Error {
13
+ constructor(message: string, options?: ErrorOptions) {
14
+ super(message, options);
16
15
  }
16
+ }
17
+
18
+ export async function multipleSequenceAlignmentUI(
19
+ col: DG.Column<string> | null = null,
20
+ pepseaMethod: typeof pepseaMethods[number] = pepseaMethods[0]
21
+ ): Promise<DG.Column> {
22
+ return new Promise(async (resolve, reject) => {
23
+ const table = col?.dataFrame ?? grok.shell.t;
24
+ const seqCol = col ?? table.columns.bySemType(DG.SEMTYPE.MACROMOLECULE);
25
+ if (seqCol == null) {
26
+ const errMsg = `MSAError: dataset doesn't conain any Macromolecule column`;
27
+ grok.shell.warning(errMsg);
28
+ reject(new MsaWarning(errMsg));
29
+ }
17
30
 
18
- let performAlignment: () => Promise<DG.Column<string> | null> = async () => null;
19
- const methodInput = ui.choiceInput('Method', pepseaMethods[0], pepseaMethods);
20
- methodInput.setTooltip('Alignment method');
21
- const gapOpenInput = ui.floatInput('Gap open', 1.53);
22
- gapOpenInput.setTooltip('Gap opening penalty at group-to-group alignment');
23
- const gapExtendInput = ui.floatInput('Gap extend', 0);
24
- gapExtendInput.setTooltip('Gap extension penalty to skip the alignment');
25
- const inputRootStyles = [methodInput.root.style, gapOpenInput.root.style, gapExtendInput.root.style];
31
+ // UI
32
+ const methodInput = ui.choiceInput('Method', pepseaMethod, pepseaMethods);
33
+ methodInput.setTooltip('Alignment method');
34
+ const gapOpenInput = ui.floatInput('Gap open', 1.53);
35
+ gapOpenInput.setTooltip('Gap opening penalty at group-to-group alignment');
36
+ const gapExtendInput = ui.floatInput('Gap extend', 0);
37
+ gapExtendInput.setTooltip('Gap extension penalty to skip the alignment');
38
+ const inputRootStyles = [methodInput.root.style, gapOpenInput.root.style, gapExtendInput.root.style];
39
+ let performAlignment: (() => Promise<DG.Column<string>>) | undefined;
26
40
 
27
- const colInput = ui.columnInput('Sequence', table, seqCol, () => {
28
- const potentialCol = colInput.value;
29
- const unusedName = table.columns.getUnusedName(`msa(${potentialCol.name})`);
41
+ // TODO: allow only macromolecule colums to be chosen
42
+ const colInput = ui.columnInput('Sequence', table, seqCol, () => {
43
+ performAlignment = onColInputChange(
44
+ colInput.value, table, inputRootStyles, methodInput, clustersColInput, gapOpenInput, gapExtendInput
45
+ );
46
+ }
47
+ ) as DG.InputBase<DG.Column<string>>;
48
+ colInput.setTooltip('Sequences column to use for alignment');
49
+ const clustersColInput = ui.columnInput('Clusters', table, null);
50
+ clustersColInput.nullable = true;
51
+ colInput.fireChanged();
52
+ //if column is specified (from tests), run alignment and resolve with the result
53
+ if (col) {
54
+ performAlignment = onColInputChange(
55
+ col, table, inputRootStyles, methodInput, clustersColInput, gapOpenInput, gapExtendInput
56
+ );
30
57
 
31
- if (checkInputColumnUI(
32
- potentialCol, potentialCol.name, [NOTATION.FASTA], [ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT], false)) {
58
+ await onDialogOk(colInput, table, performAlignment, resolve, reject);
59
+ return;
60
+ }
61
+ const dlg = ui.dialog('MSA')
62
+ .add(colInput)
63
+ .add(clustersColInput)
64
+ .add(methodInput)
65
+ .add(gapOpenInput)
66
+ .add(gapExtendInput)
67
+ .onOK(async () => {
68
+ await onDialogOk(colInput, table, performAlignment, resolve, reject);
69
+ })
70
+ .show();
71
+ });
72
+ }
73
+
74
+ async function onDialogOk(
75
+ colInput: DG.InputBase< DG.Column<any>>,
76
+ table: DG.DataFrame,
77
+ performAlignment: (() => Promise<DG.Column<string>>) | undefined,
78
+ resolve: (value: DG.Column<any>) => void,
79
+ reject: (reason: any) => void
80
+ ): Promise<void> {
81
+ let msaCol: DG.Column<string> | null = null;
82
+ const pi = DG.TaskBarProgressIndicator.create('Analyze for MSA ...');
83
+ try {
84
+ colInput.fireChanged();
85
+ if (colInput.value.semType !== DG.SEMTYPE.MACROMOLECULE)
86
+ throw new Error('Chosen column has to be of Macromolecule semantic type');
87
+ if (performAlignment === undefined) // value can only be undefined when column can't be processed with either method
88
+ throw new Error('Invalid column format');
89
+ msaCol = await performAlignment(); // progress
90
+ if (msaCol == null)
91
+ return grok.shell.warning('Wrong column format');
92
+
93
+ table.columns.add(msaCol);
94
+ await grok.data.detectSemanticTypes(table);
95
+
96
+ resolve(msaCol);
97
+ } catch (err: any) {
98
+ const errMsg: string = err instanceof Error ? err.message : err.toString();
99
+ grok.shell.error(errMsg);
100
+ reject(err);
101
+ } finally {
102
+ pi.close();
103
+ }
104
+ }
105
+
106
+
107
+ function onColInputChange(
108
+ col: DG.Column<string>,
109
+ table: DG.DataFrame,
110
+ inputRootStyles: CSSStyleDeclaration[],
111
+ methodInput: DG.InputBase<string | null>,
112
+ clustersColInput: DG.InputBase<DG.Column<any> | null>,
113
+ gapOpenInput: DG.InputBase<number | null>,
114
+ gapExtendInput: DG.InputBase<number | null>
115
+ ): (() => Promise<DG.Column<string>>) | undefined {
116
+ try {
117
+ if (col.semType !== DG.SEMTYPE.MACROMOLECULE)
118
+ return;
119
+ const unusedName = table.columns.getUnusedName(`msa(${col.name})`);
120
+
121
+ if (checkInputColumnUI(col, col.name,
122
+ [NOTATION.FASTA, NOTATION.SEPARATOR], [ALPHABET.DNA, ALPHABET.RNA, ALPHABET.PT], false)
123
+ ) { // Kalign - natural alphabets. if the notation is separator, convert to fasta and then run kalign
33
124
  for (const inputRootStyle of inputRootStyles)
34
125
  inputRootStyle.display = 'none';
35
-
36
- performAlignment = () => runKalign(potentialCol, false, unusedName, clustersColInput.value);
37
- } else if (checkInputColumnUI(potentialCol, potentialCol.name, [NOTATION.HELM], [], false)) {
126
+ const potentialColNC = new NotationConverter(col);
127
+ const performCol: DG.Column<string> = potentialColNC.isFasta() ? col :
128
+ potentialColNC.convert(NOTATION.FASTA);
129
+ return async () => await runKalign(performCol, false, unusedName, clustersColInput.value);
130
+ } else if (checkInputColumnUI(col, col.name,
131
+ [NOTATION.HELM], [], false)
132
+ ) { // PepSeA branch - Helm notation or separator notation with unknown alphabets
38
133
  for (const inputRootStyle of inputRootStyles)
39
- inputRootStyle.display = 'initial';
134
+ inputRootStyle.removeProperty('display');
40
135
 
41
- performAlignment = () => runPepsea(potentialCol, unusedName, methodInput.value!, gapOpenInput.value!,
42
- gapExtendInput.value!, clustersColInput.value);
136
+ return async () => await runPepsea(col, unusedName, methodInput.value!,
137
+ gapOpenInput.value!, gapExtendInput.value!, clustersColInput.value);
43
138
  } else {
44
139
  for (const inputRootStyle of inputRootStyles)
45
140
  inputRootStyle.display = 'none';
46
141
 
47
- performAlignment = async () => null;
142
+ return;
48
143
  }
49
- }) as DG.InputBase<DG.Column<string>>;
50
- colInput.setTooltip('Sequences column to use for alignment');
51
- colInput.fireChanged();
52
-
53
- const clustersColInput = ui.columnInput('Clusters', table, null);
54
- clustersColInput.nullable = true;
55
-
56
- let msaCol: DG.Column<string> | null = null;
57
- ui.dialog('MSA')
58
- .add(colInput)
59
- .add(clustersColInput)
60
- .add(methodInput)
61
- .add(gapOpenInput)
62
- .add(gapExtendInput)
63
- .onOK(async () => {
64
- const pi = DG.TaskBarProgressIndicator.create('Analyze for MSA ...');
65
- try {
66
- colInput.fireChanged();
67
- msaCol = await performAlignment(); // progress
68
- if (msaCol == null)
69
- return grok.shell.warning('Wrong column format');
70
-
71
- table.columns.add(msaCol);
72
- await grok.data.detectSemanticTypes(table);
73
- } catch (err: any) {
74
- const errMsg: string = err instanceof Error ? err.message : err.toString();
75
- grok.shell.error(errMsg);
76
- } finally {
77
- pi.close();
78
- }
79
- })
80
- .show();
144
+ } catch (err: any) {
145
+ const errMsg: string = err instanceof Error ? err.message : err.toString();
146
+ grok.shell.error(errMsg);
147
+ _package.logger.error(errMsg);
148
+ }
81
149
  }
@@ -2,7 +2,7 @@
2
2
  import * as DG from 'datagrok-api/dg';
3
3
 
4
4
  import {FastaFileHandler} from '@datagrok-libraries/bio/src/utils/fasta-handler';
5
- import {TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
5
+ import {ALIGNMENT, TAGS as bioTAGS} from '@datagrok-libraries/bio/src/utils/macromolecule';
6
6
  //@ts-ignore: there are no types for this library
7
7
  import Aioli from '@biowasm/aioli';
8
8
 
@@ -25,7 +25,8 @@ function _stringsToFasta(sequences: string[]): string {
25
25
  *
26
26
  * @param {DG.Column} srcCol Column with sequences.
27
27
  * @param {boolean} isAligned Whether the column is aligned.
28
- * @param {string} unUsedName
28
+ * @param {string | undefined} unUsedName
29
+ * @param {DG.Column | null} clustersCol Column with clusters.
29
30
  * @return {Promise<DG.Column>} Aligned sequences.
30
31
  */
31
32
  export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean = false, unUsedName: string = '',
@@ -40,7 +41,7 @@ export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean =
40
41
  if (clustersCol.type != DG.COLUMN_TYPE.STRING)
41
42
  clustersCol = clustersCol.convertTo(DG.TYPE.STRING);
42
43
  clustersCol.compact();
43
-
44
+
44
45
  //TODO: use fixed-size inner arrays, but first need to expose the method to get each category count
45
46
  const clustersColCategories = clustersCol.categories;
46
47
  const clustersColData = clustersCol.getRawData();
@@ -61,8 +62,6 @@ export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean =
61
62
  for (let clusterIdx = 0; clusterIdx < clustersColCategories.length; ++clusterIdx) {
62
63
  const clusterSequences = fastaSequences[clusterIdx];
63
64
  const fasta = _stringsToFasta(clusterSequences);
64
-
65
- console.log(['fasta.length =', fasta.length]);
66
65
 
67
66
  await CLI.fs.writeFile(fastaInputFilename, fasta);
68
67
  const output = await CLI.exec(`kalign ${fastaInputFilename} -f fasta -o ${fastaOutputFilename}`);
@@ -82,8 +81,7 @@ export async function runKalign(srcCol: DG.Column<string>, isAligned: boolean =
82
81
  // units
83
82
  const srcUnits = srcCol.getTag(DG.TAGS.UNITS);
84
83
  //aligned
85
- const srcAligned = srcCol.getTag(bioTAGS.aligned);
86
- const tgtAligned = srcAligned + '.MSA';
84
+ const tgtAligned = ALIGNMENT.SEQ_MSA;
87
85
  //alphabet
88
86
  const srcAlphabet = srcCol.getTag(bioTAGS.alphabet);
89
87