msa-parsers 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/gff/gffToInterPro.d.ts +20 -0
- package/dist/gff/gffToInterPro.js +83 -0
- package/dist/gff/gffToInterPro.js.map +1 -0
- package/dist/gff/gffToInterPro.test.d.ts +1 -0
- package/dist/gff/gffToInterPro.test.js +181 -0
- package/dist/gff/gffToInterPro.test.js.map +1 -0
- package/dist/gff/index.d.ts +3 -0
- package/dist/gff/index.js +4 -0
- package/dist/gff/index.js.map +1 -0
- package/dist/gff/interProToGFF.d.ts +9 -0
- package/dist/gff/interProToGFF.js +48 -0
- package/dist/gff/interProToGFF.js.map +1 -0
- package/dist/gff/interProToGFF.test.d.ts +1 -0
- package/dist/gff/interProToGFF.test.js +189 -0
- package/dist/gff/interProToGFF.test.js.map +1 -0
- package/dist/gff/parseGFF.d.ts +2 -0
- package/dist/gff/parseGFF.js +41 -0
- package/dist/gff/parseGFF.js.map +1 -0
- package/dist/gff/parseGFF.test.d.ts +1 -0
- package/dist/gff/parseGFF.test.js +92 -0
- package/dist/gff/parseGFF.test.js.map +1 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -0
- package/dist/msa/A3mMSA.d.ts +33 -0
- package/dist/msa/A3mMSA.js +280 -0
- package/dist/msa/A3mMSA.js.map +1 -0
- package/dist/msa/A3mMSA.test.d.ts +1 -0
- package/dist/msa/A3mMSA.test.js +155 -0
- package/dist/msa/A3mMSA.test.js.map +1 -0
- package/dist/msa/ClustalMSA.d.ts +30 -0
- package/dist/msa/ClustalMSA.js +53 -0
- package/dist/msa/ClustalMSA.js.map +1 -0
- package/dist/msa/EmfMSA.d.ts +27 -0
- package/dist/msa/EmfMSA.js +53 -0
- package/dist/msa/EmfMSA.js.map +1 -0
- package/dist/msa/FastaMSA.d.ts +19 -0
- package/dist/msa/FastaMSA.js +69 -0
- package/dist/msa/FastaMSA.js.map +1 -0
- package/dist/msa/StockholmMSA.d.ts +54 -0
- package/dist/msa/StockholmMSA.js +113 -0
- package/dist/msa/StockholmMSA.js.map +1 -0
- package/dist/msa/index.d.ts +18 -0
- package/dist/msa/index.js +34 -0
- package/dist/msa/index.js.map +1 -0
- package/dist/msa/index.test.d.ts +1 -0
- package/dist/msa/index.test.js +60 -0
- package/dist/msa/index.test.js.map +1 -0
- package/dist/msa/parseNewick.d.ts +60 -0
- package/dist/msa/parseNewick.js +95 -0
- package/dist/msa/parseNewick.js.map +1 -0
- package/dist/msa/stockholmParser.d.ts +22 -0
- package/dist/msa/stockholmParser.js +141 -0
- package/dist/msa/stockholmParser.js.map +1 -0
- package/dist/msa/stockholmParser.test.d.ts +1 -0
- package/dist/msa/stockholmParser.test.js +111 -0
- package/dist/msa/stockholmParser.test.js.map +1 -0
- package/dist/types.d.ts +66 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/util.d.ts +2 -0
- package/dist/util.js +10 -0
- package/dist/util.js.map +1 -0
- package/package.json +25 -0
- package/src/gff/gffToInterPro.test.ts +202 -0
- package/src/gff/gffToInterPro.ts +113 -0
- package/src/gff/index.ts +3 -0
- package/src/gff/interProToGFF.test.ts +206 -0
- package/src/gff/interProToGFF.ts +59 -0
- package/src/gff/parseGFF.test.ts +106 -0
- package/src/gff/parseGFF.ts +46 -0
- package/src/index.ts +29 -0
- package/src/msa/A3mMSA.test.ts +192 -0
- package/src/msa/A3mMSA.ts +320 -0
- package/src/msa/ClustalMSA.ts +67 -0
- package/src/msa/EmfMSA.ts +67 -0
- package/src/msa/FastaMSA.ts +82 -0
- package/src/msa/StockholmMSA.ts +141 -0
- package/src/msa/index.test.ts +74 -0
- package/src/msa/index.ts +44 -0
- package/src/msa/parseNewick.ts +94 -0
- package/src/msa/stockholmParser.test.ts +123 -0
- package/src/msa/stockholmParser.ts +157 -0
- package/src/types.ts +68 -0
- package/src/util.ts +19 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { describe, expect, test } from 'vitest';
|
|
2
|
+
import { parseGFF } from './parseGFF';
|
|
3
|
+
describe('parseGFF', () => {
|
|
4
|
+
test('parses empty string', () => {
|
|
5
|
+
expect(parseGFF('')).toEqual([]);
|
|
6
|
+
expect(parseGFF(undefined)).toEqual([]);
|
|
7
|
+
});
|
|
8
|
+
test('parses basic GFF3 line', () => {
|
|
9
|
+
const gff = 'seq1\tInterProScan\tprotein_match\t10\t50\t.\t+\t.\tName=PF00001';
|
|
10
|
+
const result = parseGFF(gff);
|
|
11
|
+
expect(result).toHaveLength(1);
|
|
12
|
+
expect(result[0]).toEqual({
|
|
13
|
+
seq_id: 'seq1',
|
|
14
|
+
source: 'InterProScan',
|
|
15
|
+
type: 'protein_match',
|
|
16
|
+
start: 10,
|
|
17
|
+
end: 50,
|
|
18
|
+
score: 0,
|
|
19
|
+
strand: '+',
|
|
20
|
+
phase: '.',
|
|
21
|
+
Name: 'PF00001',
|
|
22
|
+
});
|
|
23
|
+
});
|
|
24
|
+
test('parses multiple attributes', () => {
|
|
25
|
+
const gff = 'seq1\tPfam\tprotein_match\t10\t50\t1.5\t.\t.\tName=PF00001;signature_desc=7tm_1;description=GPCR';
|
|
26
|
+
const result = parseGFF(gff);
|
|
27
|
+
expect(result[0]).toMatchObject({
|
|
28
|
+
Name: 'PF00001',
|
|
29
|
+
signature_desc: '7tm_1',
|
|
30
|
+
description: 'GPCR',
|
|
31
|
+
});
|
|
32
|
+
});
|
|
33
|
+
test('handles URL-encoded attribute values', () => {
|
|
34
|
+
const gff = 'seq1\tSource\ttype\t1\t10\t.\t.\t.\tNote=Hello%20World%3B%3D';
|
|
35
|
+
const result = parseGFF(gff);
|
|
36
|
+
expect(result[0]?.Note).toBe('Hello World;=');
|
|
37
|
+
});
|
|
38
|
+
test('skips comment lines', () => {
|
|
39
|
+
const gff = `##gff-version 3
|
|
40
|
+
# This is a comment
|
|
41
|
+
seq1\tSource\ttype\t1\t10\t.\t.\t.\tName=test`;
|
|
42
|
+
const result = parseGFF(gff);
|
|
43
|
+
expect(result).toHaveLength(1);
|
|
44
|
+
expect(result[0]?.seq_id).toBe('seq1');
|
|
45
|
+
});
|
|
46
|
+
test('skips empty lines', () => {
|
|
47
|
+
const gff = `seq1\tSource\ttype\t1\t10\t.\t.\t.\tName=test1
|
|
48
|
+
|
|
49
|
+
seq2\tSource\ttype\t20\t30\t.\t.\t.\tName=test2`;
|
|
50
|
+
const result = parseGFF(gff);
|
|
51
|
+
expect(result).toHaveLength(2);
|
|
52
|
+
});
|
|
53
|
+
test('handles missing attributes column', () => {
|
|
54
|
+
const gff = 'seq1\tSource\ttype\t1\t10\t.\t.\t.';
|
|
55
|
+
const result = parseGFF(gff);
|
|
56
|
+
expect(result).toHaveLength(1);
|
|
57
|
+
expect(result[0]?.seq_id).toBe('seq1');
|
|
58
|
+
});
|
|
59
|
+
test('handles partial GFF lines gracefully', () => {
|
|
60
|
+
const gff = 'seq1\tSource\ttype';
|
|
61
|
+
const result = parseGFF(gff);
|
|
62
|
+
expect(result).toHaveLength(1);
|
|
63
|
+
expect(result[0]).toMatchObject({
|
|
64
|
+
seq_id: 'seq1',
|
|
65
|
+
source: 'Source',
|
|
66
|
+
type: 'type',
|
|
67
|
+
start: 0,
|
|
68
|
+
end: 0,
|
|
69
|
+
});
|
|
70
|
+
});
|
|
71
|
+
test('parses numeric score', () => {
|
|
72
|
+
const gff = 'seq1\tSource\ttype\t1\t10\t45.6\t.\t.\tName=test';
|
|
73
|
+
const result = parseGFF(gff);
|
|
74
|
+
expect(result[0]?.score).toBe(45.6);
|
|
75
|
+
});
|
|
76
|
+
test('handles comma-separated values in attributes', () => {
|
|
77
|
+
const gff = 'seq1\tSource\ttype\t1\t10\t.\t.\t.\tOntology_term=GO:0001,GO:0002';
|
|
78
|
+
const result = parseGFF(gff);
|
|
79
|
+
expect(result[0]?.Ontology_term).toBe('GO:0001 GO:0002');
|
|
80
|
+
});
|
|
81
|
+
test('parses multiple lines', () => {
|
|
82
|
+
const gff = `seq1\tPfam\tprotein_match\t10\t50\t.\t.\t.\tName=PF00001
|
|
83
|
+
seq1\tSMART\tprotein_match\t60\t100\t.\t.\t.\tName=SM00001
|
|
84
|
+
seq2\tPfam\tprotein_match\t5\t40\t.\t.\t.\tName=PF00002`;
|
|
85
|
+
const result = parseGFF(gff);
|
|
86
|
+
expect(result).toHaveLength(3);
|
|
87
|
+
expect(result[0]?.seq_id).toBe('seq1');
|
|
88
|
+
expect(result[1]?.seq_id).toBe('seq1');
|
|
89
|
+
expect(result[2]?.seq_id).toBe('seq2');
|
|
90
|
+
});
|
|
91
|
+
});
|
|
92
|
+
//# sourceMappingURL=parseGFF.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parseGFF.test.js","sourceRoot":"","sources":["../../src/gff/parseGFF.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAA;AAE/C,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAA;AAErC,QAAQ,CAAC,UAAU,EAAE,GAAG,EAAE;IACxB,IAAI,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC/B,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QAChC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;IACzC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,wBAAwB,EAAE,GAAG,EAAE;QAClC,MAAM,GAAG,GACP,kEAAkE,CAAA;QACpE,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;YACxB,MAAM,EAAE,MAAM;YACd,MAAM,EAAE,cAAc;YACtB,IAAI,EAAE,eAAe;YACrB,KAAK,EAAE,EAAE;YACT,GAAG,EAAE,EAAE;YACP,KAAK,EAAE,CAAC;YACR,MAAM,EAAE,GAAG;YACX,KAAK,EAAE,GAAG;YACV,IAAI,EAAE,SAAS;SAChB,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACtC,MAAM,GAAG,GACP,kGAAkG,CAAA;QACpG,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC;YAC9B,IAAI,EAAE,SAAS;YACf,cAAc,EAAE,OAAO;YACvB,WAAW,EAAE,MAAM;SACpB,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAChD,MAAM,GAAG,GAAG,8DAA8D,CAAA;QAC1E,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,CAAA;IAC/C,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC/B,MAAM,GAAG,GAAG;;8CAE8B,CAAA;QAC1C,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IACxC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAC7B,MAAM,GAAG,GAAG;;gDAEgC,CAAA;QAC5C,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;IAChC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,mCAAmC,EAAE,GAAG,EAAE;QAC7C,MAAM,GAAG,GAAG,oCAAoC,CAAA;QAChD,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IACxC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAChD,MAAM,GAAG,GAAG,oBAAoB,CAAA;QAChC,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC;YAC9B,MAAM,EAAE,MAAM;YACd,MAAM,EAAE,QAAQ;YAChB,IAAI,EAAE,MAAM;YACZ,KAAK,EAAE,CAAC;YACR,GAAG,EAAE,CAAC;SACP,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,sBAAsB,EAAE,GAAG,EAAE;QAChC,MAAM,GAAG,GAAG,kDAAkD,CAAA;QAC9D,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACrC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,8CAA8C,EAAE,GAAG,EAAE;QACxD,MAAM,GAAG,GACP,mEAAmE,CAAA;QACrE,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAA;IAC1D,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,uBAAuB,EAAE,GAAG,EAAE;QACjC,MAAM,GAAG,GAAG;;wDAEwC,CAAA;QACpD,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACtC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACtC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IACxC,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export * from './types';
|
|
2
|
+
export { generateNodeIds } from './util';
|
|
3
|
+
export { A3mMSA, ClustalMSA, EmfMSA, FastaMSA, StockholmMSA, getUngappedSequence, parseEmfTree, parseMSA, parseNewick, stockholmSniff, } from './msa';
|
|
4
|
+
export type { MSAParserType } from './msa';
|
|
5
|
+
export { gffToInterProResponse, gffToInterProResults, interProResponseToGFF, interProToGFF, parseGFF, } from './gff';
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
// Types
|
|
2
|
+
export * from './types';
|
|
3
|
+
// Utilities
|
|
4
|
+
export { generateNodeIds } from './util';
|
|
5
|
+
// MSA parsers
|
|
6
|
+
export { A3mMSA, ClustalMSA, EmfMSA, FastaMSA, StockholmMSA, getUngappedSequence, parseEmfTree, parseMSA, parseNewick, stockholmSniff, } from './msa';
|
|
7
|
+
// GFF parsing
|
|
8
|
+
export { gffToInterProResponse, gffToInterProResults, interProResponseToGFF, interProToGFF, parseGFF, } from './gff';
|
|
9
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,QAAQ;AACR,cAAc,SAAS,CAAA;AAEvB,YAAY;AACZ,OAAO,EAAE,eAAe,EAAE,MAAM,QAAQ,CAAA;AAExC,cAAc;AACd,OAAO,EACL,MAAM,EACN,UAAU,EACV,MAAM,EACN,QAAQ,EACR,YAAY,EACZ,mBAAmB,EACnB,YAAY,EACZ,QAAQ,EACR,WAAW,EACX,cAAc,GACf,MAAM,OAAO,CAAA;AAGd,cAAc;AACd,OAAO,EACL,qBAAqB,EACrB,oBAAoB,EACpB,qBAAqB,EACrB,aAAa,EACb,QAAQ,GACT,MAAM,OAAO,CAAA"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { NodeWithIds } from '../types';
|
|
2
|
+
export default class A3mMSA {
|
|
3
|
+
private MSA;
|
|
4
|
+
private orderedNames;
|
|
5
|
+
constructor(text: string);
|
|
6
|
+
/**
|
|
7
|
+
* Detect if text is likely A3M format
|
|
8
|
+
*/
|
|
9
|
+
static sniff(text: string): boolean;
|
|
10
|
+
/**
|
|
11
|
+
* Expand A3M format to standard aligned format.
|
|
12
|
+
*
|
|
13
|
+
* In A3M, lowercase characters are insertions that implicitly introduce
|
|
14
|
+
* gaps in sequences that don't have an insert at that position.
|
|
15
|
+
* Gaps (-) following match columns in sequences without inserts align
|
|
16
|
+
* with lowercase inserts in other sequences.
|
|
17
|
+
*/
|
|
18
|
+
private expandA3M;
|
|
19
|
+
getMSA(): {
|
|
20
|
+
seqdata: Record<string, string>;
|
|
21
|
+
};
|
|
22
|
+
getRowData(): undefined;
|
|
23
|
+
getNames(): string[];
|
|
24
|
+
getRow(name: string): string;
|
|
25
|
+
getWidth(): number;
|
|
26
|
+
getStructures(): {};
|
|
27
|
+
get alignmentNames(): never[];
|
|
28
|
+
getHeader(): {};
|
|
29
|
+
getTree(): NodeWithIds;
|
|
30
|
+
get seqConsensus(): undefined;
|
|
31
|
+
get secondaryStructureConsensus(): undefined;
|
|
32
|
+
get tracks(): never[];
|
|
33
|
+
}
|
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A3M Format Parser
|
|
3
|
+
*
|
|
4
|
+
* The A3M format consists of aligned FASTA, in which alignments are shown with:
|
|
5
|
+
* - Inserts as lowercase characters
|
|
6
|
+
* - Matches as uppercase characters
|
|
7
|
+
* - Deletions as '-'
|
|
8
|
+
* - Gaps aligned to inserts as '.'
|
|
9
|
+
*
|
|
10
|
+
* Note that gaps aligned to inserts can be omitted in the A3M format.
|
|
11
|
+
*
|
|
12
|
+
* Example:
|
|
13
|
+
* >query
|
|
14
|
+
* ETESMKTVRIREKIKKFLGDRPRNTAEILEHINSTMRHGTTSQQLGNVLSKDKDIVKVGYIKRSGILSGGYDICEWATRNWVAEHCPEWTE
|
|
15
|
+
* >seq1
|
|
16
|
+
* ----MRTTRLRQKIKKFLNERGeANTTEILEHVNSTMRHGTTPQQLGNVLSKDKDILKVATTKRGGALSGRYEICVWTLRP-----------
|
|
17
|
+
*
|
|
18
|
+
* In the above, 'e' after 'G' in seq1 is a lowercase insert.
|
|
19
|
+
*
|
|
20
|
+
* @see https://yanglab.qd.sdu.edu.cn/trRosetta/msa_format.html
|
|
21
|
+
*/
|
|
22
|
+
// Char code helpers for fast character classification
|
|
23
|
+
const CODE_A = 65; // 'A'
|
|
24
|
+
const CODE_Z = 90; // 'Z'
|
|
25
|
+
const CODE_a = 97; // 'a'
|
|
26
|
+
const CODE_z = 122; // 'z'
|
|
27
|
+
const CODE_DASH = 45; // '-'
|
|
28
|
+
const CODE_DOT = 46; // '.'
|
|
29
|
+
function isLower(code) {
|
|
30
|
+
return code >= CODE_a && code <= CODE_z;
|
|
31
|
+
}
|
|
32
|
+
export default class A3mMSA {
|
|
33
|
+
MSA;
|
|
34
|
+
orderedNames;
|
|
35
|
+
constructor(text) {
|
|
36
|
+
const rawSeqs = [];
|
|
37
|
+
const names = [];
|
|
38
|
+
// First pass: parse sequences (like FASTA), preserving order
|
|
39
|
+
for (const entry of text.split('>')) {
|
|
40
|
+
if (!/\S/.test(entry)) {
|
|
41
|
+
continue;
|
|
42
|
+
}
|
|
43
|
+
const newlineIdx = entry.indexOf('\n');
|
|
44
|
+
if (newlineIdx === -1) {
|
|
45
|
+
continue;
|
|
46
|
+
}
|
|
47
|
+
const defLine = entry.slice(0, newlineIdx);
|
|
48
|
+
const spaceIdx = defLine.indexOf(' ');
|
|
49
|
+
const id = spaceIdx === -1 ? defLine : defLine.slice(0, spaceIdx);
|
|
50
|
+
if (id) {
|
|
51
|
+
rawSeqs.push(entry.slice(newlineIdx + 1).replaceAll(/\s/g, ''));
|
|
52
|
+
names.push(id);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
this.orderedNames = names;
|
|
56
|
+
this.MSA = { seqdata: this.expandA3M(rawSeqs, names) };
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Detect if text is likely A3M format
|
|
60
|
+
*/
|
|
61
|
+
static sniff(text) {
|
|
62
|
+
if (!text.startsWith('>')) {
|
|
63
|
+
return false;
|
|
64
|
+
}
|
|
65
|
+
const seqs = [];
|
|
66
|
+
for (const entry of text.split('>')) {
|
|
67
|
+
if (!/\S/.test(entry)) {
|
|
68
|
+
continue;
|
|
69
|
+
}
|
|
70
|
+
const newlineIdx = entry.indexOf('\n');
|
|
71
|
+
if (newlineIdx === -1) {
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
const seq = entry.slice(newlineIdx + 1).replaceAll(/\s/g, '');
|
|
75
|
+
if (seq) {
|
|
76
|
+
seqs.push(seq);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
if (seqs.length < 2) {
|
|
80
|
+
return false;
|
|
81
|
+
}
|
|
82
|
+
// Check for lowercase and compute lengths in single pass per sequence
|
|
83
|
+
// In A3M, only uppercase letters are match columns (not gaps)
|
|
84
|
+
let hasLowercase = false;
|
|
85
|
+
let firstUppercaseLen = -1;
|
|
86
|
+
let sameUppercaseLength = true;
|
|
87
|
+
for (const seq of seqs) {
|
|
88
|
+
let uppercaseLen = 0;
|
|
89
|
+
for (let i = 0; i < seq.length; i++) {
|
|
90
|
+
const code = seq.charCodeAt(i);
|
|
91
|
+
if (isLower(code)) {
|
|
92
|
+
hasLowercase = true;
|
|
93
|
+
}
|
|
94
|
+
else if (code >= CODE_A && code <= CODE_Z) {
|
|
95
|
+
uppercaseLen++;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
if (firstUppercaseLen === -1) {
|
|
99
|
+
firstUppercaseLen = uppercaseLen;
|
|
100
|
+
}
|
|
101
|
+
else {
|
|
102
|
+
if (uppercaseLen !== firstUppercaseLen) {
|
|
103
|
+
sameUppercaseLength = false;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
return hasLowercase && sameUppercaseLength;
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Expand A3M format to standard aligned format.
|
|
111
|
+
*
|
|
112
|
+
* In A3M, lowercase characters are insertions that implicitly introduce
|
|
113
|
+
* gaps in sequences that don't have an insert at that position.
|
|
114
|
+
* Gaps (-) following match columns in sequences without inserts align
|
|
115
|
+
* with lowercase inserts in other sequences.
|
|
116
|
+
*/
|
|
117
|
+
expandA3M(rawSeqs, names) {
|
|
118
|
+
const numSeqs = names.length;
|
|
119
|
+
if (numSeqs === 0) {
|
|
120
|
+
return {};
|
|
121
|
+
}
|
|
122
|
+
// Parse sequences: extract match chars (uppercase only) and insert content
|
|
123
|
+
// For each sequence, track: matchChars, insertContent (after each match)
|
|
124
|
+
const matchChars = [];
|
|
125
|
+
const insertContent = [];
|
|
126
|
+
for (let seqIdx = 0; seqIdx < numSeqs; seqIdx++) {
|
|
127
|
+
const seq = rawSeqs[seqIdx];
|
|
128
|
+
const matches = [];
|
|
129
|
+
const inserts = [];
|
|
130
|
+
let i = 0;
|
|
131
|
+
while (i < seq.length) {
|
|
132
|
+
const code = seq.charCodeAt(i);
|
|
133
|
+
if (code >= CODE_A && code <= CODE_Z) {
|
|
134
|
+
// Uppercase letter - match column
|
|
135
|
+
matches.push(seq[i]);
|
|
136
|
+
// Collect following lowercase/gap characters as insert content
|
|
137
|
+
let ins = '';
|
|
138
|
+
let j = i + 1;
|
|
139
|
+
while (j < seq.length) {
|
|
140
|
+
const c = seq.charCodeAt(j);
|
|
141
|
+
if (isLower(c) || c === CODE_DASH || c === CODE_DOT) {
|
|
142
|
+
ins += seq[j];
|
|
143
|
+
j++;
|
|
144
|
+
}
|
|
145
|
+
else {
|
|
146
|
+
break;
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
inserts.push(ins);
|
|
150
|
+
i = j;
|
|
151
|
+
}
|
|
152
|
+
else if (code === CODE_DASH || code === CODE_DOT) {
|
|
153
|
+
// Leading gap before first match - skip
|
|
154
|
+
i++;
|
|
155
|
+
}
|
|
156
|
+
else if (isLower(code)) {
|
|
157
|
+
// Leading insert before first match
|
|
158
|
+
let ins = '';
|
|
159
|
+
while (i < seq.length && isLower(seq.charCodeAt(i))) {
|
|
160
|
+
ins += seq[i];
|
|
161
|
+
i++;
|
|
162
|
+
}
|
|
163
|
+
// Add empty match with this insert
|
|
164
|
+
matches.push('');
|
|
165
|
+
inserts.push(ins);
|
|
166
|
+
}
|
|
167
|
+
else {
|
|
168
|
+
i++;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
matchChars.push(matches);
|
|
172
|
+
insertContent.push(inserts);
|
|
173
|
+
}
|
|
174
|
+
// Find number of match positions (should be same for all valid A3M)
|
|
175
|
+
const numPositions = Math.max(...matchChars.map(m => m.length), 0);
|
|
176
|
+
// Find max insert length at each position (only count lowercase, not gaps)
|
|
177
|
+
const maxInserts = new Array(numPositions).fill(0);
|
|
178
|
+
for (let seqIdx = 0; seqIdx < numSeqs; seqIdx++) {
|
|
179
|
+
const inserts = insertContent[seqIdx];
|
|
180
|
+
for (let pos = 0; pos < inserts.length; pos++) {
|
|
181
|
+
// Count only lowercase characters as actual inserts
|
|
182
|
+
let lcCount = 0;
|
|
183
|
+
for (const c of inserts[pos]) {
|
|
184
|
+
if (isLower(c.charCodeAt(0))) {
|
|
185
|
+
lcCount++;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
if (lcCount > maxInserts[pos]) {
|
|
189
|
+
maxInserts[pos] = lcCount;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
// Build expanded sequences
|
|
194
|
+
const expanded = {};
|
|
195
|
+
for (let seqIdx = 0; seqIdx < numSeqs; seqIdx++) {
|
|
196
|
+
const matches = matchChars[seqIdx];
|
|
197
|
+
const inserts = insertContent[seqIdx];
|
|
198
|
+
const result = [];
|
|
199
|
+
for (let pos = 0; pos < numPositions; pos++) {
|
|
200
|
+
const maxIns = maxInserts[pos];
|
|
201
|
+
if (pos < matches.length) {
|
|
202
|
+
const matchChar = matches[pos];
|
|
203
|
+
const insContent = inserts[pos] || '';
|
|
204
|
+
// Add match character (or gap if empty)
|
|
205
|
+
result.push(matchChar || '-');
|
|
206
|
+
// Process insert content
|
|
207
|
+
let lcContent = '';
|
|
208
|
+
for (const c of insContent) {
|
|
209
|
+
if (isLower(c.charCodeAt(0))) {
|
|
210
|
+
lcContent += c.toUpperCase();
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
// Add the insert content (uppercased)
|
|
214
|
+
result.push(lcContent);
|
|
215
|
+
// Pad with gaps to match max insert length
|
|
216
|
+
const padding = maxIns - lcContent.length;
|
|
217
|
+
if (padding > 0) {
|
|
218
|
+
result.push('.'.repeat(padding));
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
else {
|
|
222
|
+
// This sequence is shorter - add gaps
|
|
223
|
+
result.push('-');
|
|
224
|
+
if (maxIns > 0) {
|
|
225
|
+
result.push('.'.repeat(maxIns));
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
expanded[names[seqIdx]] = result.join('');
|
|
230
|
+
}
|
|
231
|
+
return expanded;
|
|
232
|
+
}
|
|
233
|
+
getMSA() {
|
|
234
|
+
return this.MSA;
|
|
235
|
+
}
|
|
236
|
+
getRowData() {
|
|
237
|
+
return undefined;
|
|
238
|
+
}
|
|
239
|
+
getNames() {
|
|
240
|
+
return this.orderedNames;
|
|
241
|
+
}
|
|
242
|
+
getRow(name) {
|
|
243
|
+
return this.MSA.seqdata[name] || '';
|
|
244
|
+
}
|
|
245
|
+
getWidth() {
|
|
246
|
+
const name = Object.keys(this.MSA.seqdata)[0];
|
|
247
|
+
return name ? this.getRow(name).length : 0;
|
|
248
|
+
}
|
|
249
|
+
getStructures() {
|
|
250
|
+
return {};
|
|
251
|
+
}
|
|
252
|
+
get alignmentNames() {
|
|
253
|
+
return [];
|
|
254
|
+
}
|
|
255
|
+
getHeader() {
|
|
256
|
+
return {};
|
|
257
|
+
}
|
|
258
|
+
getTree() {
|
|
259
|
+
return {
|
|
260
|
+
id: 'root',
|
|
261
|
+
name: 'root',
|
|
262
|
+
noTree: true,
|
|
263
|
+
children: this.getNames().map(name => ({
|
|
264
|
+
id: name,
|
|
265
|
+
children: [],
|
|
266
|
+
name,
|
|
267
|
+
})),
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
get seqConsensus() {
|
|
271
|
+
return undefined;
|
|
272
|
+
}
|
|
273
|
+
get secondaryStructureConsensus() {
|
|
274
|
+
return undefined;
|
|
275
|
+
}
|
|
276
|
+
get tracks() {
|
|
277
|
+
return [];
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
//# sourceMappingURL=A3mMSA.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"A3mMSA.js","sourceRoot":"","sources":["../../src/msa/A3mMSA.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,sDAAsD;AACtD,MAAM,MAAM,GAAG,EAAE,CAAA,CAAC,MAAM;AACxB,MAAM,MAAM,GAAG,EAAE,CAAA,CAAC,MAAM;AACxB,MAAM,MAAM,GAAG,EAAE,CAAA,CAAC,MAAM;AACxB,MAAM,MAAM,GAAG,GAAG,CAAA,CAAC,MAAM;AACzB,MAAM,SAAS,GAAG,EAAE,CAAA,CAAC,MAAM;AAC3B,MAAM,QAAQ,GAAG,EAAE,CAAA,CAAC,MAAM;AAE1B,SAAS,OAAO,CAAC,IAAY;IAC3B,OAAO,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,CAAA;AACzC,CAAC;AAED,MAAM,CAAC,OAAO,OAAO,MAAM;IACjB,GAAG,CAAqC;IACxC,YAAY,CAAU;IAE9B,YAAY,IAAY;QACtB,MAAM,OAAO,GAAa,EAAE,CAAA;QAC5B,MAAM,KAAK,GAAa,EAAE,CAAA;QAE1B,6DAA6D;QAC7D,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;YACpC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBACtB,SAAQ;YACV,CAAC;YACD,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;YACtC,IAAI,UAAU,KAAK,CAAC,CAAC,EAAE,CAAC;gBACtB,SAAQ;YACV,CAAC;YACD,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAA;YAC1C,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;YACrC,MAAM,EAAE,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAA;YACjE,IAAI,EAAE,EAAE,CAAC;gBACP,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAA;gBAC/D,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;YAChB,CAAC;QACH,CAAC;QAED,IAAI,CAAC,YAAY,GAAG,KAAK,CAAA;QACzB,IAAI,CAAC,GAAG,GAAG,EAAE,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,CAAA;IACxD,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,KAAK,CAAC,IAAY;QACvB,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YAC1B,OAAO,KAAK,CAAA;QACd,CAAC;QAED,MAAM,IAAI,GAAa,EAAE,CAAA;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;YACpC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBACtB,SAAQ;YACV,CAAC;YACD,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;YACtC,IAAI,UAAU,KAAK,CAAC,CAAC,EAAE,CAAC;gBACtB,SAAQ;YACV,CAAC;YACD,MAAM,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,CAAC,CAAA;YAC7D,IAAI,GAAG,EAAE,CAAC;gBACR,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;YAChB,CAAC;QACH,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpB,OAAO,KAAK,CAAA;QACd,CAAC;QAED,sEAAsE;QACtE,8DAA8D;QAC9D,IAAI,YAAY,GAAG,KAAK,CAAA;QACxB,IAAI,iBAAiB,GAAG,CAAC,CAAC,CAAA;QAC1B,IAAI,mBAAmB,GAAG,IAAI,CAAA;QAE9B,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,YAAY,GAAG,CAAC,CAAA;YACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACpC,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAA;gBAC9B,IAAI,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;oBAClB,YAAY,GAAG,IAAI,CAAA;gBACrB,CAAC;qBAAM,IAAI,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,EAAE,CAAC;oBAC5C,YAAY,EAAE,CAAA;gBAChB,CAAC;YACH,CAAC;YAED,IAAI,iBAAiB,KAAK,CAAC,CAAC,EAAE,CAAC;gBAC7B,iBAAiB,GAAG,YAAY,CAAA;YAClC,CAAC;iBAAM,CAAC;gBACN,IAAI,YAAY,KAAK,iBAAiB,EAAE,CAAC;oBACvC,mBAAmB,GAAG,KAAK,CAAA;gBAC7B,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,YAAY,IAAI,mBAAmB,CAAA;IAC5C,CAAC;IAED;;;;;;;OAOG;IACK,SAAS,CACf,OAAiB,EACjB,KAAe;QAEf,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAA;QAC5B,IAAI,OAAO,KAAK,CAAC,EAAE,CAAC;YAClB,OAAO,EAAE,CAAA;QACX,CAAC;QAED,2EAA2E;QAC3E,yEAAyE;QACzE,MAAM,UAAU,GAAe,EAAE,CAAA;QACjC,MAAM,aAAa,GAAe,EAAE,CAAA;QAEpC,KAAK,IAAI,MAAM,GAAG,CAAC,EAAE,MAAM,GAAG,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC;YAChD,MAAM,GAAG,GAAG,OAAO,CAAC,MAAM,CAAE,CAAA;YAC5B,MAAM,OAAO,GAAa,EAAE,CAAA;YAC5B,MAAM,OAAO,GAAa,EAAE,CAAA;YAC5B,IAAI,CAAC,GAAG,CAAC,CAAA;YAET,OAAO,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC;gBACtB,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAA;gBAE9B,IAAI,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,EAAE,CAAC;oBACrC,kCAAkC;oBAClC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAE,CAAC,CAAA;oBACrB,+DAA+D;oBAC/D,IAAI,GAAG,GAAG,EAAE,CAAA;oBACZ,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;oBACb,OAAO,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC;wBACtB,MAAM,CAAC,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAA;wBAC3B,IAAI,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,SAAS,IAAI,CAAC,KAAK,QAAQ,EAAE,CAAC;4BACpD,GAAG,IAAI,GAAG,CAAC,CAAC,CAAE,CAAA;4BACd,CAAC,EAAE,CAAA;wBACL,CAAC;6BAAM,CAAC;4BACN,MAAK;wBACP,CAAC;oBACH,CAAC;oBACD,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;oBACjB,CAAC,GAAG,CAAC,CAAA;gBACP,CAAC;qBAAM,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;oBACnD,wCAAwC;oBACxC,CAAC,EAAE,CAAA;gBACL,CAAC;qBAAM,IAAI,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;oBACzB,oCAAoC;oBACpC,IAAI,GAAG,GAAG,EAAE,CAAA;oBACZ,OAAO,CAAC,GAAG,GAAG,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;wBACpD,GAAG,IAAI,GAAG,CAAC,CAAC,CAAE,CAAA;wBACd,CAAC,EAAE,CAAA;oBACL,CAAC;oBACD,mCAAmC;oBACnC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;oBAChB,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;gBACnB,CAAC;qBAAM,CAAC;oBACN,CAAC,EAAE,CAAA;gBACL,CAAC;YACH,CAAC;YAED,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YACxB,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QAC7B,CAAC;QAED,oEAAoE;QACpE,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAA;QAElE,2EAA2E;QAC3E,MAAM,UAAU,GAAG,IAAI,KAAK,CAAS,YAAY,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAC1D,KAAK,IAAI,MAAM,GAAG,CAAC,EAAE,MAAM,GAAG,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC;YAChD,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAE,CAAA;YACtC,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,OAAO,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;gBAC9C,oDAAoD;gBACpD,IAAI,OAAO,GAAG,CAAC,CAAA;gBACf,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,GAAG,CAAE,EAAE,CAAC;oBAC9B,IAAI,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;wBAC7B,OAAO,EAAE,CAAA;oBACX,CAAC;gBACH,CAAC;gBACD,IAAI,OAAO,GAAG,UAAU,CAAC,GAAG,CAAE,EAAE,CAAC;oBAC/B,UAAU,CAAC,GAAG,CAAC,GAAG,OAAO,CAAA;gBAC3B,CAAC;YACH,CAAC;QACH,CAAC;QAED,2BAA2B;QAC3B,MAAM,QAAQ,GAA2B,EAAE,CAAA;QAE3C,KAAK,IAAI,MAAM,GAAG,CAAC,EAAE,MAAM,GAAG,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC;YAChD,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAE,CAAA;YACnC,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAE,CAAA;YACtC,MAAM,MAAM,GAAa,EAAE,CAAA;YAE3B,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,YAAY,EAAE,GAAG,EAAE,EAAE,CAAC;gBAC5C,MAAM,MAAM,GAAG,UAAU,CAAC,GAAG,CAAE,CAAA;gBAE/B,IAAI,GAAG,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;oBACzB,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAE,CAAA;oBAC/B,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,EAAE,CAAA;oBAErC,wCAAwC;oBACxC,MAAM,CAAC,IAAI,CAAC,SAAS,IAAI,GAAG,CAAC,CAAA;oBAE7B,yBAAyB;oBACzB,IAAI,SAAS,GAAG,EAAE,CAAA;oBAClB,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;wBAC3B,IAAI,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;4BAC7B,SAAS,IAAI,CAAC,CAAC,WAAW,EAAE,CAAA;wBAC9B,CAAC;oBACH,CAAC;oBAED,sCAAsC;oBACtC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;oBAEtB,2CAA2C;oBAC3C,MAAM,OAAO,GAAG,MAAM,GAAG,SAAS,CAAC,MAAM,CAAA;oBACzC,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;wBAChB,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAA;oBAClC,CAAC;gBACH,CAAC;qBAAM,CAAC;oBACN,sCAAsC;oBACtC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;oBAChB,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;wBACf,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAA;oBACjC,CAAC;gBACH,CAAC;YACH,CAAC;YAED,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAE,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;QAC5C,CAAC;QAED,OAAO,QAAQ,CAAA;IACjB,CAAC;IAED,MAAM;QACJ,OAAO,IAAI,CAAC,GAAG,CAAA;IACjB,CAAC;IAED,UAAU;QACR,OAAO,SAAS,CAAA;IAClB,CAAC;IAED,QAAQ;QACN,OAAO,IAAI,CAAC,YAAY,CAAA;IAC1B,CAAC;IAED,MAAM,CAAC,IAAY;QACjB,OAAO,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,CAAA;IACrC,CAAC;IAED,QAAQ;QACN,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAA;QAC7C,OAAO,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAA;IAC5C,CAAC;IAED,aAAa;QACX,OAAO,EAAE,CAAA;IACX,CAAC;IAED,IAAI,cAAc;QAChB,OAAO,EAAE,CAAA;IACX,CAAC;IAED,SAAS;QACP,OAAO,EAAE,CAAA;IACX,CAAC;IAED,OAAO;QACL,OAAO;YACL,EAAE,EAAE,MAAM;YACV,IAAI,EAAE,MAAM;YACZ,MAAM,EAAE,IAAI;YACZ,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACrC,EAAE,EAAE,IAAI;gBACR,QAAQ,EAAE,EAAE;gBACZ,IAAI;aACL,CAAC,CAAC;SACJ,CAAA;IACH,CAAC;IAED,IAAI,YAAY;QACd,OAAO,SAAS,CAAA;IAClB,CAAC;IAED,IAAI,2BAA2B;QAC7B,OAAO,SAAS,CAAA;IAClB,CAAC;IAED,IAAI,MAAM;QACR,OAAO,EAAE,CAAA;IACX,CAAC;CACF"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import { describe, expect, test } from 'vitest';
|
|
2
|
+
import A3mMSA from './A3mMSA';
|
|
3
|
+
describe('A3mMSA', () => {
|
|
4
|
+
describe('sniff', () => {
|
|
5
|
+
test('returns false for non-FASTA text', () => {
|
|
6
|
+
expect(A3mMSA.sniff('not fasta')).toBe(false);
|
|
7
|
+
expect(A3mMSA.sniff('CLUSTAL W')).toBe(false);
|
|
8
|
+
});
|
|
9
|
+
test('returns false for regular FASTA', () => {
|
|
10
|
+
const fasta = `>seq1
|
|
11
|
+
ACDEFGHIKLMNPQRSTVWY
|
|
12
|
+
>seq2
|
|
13
|
+
ACDEFGHIKLMNPQRSTVWY`;
|
|
14
|
+
expect(A3mMSA.sniff(fasta)).toBe(false);
|
|
15
|
+
});
|
|
16
|
+
test('returns true for A3M format', () => {
|
|
17
|
+
const a3m = `>seq1
|
|
18
|
+
ACDEFghiKLMNPQ
|
|
19
|
+
>seq2
|
|
20
|
+
ACDEF---KLMNPQ`;
|
|
21
|
+
expect(A3mMSA.sniff(a3m)).toBe(true);
|
|
22
|
+
});
|
|
23
|
+
test('returns false for single sequence', () => {
|
|
24
|
+
const a3m = `>seq1
|
|
25
|
+
ACDEFghiKLMNPQ`;
|
|
26
|
+
expect(A3mMSA.sniff(a3m)).toBe(false);
|
|
27
|
+
});
|
|
28
|
+
});
|
|
29
|
+
describe('parsing', () => {
|
|
30
|
+
test('parses simple A3M', () => {
|
|
31
|
+
const a3m = `>seq1
|
|
32
|
+
ACDEFghiKLMNPQ
|
|
33
|
+
>seq2
|
|
34
|
+
ACDEF---KLMNPQ`;
|
|
35
|
+
const msa = new A3mMSA(a3m);
|
|
36
|
+
expect(msa.getNames()).toEqual(['seq1', 'seq2']);
|
|
37
|
+
const seq1 = msa.getRow('seq1');
|
|
38
|
+
const seq2 = msa.getRow('seq2');
|
|
39
|
+
expect(seq1.length).toBe(seq2.length);
|
|
40
|
+
expect(seq1).toContain('GHI');
|
|
41
|
+
});
|
|
42
|
+
test('expands lowercase insertions', () => {
|
|
43
|
+
const a3m = `>seq1
|
|
44
|
+
ACabc
|
|
45
|
+
>seq2
|
|
46
|
+
AC---`;
|
|
47
|
+
const msa = new A3mMSA(a3m);
|
|
48
|
+
const seq1 = msa.getRow('seq1');
|
|
49
|
+
const seq2 = msa.getRow('seq2');
|
|
50
|
+
expect(seq1).toBe('ACABC');
|
|
51
|
+
expect(seq2).toBe('AC...');
|
|
52
|
+
});
|
|
53
|
+
test('handles multiple insertions', () => {
|
|
54
|
+
const a3m = `>seq1
|
|
55
|
+
AabcDdefG
|
|
56
|
+
>seq2
|
|
57
|
+
A---D---G`;
|
|
58
|
+
const msa = new A3mMSA(a3m);
|
|
59
|
+
const seq1 = msa.getRow('seq1');
|
|
60
|
+
const seq2 = msa.getRow('seq2');
|
|
61
|
+
expect(seq1.length).toBe(seq2.length);
|
|
62
|
+
});
|
|
63
|
+
test('getWidth returns correct width', () => {
|
|
64
|
+
const a3m = `>seq1
|
|
65
|
+
ACDEF
|
|
66
|
+
>seq2
|
|
67
|
+
ACDEF`;
|
|
68
|
+
const msa = new A3mMSA(a3m);
|
|
69
|
+
expect(msa.getWidth()).toBe(5);
|
|
70
|
+
});
|
|
71
|
+
test('getMSA returns seqdata', () => {
|
|
72
|
+
const a3m = `>seq1
|
|
73
|
+
ACDEF
|
|
74
|
+
>seq2
|
|
75
|
+
GHIKL`;
|
|
76
|
+
const msa = new A3mMSA(a3m);
|
|
77
|
+
const data = msa.getMSA();
|
|
78
|
+
expect(data.seqdata).toHaveProperty('seq1');
|
|
79
|
+
expect(data.seqdata).toHaveProperty('seq2');
|
|
80
|
+
});
|
|
81
|
+
test('getTree returns noTree structure', () => {
|
|
82
|
+
const a3m = `>seq1
|
|
83
|
+
ACDEF
|
|
84
|
+
>seq2
|
|
85
|
+
GHIKL`;
|
|
86
|
+
const msa = new A3mMSA(a3m);
|
|
87
|
+
const tree = msa.getTree();
|
|
88
|
+
expect(tree.noTree).toBe(true);
|
|
89
|
+
expect(tree.children).toHaveLength(2);
|
|
90
|
+
});
|
|
91
|
+
test('handles empty sequences', () => {
|
|
92
|
+
const a3m = `>seq1
|
|
93
|
+
ACDEF`;
|
|
94
|
+
const msa = new A3mMSA(a3m);
|
|
95
|
+
expect(msa.getNames()).toEqual(['seq1']);
|
|
96
|
+
});
|
|
97
|
+
test('handles sequences with only ID on defline', () => {
|
|
98
|
+
const a3m = `>seq1 description here
|
|
99
|
+
ACDEF
|
|
100
|
+
>seq2 another description
|
|
101
|
+
GHIKL`;
|
|
102
|
+
const msa = new A3mMSA(a3m);
|
|
103
|
+
expect(msa.getNames()).toEqual(['seq1', 'seq2']);
|
|
104
|
+
});
|
|
105
|
+
test('preserves sequence order', () => {
|
|
106
|
+
const a3m = `>z_seq
|
|
107
|
+
AAAAA
|
|
108
|
+
>a_seq
|
|
109
|
+
CCCCC
|
|
110
|
+
>m_seq
|
|
111
|
+
DDDDD`;
|
|
112
|
+
const msa = new A3mMSA(a3m);
|
|
113
|
+
expect(msa.getNames()).toEqual(['z_seq', 'a_seq', 'm_seq']);
|
|
114
|
+
});
|
|
115
|
+
});
|
|
116
|
+
describe('properties', () => {
|
|
117
|
+
test('alignmentNames is empty array', () => {
|
|
118
|
+
const a3m = `>seq1
|
|
119
|
+
ACDEF`;
|
|
120
|
+
const msa = new A3mMSA(a3m);
|
|
121
|
+
expect(msa.alignmentNames).toEqual([]);
|
|
122
|
+
});
|
|
123
|
+
test('seqConsensus is undefined', () => {
|
|
124
|
+
const a3m = `>seq1
|
|
125
|
+
ACDEF`;
|
|
126
|
+
const msa = new A3mMSA(a3m);
|
|
127
|
+
expect(msa.seqConsensus).toBeUndefined();
|
|
128
|
+
});
|
|
129
|
+
test('secondaryStructureConsensus is undefined', () => {
|
|
130
|
+
const a3m = `>seq1
|
|
131
|
+
ACDEF`;
|
|
132
|
+
const msa = new A3mMSA(a3m);
|
|
133
|
+
expect(msa.secondaryStructureConsensus).toBeUndefined();
|
|
134
|
+
});
|
|
135
|
+
test('tracks is empty array', () => {
|
|
136
|
+
const a3m = `>seq1
|
|
137
|
+
ACDEF`;
|
|
138
|
+
const msa = new A3mMSA(a3m);
|
|
139
|
+
expect(msa.tracks).toEqual([]);
|
|
140
|
+
});
|
|
141
|
+
test('getStructures returns empty object', () => {
|
|
142
|
+
const a3m = `>seq1
|
|
143
|
+
ACDEF`;
|
|
144
|
+
const msa = new A3mMSA(a3m);
|
|
145
|
+
expect(msa.getStructures()).toEqual({});
|
|
146
|
+
});
|
|
147
|
+
test('getHeader returns empty object', () => {
|
|
148
|
+
const a3m = `>seq1
|
|
149
|
+
ACDEF`;
|
|
150
|
+
const msa = new A3mMSA(a3m);
|
|
151
|
+
expect(msa.getHeader()).toEqual({});
|
|
152
|
+
});
|
|
153
|
+
});
|
|
154
|
+
});
|
|
155
|
+
//# sourceMappingURL=A3mMSA.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"A3mMSA.test.js","sourceRoot":"","sources":["../../src/msa/A3mMSA.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAA;AAE/C,OAAO,MAAM,MAAM,UAAU,CAAA;AAE7B,QAAQ,CAAC,QAAQ,EAAE,GAAG,EAAE;IACtB,QAAQ,CAAC,OAAO,EAAE,GAAG,EAAE;QACrB,IAAI,CAAC,kCAAkC,EAAE,GAAG,EAAE;YAC5C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;YAC7C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QAC/C,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,iCAAiC,EAAE,GAAG,EAAE;YAC3C,MAAM,KAAK,GAAG;;;qBAGC,CAAA;YACf,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QACzC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,6BAA6B,EAAE,GAAG,EAAE;YACvC,MAAM,GAAG,GAAG;;;eAGH,CAAA;YACT,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACtC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,mCAAmC,EAAE,GAAG,EAAE;YAC7C,MAAM,GAAG,GAAG;eACH,CAAA;YACT,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QACvC,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,SAAS,EAAE,GAAG,EAAE;QACvB,IAAI,CAAC,mBAAmB,EAAE,GAAG,EAAE;YAC7B,MAAM,GAAG,GAAG;;;eAGH,CAAA;YACT,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAA;YAChD,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAC/B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAE/B,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YACrC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;QAC/B,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,8BAA8B,EAAE,GAAG,EAAE;YACxC,MAAM,GAAG,GAAG;;;MAGZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAC/B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAE/B,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YAC1B,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QAC5B,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,6BAA6B,EAAE,GAAG,EAAE;YACvC,MAAM,GAAG,GAAG;;;UAGR,CAAA;YACJ,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAC/B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAE/B,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACvC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,gCAAgC,EAAE,GAAG,EAAE;YAC1C,MAAM,GAAG,GAAG;;;MAGZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAChC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,wBAAwB,EAAE,GAAG,EAAE;YAClC,MAAM,GAAG,GAAG;;;MAGZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAC3B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,EAAE,CAAA;YAEzB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAA;YAC3C,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAA;QAC7C,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,kCAAkC,EAAE,GAAG,EAAE;YAC5C,MAAM,GAAG,GAAG;;;MAGZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAC3B,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,EAAE,CAAA;YAE1B,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAC9B,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QACvC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,yBAAyB,EAAE,GAAG,EAAE;YACnC,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAA;QAC1C,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,2CAA2C,EAAE,GAAG,EAAE;YACrD,MAAM,GAAG,GAAG;;;MAGZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAA;QAClD,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,0BAA0B,EAAE,GAAG,EAAE;YACpC,MAAM,GAAG,GAAG;;;;;MAKZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC,CAAA;QAC7D,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;QAC1B,IAAI,CAAC,+BAA+B,EAAE,GAAG,EAAE;YACzC,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QACxC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,2BAA2B,EAAE,GAAG,EAAE;YACrC,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,aAAa,EAAE,CAAA;QAC1C,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,0CAA0C,EAAE,GAAG,EAAE;YACpD,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC,aAAa,EAAE,CAAA;QACzD,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,uBAAuB,EAAE,GAAG,EAAE;YACjC,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QAChC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,oCAAoC,EAAE,GAAG,EAAE;YAC9C,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,aAAa,EAAE,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QACzC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,gCAAgC,EAAE,GAAG,EAAE;YAC1C,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QACrC,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import type { NodeWithIds } from '../types';
|
|
2
|
+
export default class ClustalMSA {
|
|
3
|
+
private MSA;
|
|
4
|
+
constructor(text: string);
|
|
5
|
+
getMSA(): {
|
|
6
|
+
consensus: string;
|
|
7
|
+
alns: {
|
|
8
|
+
id: string;
|
|
9
|
+
seq: string;
|
|
10
|
+
}[];
|
|
11
|
+
header: {
|
|
12
|
+
info: string;
|
|
13
|
+
version: string | undefined;
|
|
14
|
+
};
|
|
15
|
+
};
|
|
16
|
+
getRow(name: string): string;
|
|
17
|
+
getWidth(): number;
|
|
18
|
+
getRowData(): undefined;
|
|
19
|
+
getHeader(): {
|
|
20
|
+
info: string;
|
|
21
|
+
version: string | undefined;
|
|
22
|
+
};
|
|
23
|
+
getNames(): string[];
|
|
24
|
+
getStructures(): {};
|
|
25
|
+
get alignmentNames(): never[];
|
|
26
|
+
getTree(): NodeWithIds;
|
|
27
|
+
get seqConsensus(): string;
|
|
28
|
+
get secondaryStructureConsensus(): undefined;
|
|
29
|
+
get tracks(): never[];
|
|
30
|
+
}
|