msa-parsers 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/dist/gff/gffToInterPro.d.ts +20 -0
  2. package/dist/gff/gffToInterPro.js +83 -0
  3. package/dist/gff/gffToInterPro.js.map +1 -0
  4. package/dist/gff/gffToInterPro.test.d.ts +1 -0
  5. package/dist/gff/gffToInterPro.test.js +181 -0
  6. package/dist/gff/gffToInterPro.test.js.map +1 -0
  7. package/dist/gff/index.d.ts +3 -0
  8. package/dist/gff/index.js +4 -0
  9. package/dist/gff/index.js.map +1 -0
  10. package/dist/gff/interProToGFF.d.ts +9 -0
  11. package/dist/gff/interProToGFF.js +48 -0
  12. package/dist/gff/interProToGFF.js.map +1 -0
  13. package/dist/gff/interProToGFF.test.d.ts +1 -0
  14. package/dist/gff/interProToGFF.test.js +189 -0
  15. package/dist/gff/interProToGFF.test.js.map +1 -0
  16. package/dist/gff/parseGFF.d.ts +2 -0
  17. package/dist/gff/parseGFF.js +41 -0
  18. package/dist/gff/parseGFF.js.map +1 -0
  19. package/dist/gff/parseGFF.test.d.ts +1 -0
  20. package/dist/gff/parseGFF.test.js +92 -0
  21. package/dist/gff/parseGFF.test.js.map +1 -0
  22. package/dist/index.d.ts +5 -0
  23. package/dist/index.js +9 -0
  24. package/dist/index.js.map +1 -0
  25. package/dist/msa/A3mMSA.d.ts +33 -0
  26. package/dist/msa/A3mMSA.js +280 -0
  27. package/dist/msa/A3mMSA.js.map +1 -0
  28. package/dist/msa/A3mMSA.test.d.ts +1 -0
  29. package/dist/msa/A3mMSA.test.js +155 -0
  30. package/dist/msa/A3mMSA.test.js.map +1 -0
  31. package/dist/msa/ClustalMSA.d.ts +30 -0
  32. package/dist/msa/ClustalMSA.js +53 -0
  33. package/dist/msa/ClustalMSA.js.map +1 -0
  34. package/dist/msa/EmfMSA.d.ts +27 -0
  35. package/dist/msa/EmfMSA.js +53 -0
  36. package/dist/msa/EmfMSA.js.map +1 -0
  37. package/dist/msa/FastaMSA.d.ts +19 -0
  38. package/dist/msa/FastaMSA.js +69 -0
  39. package/dist/msa/FastaMSA.js.map +1 -0
  40. package/dist/msa/StockholmMSA.d.ts +54 -0
  41. package/dist/msa/StockholmMSA.js +113 -0
  42. package/dist/msa/StockholmMSA.js.map +1 -0
  43. package/dist/msa/index.d.ts +18 -0
  44. package/dist/msa/index.js +34 -0
  45. package/dist/msa/index.js.map +1 -0
  46. package/dist/msa/index.test.d.ts +1 -0
  47. package/dist/msa/index.test.js +60 -0
  48. package/dist/msa/index.test.js.map +1 -0
  49. package/dist/msa/parseNewick.d.ts +60 -0
  50. package/dist/msa/parseNewick.js +95 -0
  51. package/dist/msa/parseNewick.js.map +1 -0
  52. package/dist/msa/stockholmParser.d.ts +22 -0
  53. package/dist/msa/stockholmParser.js +141 -0
  54. package/dist/msa/stockholmParser.js.map +1 -0
  55. package/dist/msa/stockholmParser.test.d.ts +1 -0
  56. package/dist/msa/stockholmParser.test.js +111 -0
  57. package/dist/msa/stockholmParser.test.js.map +1 -0
  58. package/dist/types.d.ts +66 -0
  59. package/dist/types.js +2 -0
  60. package/dist/types.js.map +1 -0
  61. package/dist/util.d.ts +2 -0
  62. package/dist/util.js +10 -0
  63. package/dist/util.js.map +1 -0
  64. package/package.json +25 -0
  65. package/src/gff/gffToInterPro.test.ts +202 -0
  66. package/src/gff/gffToInterPro.ts +113 -0
  67. package/src/gff/index.ts +3 -0
  68. package/src/gff/interProToGFF.test.ts +206 -0
  69. package/src/gff/interProToGFF.ts +59 -0
  70. package/src/gff/parseGFF.test.ts +106 -0
  71. package/src/gff/parseGFF.ts +46 -0
  72. package/src/index.ts +29 -0
  73. package/src/msa/A3mMSA.test.ts +192 -0
  74. package/src/msa/A3mMSA.ts +320 -0
  75. package/src/msa/ClustalMSA.ts +67 -0
  76. package/src/msa/EmfMSA.ts +67 -0
  77. package/src/msa/FastaMSA.ts +82 -0
  78. package/src/msa/StockholmMSA.ts +141 -0
  79. package/src/msa/index.test.ts +74 -0
  80. package/src/msa/index.ts +44 -0
  81. package/src/msa/parseNewick.ts +94 -0
  82. package/src/msa/stockholmParser.test.ts +123 -0
  83. package/src/msa/stockholmParser.ts +157 -0
  84. package/src/types.ts +68 -0
  85. package/src/util.ts +19 -0
@@ -0,0 +1,92 @@
1
+ import { describe, expect, test } from 'vitest';
2
+ import { parseGFF } from './parseGFF';
3
+ describe('parseGFF', () => {
4
+ test('parses empty string', () => {
5
+ expect(parseGFF('')).toEqual([]);
6
+ expect(parseGFF(undefined)).toEqual([]);
7
+ });
8
+ test('parses basic GFF3 line', () => {
9
+ const gff = 'seq1\tInterProScan\tprotein_match\t10\t50\t.\t+\t.\tName=PF00001';
10
+ const result = parseGFF(gff);
11
+ expect(result).toHaveLength(1);
12
+ expect(result[0]).toEqual({
13
+ seq_id: 'seq1',
14
+ source: 'InterProScan',
15
+ type: 'protein_match',
16
+ start: 10,
17
+ end: 50,
18
+ score: 0,
19
+ strand: '+',
20
+ phase: '.',
21
+ Name: 'PF00001',
22
+ });
23
+ });
24
+ test('parses multiple attributes', () => {
25
+ const gff = 'seq1\tPfam\tprotein_match\t10\t50\t1.5\t.\t.\tName=PF00001;signature_desc=7tm_1;description=GPCR';
26
+ const result = parseGFF(gff);
27
+ expect(result[0]).toMatchObject({
28
+ Name: 'PF00001',
29
+ signature_desc: '7tm_1',
30
+ description: 'GPCR',
31
+ });
32
+ });
33
+ test('handles URL-encoded attribute values', () => {
34
+ const gff = 'seq1\tSource\ttype\t1\t10\t.\t.\t.\tNote=Hello%20World%3B%3D';
35
+ const result = parseGFF(gff);
36
+ expect(result[0]?.Note).toBe('Hello World;=');
37
+ });
38
+ test('skips comment lines', () => {
39
+ const gff = `##gff-version 3
40
+ # This is a comment
41
+ seq1\tSource\ttype\t1\t10\t.\t.\t.\tName=test`;
42
+ const result = parseGFF(gff);
43
+ expect(result).toHaveLength(1);
44
+ expect(result[0]?.seq_id).toBe('seq1');
45
+ });
46
+ test('skips empty lines', () => {
47
+ const gff = `seq1\tSource\ttype\t1\t10\t.\t.\t.\tName=test1
48
+
49
+ seq2\tSource\ttype\t20\t30\t.\t.\t.\tName=test2`;
50
+ const result = parseGFF(gff);
51
+ expect(result).toHaveLength(2);
52
+ });
53
+ test('handles missing attributes column', () => {
54
+ const gff = 'seq1\tSource\ttype\t1\t10\t.\t.\t.';
55
+ const result = parseGFF(gff);
56
+ expect(result).toHaveLength(1);
57
+ expect(result[0]?.seq_id).toBe('seq1');
58
+ });
59
+ test('handles partial GFF lines gracefully', () => {
60
+ const gff = 'seq1\tSource\ttype';
61
+ const result = parseGFF(gff);
62
+ expect(result).toHaveLength(1);
63
+ expect(result[0]).toMatchObject({
64
+ seq_id: 'seq1',
65
+ source: 'Source',
66
+ type: 'type',
67
+ start: 0,
68
+ end: 0,
69
+ });
70
+ });
71
+ test('parses numeric score', () => {
72
+ const gff = 'seq1\tSource\ttype\t1\t10\t45.6\t.\t.\tName=test';
73
+ const result = parseGFF(gff);
74
+ expect(result[0]?.score).toBe(45.6);
75
+ });
76
+ test('handles comma-separated values in attributes', () => {
77
+ const gff = 'seq1\tSource\ttype\t1\t10\t.\t.\t.\tOntology_term=GO:0001,GO:0002';
78
+ const result = parseGFF(gff);
79
+ expect(result[0]?.Ontology_term).toBe('GO:0001 GO:0002');
80
+ });
81
+ test('parses multiple lines', () => {
82
+ const gff = `seq1\tPfam\tprotein_match\t10\t50\t.\t.\t.\tName=PF00001
83
+ seq1\tSMART\tprotein_match\t60\t100\t.\t.\t.\tName=SM00001
84
+ seq2\tPfam\tprotein_match\t5\t40\t.\t.\t.\tName=PF00002`;
85
+ const result = parseGFF(gff);
86
+ expect(result).toHaveLength(3);
87
+ expect(result[0]?.seq_id).toBe('seq1');
88
+ expect(result[1]?.seq_id).toBe('seq1');
89
+ expect(result[2]?.seq_id).toBe('seq2');
90
+ });
91
+ });
92
+ //# sourceMappingURL=parseGFF.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parseGFF.test.js","sourceRoot":"","sources":["../../src/gff/parseGFF.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAA;AAE/C,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAA;AAErC,QAAQ,CAAC,UAAU,EAAE,GAAG,EAAE;IACxB,IAAI,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC/B,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QAChC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;IACzC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,wBAAwB,EAAE,GAAG,EAAE;QAClC,MAAM,GAAG,GACP,kEAAkE,CAAA;QACpE,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;YACxB,MAAM,EAAE,MAAM;YACd,MAAM,EAAE,cAAc;YACtB,IAAI,EAAE,eAAe;YACrB,KAAK,EAAE,EAAE;YACT,GAAG,EAAE,EAAE;YACP,KAAK,EAAE,CAAC;YACR,MAAM,EAAE,GAAG;YACX,KAAK,EAAE,GAAG;YACV,IAAI,EAAE,SAAS;SAChB,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACtC,MAAM,GAAG,GACP,kGAAkG,CAAA;QACpG,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC;YAC9B,IAAI,EAAE,SAAS;YACf,cAAc,EAAE,OAAO;YACvB,WAAW,EAAE,MAAM;SACpB,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAChD,MAAM,GAAG,GAAG,8DAA8D,CAAA;QAC1E,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,CAAA;IAC/C,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC/B,MAAM,GAAG,GAAG;;8CAE8B,CAAA;QAC1C,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IACxC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAC7B,MAAM,GAAG,GAAG;;gDAEgC,CAAA;QAC5C,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;IAChC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,mCAAmC,EAAE,GAAG,EAAE;QAC7C,MAAM,GAAG,GAAG,oCAAoC,CAAA;QAChD,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IACxC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAChD,MAAM,GAAG,GAAG,oBAAoB,CAAA;QAChC,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC;YAC9B,MAAM,EAAE,MAAM;YACd,MAAM,EAAE,QAAQ;YAChB,IAAI,EAAE,MAAM;YACZ,KAAK,EAAE,CAAC;YACR,GAAG,EAAE,CAAC;SACP,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,sBAAsB,EAAE,GAAG,EAAE;QAChC,MAAM,GAAG,GAAG,kDAAkD,CAAA;QAC9D,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACrC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,8CAA8C,EAAE,GAAG,EAAE;QACxD,MAAM,GAAG,GACP,mEAAmE,CAAA;QACrE,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAA;IAC1D,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,uBAAuB,EAAE,GAAG,EAAE;QACjC,MAAM,GAAG,GAAG;;wDAEwC,CAAA;QACpD,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACtC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACtC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IACxC,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
@@ -0,0 +1,5 @@
1
+ export * from './types';
2
+ export { generateNodeIds } from './util';
3
+ export { A3mMSA, ClustalMSA, EmfMSA, FastaMSA, StockholmMSA, getUngappedSequence, parseEmfTree, parseMSA, parseNewick, stockholmSniff, } from './msa';
4
+ export type { MSAParserType } from './msa';
5
+ export { gffToInterProResponse, gffToInterProResults, interProResponseToGFF, interProToGFF, parseGFF, } from './gff';
package/dist/index.js ADDED
@@ -0,0 +1,9 @@
1
+ // Types
2
+ export * from './types';
3
+ // Utilities
4
+ export { generateNodeIds } from './util';
5
+ // MSA parsers
6
+ export { A3mMSA, ClustalMSA, EmfMSA, FastaMSA, StockholmMSA, getUngappedSequence, parseEmfTree, parseMSA, parseNewick, stockholmSniff, } from './msa';
7
+ // GFF parsing
8
+ export { gffToInterProResponse, gffToInterProResults, interProResponseToGFF, interProToGFF, parseGFF, } from './gff';
9
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,QAAQ;AACR,cAAc,SAAS,CAAA;AAEvB,YAAY;AACZ,OAAO,EAAE,eAAe,EAAE,MAAM,QAAQ,CAAA;AAExC,cAAc;AACd,OAAO,EACL,MAAM,EACN,UAAU,EACV,MAAM,EACN,QAAQ,EACR,YAAY,EACZ,mBAAmB,EACnB,YAAY,EACZ,QAAQ,EACR,WAAW,EACX,cAAc,GACf,MAAM,OAAO,CAAA;AAGd,cAAc;AACd,OAAO,EACL,qBAAqB,EACrB,oBAAoB,EACpB,qBAAqB,EACrB,aAAa,EACb,QAAQ,GACT,MAAM,OAAO,CAAA"}
@@ -0,0 +1,33 @@
1
+ import type { NodeWithIds } from '../types';
2
+ export default class A3mMSA {
3
+ private MSA;
4
+ private orderedNames;
5
+ constructor(text: string);
6
+ /**
7
+ * Detect if text is likely A3M format
8
+ */
9
+ static sniff(text: string): boolean;
10
+ /**
11
+ * Expand A3M format to standard aligned format.
12
+ *
13
+ * In A3M, lowercase characters are insertions that implicitly introduce
14
+ * gaps in sequences that don't have an insert at that position.
15
+ * Gaps (-) following match columns in sequences without inserts align
16
+ * with lowercase inserts in other sequences.
17
+ */
18
+ private expandA3M;
19
+ getMSA(): {
20
+ seqdata: Record<string, string>;
21
+ };
22
+ getRowData(): undefined;
23
+ getNames(): string[];
24
+ getRow(name: string): string;
25
+ getWidth(): number;
26
+ getStructures(): {};
27
+ get alignmentNames(): never[];
28
+ getHeader(): {};
29
+ getTree(): NodeWithIds;
30
+ get seqConsensus(): undefined;
31
+ get secondaryStructureConsensus(): undefined;
32
+ get tracks(): never[];
33
+ }
@@ -0,0 +1,280 @@
1
+ /**
2
+ * A3M Format Parser
3
+ *
4
+ * The A3M format consists of aligned FASTA, in which alignments are shown with:
5
+ * - Inserts as lowercase characters
6
+ * - Matches as uppercase characters
7
+ * - Deletions as '-'
8
+ * - Gaps aligned to inserts as '.'
9
+ *
10
+ * Note that gaps aligned to inserts can be omitted in the A3M format.
11
+ *
12
+ * Example:
13
+ * >query
14
+ * ETESMKTVRIREKIKKFLGDRPRNTAEILEHINSTMRHGTTSQQLGNVLSKDKDIVKVGYIKRSGILSGGYDICEWATRNWVAEHCPEWTE
15
+ * >seq1
16
+ * ----MRTTRLRQKIKKFLNERGeANTTEILEHVNSTMRHGTTPQQLGNVLSKDKDILKVATTKRGGALSGRYEICVWTLRP-----------
17
+ *
18
+ * In the above, 'e' after 'G' in seq1 is a lowercase insert.
19
+ *
20
+ * @see https://yanglab.qd.sdu.edu.cn/trRosetta/msa_format.html
21
+ */
22
+ // Char code helpers for fast character classification
23
+ const CODE_A = 65; // 'A'
24
+ const CODE_Z = 90; // 'Z'
25
+ const CODE_a = 97; // 'a'
26
+ const CODE_z = 122; // 'z'
27
+ const CODE_DASH = 45; // '-'
28
+ const CODE_DOT = 46; // '.'
29
+ function isLower(code) {
30
+ return code >= CODE_a && code <= CODE_z;
31
+ }
32
+ export default class A3mMSA {
33
+ MSA;
34
+ orderedNames;
35
+ constructor(text) {
36
+ const rawSeqs = [];
37
+ const names = [];
38
+ // First pass: parse sequences (like FASTA), preserving order
39
+ for (const entry of text.split('>')) {
40
+ if (!/\S/.test(entry)) {
41
+ continue;
42
+ }
43
+ const newlineIdx = entry.indexOf('\n');
44
+ if (newlineIdx === -1) {
45
+ continue;
46
+ }
47
+ const defLine = entry.slice(0, newlineIdx);
48
+ const spaceIdx = defLine.indexOf(' ');
49
+ const id = spaceIdx === -1 ? defLine : defLine.slice(0, spaceIdx);
50
+ if (id) {
51
+ rawSeqs.push(entry.slice(newlineIdx + 1).replaceAll(/\s/g, ''));
52
+ names.push(id);
53
+ }
54
+ }
55
+ this.orderedNames = names;
56
+ this.MSA = { seqdata: this.expandA3M(rawSeqs, names) };
57
+ }
58
+ /**
59
+ * Detect if text is likely A3M format
60
+ */
61
+ static sniff(text) {
62
+ if (!text.startsWith('>')) {
63
+ return false;
64
+ }
65
+ const seqs = [];
66
+ for (const entry of text.split('>')) {
67
+ if (!/\S/.test(entry)) {
68
+ continue;
69
+ }
70
+ const newlineIdx = entry.indexOf('\n');
71
+ if (newlineIdx === -1) {
72
+ continue;
73
+ }
74
+ const seq = entry.slice(newlineIdx + 1).replaceAll(/\s/g, '');
75
+ if (seq) {
76
+ seqs.push(seq);
77
+ }
78
+ }
79
+ if (seqs.length < 2) {
80
+ return false;
81
+ }
82
+ // Check for lowercase and compute lengths in single pass per sequence
83
+ // In A3M, only uppercase letters are match columns (not gaps)
84
+ let hasLowercase = false;
85
+ let firstUppercaseLen = -1;
86
+ let sameUppercaseLength = true;
87
+ for (const seq of seqs) {
88
+ let uppercaseLen = 0;
89
+ for (let i = 0; i < seq.length; i++) {
90
+ const code = seq.charCodeAt(i);
91
+ if (isLower(code)) {
92
+ hasLowercase = true;
93
+ }
94
+ else if (code >= CODE_A && code <= CODE_Z) {
95
+ uppercaseLen++;
96
+ }
97
+ }
98
+ if (firstUppercaseLen === -1) {
99
+ firstUppercaseLen = uppercaseLen;
100
+ }
101
+ else {
102
+ if (uppercaseLen !== firstUppercaseLen) {
103
+ sameUppercaseLength = false;
104
+ }
105
+ }
106
+ }
107
+ return hasLowercase && sameUppercaseLength;
108
+ }
109
+ /**
110
+ * Expand A3M format to standard aligned format.
111
+ *
112
+ * In A3M, lowercase characters are insertions that implicitly introduce
113
+ * gaps in sequences that don't have an insert at that position.
114
+ * Gaps (-) following match columns in sequences without inserts align
115
+ * with lowercase inserts in other sequences.
116
+ */
117
+ expandA3M(rawSeqs, names) {
118
+ const numSeqs = names.length;
119
+ if (numSeqs === 0) {
120
+ return {};
121
+ }
122
+ // Parse sequences: extract match chars (uppercase only) and insert content
123
+ // For each sequence, track: matchChars, insertContent (after each match)
124
+ const matchChars = [];
125
+ const insertContent = [];
126
+ for (let seqIdx = 0; seqIdx < numSeqs; seqIdx++) {
127
+ const seq = rawSeqs[seqIdx];
128
+ const matches = [];
129
+ const inserts = [];
130
+ let i = 0;
131
+ while (i < seq.length) {
132
+ const code = seq.charCodeAt(i);
133
+ if (code >= CODE_A && code <= CODE_Z) {
134
+ // Uppercase letter - match column
135
+ matches.push(seq[i]);
136
+ // Collect following lowercase/gap characters as insert content
137
+ let ins = '';
138
+ let j = i + 1;
139
+ while (j < seq.length) {
140
+ const c = seq.charCodeAt(j);
141
+ if (isLower(c) || c === CODE_DASH || c === CODE_DOT) {
142
+ ins += seq[j];
143
+ j++;
144
+ }
145
+ else {
146
+ break;
147
+ }
148
+ }
149
+ inserts.push(ins);
150
+ i = j;
151
+ }
152
+ else if (code === CODE_DASH || code === CODE_DOT) {
153
+ // Leading gap before first match - skip
154
+ i++;
155
+ }
156
+ else if (isLower(code)) {
157
+ // Leading insert before first match
158
+ let ins = '';
159
+ while (i < seq.length && isLower(seq.charCodeAt(i))) {
160
+ ins += seq[i];
161
+ i++;
162
+ }
163
+ // Add empty match with this insert
164
+ matches.push('');
165
+ inserts.push(ins);
166
+ }
167
+ else {
168
+ i++;
169
+ }
170
+ }
171
+ matchChars.push(matches);
172
+ insertContent.push(inserts);
173
+ }
174
+ // Find number of match positions (should be same for all valid A3M)
175
+ const numPositions = Math.max(...matchChars.map(m => m.length), 0);
176
+ // Find max insert length at each position (only count lowercase, not gaps)
177
+ const maxInserts = new Array(numPositions).fill(0);
178
+ for (let seqIdx = 0; seqIdx < numSeqs; seqIdx++) {
179
+ const inserts = insertContent[seqIdx];
180
+ for (let pos = 0; pos < inserts.length; pos++) {
181
+ // Count only lowercase characters as actual inserts
182
+ let lcCount = 0;
183
+ for (const c of inserts[pos]) {
184
+ if (isLower(c.charCodeAt(0))) {
185
+ lcCount++;
186
+ }
187
+ }
188
+ if (lcCount > maxInserts[pos]) {
189
+ maxInserts[pos] = lcCount;
190
+ }
191
+ }
192
+ }
193
+ // Build expanded sequences
194
+ const expanded = {};
195
+ for (let seqIdx = 0; seqIdx < numSeqs; seqIdx++) {
196
+ const matches = matchChars[seqIdx];
197
+ const inserts = insertContent[seqIdx];
198
+ const result = [];
199
+ for (let pos = 0; pos < numPositions; pos++) {
200
+ const maxIns = maxInserts[pos];
201
+ if (pos < matches.length) {
202
+ const matchChar = matches[pos];
203
+ const insContent = inserts[pos] || '';
204
+ // Add match character (or gap if empty)
205
+ result.push(matchChar || '-');
206
+ // Process insert content
207
+ let lcContent = '';
208
+ for (const c of insContent) {
209
+ if (isLower(c.charCodeAt(0))) {
210
+ lcContent += c.toUpperCase();
211
+ }
212
+ }
213
+ // Add the insert content (uppercased)
214
+ result.push(lcContent);
215
+ // Pad with gaps to match max insert length
216
+ const padding = maxIns - lcContent.length;
217
+ if (padding > 0) {
218
+ result.push('.'.repeat(padding));
219
+ }
220
+ }
221
+ else {
222
+ // This sequence is shorter - add gaps
223
+ result.push('-');
224
+ if (maxIns > 0) {
225
+ result.push('.'.repeat(maxIns));
226
+ }
227
+ }
228
+ }
229
+ expanded[names[seqIdx]] = result.join('');
230
+ }
231
+ return expanded;
232
+ }
233
+ getMSA() {
234
+ return this.MSA;
235
+ }
236
+ getRowData() {
237
+ return undefined;
238
+ }
239
+ getNames() {
240
+ return this.orderedNames;
241
+ }
242
+ getRow(name) {
243
+ return this.MSA.seqdata[name] || '';
244
+ }
245
+ getWidth() {
246
+ const name = Object.keys(this.MSA.seqdata)[0];
247
+ return name ? this.getRow(name).length : 0;
248
+ }
249
+ getStructures() {
250
+ return {};
251
+ }
252
+ get alignmentNames() {
253
+ return [];
254
+ }
255
+ getHeader() {
256
+ return {};
257
+ }
258
+ getTree() {
259
+ return {
260
+ id: 'root',
261
+ name: 'root',
262
+ noTree: true,
263
+ children: this.getNames().map(name => ({
264
+ id: name,
265
+ children: [],
266
+ name,
267
+ })),
268
+ };
269
+ }
270
+ get seqConsensus() {
271
+ return undefined;
272
+ }
273
+ get secondaryStructureConsensus() {
274
+ return undefined;
275
+ }
276
+ get tracks() {
277
+ return [];
278
+ }
279
+ }
280
+ //# sourceMappingURL=A3mMSA.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"A3mMSA.js","sourceRoot":"","sources":["../../src/msa/A3mMSA.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;;;;;;;;;;GAoBG;AAEH,sDAAsD;AACtD,MAAM,MAAM,GAAG,EAAE,CAAA,CAAC,MAAM;AACxB,MAAM,MAAM,GAAG,EAAE,CAAA,CAAC,MAAM;AACxB,MAAM,MAAM,GAAG,EAAE,CAAA,CAAC,MAAM;AACxB,MAAM,MAAM,GAAG,GAAG,CAAA,CAAC,MAAM;AACzB,MAAM,SAAS,GAAG,EAAE,CAAA,CAAC,MAAM;AAC3B,MAAM,QAAQ,GAAG,EAAE,CAAA,CAAC,MAAM;AAE1B,SAAS,OAAO,CAAC,IAAY;IAC3B,OAAO,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,CAAA;AACzC,CAAC;AAED,MAAM,CAAC,OAAO,OAAO,MAAM;IACjB,GAAG,CAAqC;IACxC,YAAY,CAAU;IAE9B,YAAY,IAAY;QACtB,MAAM,OAAO,GAAa,EAAE,CAAA;QAC5B,MAAM,KAAK,GAAa,EAAE,CAAA;QAE1B,6DAA6D;QAC7D,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;YACpC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBACtB,SAAQ;YACV,CAAC;YACD,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;YACtC,IAAI,UAAU,KAAK,CAAC,CAAC,EAAE,CAAC;gBACtB,SAAQ;YACV,CAAC;YACD,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,UAAU,CAAC,CAAA;YAC1C,MAAM,QAAQ,GAAG,OAAO,CAAC,OAAO,CAAC,GAAG,CAAC,CAAA;YACrC,MAAM,EAAE,GAAG,QAAQ,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAA;YACjE,IAAI,EAAE,EAAE,CAAC;gBACP,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAA;gBAC/D,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;YAChB,CAAC;QACH,CAAC;QAED,IAAI,CAAC,YAAY,GAAG,KAAK,CAAA;QACzB,IAAI,CAAC,GAAG,GAAG,EAAE,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,EAAE,KAAK,CAAC,EAAE,CAAA;IACxD,CAAC;IAED;;OAEG;IACH,MAAM,CAAC,KAAK,CAAC,IAAY;QACvB,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YAC1B,OAAO,KAAK,CAAA;QACd,CAAC;QAED,MAAM,IAAI,GAAa,EAAE,CAAA;QACzB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC;YACpC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;gBACtB,SAAQ;YACV,CAAC;YACD,MAAM,UAAU,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;YACtC,IAAI,UAAU,KAAK,CAAC,CAAC,EAAE,CAAC;gBACtB,SAAQ;YACV,CAAC;YACD,MAAM,GAAG,GAAG,KAAK,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,CAAC,CAAA;YAC7D,IAAI,GAAG,EAAE,CAAC;gBACR,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;YAChB,CAAC;QACH,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpB,OAAO,KAAK,CAAA;QACd,CAAC;QAED,sEAAsE;QACtE,8DAA8D;QAC9D,IAAI,YAAY,GAAG,KAAK,CAAA;QACxB,IAAI,iBAAiB,GAAG,CAAC,CAAC,CAAA;QAC1B,IAAI,mBAAmB,GAAG,IAAI,CAAA;QAE9B,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,IAAI,YAAY,GAAG,CAAC,CAAA;YACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACpC,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAA;gBAC9B,IAAI,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;oBAClB,YAAY,GAAG,IAAI,CAAA;gBACrB,CAAC;qBAAM,IAAI,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,EAAE,CAAC;oBAC5C,YAAY,EAAE,CAAA;gBAChB,CAAC;YACH,CAAC;YAED,IAAI,iBAAiB,KAAK,CAAC,CAAC,EAAE,CAAC;gBAC7B,iBAAiB,GAAG,YAAY,CAAA;YAClC,CAAC;iBAAM,CAAC;gBACN,IAAI,YAAY,KAAK,iBAAiB,EAAE,CAAC;oBACvC,mBAAmB,GAAG,KAAK,CAAA;gBAC7B,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,YAAY,IAAI,mBAAmB,CAAA;IAC5C,CAAC;IAED;;;;;;;OAOG;IACK,SAAS,CACf,OAAiB,EACjB,KAAe;QAEf,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAA;QAC5B,IAAI,OAAO,KAAK,CAAC,EAAE,CAAC;YAClB,OAAO,EAAE,CAAA;QACX,CAAC;QAED,2EAA2E;QAC3E,yEAAyE;QACzE,MAAM,UAAU,GAAe,EAAE,CAAA;QACjC,MAAM,aAAa,GAAe,EAAE,CAAA;QAEpC,KAAK,IAAI,MAAM,GAAG,CAAC,EAAE,MAAM,GAAG,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC;YAChD,MAAM,GAAG,GAAG,OAAO,CAAC,MAAM,CAAE,CAAA;YAC5B,MAAM,OAAO,GAAa,EAAE,CAAA;YAC5B,MAAM,OAAO,GAAa,EAAE,CAAA;YAC5B,IAAI,CAAC,GAAG,CAAC,CAAA;YAET,OAAO,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC;gBACtB,MAAM,IAAI,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAA;gBAE9B,IAAI,IAAI,IAAI,MAAM,IAAI,IAAI,IAAI,MAAM,EAAE,CAAC;oBACrC,kCAAkC;oBAClC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAE,CAAC,CAAA;oBACrB,+DAA+D;oBAC/D,IAAI,GAAG,GAAG,EAAE,CAAA;oBACZ,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAA;oBACb,OAAO,CAAC,GAAG,GAAG,CAAC,MAAM,EAAE,CAAC;wBACtB,MAAM,CAAC,GAAG,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAA;wBAC3B,IAAI,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,SAAS,IAAI,CAAC,KAAK,QAAQ,EAAE,CAAC;4BACpD,GAAG,IAAI,GAAG,CAAC,CAAC,CAAE,CAAA;4BACd,CAAC,EAAE,CAAA;wBACL,CAAC;6BAAM,CAAC;4BACN,MAAK;wBACP,CAAC;oBACH,CAAC;oBACD,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;oBACjB,CAAC,GAAG,CAAC,CAAA;gBACP,CAAC;qBAAM,IAAI,IAAI,KAAK,SAAS,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;oBACnD,wCAAwC;oBACxC,CAAC,EAAE,CAAA;gBACL,CAAC;qBAAM,IAAI,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;oBACzB,oCAAoC;oBACpC,IAAI,GAAG,GAAG,EAAE,CAAA;oBACZ,OAAO,CAAC,GAAG,GAAG,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;wBACpD,GAAG,IAAI,GAAG,CAAC,CAAC,CAAE,CAAA;wBACd,CAAC,EAAE,CAAA;oBACL,CAAC;oBACD,mCAAmC;oBACnC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;oBAChB,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;gBACnB,CAAC;qBAAM,CAAC;oBACN,CAAC,EAAE,CAAA;gBACL,CAAC;YACH,CAAC;YAED,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YACxB,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QAC7B,CAAC;QAED,oEAAoE;QACpE,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC,CAAA;QAElE,2EAA2E;QAC3E,MAAM,UAAU,GAAG,IAAI,KAAK,CAAS,YAAY,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAC1D,KAAK,IAAI,MAAM,GAAG,CAAC,EAAE,MAAM,GAAG,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC;YAChD,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAE,CAAA;YACtC,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,OAAO,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;gBAC9C,oDAAoD;gBACpD,IAAI,OAAO,GAAG,CAAC,CAAA;gBACf,KAAK,MAAM,CAAC,IAAI,OAAO,CAAC,GAAG,CAAE,EAAE,CAAC;oBAC9B,IAAI,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;wBAC7B,OAAO,EAAE,CAAA;oBACX,CAAC;gBACH,CAAC;gBACD,IAAI,OAAO,GAAG,UAAU,CAAC,GAAG,CAAE,EAAE,CAAC;oBAC/B,UAAU,CAAC,GAAG,CAAC,GAAG,OAAO,CAAA;gBAC3B,CAAC;YACH,CAAC;QACH,CAAC;QAED,2BAA2B;QAC3B,MAAM,QAAQ,GAA2B,EAAE,CAAA;QAE3C,KAAK,IAAI,MAAM,GAAG,CAAC,EAAE,MAAM,GAAG,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC;YAChD,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAE,CAAA;YACnC,MAAM,OAAO,GAAG,aAAa,CAAC,MAAM,CAAE,CAAA;YACtC,MAAM,MAAM,GAAa,EAAE,CAAA;YAE3B,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,YAAY,EAAE,GAAG,EAAE,EAAE,CAAC;gBAC5C,MAAM,MAAM,GAAG,UAAU,CAAC,GAAG,CAAE,CAAA;gBAE/B,IAAI,GAAG,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;oBACzB,MAAM,SAAS,GAAG,OAAO,CAAC,GAAG,CAAE,CAAA;oBAC/B,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,IAAI,EAAE,CAAA;oBAErC,wCAAwC;oBACxC,MAAM,CAAC,IAAI,CAAC,SAAS,IAAI,GAAG,CAAC,CAAA;oBAE7B,yBAAyB;oBACzB,IAAI,SAAS,GAAG,EAAE,CAAA;oBAClB,KAAK,MAAM,CAAC,IAAI,UAAU,EAAE,CAAC;wBAC3B,IAAI,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;4BAC7B,SAAS,IAAI,CAAC,CAAC,WAAW,EAAE,CAAA;wBAC9B,CAAC;oBACH,CAAC;oBAED,sCAAsC;oBACtC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,CAAA;oBAEtB,2CAA2C;oBAC3C,MAAM,OAAO,GAAG,MAAM,GAAG,SAAS,CAAC,MAAM,CAAA;oBACzC,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;wBAChB,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAA;oBAClC,CAAC;gBACH,CAAC;qBAAM,CAAC;oBACN,sCAAsC;oBACtC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;oBAChB,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;wBACf,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAA;oBACjC,CAAC;gBACH,CAAC;YACH,CAAC;YAED,QAAQ,CAAC,KAAK,CAAC,MAAM,CAAE,CAAC,GAAG,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;QAC5C,CAAC;QAED,OAAO,QAAQ,CAAA;IACjB,CAAC;IAED,MAAM;QACJ,OAAO,IAAI,CAAC,GAAG,CAAA;IACjB,CAAC;IAED,UAAU;QACR,OAAO,SAAS,CAAA;IAClB,CAAC;IAED,QAAQ;QACN,OAAO,IAAI,CAAC,YAAY,CAAA;IAC1B,CAAC;IAED,MAAM,CAAC,IAAY;QACjB,OAAO,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,CAAA;IACrC,CAAC;IAED,QAAQ;QACN,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,CAAA;QAC7C,OAAO,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAA;IAC5C,CAAC;IAED,aAAa;QACX,OAAO,EAAE,CAAA;IACX,CAAC;IAED,IAAI,cAAc;QAChB,OAAO,EAAE,CAAA;IACX,CAAC;IAED,SAAS;QACP,OAAO,EAAE,CAAA;IACX,CAAC;IAED,OAAO;QACL,OAAO;YACL,EAAE,EAAE,MAAM;YACV,IAAI,EAAE,MAAM;YACZ,MAAM,EAAE,IAAI;YACZ,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;gBACrC,EAAE,EAAE,IAAI;gBACR,QAAQ,EAAE,EAAE;gBACZ,IAAI;aACL,CAAC,CAAC;SACJ,CAAA;IACH,CAAC;IAED,IAAI,YAAY;QACd,OAAO,SAAS,CAAA;IAClB,CAAC;IAED,IAAI,2BAA2B;QAC7B,OAAO,SAAS,CAAA;IAClB,CAAC;IAED,IAAI,MAAM;QACR,OAAO,EAAE,CAAA;IACX,CAAC;CACF"}
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,155 @@
1
+ import { describe, expect, test } from 'vitest';
2
+ import A3mMSA from './A3mMSA';
3
+ describe('A3mMSA', () => {
4
+ describe('sniff', () => {
5
+ test('returns false for non-FASTA text', () => {
6
+ expect(A3mMSA.sniff('not fasta')).toBe(false);
7
+ expect(A3mMSA.sniff('CLUSTAL W')).toBe(false);
8
+ });
9
+ test('returns false for regular FASTA', () => {
10
+ const fasta = `>seq1
11
+ ACDEFGHIKLMNPQRSTVWY
12
+ >seq2
13
+ ACDEFGHIKLMNPQRSTVWY`;
14
+ expect(A3mMSA.sniff(fasta)).toBe(false);
15
+ });
16
+ test('returns true for A3M format', () => {
17
+ const a3m = `>seq1
18
+ ACDEFghiKLMNPQ
19
+ >seq2
20
+ ACDEF---KLMNPQ`;
21
+ expect(A3mMSA.sniff(a3m)).toBe(true);
22
+ });
23
+ test('returns false for single sequence', () => {
24
+ const a3m = `>seq1
25
+ ACDEFghiKLMNPQ`;
26
+ expect(A3mMSA.sniff(a3m)).toBe(false);
27
+ });
28
+ });
29
+ describe('parsing', () => {
30
+ test('parses simple A3M', () => {
31
+ const a3m = `>seq1
32
+ ACDEFghiKLMNPQ
33
+ >seq2
34
+ ACDEF---KLMNPQ`;
35
+ const msa = new A3mMSA(a3m);
36
+ expect(msa.getNames()).toEqual(['seq1', 'seq2']);
37
+ const seq1 = msa.getRow('seq1');
38
+ const seq2 = msa.getRow('seq2');
39
+ expect(seq1.length).toBe(seq2.length);
40
+ expect(seq1).toContain('GHI');
41
+ });
42
+ test('expands lowercase insertions', () => {
43
+ const a3m = `>seq1
44
+ ACabc
45
+ >seq2
46
+ AC---`;
47
+ const msa = new A3mMSA(a3m);
48
+ const seq1 = msa.getRow('seq1');
49
+ const seq2 = msa.getRow('seq2');
50
+ expect(seq1).toBe('ACABC');
51
+ expect(seq2).toBe('AC...');
52
+ });
53
+ test('handles multiple insertions', () => {
54
+ const a3m = `>seq1
55
+ AabcDdefG
56
+ >seq2
57
+ A---D---G`;
58
+ const msa = new A3mMSA(a3m);
59
+ const seq1 = msa.getRow('seq1');
60
+ const seq2 = msa.getRow('seq2');
61
+ expect(seq1.length).toBe(seq2.length);
62
+ });
63
+ test('getWidth returns correct width', () => {
64
+ const a3m = `>seq1
65
+ ACDEF
66
+ >seq2
67
+ ACDEF`;
68
+ const msa = new A3mMSA(a3m);
69
+ expect(msa.getWidth()).toBe(5);
70
+ });
71
+ test('getMSA returns seqdata', () => {
72
+ const a3m = `>seq1
73
+ ACDEF
74
+ >seq2
75
+ GHIKL`;
76
+ const msa = new A3mMSA(a3m);
77
+ const data = msa.getMSA();
78
+ expect(data.seqdata).toHaveProperty('seq1');
79
+ expect(data.seqdata).toHaveProperty('seq2');
80
+ });
81
+ test('getTree returns noTree structure', () => {
82
+ const a3m = `>seq1
83
+ ACDEF
84
+ >seq2
85
+ GHIKL`;
86
+ const msa = new A3mMSA(a3m);
87
+ const tree = msa.getTree();
88
+ expect(tree.noTree).toBe(true);
89
+ expect(tree.children).toHaveLength(2);
90
+ });
91
+ test('handles empty sequences', () => {
92
+ const a3m = `>seq1
93
+ ACDEF`;
94
+ const msa = new A3mMSA(a3m);
95
+ expect(msa.getNames()).toEqual(['seq1']);
96
+ });
97
+ test('handles sequences with only ID on defline', () => {
98
+ const a3m = `>seq1 description here
99
+ ACDEF
100
+ >seq2 another description
101
+ GHIKL`;
102
+ const msa = new A3mMSA(a3m);
103
+ expect(msa.getNames()).toEqual(['seq1', 'seq2']);
104
+ });
105
+ test('preserves sequence order', () => {
106
+ const a3m = `>z_seq
107
+ AAAAA
108
+ >a_seq
109
+ CCCCC
110
+ >m_seq
111
+ DDDDD`;
112
+ const msa = new A3mMSA(a3m);
113
+ expect(msa.getNames()).toEqual(['z_seq', 'a_seq', 'm_seq']);
114
+ });
115
+ });
116
+ describe('properties', () => {
117
+ test('alignmentNames is empty array', () => {
118
+ const a3m = `>seq1
119
+ ACDEF`;
120
+ const msa = new A3mMSA(a3m);
121
+ expect(msa.alignmentNames).toEqual([]);
122
+ });
123
+ test('seqConsensus is undefined', () => {
124
+ const a3m = `>seq1
125
+ ACDEF`;
126
+ const msa = new A3mMSA(a3m);
127
+ expect(msa.seqConsensus).toBeUndefined();
128
+ });
129
+ test('secondaryStructureConsensus is undefined', () => {
130
+ const a3m = `>seq1
131
+ ACDEF`;
132
+ const msa = new A3mMSA(a3m);
133
+ expect(msa.secondaryStructureConsensus).toBeUndefined();
134
+ });
135
+ test('tracks is empty array', () => {
136
+ const a3m = `>seq1
137
+ ACDEF`;
138
+ const msa = new A3mMSA(a3m);
139
+ expect(msa.tracks).toEqual([]);
140
+ });
141
+ test('getStructures returns empty object', () => {
142
+ const a3m = `>seq1
143
+ ACDEF`;
144
+ const msa = new A3mMSA(a3m);
145
+ expect(msa.getStructures()).toEqual({});
146
+ });
147
+ test('getHeader returns empty object', () => {
148
+ const a3m = `>seq1
149
+ ACDEF`;
150
+ const msa = new A3mMSA(a3m);
151
+ expect(msa.getHeader()).toEqual({});
152
+ });
153
+ });
154
+ });
155
+ //# sourceMappingURL=A3mMSA.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"A3mMSA.test.js","sourceRoot":"","sources":["../../src/msa/A3mMSA.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAA;AAE/C,OAAO,MAAM,MAAM,UAAU,CAAA;AAE7B,QAAQ,CAAC,QAAQ,EAAE,GAAG,EAAE;IACtB,QAAQ,CAAC,OAAO,EAAE,GAAG,EAAE;QACrB,IAAI,CAAC,kCAAkC,EAAE,GAAG,EAAE;YAC5C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;YAC7C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QAC/C,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,iCAAiC,EAAE,GAAG,EAAE;YAC3C,MAAM,KAAK,GAAG;;;qBAGC,CAAA;YACf,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QACzC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,6BAA6B,EAAE,GAAG,EAAE;YACvC,MAAM,GAAG,GAAG;;;eAGH,CAAA;YACT,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACtC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,mCAAmC,EAAE,GAAG,EAAE;YAC7C,MAAM,GAAG,GAAG;eACH,CAAA;YACT,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QACvC,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,SAAS,EAAE,GAAG,EAAE;QACvB,IAAI,CAAC,mBAAmB,EAAE,GAAG,EAAE;YAC7B,MAAM,GAAG,GAAG;;;eAGH,CAAA;YACT,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAA;YAChD,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAC/B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAE/B,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YACrC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;QAC/B,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,8BAA8B,EAAE,GAAG,EAAE;YACxC,MAAM,GAAG,GAAG;;;MAGZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAC/B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAE/B,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YAC1B,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QAC5B,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,6BAA6B,EAAE,GAAG,EAAE;YACvC,MAAM,GAAG,GAAG;;;UAGR,CAAA;YACJ,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAC/B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAE/B,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACvC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,gCAAgC,EAAE,GAAG,EAAE;YAC1C,MAAM,GAAG,GAAG;;;MAGZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAChC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,wBAAwB,EAAE,GAAG,EAAE;YAClC,MAAM,GAAG,GAAG;;;MAGZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAC3B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,EAAE,CAAA;YAEzB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAA;YAC3C,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAA;QAC7C,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,kCAAkC,EAAE,GAAG,EAAE;YAC5C,MAAM,GAAG,GAAG;;;MAGZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAC3B,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,EAAE,CAAA;YAE1B,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAC9B,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QACvC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,yBAAyB,EAAE,GAAG,EAAE;YACnC,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAA;QAC1C,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,2CAA2C,EAAE,GAAG,EAAE;YACrD,MAAM,GAAG,GAAG;;;MAGZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAA;QAClD,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,0BAA0B,EAAE,GAAG,EAAE;YACpC,MAAM,GAAG,GAAG;;;;;MAKZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC,CAAA;QAC7D,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;QAC1B,IAAI,CAAC,+BAA+B,EAAE,GAAG,EAAE;YACzC,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QACxC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,2BAA2B,EAAE,GAAG,EAAE;YACrC,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,aAAa,EAAE,CAAA;QAC1C,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,0CAA0C,EAAE,GAAG,EAAE;YACpD,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC,aAAa,EAAE,CAAA;QACzD,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,uBAAuB,EAAE,GAAG,EAAE;YACjC,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QAChC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,oCAAoC,EAAE,GAAG,EAAE;YAC9C,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,aAAa,EAAE,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QACzC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,gCAAgC,EAAE,GAAG,EAAE;YAC1C,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QACrC,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
@@ -0,0 +1,30 @@
1
+ import type { NodeWithIds } from '../types';
2
+ export default class ClustalMSA {
3
+ private MSA;
4
+ constructor(text: string);
5
+ getMSA(): {
6
+ consensus: string;
7
+ alns: {
8
+ id: string;
9
+ seq: string;
10
+ }[];
11
+ header: {
12
+ info: string;
13
+ version: string | undefined;
14
+ };
15
+ };
16
+ getRow(name: string): string;
17
+ getWidth(): number;
18
+ getRowData(): undefined;
19
+ getHeader(): {
20
+ info: string;
21
+ version: string | undefined;
22
+ };
23
+ getNames(): string[];
24
+ getStructures(): {};
25
+ get alignmentNames(): never[];
26
+ getTree(): NodeWithIds;
27
+ get seqConsensus(): string;
28
+ get secondaryStructureConsensus(): undefined;
29
+ get tracks(): never[];
30
+ }