msa-parsers 5.0.3 → 5.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,181 @@
1
+ import { describe, expect, test } from 'vitest';
2
+ import { gffToInterProResponse, gffToInterProResults } from "./gffToInterPro.js";
3
+ describe('gffToInterProResults', () => {
4
+ test('converts empty array', () => {
5
+ expect(gffToInterProResults([])).toEqual({});
6
+ });
7
+ test('converts single GFF record', () => {
8
+ const records = [
9
+ {
10
+ seq_id: 'seq1',
11
+ source: 'Pfam',
12
+ type: 'protein_match',
13
+ start: 10,
14
+ end: 50,
15
+ score: 0,
16
+ strand: '.',
17
+ phase: '.',
18
+ Name: 'PF00001',
19
+ signature_desc: '7tm_1',
20
+ description: 'GPCR family',
21
+ },
22
+ ];
23
+ const result = gffToInterProResults(records);
24
+ expect(result).toHaveProperty('seq1');
25
+ expect(result.seq1?.matches).toHaveLength(1);
26
+ expect(result.seq1?.matches[0]?.signature.entry).toEqual({
27
+ accession: 'PF00001',
28
+ name: '7tm_1',
29
+ description: 'GPCR family',
30
+ });
31
+ expect(result.seq1?.matches[0]?.locations).toEqual([{ start: 10, end: 50 }]);
32
+ expect(result.seq1?.xref).toEqual([{ id: 'seq1' }]);
33
+ });
34
+ test('groups multiple records for same sequence', () => {
35
+ const records = [
36
+ {
37
+ seq_id: 'seq1',
38
+ source: 'Pfam',
39
+ type: 'protein_match',
40
+ start: 10,
41
+ end: 50,
42
+ score: 0,
43
+ strand: '.',
44
+ phase: '.',
45
+ Name: 'PF00001',
46
+ },
47
+ {
48
+ seq_id: 'seq1',
49
+ source: 'SMART',
50
+ type: 'protein_match',
51
+ start: 60,
52
+ end: 100,
53
+ score: 0,
54
+ strand: '.',
55
+ phase: '.',
56
+ Name: 'SM00001',
57
+ },
58
+ ];
59
+ const result = gffToInterProResults(records);
60
+ expect(Object.keys(result)).toHaveLength(1);
61
+ expect(result.seq1?.matches).toHaveLength(2);
62
+ });
63
+ test('handles multiple sequences', () => {
64
+ const records = [
65
+ {
66
+ seq_id: 'seq1',
67
+ source: 'Pfam',
68
+ type: 'protein_match',
69
+ start: 10,
70
+ end: 50,
71
+ score: 0,
72
+ strand: '.',
73
+ phase: '.',
74
+ Name: 'PF00001',
75
+ },
76
+ {
77
+ seq_id: 'seq2',
78
+ source: 'Pfam',
79
+ type: 'protein_match',
80
+ start: 5,
81
+ end: 40,
82
+ score: 0,
83
+ strand: '.',
84
+ phase: '.',
85
+ Name: 'PF00002',
86
+ },
87
+ ];
88
+ const result = gffToInterProResults(records);
89
+ expect(Object.keys(result)).toHaveLength(2);
90
+ expect(result).toHaveProperty('seq1');
91
+ expect(result).toHaveProperty('seq2');
92
+ });
93
+ test('combines locations for same accession', () => {
94
+ const records = [
95
+ {
96
+ seq_id: 'seq1',
97
+ source: 'Pfam',
98
+ type: 'protein_match',
99
+ start: 10,
100
+ end: 50,
101
+ score: 0,
102
+ strand: '.',
103
+ phase: '.',
104
+ Name: 'PF00001',
105
+ },
106
+ {
107
+ seq_id: 'seq1',
108
+ source: 'Pfam',
109
+ type: 'protein_match',
110
+ start: 100,
111
+ end: 150,
112
+ score: 0,
113
+ strand: '.',
114
+ phase: '.',
115
+ Name: 'PF00001',
116
+ },
117
+ ];
118
+ const result = gffToInterProResults(records);
119
+ expect(result.seq1?.matches).toHaveLength(1);
120
+ expect(result.seq1?.matches[0]?.locations).toHaveLength(2);
121
+ expect(result.seq1?.matches[0]?.locations).toEqual([
122
+ { start: 10, end: 50 },
123
+ { start: 100, end: 150 },
124
+ ]);
125
+ });
126
+ test('uses ID as fallback for Name', () => {
127
+ const records = [
128
+ {
129
+ seq_id: 'seq1',
130
+ source: 'Source',
131
+ type: 'protein_match',
132
+ start: 10,
133
+ end: 50,
134
+ score: 0,
135
+ strand: '.',
136
+ phase: '.',
137
+ ID: 'domain_123',
138
+ },
139
+ ];
140
+ const result = gffToInterProResults(records);
141
+ expect(result.seq1?.matches[0]?.signature.entry?.accession).toBe('domain_123');
142
+ });
143
+ test('generates fallback accession from source and positions', () => {
144
+ const records = [
145
+ {
146
+ seq_id: 'seq1',
147
+ source: 'CustomSource',
148
+ type: 'protein_match',
149
+ start: 10,
150
+ end: 50,
151
+ score: 0,
152
+ strand: '.',
153
+ phase: '.',
154
+ },
155
+ ];
156
+ const result = gffToInterProResults(records);
157
+ expect(result.seq1?.matches[0]?.signature.entry?.accession).toBe('CustomSource_10_50');
158
+ });
159
+ });
160
+ describe('gffToInterProResponse', () => {
161
+ test('wraps results in response format', () => {
162
+ const records = [
163
+ {
164
+ seq_id: 'seq1',
165
+ source: 'Pfam',
166
+ type: 'protein_match',
167
+ start: 10,
168
+ end: 50,
169
+ score: 0,
170
+ strand: '.',
171
+ phase: '.',
172
+ Name: 'PF00001',
173
+ },
174
+ ];
175
+ const response = gffToInterProResponse(records);
176
+ expect(response).toHaveProperty('results');
177
+ expect(response.results).toHaveLength(1);
178
+ expect(response.results[0]?.xref[0]?.id).toBe('seq1');
179
+ });
180
+ });
181
+ //# sourceMappingURL=gffToInterPro.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gffToInterPro.test.js","sourceRoot":"","sources":["../../src/gff/gffToInterPro.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAA;AAE/C,OAAO,EAAE,qBAAqB,EAAE,oBAAoB,EAAE,MAAM,oBAAoB,CAAA;AAIhF,QAAQ,CAAC,sBAAsB,EAAE,GAAG,EAAE;IACpC,IAAI,CAAC,sBAAsB,EAAE,GAAG,EAAE;QAChC,MAAM,CAAC,oBAAoB,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;IAC9C,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAgB;YAC3B;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;gBACf,cAAc,EAAE,OAAO;gBACvB,WAAW,EAAE,aAAa;aAC3B;SACF,CAAA;QACD,MAAM,MAAM,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAA;QAE5C,MAAM,CAAC,MAAM,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAA;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC5C,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC;YACvD,SAAS,EAAE,SAAS;YACpB,IAAI,EAAE,OAAO;YACb,WAAW,EAAE,aAAa;SAC3B,CAAC,CAAA;QACF,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,OAAO,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC,CAAC,CAAA;QAC5E,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC,CAAA;IACrD,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACrD,MAAM,OAAO,GAAgB;YAC3B;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;aAChB;YACD;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,OAAO;gBACf,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,GAAG;gBACR,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;aAChB;SACF,CAAA;QACD,MAAM,MAAM,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAA;QAE5C,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC3C,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;IAC9C,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAgB;YAC3B;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;aAChB;YACD;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,CAAC;gBACR,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;aAChB;SACF,CAAA;QACD,MAAM,MAAM,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAA;QAE5C,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC3C,MAAM,CAAC,MAAM,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAA;QACrC,MAAM,CAAC,MAAM,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAA;IACvC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,uCAAuC,EAAE,GAAG,EAAE;QACjD,MAAM,OAAO,GAAgB;YAC3B;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;aAChB;YACD;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,GAAG;gBACV,GAAG,EAAE,GAAG;gBACR,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;aAChB;SACF,CAAA;QACD,MAAM,MAAM,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAA;QAE5C,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC5C,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC1D,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,OAAO,CAAC;YACjD,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE;YACtB,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE;SACzB,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACxC,MAAM,OAAO,GAAgB;YAC3B;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,QAAQ;gBAChB,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,EAAE,EAAE,YAAY;aACjB;SACF,CAAA;QACD,MAAM,MAAM,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAA;QAE5C,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC,IAAI,CAC9D,YAAY,CACb,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,wDAAwD,EAAE,GAAG,EAAE;QAClE,MAAM,OAAO,GAAgB;YAC3B;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,cAAc;gBACtB,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;aACX;SACF,CAAA;QACD,MAAM,MAAM,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAA;QAE5C,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC,IAAI,CAC9D,oBAAoB,CACrB,CAAA;IACH,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA;AAEF,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;IACrC,IAAI,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC5C,MAAM,OAAO,GAAgB;YAC3B;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;aAChB;SACF,CAAA;QACD,MAAM,QAAQ,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAA;QAE/C,MAAM,CAAC,QAAQ,CAAC,CAAC,cAAc,CAAC,SAAS,CAAC,CAAA;QAC1C,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QACxC,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IACvD,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,189 @@
1
+ import { describe, expect, test } from 'vitest';
2
+ import { interProResponseToGFF, interProToGFF } from "./interProToGFF.js";
3
+ describe('interProToGFF', () => {
4
+ test('converts empty results', () => {
5
+ const gff = interProToGFF({});
6
+ expect(gff).toBe('##gff-version 3');
7
+ });
8
+ test('converts single result', () => {
9
+ const results = {
10
+ seq1: {
11
+ matches: [
12
+ {
13
+ signature: {
14
+ entry: {
15
+ accession: 'PF00001',
16
+ name: '7tm_1',
17
+ description: 'GPCR family',
18
+ },
19
+ },
20
+ locations: [{ start: 10, end: 50 }],
21
+ },
22
+ ],
23
+ xref: [{ id: 'seq1' }],
24
+ },
25
+ };
26
+ const gff = interProToGFF(results);
27
+ const lines = gff.split('\n');
28
+ expect(lines[0]).toBe('##gff-version 3');
29
+ expect(lines[1]).toContain('seq1');
30
+ expect(lines[1]).toContain('InterProScan');
31
+ expect(lines[1]).toContain('protein_match');
32
+ expect(lines[1]).toContain('10');
33
+ expect(lines[1]).toContain('50');
34
+ expect(lines[1]).toContain('Name=PF00001');
35
+ expect(lines[1]).toContain('signature_desc=7tm_1');
36
+ expect(lines[1]).toContain('description=GPCR%20family');
37
+ });
38
+ test('handles multiple locations', () => {
39
+ const results = {
40
+ seq1: {
41
+ matches: [
42
+ {
43
+ signature: {
44
+ entry: {
45
+ accession: 'PF00001',
46
+ name: 'domain',
47
+ description: 'test',
48
+ },
49
+ },
50
+ locations: [
51
+ { start: 10, end: 50 },
52
+ { start: 100, end: 150 },
53
+ ],
54
+ },
55
+ ],
56
+ xref: [{ id: 'seq1' }],
57
+ },
58
+ };
59
+ const gff = interProToGFF(results);
60
+ const lines = gff.split('\n');
61
+ expect(lines).toHaveLength(3);
62
+ expect(lines[1]).toContain('10\t50');
63
+ expect(lines[2]).toContain('100\t150');
64
+ });
65
+ test('handles multiple sequences', () => {
66
+ const results = {
67
+ seq1: {
68
+ matches: [
69
+ {
70
+ signature: {
71
+ entry: {
72
+ accession: 'PF00001',
73
+ name: 'domain1',
74
+ description: 'test1',
75
+ },
76
+ },
77
+ locations: [{ start: 10, end: 50 }],
78
+ },
79
+ ],
80
+ xref: [{ id: 'seq1' }],
81
+ },
82
+ seq2: {
83
+ matches: [
84
+ {
85
+ signature: {
86
+ entry: {
87
+ accession: 'PF00002',
88
+ name: 'domain2',
89
+ description: 'test2',
90
+ },
91
+ },
92
+ locations: [{ start: 5, end: 40 }],
93
+ },
94
+ ],
95
+ xref: [{ id: 'seq2' }],
96
+ },
97
+ };
98
+ const gff = interProToGFF(results);
99
+ const lines = gff.split('\n');
100
+ expect(lines).toHaveLength(3);
101
+ expect(lines.some(l => l.includes('seq1'))).toBe(true);
102
+ expect(lines.some(l => l.includes('seq2'))).toBe(true);
103
+ });
104
+ test('skips matches without entry', () => {
105
+ const results = {
106
+ seq1: {
107
+ matches: [
108
+ {
109
+ signature: {},
110
+ locations: [{ start: 10, end: 50 }],
111
+ },
112
+ ],
113
+ xref: [{ id: 'seq1' }],
114
+ },
115
+ };
116
+ const gff = interProToGFF(results);
117
+ const lines = gff.split('\n');
118
+ expect(lines).toHaveLength(1);
119
+ expect(lines[0]).toBe('##gff-version 3');
120
+ });
121
+ test('URL-encodes special characters in attributes', () => {
122
+ const results = {
123
+ seq1: {
124
+ matches: [
125
+ {
126
+ signature: {
127
+ entry: {
128
+ accession: 'PF00001',
129
+ name: 'test;name=value',
130
+ description: 'description with spaces',
131
+ },
132
+ },
133
+ locations: [{ start: 10, end: 50 }],
134
+ },
135
+ ],
136
+ xref: [{ id: 'seq1' }],
137
+ },
138
+ };
139
+ const gff = interProToGFF(results);
140
+ expect(gff).toContain('signature_desc=test%3Bname%3Dvalue');
141
+ expect(gff).toContain('description=description%20with%20spaces');
142
+ });
143
+ });
144
+ describe('interProResponseToGFF', () => {
145
+ test('converts array of results', () => {
146
+ const results = [
147
+ {
148
+ matches: [
149
+ {
150
+ signature: {
151
+ entry: {
152
+ accession: 'PF00001',
153
+ name: 'domain',
154
+ description: 'test',
155
+ },
156
+ },
157
+ locations: [{ start: 10, end: 50 }],
158
+ },
159
+ ],
160
+ xref: [{ id: 'seq1' }],
161
+ },
162
+ ];
163
+ const gff = interProResponseToGFF(results);
164
+ expect(gff).toContain('##gff-version 3');
165
+ expect(gff).toContain('seq1');
166
+ });
167
+ test('handles results without xref', () => {
168
+ const results = [
169
+ {
170
+ matches: [
171
+ {
172
+ signature: {
173
+ entry: {
174
+ accession: 'PF00001',
175
+ name: 'domain',
176
+ description: 'test',
177
+ },
178
+ },
179
+ locations: [{ start: 10, end: 50 }],
180
+ },
181
+ ],
182
+ xref: [],
183
+ },
184
+ ];
185
+ const gff = interProResponseToGFF(results);
186
+ expect(gff).toBe('##gff-version 3');
187
+ });
188
+ });
189
+ //# sourceMappingURL=interProToGFF.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"interProToGFF.test.js","sourceRoot":"","sources":["../../src/gff/interProToGFF.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAA;AAE/C,OAAO,EAAE,qBAAqB,EAAE,aAAa,EAAE,MAAM,oBAAoB,CAAA;AAIzE,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC7B,IAAI,CAAC,wBAAwB,EAAE,GAAG,EAAE;QAClC,MAAM,GAAG,GAAG,aAAa,CAAC,EAAE,CAAC,CAAA;QAC7B,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAA;IACrC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,wBAAwB,EAAE,GAAG,EAAE;QAClC,MAAM,OAAO,GAAwC;YACnD,IAAI,EAAE;gBACJ,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE;4BACT,KAAK,EAAE;gCACL,SAAS,EAAE,SAAS;gCACpB,IAAI,EAAE,OAAO;gCACb,WAAW,EAAE,aAAa;6BAC3B;yBACF;wBACD,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;qBACpC;iBACF;gBACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC;aACvB;SACF,CAAA;QACD,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,CAAA;QAClC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QAE7B,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAA;QACxC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAA;QAClC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,cAAc,CAAC,CAAA;QAC1C,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,eAAe,CAAC,CAAA;QAC3C,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;QAChC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;QAChC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,cAAc,CAAC,CAAA;QAC1C,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,sBAAsB,CAAC,CAAA;QAClD,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,2BAA2B,CAAC,CAAA;IACzD,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAwC;YACnD,IAAI,EAAE;gBACJ,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE;4BACT,KAAK,EAAE;gCACL,SAAS,EAAE,SAAS;gCACpB,IAAI,EAAE,QAAQ;gCACd,WAAW,EAAE,MAAM;6BACpB;yBACF;wBACD,SAAS,EAAE;4BACT,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE;4BACtB,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE;yBACzB;qBACF;iBACF;gBACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC;aACvB;SACF,CAAA;QACD,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,CAAA;QAClC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QAE7B,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC7B,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAA;QACpC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,UAAU,CAAC,CAAA;IACxC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAwC;YACnD,IAAI,EAAE;gBACJ,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE;4BACT,KAAK,EAAE;gCACL,SAAS,EAAE,SAAS;gCACpB,IAAI,EAAE,SAAS;gCACf,WAAW,EAAE,OAAO;6BACrB;yBACF;wBACD,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;qBACpC;iBACF;gBACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC;aACvB;YACD,IAAI,EAAE;gBACJ,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE;4BACT,KAAK,EAAE;gCACL,SAAS,EAAE,SAAS;gCACpB,IAAI,EAAE,SAAS;gCACf,WAAW,EAAE,OAAO;6BACrB;yBACF;wBACD,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;qBACnC;iBACF;gBACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC;aACvB;SACF,CAAA;QACD,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,CAAA;QAClC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QAE7B,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC7B,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACtD,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACxD,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACvC,MAAM,OAAO,GAAwC;YACnD,IAAI,EAAE;gBACJ,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE,EAAE;wBACb,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;qBACpC;iBACF;gBACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC;aACvB;SACF,CAAA;QACD,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,CAAA;QAClC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QAE7B,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC7B,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAA;IAC1C,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,8CAA8C,EAAE,GAAG,EAAE;QACxD,MAAM,OAAO,GAAwC;YACnD,IAAI,EAAE;gBACJ,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE;4BACT,KAAK,EAAE;gCACL,SAAS,EAAE,SAAS;gCACpB,IAAI,EAAE,iBAAiB;gCACvB,WAAW,EAAE,yBAAyB;6BACvC;yBACF;wBACD,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;qBACpC;iBACF;gBACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC;aACvB;SACF,CAAA;QACD,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,CAAA;QAElC,MAAM,CAAC,GAAG,CAAC,CAAC,SAAS,CAAC,oCAAoC,CAAC,CAAA;QAC3D,MAAM,CAAC,GAAG,CAAC,CAAC,SAAS,CAAC,yCAAyC,CAAC,CAAA;IAClE,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA;AAEF,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;IACrC,IAAI,CAAC,2BAA2B,EAAE,GAAG,EAAE;QACrC,MAAM,OAAO,GAA0B;YACrC;gBACE,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE;4BACT,KAAK,EAAE;gCACL,SAAS,EAAE,SAAS;gCACpB,IAAI,EAAE,QAAQ;gCACd,WAAW,EAAE,MAAM;6BACpB;yBACF;wBACD,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;qBACpC;iBACF;gBACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC;aACvB;SACF,CAAA;QACD,MAAM,GAAG,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAA;QAE1C,MAAM,CAAC,GAAG,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAA;QACxC,MAAM,CAAC,GAAG,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAA;IAC/B,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACxC,MAAM,OAAO,GAA0B;YACrC;gBACE,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE;4BACT,KAAK,EAAE;gCACL,SAAS,EAAE,SAAS;gCACpB,IAAI,EAAE,QAAQ;gCACd,WAAW,EAAE,MAAM;6BACpB;yBACF;wBACD,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;qBACpC;iBACF;gBACD,IAAI,EAAE,EAAE;aACT;SACF,CAAA;QACD,MAAM,GAAG,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAA;QAE1C,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAA;IACrC,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,92 @@
1
+ import { describe, expect, test } from 'vitest';
2
+ import { parseGFF } from "./parseGFF.js";
3
+ describe('parseGFF', () => {
4
+ test('parses empty string', () => {
5
+ expect(parseGFF('')).toEqual([]);
6
+ expect(parseGFF(undefined)).toEqual([]);
7
+ });
8
+ test('parses basic GFF3 line', () => {
9
+ const gff = 'seq1\tInterProScan\tprotein_match\t10\t50\t.\t+\t.\tName=PF00001';
10
+ const result = parseGFF(gff);
11
+ expect(result).toHaveLength(1);
12
+ expect(result[0]).toEqual({
13
+ seq_id: 'seq1',
14
+ source: 'InterProScan',
15
+ type: 'protein_match',
16
+ start: 10,
17
+ end: 50,
18
+ score: 0,
19
+ strand: '+',
20
+ phase: '.',
21
+ Name: 'PF00001',
22
+ });
23
+ });
24
+ test('parses multiple attributes', () => {
25
+ const gff = 'seq1\tPfam\tprotein_match\t10\t50\t1.5\t.\t.\tName=PF00001;signature_desc=7tm_1;description=GPCR';
26
+ const result = parseGFF(gff);
27
+ expect(result[0]).toMatchObject({
28
+ Name: 'PF00001',
29
+ signature_desc: '7tm_1',
30
+ description: 'GPCR',
31
+ });
32
+ });
33
+ test('handles URL-encoded attribute values', () => {
34
+ const gff = 'seq1\tSource\ttype\t1\t10\t.\t.\t.\tNote=Hello%20World%3B%3D';
35
+ const result = parseGFF(gff);
36
+ expect(result[0]?.Note).toBe('Hello World;=');
37
+ });
38
+ test('skips comment lines', () => {
39
+ const gff = `##gff-version 3
40
+ # This is a comment
41
+ seq1\tSource\ttype\t1\t10\t.\t.\t.\tName=test`;
42
+ const result = parseGFF(gff);
43
+ expect(result).toHaveLength(1);
44
+ expect(result[0]?.seq_id).toBe('seq1');
45
+ });
46
+ test('skips empty lines', () => {
47
+ const gff = `seq1\tSource\ttype\t1\t10\t.\t.\t.\tName=test1
48
+
49
+ seq2\tSource\ttype\t20\t30\t.\t.\t.\tName=test2`;
50
+ const result = parseGFF(gff);
51
+ expect(result).toHaveLength(2);
52
+ });
53
+ test('handles missing attributes column', () => {
54
+ const gff = 'seq1\tSource\ttype\t1\t10\t.\t.\t.';
55
+ const result = parseGFF(gff);
56
+ expect(result).toHaveLength(1);
57
+ expect(result[0]?.seq_id).toBe('seq1');
58
+ });
59
+ test('handles partial GFF lines gracefully', () => {
60
+ const gff = 'seq1\tSource\ttype';
61
+ const result = parseGFF(gff);
62
+ expect(result).toHaveLength(1);
63
+ expect(result[0]).toMatchObject({
64
+ seq_id: 'seq1',
65
+ source: 'Source',
66
+ type: 'type',
67
+ start: 0,
68
+ end: 0,
69
+ });
70
+ });
71
+ test('parses numeric score', () => {
72
+ const gff = 'seq1\tSource\ttype\t1\t10\t45.6\t.\t.\tName=test';
73
+ const result = parseGFF(gff);
74
+ expect(result[0]?.score).toBe(45.6);
75
+ });
76
+ test('handles comma-separated values in attributes', () => {
77
+ const gff = 'seq1\tSource\ttype\t1\t10\t.\t.\t.\tOntology_term=GO:0001,GO:0002';
78
+ const result = parseGFF(gff);
79
+ expect(result[0]?.Ontology_term).toBe('GO:0001 GO:0002');
80
+ });
81
+ test('parses multiple lines', () => {
82
+ const gff = `seq1\tPfam\tprotein_match\t10\t50\t.\t.\t.\tName=PF00001
83
+ seq1\tSMART\tprotein_match\t60\t100\t.\t.\t.\tName=SM00001
84
+ seq2\tPfam\tprotein_match\t5\t40\t.\t.\t.\tName=PF00002`;
85
+ const result = parseGFF(gff);
86
+ expect(result).toHaveLength(3);
87
+ expect(result[0]?.seq_id).toBe('seq1');
88
+ expect(result[1]?.seq_id).toBe('seq1');
89
+ expect(result[2]?.seq_id).toBe('seq2');
90
+ });
91
+ });
92
+ //# sourceMappingURL=parseGFF.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parseGFF.test.js","sourceRoot":"","sources":["../../src/gff/parseGFF.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAA;AAE/C,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAA;AAExC,QAAQ,CAAC,UAAU,EAAE,GAAG,EAAE;IACxB,IAAI,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC/B,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QAChC,MAAM,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;IACzC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,wBAAwB,EAAE,GAAG,EAAE;QAClC,MAAM,GAAG,GACP,kEAAkE,CAAA;QACpE,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;YACxB,MAAM,EAAE,MAAM;YACd,MAAM,EAAE,cAAc;YACtB,IAAI,EAAE,eAAe;YACrB,KAAK,EAAE,EAAE;YACT,GAAG,EAAE,EAAE;YACP,KAAK,EAAE,CAAC;YACR,MAAM,EAAE,GAAG;YACX,KAAK,EAAE,GAAG;YACV,IAAI,EAAE,SAAS;SAChB,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACtC,MAAM,GAAG,GACP,kGAAkG,CAAA;QACpG,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC;YAC9B,IAAI,EAAE,SAAS;YACf,cAAc,EAAE,OAAO;YACvB,WAAW,EAAE,MAAM;SACpB,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAChD,MAAM,GAAG,GAAG,8DAA8D,CAAA;QAC1E,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,IAAI,CAAC,eAAe,CAAC,CAAA;IAC/C,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC/B,MAAM,GAAG,GAAG;;8CAE8B,CAAA;QAC1C,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IACxC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAC7B,MAAM,GAAG,GAAG;;gDAEgC,CAAA;QAC5C,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;IAChC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,mCAAmC,EAAE,GAAG,EAAE;QAC7C,MAAM,GAAG,GAAG,oCAAoC,CAAA;QAChD,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IACxC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAChD,MAAM,GAAG,GAAG,oBAAoB,CAAA;QAChC,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC;YAC9B,MAAM,EAAE,MAAM;YACd,MAAM,EAAE,QAAQ;YAChB,IAAI,EAAE,MAAM;YACZ,KAAK,EAAE,CAAC;YACR,GAAG,EAAE,CAAC;SACP,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,sBAAsB,EAAE,GAAG,EAAE;QAChC,MAAM,GAAG,GAAG,kDAAkD,CAAA;QAC9D,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACrC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,8CAA8C,EAAE,GAAG,EAAE;QACxD,MAAM,GAAG,GACP,mEAAmE,CAAA;QACrE,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,aAAa,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAA;IAC1D,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,uBAAuB,EAAE,GAAG,EAAE;QACjC,MAAM,GAAG,GAAG;;wDAEwC,CAAA;QACpD,MAAM,MAAM,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAC5B,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACtC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACtC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IACxC,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,155 @@
1
+ import { describe, expect, test } from 'vitest';
2
+ import A3mMSA from "./A3mMSA.js";
3
+ describe('A3mMSA', () => {
4
+ describe('sniff', () => {
5
+ test('returns false for non-FASTA text', () => {
6
+ expect(A3mMSA.sniff('not fasta')).toBe(false);
7
+ expect(A3mMSA.sniff('CLUSTAL W')).toBe(false);
8
+ });
9
+ test('returns false for regular FASTA', () => {
10
+ const fasta = `>seq1
11
+ ACDEFGHIKLMNPQRSTVWY
12
+ >seq2
13
+ ACDEFGHIKLMNPQRSTVWY`;
14
+ expect(A3mMSA.sniff(fasta)).toBe(false);
15
+ });
16
+ test('returns true for A3M format', () => {
17
+ const a3m = `>seq1
18
+ ACDEFghiKLMNPQ
19
+ >seq2
20
+ ACDEF---KLMNPQ`;
21
+ expect(A3mMSA.sniff(a3m)).toBe(true);
22
+ });
23
+ test('returns false for single sequence', () => {
24
+ const a3m = `>seq1
25
+ ACDEFghiKLMNPQ`;
26
+ expect(A3mMSA.sniff(a3m)).toBe(false);
27
+ });
28
+ });
29
+ describe('parsing', () => {
30
+ test('parses simple A3M', () => {
31
+ const a3m = `>seq1
32
+ ACDEFghiKLMNPQ
33
+ >seq2
34
+ ACDEF---KLMNPQ`;
35
+ const msa = new A3mMSA(a3m);
36
+ expect(msa.getNames()).toEqual(['seq1', 'seq2']);
37
+ const seq1 = msa.getRow('seq1');
38
+ const seq2 = msa.getRow('seq2');
39
+ expect(seq1.length).toBe(seq2.length);
40
+ expect(seq1).toContain('GHI');
41
+ });
42
+ test('expands lowercase insertions', () => {
43
+ const a3m = `>seq1
44
+ ACabc
45
+ >seq2
46
+ AC---`;
47
+ const msa = new A3mMSA(a3m);
48
+ const seq1 = msa.getRow('seq1');
49
+ const seq2 = msa.getRow('seq2');
50
+ expect(seq1).toBe('ACABC');
51
+ expect(seq2).toBe('AC...');
52
+ });
53
+ test('handles multiple insertions', () => {
54
+ const a3m = `>seq1
55
+ AabcDdefG
56
+ >seq2
57
+ A---D---G`;
58
+ const msa = new A3mMSA(a3m);
59
+ const seq1 = msa.getRow('seq1');
60
+ const seq2 = msa.getRow('seq2');
61
+ expect(seq1.length).toBe(seq2.length);
62
+ });
63
+ test('getWidth returns correct width', () => {
64
+ const a3m = `>seq1
65
+ ACDEF
66
+ >seq2
67
+ ACDEF`;
68
+ const msa = new A3mMSA(a3m);
69
+ expect(msa.getWidth()).toBe(5);
70
+ });
71
+ test('getMSA returns seqdata', () => {
72
+ const a3m = `>seq1
73
+ ACDEF
74
+ >seq2
75
+ GHIKL`;
76
+ const msa = new A3mMSA(a3m);
77
+ const data = msa.getMSA();
78
+ expect(data.seqdata).toHaveProperty('seq1');
79
+ expect(data.seqdata).toHaveProperty('seq2');
80
+ });
81
+ test('getTree returns noTree structure', () => {
82
+ const a3m = `>seq1
83
+ ACDEF
84
+ >seq2
85
+ GHIKL`;
86
+ const msa = new A3mMSA(a3m);
87
+ const tree = msa.getTree();
88
+ expect(tree.noTree).toBe(true);
89
+ expect(tree.children).toHaveLength(2);
90
+ });
91
+ test('handles empty sequences', () => {
92
+ const a3m = `>seq1
93
+ ACDEF`;
94
+ const msa = new A3mMSA(a3m);
95
+ expect(msa.getNames()).toEqual(['seq1']);
96
+ });
97
+ test('handles sequences with only ID on defline', () => {
98
+ const a3m = `>seq1 description here
99
+ ACDEF
100
+ >seq2 another description
101
+ GHIKL`;
102
+ const msa = new A3mMSA(a3m);
103
+ expect(msa.getNames()).toEqual(['seq1', 'seq2']);
104
+ });
105
+ test('preserves sequence order', () => {
106
+ const a3m = `>z_seq
107
+ AAAAA
108
+ >a_seq
109
+ CCCCC
110
+ >m_seq
111
+ DDDDD`;
112
+ const msa = new A3mMSA(a3m);
113
+ expect(msa.getNames()).toEqual(['z_seq', 'a_seq', 'm_seq']);
114
+ });
115
+ });
116
+ describe('properties', () => {
117
+ test('alignmentNames is empty array', () => {
118
+ const a3m = `>seq1
119
+ ACDEF`;
120
+ const msa = new A3mMSA(a3m);
121
+ expect(msa.alignmentNames).toEqual([]);
122
+ });
123
+ test('seqConsensus is undefined', () => {
124
+ const a3m = `>seq1
125
+ ACDEF`;
126
+ const msa = new A3mMSA(a3m);
127
+ expect(msa.seqConsensus).toBeUndefined();
128
+ });
129
+ test('secondaryStructureConsensus is undefined', () => {
130
+ const a3m = `>seq1
131
+ ACDEF`;
132
+ const msa = new A3mMSA(a3m);
133
+ expect(msa.secondaryStructureConsensus).toBeUndefined();
134
+ });
135
+ test('tracks is empty array', () => {
136
+ const a3m = `>seq1
137
+ ACDEF`;
138
+ const msa = new A3mMSA(a3m);
139
+ expect(msa.tracks).toEqual([]);
140
+ });
141
+ test('getStructures returns empty object', () => {
142
+ const a3m = `>seq1
143
+ ACDEF`;
144
+ const msa = new A3mMSA(a3m);
145
+ expect(msa.getStructures()).toEqual({});
146
+ });
147
+ test('getHeader returns empty object', () => {
148
+ const a3m = `>seq1
149
+ ACDEF`;
150
+ const msa = new A3mMSA(a3m);
151
+ expect(msa.getHeader()).toEqual({});
152
+ });
153
+ });
154
+ });
155
+ //# sourceMappingURL=A3mMSA.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"A3mMSA.test.js","sourceRoot":"","sources":["../../src/msa/A3mMSA.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAA;AAE/C,OAAO,MAAM,MAAM,aAAa,CAAA;AAEhC,QAAQ,CAAC,QAAQ,EAAE,GAAG,EAAE;IACtB,QAAQ,CAAC,OAAO,EAAE,GAAG,EAAE;QACrB,IAAI,CAAC,kCAAkC,EAAE,GAAG,EAAE;YAC5C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;YAC7C,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QAC/C,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,iCAAiC,EAAE,GAAG,EAAE;YAC3C,MAAM,KAAK,GAAG;;;qBAGC,CAAA;YACf,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QACzC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,6BAA6B,EAAE,GAAG,EAAE;YACvC,MAAM,GAAG,GAAG;;;eAGH,CAAA;YACT,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACtC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,mCAAmC,EAAE,GAAG,EAAE;YAC7C,MAAM,GAAG,GAAG;eACH,CAAA;YACT,MAAM,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QACvC,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,SAAS,EAAE,GAAG,EAAE;QACvB,IAAI,CAAC,mBAAmB,EAAE,GAAG,EAAE;YAC7B,MAAM,GAAG,GAAG;;;eAGH,CAAA;YACT,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAA;YAChD,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAC/B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAE/B,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;YACrC,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAA;QAC/B,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,8BAA8B,EAAE,GAAG,EAAE;YACxC,MAAM,GAAG,GAAG;;;MAGZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAC/B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAE/B,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YAC1B,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QAC5B,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,6BAA6B,EAAE,GAAG,EAAE;YACvC,MAAM,GAAG,GAAG;;;UAGR,CAAA;YACJ,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAC/B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;YAE/B,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACvC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,gCAAgC,EAAE,GAAG,EAAE;YAC1C,MAAM,GAAG,GAAG;;;MAGZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAChC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,wBAAwB,EAAE,GAAG,EAAE;YAClC,MAAM,GAAG,GAAG;;;MAGZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAC3B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,EAAE,CAAA;YAEzB,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAA;YAC3C,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAA;QAC7C,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,kCAAkC,EAAE,GAAG,EAAE;YAC5C,MAAM,GAAG,GAAG;;;MAGZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAC3B,MAAM,IAAI,GAAG,GAAG,CAAC,OAAO,EAAE,CAAA;YAE1B,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAC9B,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QACvC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,yBAAyB,EAAE,GAAG,EAAE;YACnC,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAA;QAC1C,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,2CAA2C,EAAE,GAAG,EAAE;YACrD,MAAM,GAAG,GAAG;;;MAGZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAA;QAClD,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,0BAA0B,EAAE,GAAG,EAAE;YACpC,MAAM,GAAG,GAAG;;;;;MAKZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,CAAC,CAAC,CAAA;QAC7D,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;QAC1B,IAAI,CAAC,+BAA+B,EAAE,GAAG,EAAE;YACzC,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,cAAc,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QACxC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,2BAA2B,EAAE,GAAG,EAAE;YACrC,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,aAAa,EAAE,CAAA;QAC1C,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,0CAA0C,EAAE,GAAG,EAAE;YACpD,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,2BAA2B,CAAC,CAAC,aAAa,EAAE,CAAA;QACzD,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,uBAAuB,EAAE,GAAG,EAAE;YACjC,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QAChC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,oCAAoC,EAAE,GAAG,EAAE;YAC9C,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,aAAa,EAAE,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QACzC,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,gCAAgC,EAAE,GAAG,EAAE;YAC1C,MAAM,GAAG,GAAG;MACZ,CAAA;YACA,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,GAAG,CAAC,CAAA;YAE3B,MAAM,CAAC,GAAG,CAAC,SAAS,EAAE,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;QACrC,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,60 @@
1
+ import { describe, expect, test } from 'vitest';
2
+ import A3mMSA from "./A3mMSA.js";
3
+ import ClustalMSA from "./ClustalMSA.js";
4
+ import FastaMSA from "./FastaMSA.js";
5
+ import { getUngappedSequence, parseMSA } from "./index.js";
6
+ describe('parseMSA', () => {
7
+ test('parses FASTA format', () => {
8
+ const fasta = `>seq1
9
+ ACDEF
10
+ >seq2
11
+ GHIKL`;
12
+ const msa = parseMSA(fasta);
13
+ expect(msa).toBeInstanceOf(FastaMSA);
14
+ expect(msa.getNames()).toEqual(['seq1', 'seq2']);
15
+ });
16
+ test('parses A3M format', () => {
17
+ const a3m = `>seq1
18
+ ACDEFghiKLMNPQ
19
+ >seq2
20
+ ACDEF---KLMNPQ`;
21
+ const msa = parseMSA(a3m);
22
+ expect(msa).toBeInstanceOf(A3mMSA);
23
+ });
24
+ test('parses Clustal format', () => {
25
+ const clustal = `CLUSTAL W (1.83) multiple sequence alignment
26
+
27
+ seq1 ACDEFGHIKL
28
+ seq2 ACDEFGHIKL
29
+ **********`;
30
+ const msa = parseMSA(clustal);
31
+ expect(msa).toBeInstanceOf(ClustalMSA);
32
+ });
33
+ test('defaults to Clustal for unknown format', () => {
34
+ const unknown = `some unknown format
35
+ that doesn't match anything`;
36
+ const msa = parseMSA(unknown);
37
+ expect(msa).toBeInstanceOf(ClustalMSA);
38
+ });
39
+ });
40
+ describe('getUngappedSequence', () => {
41
+ test('removes dashes', () => {
42
+ expect(getUngappedSequence('AC--DEF')).toBe('ACDEF');
43
+ });
44
+ test('removes dots', () => {
45
+ expect(getUngappedSequence('AC..DEF')).toBe('ACDEF');
46
+ });
47
+ test('removes mixed gaps', () => {
48
+ expect(getUngappedSequence('A-C.D--E..F')).toBe('ACDEF');
49
+ });
50
+ test('handles empty string', () => {
51
+ expect(getUngappedSequence('')).toBe('');
52
+ });
53
+ test('handles string with only gaps', () => {
54
+ expect(getUngappedSequence('---...')).toBe('');
55
+ });
56
+ test('handles string with no gaps', () => {
57
+ expect(getUngappedSequence('ACDEF')).toBe('ACDEF');
58
+ });
59
+ });
60
+ //# sourceMappingURL=index.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.test.js","sourceRoot":"","sources":["../../src/msa/index.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAA;AAE/C,OAAO,MAAM,MAAM,aAAa,CAAA;AAChC,OAAO,UAAU,MAAM,iBAAiB,CAAA;AACxC,OAAO,QAAQ,MAAM,eAAe,CAAA;AACpC,OAAO,EAAE,mBAAmB,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAA;AAE1D,QAAQ,CAAC,UAAU,EAAE,GAAG,EAAE;IACxB,IAAI,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC/B,MAAM,KAAK,GAAG;;;MAGZ,CAAA;QACF,MAAM,GAAG,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAA;QAE3B,MAAM,CAAC,GAAG,CAAC,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAA;QACpC,MAAM,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAA;IAClD,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAC7B,MAAM,GAAG,GAAG;;;eAGD,CAAA;QACX,MAAM,GAAG,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAA;QAEzB,MAAM,CAAC,GAAG,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAA;IACpC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,uBAAuB,EAAE,GAAG,EAAE;QACjC,MAAM,OAAO,GAAG;;;;qBAIC,CAAA;QACjB,MAAM,GAAG,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAA;QAE7B,MAAM,CAAC,GAAG,CAAC,CAAC,cAAc,CAAC,UAAU,CAAC,CAAA;IACxC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,wCAAwC,EAAE,GAAG,EAAE;QAClD,MAAM,OAAO,GAAG;4BACQ,CAAA;QACxB,MAAM,GAAG,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAA;QAE7B,MAAM,CAAC,GAAG,CAAC,CAAC,cAAc,CAAC,UAAU,CAAC,CAAA;IACxC,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA;AAEF,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;IACnC,IAAI,CAAC,gBAAgB,EAAE,GAAG,EAAE;QAC1B,MAAM,CAAC,mBAAmB,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IACtD,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,cAAc,EAAE,GAAG,EAAE;QACxB,MAAM,CAAC,mBAAmB,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IACtD,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,oBAAoB,EAAE,GAAG,EAAE;QAC9B,MAAM,CAAC,mBAAmB,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IAC1D,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,sBAAsB,EAAE,GAAG,EAAE;QAChC,MAAM,CAAC,mBAAmB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAC1C,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,+BAA+B,EAAE,GAAG,EAAE;QACzC,MAAM,CAAC,mBAAmB,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IAChD,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACvC,MAAM,CAAC,mBAAmB,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IACpD,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,111 @@
1
+ import { describe, expect, test } from 'vitest';
2
+ import { parse, parseAll, sniff } from "./stockholmParser.js";
3
+ describe('stockholmParser', () => {
4
+ describe('sniff', () => {
5
+ test('returns true for Stockholm format', () => {
6
+ expect(sniff('# STOCKHOLM 1.0\n')).toBe(true);
7
+ });
8
+ test('returns false for non-Stockholm format', () => {
9
+ expect(sniff('>seq1\nACDEF')).toBe(false);
10
+ expect(sniff('CLUSTAL W')).toBe(false);
11
+ });
12
+ });
13
+ describe('parseAll', () => {
14
+ test('parses simple Stockholm file', () => {
15
+ const stockholm = `# STOCKHOLM 1.0
16
+ seq1 ACDEFGHIKL
17
+ seq2 ACDEFGHIKL
18
+ //`;
19
+ const result = parseAll(stockholm);
20
+ expect(result).toHaveLength(1);
21
+ expect(result[0]?.seqdata).toEqual({
22
+ seq1: 'ACDEFGHIKL',
23
+ seq2: 'ACDEFGHIKL',
24
+ });
25
+ expect(result[0]?.seqname).toEqual(['seq1', 'seq2']);
26
+ });
27
+ test('parses GF lines', () => {
28
+ const stockholm = `# STOCKHOLM 1.0
29
+ #=GF DE Description of the alignment
30
+ #=GF NH (A:0.1,B:0.2);
31
+ seq1 ACDEF
32
+ //`;
33
+ const result = parseAll(stockholm);
34
+ expect(result[0]?.gf.DE).toEqual(['Description of the alignment']);
35
+ expect(result[0]?.gf.NH).toEqual(['(A:0.1,B:0.2);']);
36
+ });
37
+ test('parses GC lines', () => {
38
+ const stockholm = `# STOCKHOLM 1.0
39
+ seq1 ACDEF
40
+ #=GC seq_cons *****
41
+ #=GC SS_cons <<>>>
42
+ //`;
43
+ const result = parseAll(stockholm);
44
+ expect(result[0]?.gc.seq_cons).toBe('*****');
45
+ expect(result[0]?.gc.SS_cons).toBe('<<>>>');
46
+ });
47
+ test('parses GS lines', () => {
48
+ const stockholm = `# STOCKHOLM 1.0
49
+ #=GS seq1 AC P12345
50
+ #=GS seq1 DR PDB; 1ABC A; 1-100
51
+ seq1 ACDEF
52
+ //`;
53
+ const result = parseAll(stockholm);
54
+ expect(result[0]?.gs.AC?.seq1).toEqual(['P12345']);
55
+ expect(result[0]?.gs.DR?.seq1).toEqual(['PDB; 1ABC A; 1-100']);
56
+ });
57
+ test('parses multiple alignments', () => {
58
+ const stockholm = `# STOCKHOLM 1.0
59
+ seq1 ACDEF
60
+ //
61
+ # STOCKHOLM 1.0
62
+ seq2 GHIKL
63
+ //`;
64
+ const result = parseAll(stockholm);
65
+ expect(result).toHaveLength(2);
66
+ expect(result[0]?.seqdata.seq1).toBe('ACDEF');
67
+ expect(result[1]?.seqdata.seq2).toBe('GHIKL');
68
+ });
69
+ test('concatenates interleaved sequence blocks', () => {
70
+ const stockholm = `# STOCKHOLM 1.0
71
+ seq1 ACDEF
72
+ seq2 GHIKL
73
+
74
+ seq1 MNOPQ
75
+ seq2 RSTUV
76
+ //`;
77
+ const result = parseAll(stockholm);
78
+ expect(result[0]?.seqdata.seq1).toBe('ACDEFMNOPQ');
79
+ expect(result[0]?.seqdata.seq2).toBe('GHIKLRSTUV');
80
+ });
81
+ test('handles file without trailing //', () => {
82
+ const stockholm = `# STOCKHOLM 1.0
83
+ seq1 ACDEF`;
84
+ const result = parseAll(stockholm);
85
+ expect(result).toHaveLength(1);
86
+ expect(result[0]?.seqdata.seq1).toBe('ACDEF');
87
+ });
88
+ });
89
+ describe('parse', () => {
90
+ test('returns single alignment', () => {
91
+ const stockholm = `# STOCKHOLM 1.0
92
+ seq1 ACDEF
93
+ //`;
94
+ const result = parse(stockholm);
95
+ expect(result.seqdata.seq1).toBe('ACDEF');
96
+ });
97
+ test('throws on empty input', () => {
98
+ expect(() => parse('')).toThrow('No alignments found');
99
+ });
100
+ test('throws on multiple alignments', () => {
101
+ const stockholm = `# STOCKHOLM 1.0
102
+ seq1 ACDEF
103
+ //
104
+ # STOCKHOLM 1.0
105
+ seq2 GHIKL
106
+ //`;
107
+ expect(() => parse(stockholm)).toThrow('More than one alignment found');
108
+ });
109
+ });
110
+ });
111
+ //# sourceMappingURL=stockholmParser.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stockholmParser.test.js","sourceRoot":"","sources":["../../src/msa/stockholmParser.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAA;AAE/C,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,sBAAsB,CAAA;AAE7D,QAAQ,CAAC,iBAAiB,EAAE,GAAG,EAAE;IAC/B,QAAQ,CAAC,OAAO,EAAE,GAAG,EAAE;QACrB,IAAI,CAAC,mCAAmC,EAAE,GAAG,EAAE;YAC7C,MAAM,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QAC/C,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,wCAAwC,EAAE,GAAG,EAAE;YAClD,MAAM,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;YACzC,MAAM,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;QACxC,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,UAAU,EAAE,GAAG,EAAE;QACxB,IAAI,CAAC,8BAA8B,EAAE,GAAG,EAAE;YACxC,MAAM,SAAS,GAAG;;;GAGrB,CAAA;YACG,MAAM,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAA;YAClC,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;YAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,OAAO,CAAC;gBACjC,IAAI,EAAE,YAAY;gBAClB,IAAI,EAAE,YAAY;aACnB,CAAC,CAAA;YACF,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC,CAAA;QACtD,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,iBAAiB,EAAE,GAAG,EAAE;YAC3B,MAAM,SAAS,GAAG;;;;GAIrB,CAAA;YACG,MAAM,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAA;YAClC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,8BAA8B,CAAC,CAAC,CAAA;YAClE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,gBAAgB,CAAC,CAAC,CAAA;QACtD,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,iBAAiB,EAAE,GAAG,EAAE;YAC3B,MAAM,SAAS,GAAG;;;;GAIrB,CAAA;YACG,MAAM,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAA;YAClC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YAC5C,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QAC7C,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,iBAAiB,EAAE,GAAG,EAAE;YAC3B,MAAM,SAAS,GAAG;;;;GAIrB,CAAA;YACG,MAAM,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAA;YAClC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAA;YAClD,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,EAAE,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,oBAAoB,CAAC,CAAC,CAAA;QAChE,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,4BAA4B,EAAE,GAAG,EAAE;YACtC,MAAM,SAAS,GAAG;;;;;GAKrB,CAAA;YACG,MAAM,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAA;YAClC,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;YAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;YAC7C,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QAC/C,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,0CAA0C,EAAE,GAAG,EAAE;YACpD,MAAM,SAAS,GAAG;;;;;;GAMrB,CAAA;YACG,MAAM,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAA;YAClC,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;YAClD,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAA;QACpD,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,kCAAkC,EAAE,GAAG,EAAE;YAC5C,MAAM,SAAS,GAAG;YACZ,CAAA;YACN,MAAM,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAA;YAClC,MAAM,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;YAC9B,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,OAAO,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QAC/C,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,QAAQ,CAAC,OAAO,EAAE,GAAG,EAAE;QACrB,IAAI,CAAC,0BAA0B,EAAE,GAAG,EAAE;YACpC,MAAM,SAAS,GAAG;;GAErB,CAAA;YACG,MAAM,MAAM,GAAG,KAAK,CAAC,SAAS,CAAC,CAAA;YAC/B,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;QAC3C,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,uBAAuB,EAAE,GAAG,EAAE;YACjC,MAAM,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,qBAAqB,CAAC,CAAA;QACxD,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,+BAA+B,EAAE,GAAG,EAAE;YACzC,MAAM,SAAS,GAAG;;;;;GAKrB,CAAA;YACG,MAAM,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,OAAO,CAAC,+BAA+B,CAAC,CAAA;QACzE,CAAC,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "msa-parsers",
3
- "version": "5.0.3",
3
+ "version": "5.0.5",
4
4
  "license": "MIT",
5
5
  "type": "module",
6
6
  "exports": {