msa-parsers 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/dist/gff/gffToInterPro.d.ts +20 -0
  2. package/dist/gff/gffToInterPro.js +83 -0
  3. package/dist/gff/gffToInterPro.js.map +1 -0
  4. package/dist/gff/gffToInterPro.test.d.ts +1 -0
  5. package/dist/gff/gffToInterPro.test.js +181 -0
  6. package/dist/gff/gffToInterPro.test.js.map +1 -0
  7. package/dist/gff/index.d.ts +3 -0
  8. package/dist/gff/index.js +4 -0
  9. package/dist/gff/index.js.map +1 -0
  10. package/dist/gff/interProToGFF.d.ts +9 -0
  11. package/dist/gff/interProToGFF.js +48 -0
  12. package/dist/gff/interProToGFF.js.map +1 -0
  13. package/dist/gff/interProToGFF.test.d.ts +1 -0
  14. package/dist/gff/interProToGFF.test.js +189 -0
  15. package/dist/gff/interProToGFF.test.js.map +1 -0
  16. package/dist/gff/parseGFF.d.ts +2 -0
  17. package/dist/gff/parseGFF.js +41 -0
  18. package/dist/gff/parseGFF.js.map +1 -0
  19. package/dist/gff/parseGFF.test.d.ts +1 -0
  20. package/dist/gff/parseGFF.test.js +92 -0
  21. package/dist/gff/parseGFF.test.js.map +1 -0
  22. package/dist/index.d.ts +5 -0
  23. package/dist/index.js +9 -0
  24. package/dist/index.js.map +1 -0
  25. package/dist/msa/A3mMSA.d.ts +33 -0
  26. package/dist/msa/A3mMSA.js +280 -0
  27. package/dist/msa/A3mMSA.js.map +1 -0
  28. package/dist/msa/A3mMSA.test.d.ts +1 -0
  29. package/dist/msa/A3mMSA.test.js +155 -0
  30. package/dist/msa/A3mMSA.test.js.map +1 -0
  31. package/dist/msa/ClustalMSA.d.ts +30 -0
  32. package/dist/msa/ClustalMSA.js +53 -0
  33. package/dist/msa/ClustalMSA.js.map +1 -0
  34. package/dist/msa/EmfMSA.d.ts +27 -0
  35. package/dist/msa/EmfMSA.js +53 -0
  36. package/dist/msa/EmfMSA.js.map +1 -0
  37. package/dist/msa/FastaMSA.d.ts +19 -0
  38. package/dist/msa/FastaMSA.js +69 -0
  39. package/dist/msa/FastaMSA.js.map +1 -0
  40. package/dist/msa/StockholmMSA.d.ts +54 -0
  41. package/dist/msa/StockholmMSA.js +113 -0
  42. package/dist/msa/StockholmMSA.js.map +1 -0
  43. package/dist/msa/index.d.ts +18 -0
  44. package/dist/msa/index.js +34 -0
  45. package/dist/msa/index.js.map +1 -0
  46. package/dist/msa/index.test.d.ts +1 -0
  47. package/dist/msa/index.test.js +60 -0
  48. package/dist/msa/index.test.js.map +1 -0
  49. package/dist/msa/parseNewick.d.ts +60 -0
  50. package/dist/msa/parseNewick.js +95 -0
  51. package/dist/msa/parseNewick.js.map +1 -0
  52. package/dist/msa/stockholmParser.d.ts +22 -0
  53. package/dist/msa/stockholmParser.js +141 -0
  54. package/dist/msa/stockholmParser.js.map +1 -0
  55. package/dist/msa/stockholmParser.test.d.ts +1 -0
  56. package/dist/msa/stockholmParser.test.js +111 -0
  57. package/dist/msa/stockholmParser.test.js.map +1 -0
  58. package/dist/types.d.ts +66 -0
  59. package/dist/types.js +2 -0
  60. package/dist/types.js.map +1 -0
  61. package/dist/util.d.ts +2 -0
  62. package/dist/util.js +10 -0
  63. package/dist/util.js.map +1 -0
  64. package/package.json +25 -0
  65. package/src/gff/gffToInterPro.test.ts +202 -0
  66. package/src/gff/gffToInterPro.ts +113 -0
  67. package/src/gff/index.ts +3 -0
  68. package/src/gff/interProToGFF.test.ts +206 -0
  69. package/src/gff/interProToGFF.ts +59 -0
  70. package/src/gff/parseGFF.test.ts +106 -0
  71. package/src/gff/parseGFF.ts +46 -0
  72. package/src/index.ts +29 -0
  73. package/src/msa/A3mMSA.test.ts +192 -0
  74. package/src/msa/A3mMSA.ts +320 -0
  75. package/src/msa/ClustalMSA.ts +67 -0
  76. package/src/msa/EmfMSA.ts +67 -0
  77. package/src/msa/FastaMSA.ts +82 -0
  78. package/src/msa/StockholmMSA.ts +141 -0
  79. package/src/msa/index.test.ts +74 -0
  80. package/src/msa/index.ts +44 -0
  81. package/src/msa/parseNewick.ts +94 -0
  82. package/src/msa/stockholmParser.test.ts +123 -0
  83. package/src/msa/stockholmParser.ts +157 -0
  84. package/src/types.ts +68 -0
  85. package/src/util.ts +19 -0
@@ -0,0 +1,20 @@
1
+ import type { GFFRecord, InterProScanResponse, InterProScanResults } from '../types';
2
+ /**
3
+ * Convert GFF records to InterProScan format
4
+ *
5
+ * InterProScan GFF3 output format:
6
+ * - seq_id: sequence identifier
7
+ * - source: database/signature (e.g., "Pfam", "SMART")
8
+ * - type: usually "protein_match"
9
+ * - start/end: domain positions (1-based)
10
+ * - Attributes: Name (accession), signature_desc (name), Dbxref, etc.
11
+ */
12
+ export declare function gffToInterProResults(gffRecords: GFFRecord[]): Record<string, InterProScanResults>;
13
+ /**
14
+ * Convert GFF string directly to InterProScan format
15
+ */
16
+ export declare function parseGFFToInterPro(gffStr: string, parseGFFfn: (str: string) => GFFRecord[]): Record<string, InterProScanResults>;
17
+ /**
18
+ * Create a full InterProScanResponse from GFF records
19
+ */
20
+ export declare function gffToInterProResponse(gffRecords: GFFRecord[]): InterProScanResponse;
@@ -0,0 +1,83 @@
1
+ /**
2
+ * Convert GFF records to InterProScan format
3
+ *
4
+ * InterProScan GFF3 output format:
5
+ * - seq_id: sequence identifier
6
+ * - source: database/signature (e.g., "Pfam", "SMART")
7
+ * - type: usually "protein_match"
8
+ * - start/end: domain positions (1-based)
9
+ * - Attributes: Name (accession), signature_desc (name), Dbxref, etc.
10
+ */
11
+ export function gffToInterProResults(gffRecords) {
12
+ const bySequence = new Map();
13
+ for (const record of gffRecords) {
14
+ const existing = bySequence.get(record.seq_id);
15
+ if (existing) {
16
+ existing.push(record);
17
+ }
18
+ else {
19
+ bySequence.set(record.seq_id, [record]);
20
+ }
21
+ }
22
+ const results = {};
23
+ for (const [seqId, records] of bySequence) {
24
+ const matchesByAccession = new Map();
25
+ const matchInfo = new Map();
26
+ for (const record of records) {
27
+ const accession = record.Name ||
28
+ record.ID ||
29
+ `${record.source}_${record.start}_${record.end}`;
30
+ const name = record.signature_desc ||
31
+ record.Name ||
32
+ accession;
33
+ const description = record.Ontology_term ||
34
+ record.description ||
35
+ record.Note ||
36
+ name;
37
+ if (!matchInfo.has(accession)) {
38
+ matchInfo.set(accession, { name, description, accession });
39
+ }
40
+ const locations = matchesByAccession.get(accession);
41
+ if (locations) {
42
+ locations.push({ start: record.start, end: record.end });
43
+ }
44
+ else {
45
+ matchesByAccession.set(accession, [
46
+ { start: record.start, end: record.end },
47
+ ]);
48
+ }
49
+ }
50
+ const matches = [];
51
+ for (const [accession, locations] of matchesByAccession) {
52
+ const info = matchInfo.get(accession);
53
+ matches.push({
54
+ signature: {
55
+ entry: info,
56
+ },
57
+ locations,
58
+ });
59
+ }
60
+ results[seqId] = {
61
+ matches,
62
+ xref: [{ id: seqId }],
63
+ };
64
+ }
65
+ return results;
66
+ }
67
+ /**
68
+ * Convert GFF string directly to InterProScan format
69
+ */
70
+ export function parseGFFToInterPro(gffStr, parseGFFfn) {
71
+ const records = parseGFFfn(gffStr);
72
+ return gffToInterProResults(records);
73
+ }
74
+ /**
75
+ * Create a full InterProScanResponse from GFF records
76
+ */
77
+ export function gffToInterProResponse(gffRecords) {
78
+ const resultsMap = gffToInterProResults(gffRecords);
79
+ return {
80
+ results: Object.values(resultsMap),
81
+ };
82
+ }
83
+ //# sourceMappingURL=gffToInterPro.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gffToInterPro.js","sourceRoot":"","sources":["../../src/gff/gffToInterPro.ts"],"names":[],"mappings":"AAMA;;;;;;;;;GASG;AACH,MAAM,UAAU,oBAAoB,CAClC,UAAuB;IAEvB,MAAM,UAAU,GAAG,IAAI,GAAG,EAAuB,CAAA;IAEjD,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAChC,MAAM,QAAQ,GAAG,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;QAC9C,IAAI,QAAQ,EAAE,CAAC;YACb,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACvB,CAAC;aAAM,CAAC;YACN,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,MAAM,CAAC,CAAC,CAAA;QACzC,CAAC;IACH,CAAC;IAED,MAAM,OAAO,GAAwC,EAAE,CAAA;IAEvD,KAAK,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,UAAU,EAAE,CAAC;QAC1C,MAAM,kBAAkB,GAAG,IAAI,GAAG,EAG/B,CAAA;QACH,MAAM,SAAS,GAAG,IAAI,GAAG,EAGtB,CAAA;QAEH,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,MAAM,SAAS,GACZ,MAAM,CAAC,IAAe;gBACtB,MAAM,CAAC,EAAa;gBACrB,GAAG,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,GAAG,EAAE,CAAA;YAClD,MAAM,IAAI,GACP,MAAM,CAAC,cAAyB;gBAChC,MAAM,CAAC,IAAe;gBACvB,SAAS,CAAA;YACX,MAAM,WAAW,GACd,MAAM,CAAC,aAAwB;gBAC/B,MAAM,CAAC,WAAsB;gBAC7B,MAAM,CAAC,IAAe;gBACvB,IAAI,CAAA;YAEN,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;gBAC9B,SAAS,CAAC,GAAG,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,CAAC,CAAA;YAC5D,CAAC;YAED,MAAM,SAAS,GAAG,kBAAkB,CAAC,GAAG,CAAC,SAAS,CAAC,CAAA;YACnD,IAAI,SAAS,EAAE,CAAC;gBACd,SAAS,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,GAAG,EAAE,MAAM,CAAC,GAAG,EAAE,CAAC,CAAA;YAC1D,CAAC;iBAAM,CAAC;gBACN,kBAAkB,CAAC,GAAG,CAAC,SAAS,EAAE;oBAChC,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,GAAG,EAAE,MAAM,CAAC,GAAG,EAAE;iBACzC,CAAC,CAAA;YACJ,CAAC;QACH,CAAC;QAED,MAAM,OAAO,GAAG,EAAE,CAAA;QAClB,KAAK,MAAM,CAAC,SAAS,EAAE,SAAS,CAAC,IAAI,kBAAkB,EAAE,CAAC;YACxD,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,SAAS,CAAE,CAAA;YACtC,OAAO,CAAC,IAAI,CAAC;gBACX,SAAS,EAAE;oBACT,KAAK,EAAE,IAAI;iBACZ;gBACD,SAAS;aACV,CAAC,CAAA;QACJ,CAAC;QAED,OAAO,CAAC,KAAK,CAAC,GAAG;YACf,OAAO;YACP,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC;SACtB,CAAA;IACH,CAAC;IAED,OAAO,OAAO,CAAA;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAChC,MAAc,EACd,UAAwC;IAExC,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAC,CAAA;IAClC,OAAO,oBAAoB,CAAC,OAAO,CAAC,CAAA;AACtC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,qBAAqB,CACnC,UAAuB;IAEvB,MAAM,UAAU,GAAG,oBAAoB,CAAC,UAAU,CAAC,CAAA;IACnD,OAAO;QACL,OAAO,EAAE,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC;KACnC,CAAA;AACH,CAAC"}
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,181 @@
1
+ import { describe, expect, test } from 'vitest';
2
+ import { gffToInterProResponse, gffToInterProResults } from './gffToInterPro';
3
+ describe('gffToInterProResults', () => {
4
+ test('converts empty array', () => {
5
+ expect(gffToInterProResults([])).toEqual({});
6
+ });
7
+ test('converts single GFF record', () => {
8
+ const records = [
9
+ {
10
+ seq_id: 'seq1',
11
+ source: 'Pfam',
12
+ type: 'protein_match',
13
+ start: 10,
14
+ end: 50,
15
+ score: 0,
16
+ strand: '.',
17
+ phase: '.',
18
+ Name: 'PF00001',
19
+ signature_desc: '7tm_1',
20
+ description: 'GPCR family',
21
+ },
22
+ ];
23
+ const result = gffToInterProResults(records);
24
+ expect(result).toHaveProperty('seq1');
25
+ expect(result.seq1?.matches).toHaveLength(1);
26
+ expect(result.seq1?.matches[0]?.signature.entry).toEqual({
27
+ accession: 'PF00001',
28
+ name: '7tm_1',
29
+ description: 'GPCR family',
30
+ });
31
+ expect(result.seq1?.matches[0]?.locations).toEqual([{ start: 10, end: 50 }]);
32
+ expect(result.seq1?.xref).toEqual([{ id: 'seq1' }]);
33
+ });
34
+ test('groups multiple records for same sequence', () => {
35
+ const records = [
36
+ {
37
+ seq_id: 'seq1',
38
+ source: 'Pfam',
39
+ type: 'protein_match',
40
+ start: 10,
41
+ end: 50,
42
+ score: 0,
43
+ strand: '.',
44
+ phase: '.',
45
+ Name: 'PF00001',
46
+ },
47
+ {
48
+ seq_id: 'seq1',
49
+ source: 'SMART',
50
+ type: 'protein_match',
51
+ start: 60,
52
+ end: 100,
53
+ score: 0,
54
+ strand: '.',
55
+ phase: '.',
56
+ Name: 'SM00001',
57
+ },
58
+ ];
59
+ const result = gffToInterProResults(records);
60
+ expect(Object.keys(result)).toHaveLength(1);
61
+ expect(result.seq1?.matches).toHaveLength(2);
62
+ });
63
+ test('handles multiple sequences', () => {
64
+ const records = [
65
+ {
66
+ seq_id: 'seq1',
67
+ source: 'Pfam',
68
+ type: 'protein_match',
69
+ start: 10,
70
+ end: 50,
71
+ score: 0,
72
+ strand: '.',
73
+ phase: '.',
74
+ Name: 'PF00001',
75
+ },
76
+ {
77
+ seq_id: 'seq2',
78
+ source: 'Pfam',
79
+ type: 'protein_match',
80
+ start: 5,
81
+ end: 40,
82
+ score: 0,
83
+ strand: '.',
84
+ phase: '.',
85
+ Name: 'PF00002',
86
+ },
87
+ ];
88
+ const result = gffToInterProResults(records);
89
+ expect(Object.keys(result)).toHaveLength(2);
90
+ expect(result).toHaveProperty('seq1');
91
+ expect(result).toHaveProperty('seq2');
92
+ });
93
+ test('combines locations for same accession', () => {
94
+ const records = [
95
+ {
96
+ seq_id: 'seq1',
97
+ source: 'Pfam',
98
+ type: 'protein_match',
99
+ start: 10,
100
+ end: 50,
101
+ score: 0,
102
+ strand: '.',
103
+ phase: '.',
104
+ Name: 'PF00001',
105
+ },
106
+ {
107
+ seq_id: 'seq1',
108
+ source: 'Pfam',
109
+ type: 'protein_match',
110
+ start: 100,
111
+ end: 150,
112
+ score: 0,
113
+ strand: '.',
114
+ phase: '.',
115
+ Name: 'PF00001',
116
+ },
117
+ ];
118
+ const result = gffToInterProResults(records);
119
+ expect(result.seq1?.matches).toHaveLength(1);
120
+ expect(result.seq1?.matches[0]?.locations).toHaveLength(2);
121
+ expect(result.seq1?.matches[0]?.locations).toEqual([
122
+ { start: 10, end: 50 },
123
+ { start: 100, end: 150 },
124
+ ]);
125
+ });
126
+ test('uses ID as fallback for Name', () => {
127
+ const records = [
128
+ {
129
+ seq_id: 'seq1',
130
+ source: 'Source',
131
+ type: 'protein_match',
132
+ start: 10,
133
+ end: 50,
134
+ score: 0,
135
+ strand: '.',
136
+ phase: '.',
137
+ ID: 'domain_123',
138
+ },
139
+ ];
140
+ const result = gffToInterProResults(records);
141
+ expect(result.seq1?.matches[0]?.signature.entry?.accession).toBe('domain_123');
142
+ });
143
+ test('generates fallback accession from source and positions', () => {
144
+ const records = [
145
+ {
146
+ seq_id: 'seq1',
147
+ source: 'CustomSource',
148
+ type: 'protein_match',
149
+ start: 10,
150
+ end: 50,
151
+ score: 0,
152
+ strand: '.',
153
+ phase: '.',
154
+ },
155
+ ];
156
+ const result = gffToInterProResults(records);
157
+ expect(result.seq1?.matches[0]?.signature.entry?.accession).toBe('CustomSource_10_50');
158
+ });
159
+ });
160
+ describe('gffToInterProResponse', () => {
161
+ test('wraps results in response format', () => {
162
+ const records = [
163
+ {
164
+ seq_id: 'seq1',
165
+ source: 'Pfam',
166
+ type: 'protein_match',
167
+ start: 10,
168
+ end: 50,
169
+ score: 0,
170
+ strand: '.',
171
+ phase: '.',
172
+ Name: 'PF00001',
173
+ },
174
+ ];
175
+ const response = gffToInterProResponse(records);
176
+ expect(response).toHaveProperty('results');
177
+ expect(response.results).toHaveLength(1);
178
+ expect(response.results[0]?.xref[0]?.id).toBe('seq1');
179
+ });
180
+ });
181
+ //# sourceMappingURL=gffToInterPro.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"gffToInterPro.test.js","sourceRoot":"","sources":["../../src/gff/gffToInterPro.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAA;AAE/C,OAAO,EAAE,qBAAqB,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAA;AAI7E,QAAQ,CAAC,sBAAsB,EAAE,GAAG,EAAE;IACpC,IAAI,CAAC,sBAAsB,EAAE,GAAG,EAAE;QAChC,MAAM,CAAC,oBAAoB,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;IAC9C,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAgB;YAC3B;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;gBACf,cAAc,EAAE,OAAO;gBACvB,WAAW,EAAE,aAAa;aAC3B;SACF,CAAA;QACD,MAAM,MAAM,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAA;QAE5C,MAAM,CAAC,MAAM,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAA;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC5C,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC;YACvD,SAAS,EAAE,SAAS;YACpB,IAAI,EAAE,OAAO;YACb,WAAW,EAAE,aAAa;SAC3B,CAAC,CAAA;QACF,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,OAAO,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC,CAAC,CAAA;QAC5E,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC,CAAA;IACrD,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACrD,MAAM,OAAO,GAAgB;YAC3B;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;aAChB;YACD;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,OAAO;gBACf,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,GAAG;gBACR,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;aAChB;SACF,CAAA;QACD,MAAM,MAAM,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAA;QAE5C,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC3C,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;IAC9C,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAgB;YAC3B;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;aAChB;YACD;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,CAAC;gBACR,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;aAChB;SACF,CAAA;QACD,MAAM,MAAM,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAA;QAE5C,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC3C,MAAM,CAAC,MAAM,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAA;QACrC,MAAM,CAAC,MAAM,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAA;IACvC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,uCAAuC,EAAE,GAAG,EAAE;QACjD,MAAM,OAAO,GAAgB;YAC3B;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;aAChB;YACD;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,GAAG;gBACV,GAAG,EAAE,GAAG;gBACR,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;aAChB;SACF,CAAA;QACD,MAAM,MAAM,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAA;QAE5C,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC5C,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC1D,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,OAAO,CAAC;YACjD,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE;YACtB,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE;SACzB,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACxC,MAAM,OAAO,GAAgB;YAC3B;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,QAAQ;gBAChB,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,EAAE,EAAE,YAAY;aACjB;SACF,CAAA;QACD,MAAM,MAAM,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAA;QAE5C,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC,IAAI,CAC9D,YAAY,CACb,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,wDAAwD,EAAE,GAAG,EAAE;QAClE,MAAM,OAAO,GAAgB;YAC3B;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,cAAc;gBACtB,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;aACX;SACF,CAAA;QACD,MAAM,MAAM,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAA;QAE5C,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC,IAAI,CAC9D,oBAAoB,CACrB,CAAA;IACH,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA;AAEF,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;IACrC,IAAI,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC5C,MAAM,OAAO,GAAgB;YAC3B;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;aAChB;SACF,CAAA;QACD,MAAM,QAAQ,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAA;QAE/C,MAAM,CAAC,QAAQ,CAAC,CAAC,cAAc,CAAC,SAAS,CAAC,CAAA;QAC1C,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QACxC,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IACvD,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
@@ -0,0 +1,3 @@
1
+ export { parseGFF } from './parseGFF';
2
+ export { gffToInterProResponse, gffToInterProResults } from './gffToInterPro';
3
+ export { interProResponseToGFF, interProToGFF } from './interProToGFF';
@@ -0,0 +1,4 @@
1
+ export { parseGFF } from './parseGFF';
2
+ export { gffToInterProResponse, gffToInterProResults } from './gffToInterPro';
3
+ export { interProResponseToGFF, interProToGFF } from './interProToGFF';
4
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/gff/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAA;AACrC,OAAO,EAAE,qBAAqB,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAA;AAC7E,OAAO,EAAE,qBAAqB,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAA"}
@@ -0,0 +1,9 @@
1
+ import type { InterProScanResults } from '../types';
2
+ /**
3
+ * Convert InterProScan results to GFF3 format
4
+ */
5
+ export declare function interProToGFF(results: Record<string, InterProScanResults>): string;
6
+ /**
7
+ * Convert InterProScan JSON response to GFF3 format
8
+ */
9
+ export declare function interProResponseToGFF(results: InterProScanResults[]): string;
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Convert InterProScan results to GFF3 format
3
+ */
4
+ export function interProToGFF(results) {
5
+ const lines = ['##gff-version 3'];
6
+ for (const [seqId, data] of Object.entries(results)) {
7
+ for (const match of data.matches) {
8
+ const entry = match.signature.entry;
9
+ if (!entry) {
10
+ continue;
11
+ }
12
+ for (const location of match.locations) {
13
+ const attributes = [
14
+ `Name=${encodeURIComponent(entry.accession)}`,
15
+ `signature_desc=${encodeURIComponent(entry.name)}`,
16
+ `description=${encodeURIComponent(entry.description)}`,
17
+ ].join(';');
18
+ const line = [
19
+ seqId,
20
+ 'InterProScan',
21
+ 'protein_match',
22
+ location.start,
23
+ location.end,
24
+ '.',
25
+ '.',
26
+ '.',
27
+ attributes,
28
+ ].join('\t');
29
+ lines.push(line);
30
+ }
31
+ }
32
+ }
33
+ return lines.join('\n');
34
+ }
35
+ /**
36
+ * Convert InterProScan JSON response to GFF3 format
37
+ */
38
+ export function interProResponseToGFF(results) {
39
+ const resultsMap = {};
40
+ for (const result of results) {
41
+ const seqId = result.xref[0]?.id;
42
+ if (seqId) {
43
+ resultsMap[seqId] = result;
44
+ }
45
+ }
46
+ return interProToGFF(resultsMap);
47
+ }
48
+ //# sourceMappingURL=interProToGFF.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"interProToGFF.js","sourceRoot":"","sources":["../../src/gff/interProToGFF.ts"],"names":[],"mappings":"AAEA;;GAEG;AACH,MAAM,UAAU,aAAa,CAC3B,OAA4C;IAE5C,MAAM,KAAK,GAAa,CAAC,iBAAiB,CAAC,CAAA;IAE3C,KAAK,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;QACpD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjC,MAAM,KAAK,GAAG,KAAK,CAAC,SAAS,CAAC,KAAK,CAAA;YACnC,IAAI,CAAC,KAAK,EAAE,CAAC;gBACX,SAAQ;YACV,CAAC;YAED,KAAK,MAAM,QAAQ,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;gBACvC,MAAM,UAAU,GAAG;oBACjB,QAAQ,kBAAkB,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE;oBAC7C,kBAAkB,kBAAkB,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE;oBAClD,eAAe,kBAAkB,CAAC,KAAK,CAAC,WAAW,CAAC,EAAE;iBACvD,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;gBAEX,MAAM,IAAI,GAAG;oBACX,KAAK;oBACL,cAAc;oBACd,eAAe;oBACf,QAAQ,CAAC,KAAK;oBACd,QAAQ,CAAC,GAAG;oBACZ,GAAG;oBACH,GAAG;oBACH,GAAG;oBACH,UAAU;iBACX,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;gBAEZ,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAClB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AACzB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,qBAAqB,CAAC,OAA8B;IAClE,MAAM,UAAU,GAAwC,EAAE,CAAA;IAE1D,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAA;QAChC,IAAI,KAAK,EAAE,CAAC;YACV,UAAU,CAAC,KAAK,CAAC,GAAG,MAAM,CAAA;QAC5B,CAAC;IACH,CAAC;IAED,OAAO,aAAa,CAAC,UAAU,CAAC,CAAA;AAClC,CAAC"}
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,189 @@
1
+ import { describe, expect, test } from 'vitest';
2
+ import { interProResponseToGFF, interProToGFF } from './interProToGFF';
3
+ describe('interProToGFF', () => {
4
+ test('converts empty results', () => {
5
+ const gff = interProToGFF({});
6
+ expect(gff).toBe('##gff-version 3');
7
+ });
8
+ test('converts single result', () => {
9
+ const results = {
10
+ seq1: {
11
+ matches: [
12
+ {
13
+ signature: {
14
+ entry: {
15
+ accession: 'PF00001',
16
+ name: '7tm_1',
17
+ description: 'GPCR family',
18
+ },
19
+ },
20
+ locations: [{ start: 10, end: 50 }],
21
+ },
22
+ ],
23
+ xref: [{ id: 'seq1' }],
24
+ },
25
+ };
26
+ const gff = interProToGFF(results);
27
+ const lines = gff.split('\n');
28
+ expect(lines[0]).toBe('##gff-version 3');
29
+ expect(lines[1]).toContain('seq1');
30
+ expect(lines[1]).toContain('InterProScan');
31
+ expect(lines[1]).toContain('protein_match');
32
+ expect(lines[1]).toContain('10');
33
+ expect(lines[1]).toContain('50');
34
+ expect(lines[1]).toContain('Name=PF00001');
35
+ expect(lines[1]).toContain('signature_desc=7tm_1');
36
+ expect(lines[1]).toContain('description=GPCR%20family');
37
+ });
38
+ test('handles multiple locations', () => {
39
+ const results = {
40
+ seq1: {
41
+ matches: [
42
+ {
43
+ signature: {
44
+ entry: {
45
+ accession: 'PF00001',
46
+ name: 'domain',
47
+ description: 'test',
48
+ },
49
+ },
50
+ locations: [
51
+ { start: 10, end: 50 },
52
+ { start: 100, end: 150 },
53
+ ],
54
+ },
55
+ ],
56
+ xref: [{ id: 'seq1' }],
57
+ },
58
+ };
59
+ const gff = interProToGFF(results);
60
+ const lines = gff.split('\n');
61
+ expect(lines).toHaveLength(3);
62
+ expect(lines[1]).toContain('10\t50');
63
+ expect(lines[2]).toContain('100\t150');
64
+ });
65
+ test('handles multiple sequences', () => {
66
+ const results = {
67
+ seq1: {
68
+ matches: [
69
+ {
70
+ signature: {
71
+ entry: {
72
+ accession: 'PF00001',
73
+ name: 'domain1',
74
+ description: 'test1',
75
+ },
76
+ },
77
+ locations: [{ start: 10, end: 50 }],
78
+ },
79
+ ],
80
+ xref: [{ id: 'seq1' }],
81
+ },
82
+ seq2: {
83
+ matches: [
84
+ {
85
+ signature: {
86
+ entry: {
87
+ accession: 'PF00002',
88
+ name: 'domain2',
89
+ description: 'test2',
90
+ },
91
+ },
92
+ locations: [{ start: 5, end: 40 }],
93
+ },
94
+ ],
95
+ xref: [{ id: 'seq2' }],
96
+ },
97
+ };
98
+ const gff = interProToGFF(results);
99
+ const lines = gff.split('\n');
100
+ expect(lines).toHaveLength(3);
101
+ expect(lines.some(l => l.includes('seq1'))).toBe(true);
102
+ expect(lines.some(l => l.includes('seq2'))).toBe(true);
103
+ });
104
+ test('skips matches without entry', () => {
105
+ const results = {
106
+ seq1: {
107
+ matches: [
108
+ {
109
+ signature: {},
110
+ locations: [{ start: 10, end: 50 }],
111
+ },
112
+ ],
113
+ xref: [{ id: 'seq1' }],
114
+ },
115
+ };
116
+ const gff = interProToGFF(results);
117
+ const lines = gff.split('\n');
118
+ expect(lines).toHaveLength(1);
119
+ expect(lines[0]).toBe('##gff-version 3');
120
+ });
121
+ test('URL-encodes special characters in attributes', () => {
122
+ const results = {
123
+ seq1: {
124
+ matches: [
125
+ {
126
+ signature: {
127
+ entry: {
128
+ accession: 'PF00001',
129
+ name: 'test;name=value',
130
+ description: 'description with spaces',
131
+ },
132
+ },
133
+ locations: [{ start: 10, end: 50 }],
134
+ },
135
+ ],
136
+ xref: [{ id: 'seq1' }],
137
+ },
138
+ };
139
+ const gff = interProToGFF(results);
140
+ expect(gff).toContain('signature_desc=test%3Bname%3Dvalue');
141
+ expect(gff).toContain('description=description%20with%20spaces');
142
+ });
143
+ });
144
+ describe('interProResponseToGFF', () => {
145
+ test('converts array of results', () => {
146
+ const results = [
147
+ {
148
+ matches: [
149
+ {
150
+ signature: {
151
+ entry: {
152
+ accession: 'PF00001',
153
+ name: 'domain',
154
+ description: 'test',
155
+ },
156
+ },
157
+ locations: [{ start: 10, end: 50 }],
158
+ },
159
+ ],
160
+ xref: [{ id: 'seq1' }],
161
+ },
162
+ ];
163
+ const gff = interProResponseToGFF(results);
164
+ expect(gff).toContain('##gff-version 3');
165
+ expect(gff).toContain('seq1');
166
+ });
167
+ test('handles results without xref', () => {
168
+ const results = [
169
+ {
170
+ matches: [
171
+ {
172
+ signature: {
173
+ entry: {
174
+ accession: 'PF00001',
175
+ name: 'domain',
176
+ description: 'test',
177
+ },
178
+ },
179
+ locations: [{ start: 10, end: 50 }],
180
+ },
181
+ ],
182
+ xref: [],
183
+ },
184
+ ];
185
+ const gff = interProResponseToGFF(results);
186
+ expect(gff).toBe('##gff-version 3');
187
+ });
188
+ });
189
+ //# sourceMappingURL=interProToGFF.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"interProToGFF.test.js","sourceRoot":"","sources":["../../src/gff/interProToGFF.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAA;AAE/C,OAAO,EAAE,qBAAqB,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAA;AAItE,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC7B,IAAI,CAAC,wBAAwB,EAAE,GAAG,EAAE;QAClC,MAAM,GAAG,GAAG,aAAa,CAAC,EAAE,CAAC,CAAA;QAC7B,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAA;IACrC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,wBAAwB,EAAE,GAAG,EAAE;QAClC,MAAM,OAAO,GAAwC;YACnD,IAAI,EAAE;gBACJ,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE;4BACT,KAAK,EAAE;gCACL,SAAS,EAAE,SAAS;gCACpB,IAAI,EAAE,OAAO;gCACb,WAAW,EAAE,aAAa;6BAC3B;yBACF;wBACD,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;qBACpC;iBACF;gBACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC;aACvB;SACF,CAAA;QACD,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,CAAA;QAClC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QAE7B,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAA;QACxC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAA;QAClC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,cAAc,CAAC,CAAA;QAC1C,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,eAAe,CAAC,CAAA;QAC3C,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;QAChC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;QAChC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,cAAc,CAAC,CAAA;QAC1C,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,sBAAsB,CAAC,CAAA;QAClD,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,2BAA2B,CAAC,CAAA;IACzD,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAwC;YACnD,IAAI,EAAE;gBACJ,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE;4BACT,KAAK,EAAE;gCACL,SAAS,EAAE,SAAS;gCACpB,IAAI,EAAE,QAAQ;gCACd,WAAW,EAAE,MAAM;6BACpB;yBACF;wBACD,SAAS,EAAE;4BACT,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE;4BACtB,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE;yBACzB;qBACF;iBACF;gBACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC;aACvB;SACF,CAAA;QACD,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,CAAA;QAClC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QAE7B,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC7B,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAA;QACpC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,UAAU,CAAC,CAAA;IACxC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAwC;YACnD,IAAI,EAAE;gBACJ,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE;4BACT,KAAK,EAAE;gCACL,SAAS,EAAE,SAAS;gCACpB,IAAI,EAAE,SAAS;gCACf,WAAW,EAAE,OAAO;6BACrB;yBACF;wBACD,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;qBACpC;iBACF;gBACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC;aACvB;YACD,IAAI,EAAE;gBACJ,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE;4BACT,KAAK,EAAE;gCACL,SAAS,EAAE,SAAS;gCACpB,IAAI,EAAE,SAAS;gCACf,WAAW,EAAE,OAAO;6BACrB;yBACF;wBACD,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;qBACnC;iBACF;gBACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC;aACvB;SACF,CAAA;QACD,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,CAAA;QAClC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QAE7B,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC7B,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACtD,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACxD,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACvC,MAAM,OAAO,GAAwC;YACnD,IAAI,EAAE;gBACJ,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE,EAAE;wBACb,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;qBACpC;iBACF;gBACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC;aACvB;SACF,CAAA;QACD,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,CAAA;QAClC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QAE7B,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC7B,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAA;IAC1C,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,8CAA8C,EAAE,GAAG,EAAE;QACxD,MAAM,OAAO,GAAwC;YACnD,IAAI,EAAE;gBACJ,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE;4BACT,KAAK,EAAE;gCACL,SAAS,EAAE,SAAS;gCACpB,IAAI,EAAE,iBAAiB;gCACvB,WAAW,EAAE,yBAAyB;6BACvC;yBACF;wBACD,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;qBACpC;iBACF;gBACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC;aACvB;SACF,CAAA;QACD,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,CAAA;QAElC,MAAM,CAAC,GAAG,CAAC,CAAC,SAAS,CAAC,oCAAoC,CAAC,CAAA;QAC3D,MAAM,CAAC,GAAG,CAAC,CAAC,SAAS,CAAC,yCAAyC,CAAC,CAAA;IAClE,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA;AAEF,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;IACrC,IAAI,CAAC,2BAA2B,EAAE,GAAG,EAAE;QACrC,MAAM,OAAO,GAA0B;YACrC;gBACE,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE;4BACT,KAAK,EAAE;gCACL,SAAS,EAAE,SAAS;gCACpB,IAAI,EAAE,QAAQ;gCACd,WAAW,EAAE,MAAM;6BACpB;yBACF;wBACD,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;qBACpC;iBACF;gBACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC;aACvB;SACF,CAAA;QACD,MAAM,GAAG,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAA;QAE1C,MAAM,CAAC,GAAG,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAA;QACxC,MAAM,CAAC,GAAG,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAA;IAC/B,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACxC,MAAM,OAAO,GAA0B;YACrC;gBACE,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE;4BACT,KAAK,EAAE;gCACL,SAAS,EAAE,SAAS;gCACpB,IAAI,EAAE,QAAQ;gCACd,WAAW,EAAE,MAAM;6BACpB;yBACF;wBACD,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;qBACpC;iBACF;gBACD,IAAI,EAAE,EAAE;aACT;SACF,CAAA;QACD,MAAM,GAAG,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAA;QAE1C,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAA;IACrC,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
@@ -0,0 +1,2 @@
1
+ import type { GFFRecord } from '../types';
2
+ export declare function parseGFF(str?: string): GFFRecord[];
@@ -0,0 +1,41 @@
1
+ function parseAttributes(col9) {
2
+ if (!col9) {
3
+ return {};
4
+ }
5
+ return Object.fromEntries(col9
6
+ .split(';')
7
+ .map(f => f.trim())
8
+ .filter(f => !!f)
9
+ .map(f => f.split('='))
10
+ .map(([key, val]) => [
11
+ key?.trim() ?? '',
12
+ val ? decodeURIComponent(val).trim().split(',').join(' ') : undefined,
13
+ ])
14
+ .filter(([key]) => key !== ''));
15
+ }
16
+ export function parseGFF(str) {
17
+ if (!str) {
18
+ return [];
19
+ }
20
+ return str
21
+ .split('\n')
22
+ .map(f => f.trim())
23
+ .filter(f => !!f && !f.startsWith('#'))
24
+ .map(f => {
25
+ const parts = f.split('\t');
26
+ const [seq_id, source, type, start, end, score, strand, phase] = parts;
27
+ const col9 = parts[8];
28
+ return {
29
+ seq_id: seq_id ?? '',
30
+ source: source ?? '',
31
+ type: type ?? '',
32
+ start: Number(start) || 0,
33
+ end: Number(end) || 0,
34
+ score: Number(score) || 0,
35
+ strand: strand ?? '.',
36
+ phase: phase ?? '.',
37
+ ...parseAttributes(col9),
38
+ };
39
+ });
40
+ }
41
+ //# sourceMappingURL=parseGFF.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"parseGFF.js","sourceRoot":"","sources":["../../src/gff/parseGFF.ts"],"names":[],"mappings":"AAEA,SAAS,eAAe,CAAC,IAAa;IACpC,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO,EAAE,CAAA;IACX,CAAC;IACD,OAAO,MAAM,CAAC,WAAW,CACvB,IAAI;SACD,KAAK,CAAC,GAAG,CAAC;SACV,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SAClB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;SAChB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;SACtB,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,EAAE,CAAC;QACnB,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE;QACjB,GAAG,CAAC,CAAC,CAAC,kBAAkB,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS;KACtE,CAAC;SACD,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,CACjC,CAAA;AACH,CAAC;AAED,MAAM,UAAU,QAAQ,CAAC,GAAY;IACnC,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO,EAAE,CAAA;IACX,CAAC;IACD,OAAO,GAAG;SACP,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SAClB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;SACtC,GAAG,CAAC,CAAC,CAAC,EAAE;QACP,MAAM,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QAC3B,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,CAAC,GAAG,KAAK,CAAA;QACtE,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAA;QAErB,OAAO;YACL,MAAM,EAAE,MAAM,IAAI,EAAE;YACpB,MAAM,EAAE,MAAM,IAAI,EAAE;YACpB,IAAI,EAAE,IAAI,IAAI,EAAE;YAChB,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC;YACzB,GAAG,EAAE,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC;YACrB,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC;YACzB,MAAM,EAAE,MAAM,IAAI,GAAG;YACrB,KAAK,EAAE,KAAK,IAAI,GAAG;YACnB,GAAG,eAAe,CAAC,IAAI,CAAC;SACzB,CAAA;IACH,CAAC,CAAC,CAAA;AACN,CAAC"}
@@ -0,0 +1 @@
1
+ export {};