msa-parsers 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/gff/gffToInterPro.d.ts +20 -0
- package/dist/gff/gffToInterPro.js +83 -0
- package/dist/gff/gffToInterPro.js.map +1 -0
- package/dist/gff/gffToInterPro.test.d.ts +1 -0
- package/dist/gff/gffToInterPro.test.js +181 -0
- package/dist/gff/gffToInterPro.test.js.map +1 -0
- package/dist/gff/index.d.ts +3 -0
- package/dist/gff/index.js +4 -0
- package/dist/gff/index.js.map +1 -0
- package/dist/gff/interProToGFF.d.ts +9 -0
- package/dist/gff/interProToGFF.js +48 -0
- package/dist/gff/interProToGFF.js.map +1 -0
- package/dist/gff/interProToGFF.test.d.ts +1 -0
- package/dist/gff/interProToGFF.test.js +189 -0
- package/dist/gff/interProToGFF.test.js.map +1 -0
- package/dist/gff/parseGFF.d.ts +2 -0
- package/dist/gff/parseGFF.js +41 -0
- package/dist/gff/parseGFF.js.map +1 -0
- package/dist/gff/parseGFF.test.d.ts +1 -0
- package/dist/gff/parseGFF.test.js +92 -0
- package/dist/gff/parseGFF.test.js.map +1 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -0
- package/dist/msa/A3mMSA.d.ts +33 -0
- package/dist/msa/A3mMSA.js +280 -0
- package/dist/msa/A3mMSA.js.map +1 -0
- package/dist/msa/A3mMSA.test.d.ts +1 -0
- package/dist/msa/A3mMSA.test.js +155 -0
- package/dist/msa/A3mMSA.test.js.map +1 -0
- package/dist/msa/ClustalMSA.d.ts +30 -0
- package/dist/msa/ClustalMSA.js +53 -0
- package/dist/msa/ClustalMSA.js.map +1 -0
- package/dist/msa/EmfMSA.d.ts +27 -0
- package/dist/msa/EmfMSA.js +53 -0
- package/dist/msa/EmfMSA.js.map +1 -0
- package/dist/msa/FastaMSA.d.ts +19 -0
- package/dist/msa/FastaMSA.js +69 -0
- package/dist/msa/FastaMSA.js.map +1 -0
- package/dist/msa/StockholmMSA.d.ts +54 -0
- package/dist/msa/StockholmMSA.js +113 -0
- package/dist/msa/StockholmMSA.js.map +1 -0
- package/dist/msa/index.d.ts +18 -0
- package/dist/msa/index.js +34 -0
- package/dist/msa/index.js.map +1 -0
- package/dist/msa/index.test.d.ts +1 -0
- package/dist/msa/index.test.js +60 -0
- package/dist/msa/index.test.js.map +1 -0
- package/dist/msa/parseNewick.d.ts +60 -0
- package/dist/msa/parseNewick.js +95 -0
- package/dist/msa/parseNewick.js.map +1 -0
- package/dist/msa/stockholmParser.d.ts +22 -0
- package/dist/msa/stockholmParser.js +141 -0
- package/dist/msa/stockholmParser.js.map +1 -0
- package/dist/msa/stockholmParser.test.d.ts +1 -0
- package/dist/msa/stockholmParser.test.js +111 -0
- package/dist/msa/stockholmParser.test.js.map +1 -0
- package/dist/types.d.ts +66 -0
- package/dist/types.js +2 -0
- package/dist/types.js.map +1 -0
- package/dist/util.d.ts +2 -0
- package/dist/util.js +10 -0
- package/dist/util.js.map +1 -0
- package/package.json +25 -0
- package/src/gff/gffToInterPro.test.ts +202 -0
- package/src/gff/gffToInterPro.ts +113 -0
- package/src/gff/index.ts +3 -0
- package/src/gff/interProToGFF.test.ts +206 -0
- package/src/gff/interProToGFF.ts +59 -0
- package/src/gff/parseGFF.test.ts +106 -0
- package/src/gff/parseGFF.ts +46 -0
- package/src/index.ts +29 -0
- package/src/msa/A3mMSA.test.ts +192 -0
- package/src/msa/A3mMSA.ts +320 -0
- package/src/msa/ClustalMSA.ts +67 -0
- package/src/msa/EmfMSA.ts +67 -0
- package/src/msa/FastaMSA.ts +82 -0
- package/src/msa/StockholmMSA.ts +141 -0
- package/src/msa/index.test.ts +74 -0
- package/src/msa/index.ts +44 -0
- package/src/msa/parseNewick.ts +94 -0
- package/src/msa/stockholmParser.test.ts +123 -0
- package/src/msa/stockholmParser.ts +157 -0
- package/src/types.ts +68 -0
- package/src/util.ts +19 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import type { GFFRecord, InterProScanResponse, InterProScanResults } from '../types';
|
|
2
|
+
/**
|
|
3
|
+
* Convert GFF records to InterProScan format
|
|
4
|
+
*
|
|
5
|
+
* InterProScan GFF3 output format:
|
|
6
|
+
* - seq_id: sequence identifier
|
|
7
|
+
* - source: database/signature (e.g., "Pfam", "SMART")
|
|
8
|
+
* - type: usually "protein_match"
|
|
9
|
+
* - start/end: domain positions (1-based)
|
|
10
|
+
* - Attributes: Name (accession), signature_desc (name), Dbxref, etc.
|
|
11
|
+
*/
|
|
12
|
+
export declare function gffToInterProResults(gffRecords: GFFRecord[]): Record<string, InterProScanResults>;
|
|
13
|
+
/**
|
|
14
|
+
* Convert GFF string directly to InterProScan format
|
|
15
|
+
*/
|
|
16
|
+
export declare function parseGFFToInterPro(gffStr: string, parseGFFfn: (str: string) => GFFRecord[]): Record<string, InterProScanResults>;
|
|
17
|
+
/**
|
|
18
|
+
* Create a full InterProScanResponse from GFF records
|
|
19
|
+
*/
|
|
20
|
+
export declare function gffToInterProResponse(gffRecords: GFFRecord[]): InterProScanResponse;
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Convert GFF records to InterProScan format
|
|
3
|
+
*
|
|
4
|
+
* InterProScan GFF3 output format:
|
|
5
|
+
* - seq_id: sequence identifier
|
|
6
|
+
* - source: database/signature (e.g., "Pfam", "SMART")
|
|
7
|
+
* - type: usually "protein_match"
|
|
8
|
+
* - start/end: domain positions (1-based)
|
|
9
|
+
* - Attributes: Name (accession), signature_desc (name), Dbxref, etc.
|
|
10
|
+
*/
|
|
11
|
+
export function gffToInterProResults(gffRecords) {
|
|
12
|
+
const bySequence = new Map();
|
|
13
|
+
for (const record of gffRecords) {
|
|
14
|
+
const existing = bySequence.get(record.seq_id);
|
|
15
|
+
if (existing) {
|
|
16
|
+
existing.push(record);
|
|
17
|
+
}
|
|
18
|
+
else {
|
|
19
|
+
bySequence.set(record.seq_id, [record]);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
const results = {};
|
|
23
|
+
for (const [seqId, records] of bySequence) {
|
|
24
|
+
const matchesByAccession = new Map();
|
|
25
|
+
const matchInfo = new Map();
|
|
26
|
+
for (const record of records) {
|
|
27
|
+
const accession = record.Name ||
|
|
28
|
+
record.ID ||
|
|
29
|
+
`${record.source}_${record.start}_${record.end}`;
|
|
30
|
+
const name = record.signature_desc ||
|
|
31
|
+
record.Name ||
|
|
32
|
+
accession;
|
|
33
|
+
const description = record.Ontology_term ||
|
|
34
|
+
record.description ||
|
|
35
|
+
record.Note ||
|
|
36
|
+
name;
|
|
37
|
+
if (!matchInfo.has(accession)) {
|
|
38
|
+
matchInfo.set(accession, { name, description, accession });
|
|
39
|
+
}
|
|
40
|
+
const locations = matchesByAccession.get(accession);
|
|
41
|
+
if (locations) {
|
|
42
|
+
locations.push({ start: record.start, end: record.end });
|
|
43
|
+
}
|
|
44
|
+
else {
|
|
45
|
+
matchesByAccession.set(accession, [
|
|
46
|
+
{ start: record.start, end: record.end },
|
|
47
|
+
]);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
const matches = [];
|
|
51
|
+
for (const [accession, locations] of matchesByAccession) {
|
|
52
|
+
const info = matchInfo.get(accession);
|
|
53
|
+
matches.push({
|
|
54
|
+
signature: {
|
|
55
|
+
entry: info,
|
|
56
|
+
},
|
|
57
|
+
locations,
|
|
58
|
+
});
|
|
59
|
+
}
|
|
60
|
+
results[seqId] = {
|
|
61
|
+
matches,
|
|
62
|
+
xref: [{ id: seqId }],
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
return results;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Convert GFF string directly to InterProScan format
|
|
69
|
+
*/
|
|
70
|
+
export function parseGFFToInterPro(gffStr, parseGFFfn) {
|
|
71
|
+
const records = parseGFFfn(gffStr);
|
|
72
|
+
return gffToInterProResults(records);
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Create a full InterProScanResponse from GFF records
|
|
76
|
+
*/
|
|
77
|
+
export function gffToInterProResponse(gffRecords) {
|
|
78
|
+
const resultsMap = gffToInterProResults(gffRecords);
|
|
79
|
+
return {
|
|
80
|
+
results: Object.values(resultsMap),
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
//# sourceMappingURL=gffToInterPro.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"gffToInterPro.js","sourceRoot":"","sources":["../../src/gff/gffToInterPro.ts"],"names":[],"mappings":"AAMA;;;;;;;;;GASG;AACH,MAAM,UAAU,oBAAoB,CAClC,UAAuB;IAEvB,MAAM,UAAU,GAAG,IAAI,GAAG,EAAuB,CAAA;IAEjD,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAChC,MAAM,QAAQ,GAAG,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;QAC9C,IAAI,QAAQ,EAAE,CAAC;YACb,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QACvB,CAAC;aAAM,CAAC;YACN,UAAU,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,MAAM,CAAC,CAAC,CAAA;QACzC,CAAC;IACH,CAAC;IAED,MAAM,OAAO,GAAwC,EAAE,CAAA;IAEvD,KAAK,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,UAAU,EAAE,CAAC;QAC1C,MAAM,kBAAkB,GAAG,IAAI,GAAG,EAG/B,CAAA;QACH,MAAM,SAAS,GAAG,IAAI,GAAG,EAGtB,CAAA;QAEH,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,MAAM,SAAS,GACZ,MAAM,CAAC,IAAe;gBACtB,MAAM,CAAC,EAAa;gBACrB,GAAG,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,GAAG,EAAE,CAAA;YAClD,MAAM,IAAI,GACP,MAAM,CAAC,cAAyB;gBAChC,MAAM,CAAC,IAAe;gBACvB,SAAS,CAAA;YACX,MAAM,WAAW,GACd,MAAM,CAAC,aAAwB;gBAC/B,MAAM,CAAC,WAAsB;gBAC7B,MAAM,CAAC,IAAe;gBACvB,IAAI,CAAA;YAEN,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,SAAS,CAAC,EAAE,CAAC;gBAC9B,SAAS,CAAC,GAAG,CAAC,SAAS,EAAE,EAAE,IAAI,EAAE,WAAW,EAAE,SAAS,EAAE,CAAC,CAAA;YAC5D,CAAC;YAED,MAAM,SAAS,GAAG,kBAAkB,CAAC,GAAG,CAAC,SAAS,CAAC,CAAA;YACnD,IAAI,SAAS,EAAE,CAAC;gBACd,SAAS,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,GAAG,EAAE,MAAM,CAAC,GAAG,EAAE,CAAC,CAAA;YAC1D,CAAC;iBAAM,CAAC;gBACN,kBAAkB,CAAC,GAAG,CAAC,SAAS,EAAE;oBAChC,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,GAAG,EAAE,MAAM,CAAC,GAAG,EAAE;iBACzC,CAAC,CAAA;YACJ,CAAC;QACH,CAAC;QAED,MAAM,OAAO,GAAG,EAAE,CAAA;QAClB,KAAK,MAAM,CAAC,SAAS,EAAE,SAAS,CAAC,IAAI,kBAAkB,EAAE,CAAC;YACxD,MAAM,IAAI,GAAG,SAAS,CAAC,GAAG,CAAC,SAAS,CAAE,CAAA;YACtC,OAAO,CAAC,IAAI,CAAC;gBACX,SAAS,EAAE;oBACT,KAAK,EAAE,IAAI;iBACZ;gBACD,SAAS;aACV,CAAC,CAAA;QACJ,CAAC;QAED,OAAO,CAAC,KAAK,CAAC,GAAG;YACf,OAAO;YACP,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC;SACtB,CAAA;IACH,CAAC;IAED,OAAO,OAAO,CAAA;AAChB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,kBAAkB,CAChC,MAAc,EACd,UAAwC;IAExC,MAAM,OAAO,GAAG,UAAU,CAAC,MAAM,CAAC,CAAA;IAClC,OAAO,oBAAoB,CAAC,OAAO,CAAC,CAAA;AACtC,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,qBAAqB,CACnC,UAAuB;IAEvB,MAAM,UAAU,GAAG,oBAAoB,CAAC,UAAU,CAAC,CAAA;IACnD,OAAO;QACL,OAAO,EAAE,MAAM,CAAC,MAAM,CAAC,UAAU,CAAC;KACnC,CAAA;AACH,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
import { describe, expect, test } from 'vitest';
|
|
2
|
+
import { gffToInterProResponse, gffToInterProResults } from './gffToInterPro';
|
|
3
|
+
describe('gffToInterProResults', () => {
|
|
4
|
+
test('converts empty array', () => {
|
|
5
|
+
expect(gffToInterProResults([])).toEqual({});
|
|
6
|
+
});
|
|
7
|
+
test('converts single GFF record', () => {
|
|
8
|
+
const records = [
|
|
9
|
+
{
|
|
10
|
+
seq_id: 'seq1',
|
|
11
|
+
source: 'Pfam',
|
|
12
|
+
type: 'protein_match',
|
|
13
|
+
start: 10,
|
|
14
|
+
end: 50,
|
|
15
|
+
score: 0,
|
|
16
|
+
strand: '.',
|
|
17
|
+
phase: '.',
|
|
18
|
+
Name: 'PF00001',
|
|
19
|
+
signature_desc: '7tm_1',
|
|
20
|
+
description: 'GPCR family',
|
|
21
|
+
},
|
|
22
|
+
];
|
|
23
|
+
const result = gffToInterProResults(records);
|
|
24
|
+
expect(result).toHaveProperty('seq1');
|
|
25
|
+
expect(result.seq1?.matches).toHaveLength(1);
|
|
26
|
+
expect(result.seq1?.matches[0]?.signature.entry).toEqual({
|
|
27
|
+
accession: 'PF00001',
|
|
28
|
+
name: '7tm_1',
|
|
29
|
+
description: 'GPCR family',
|
|
30
|
+
});
|
|
31
|
+
expect(result.seq1?.matches[0]?.locations).toEqual([{ start: 10, end: 50 }]);
|
|
32
|
+
expect(result.seq1?.xref).toEqual([{ id: 'seq1' }]);
|
|
33
|
+
});
|
|
34
|
+
test('groups multiple records for same sequence', () => {
|
|
35
|
+
const records = [
|
|
36
|
+
{
|
|
37
|
+
seq_id: 'seq1',
|
|
38
|
+
source: 'Pfam',
|
|
39
|
+
type: 'protein_match',
|
|
40
|
+
start: 10,
|
|
41
|
+
end: 50,
|
|
42
|
+
score: 0,
|
|
43
|
+
strand: '.',
|
|
44
|
+
phase: '.',
|
|
45
|
+
Name: 'PF00001',
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
seq_id: 'seq1',
|
|
49
|
+
source: 'SMART',
|
|
50
|
+
type: 'protein_match',
|
|
51
|
+
start: 60,
|
|
52
|
+
end: 100,
|
|
53
|
+
score: 0,
|
|
54
|
+
strand: '.',
|
|
55
|
+
phase: '.',
|
|
56
|
+
Name: 'SM00001',
|
|
57
|
+
},
|
|
58
|
+
];
|
|
59
|
+
const result = gffToInterProResults(records);
|
|
60
|
+
expect(Object.keys(result)).toHaveLength(1);
|
|
61
|
+
expect(result.seq1?.matches).toHaveLength(2);
|
|
62
|
+
});
|
|
63
|
+
test('handles multiple sequences', () => {
|
|
64
|
+
const records = [
|
|
65
|
+
{
|
|
66
|
+
seq_id: 'seq1',
|
|
67
|
+
source: 'Pfam',
|
|
68
|
+
type: 'protein_match',
|
|
69
|
+
start: 10,
|
|
70
|
+
end: 50,
|
|
71
|
+
score: 0,
|
|
72
|
+
strand: '.',
|
|
73
|
+
phase: '.',
|
|
74
|
+
Name: 'PF00001',
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
seq_id: 'seq2',
|
|
78
|
+
source: 'Pfam',
|
|
79
|
+
type: 'protein_match',
|
|
80
|
+
start: 5,
|
|
81
|
+
end: 40,
|
|
82
|
+
score: 0,
|
|
83
|
+
strand: '.',
|
|
84
|
+
phase: '.',
|
|
85
|
+
Name: 'PF00002',
|
|
86
|
+
},
|
|
87
|
+
];
|
|
88
|
+
const result = gffToInterProResults(records);
|
|
89
|
+
expect(Object.keys(result)).toHaveLength(2);
|
|
90
|
+
expect(result).toHaveProperty('seq1');
|
|
91
|
+
expect(result).toHaveProperty('seq2');
|
|
92
|
+
});
|
|
93
|
+
test('combines locations for same accession', () => {
|
|
94
|
+
const records = [
|
|
95
|
+
{
|
|
96
|
+
seq_id: 'seq1',
|
|
97
|
+
source: 'Pfam',
|
|
98
|
+
type: 'protein_match',
|
|
99
|
+
start: 10,
|
|
100
|
+
end: 50,
|
|
101
|
+
score: 0,
|
|
102
|
+
strand: '.',
|
|
103
|
+
phase: '.',
|
|
104
|
+
Name: 'PF00001',
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
seq_id: 'seq1',
|
|
108
|
+
source: 'Pfam',
|
|
109
|
+
type: 'protein_match',
|
|
110
|
+
start: 100,
|
|
111
|
+
end: 150,
|
|
112
|
+
score: 0,
|
|
113
|
+
strand: '.',
|
|
114
|
+
phase: '.',
|
|
115
|
+
Name: 'PF00001',
|
|
116
|
+
},
|
|
117
|
+
];
|
|
118
|
+
const result = gffToInterProResults(records);
|
|
119
|
+
expect(result.seq1?.matches).toHaveLength(1);
|
|
120
|
+
expect(result.seq1?.matches[0]?.locations).toHaveLength(2);
|
|
121
|
+
expect(result.seq1?.matches[0]?.locations).toEqual([
|
|
122
|
+
{ start: 10, end: 50 },
|
|
123
|
+
{ start: 100, end: 150 },
|
|
124
|
+
]);
|
|
125
|
+
});
|
|
126
|
+
test('uses ID as fallback for Name', () => {
|
|
127
|
+
const records = [
|
|
128
|
+
{
|
|
129
|
+
seq_id: 'seq1',
|
|
130
|
+
source: 'Source',
|
|
131
|
+
type: 'protein_match',
|
|
132
|
+
start: 10,
|
|
133
|
+
end: 50,
|
|
134
|
+
score: 0,
|
|
135
|
+
strand: '.',
|
|
136
|
+
phase: '.',
|
|
137
|
+
ID: 'domain_123',
|
|
138
|
+
},
|
|
139
|
+
];
|
|
140
|
+
const result = gffToInterProResults(records);
|
|
141
|
+
expect(result.seq1?.matches[0]?.signature.entry?.accession).toBe('domain_123');
|
|
142
|
+
});
|
|
143
|
+
test('generates fallback accession from source and positions', () => {
|
|
144
|
+
const records = [
|
|
145
|
+
{
|
|
146
|
+
seq_id: 'seq1',
|
|
147
|
+
source: 'CustomSource',
|
|
148
|
+
type: 'protein_match',
|
|
149
|
+
start: 10,
|
|
150
|
+
end: 50,
|
|
151
|
+
score: 0,
|
|
152
|
+
strand: '.',
|
|
153
|
+
phase: '.',
|
|
154
|
+
},
|
|
155
|
+
];
|
|
156
|
+
const result = gffToInterProResults(records);
|
|
157
|
+
expect(result.seq1?.matches[0]?.signature.entry?.accession).toBe('CustomSource_10_50');
|
|
158
|
+
});
|
|
159
|
+
});
|
|
160
|
+
describe('gffToInterProResponse', () => {
|
|
161
|
+
test('wraps results in response format', () => {
|
|
162
|
+
const records = [
|
|
163
|
+
{
|
|
164
|
+
seq_id: 'seq1',
|
|
165
|
+
source: 'Pfam',
|
|
166
|
+
type: 'protein_match',
|
|
167
|
+
start: 10,
|
|
168
|
+
end: 50,
|
|
169
|
+
score: 0,
|
|
170
|
+
strand: '.',
|
|
171
|
+
phase: '.',
|
|
172
|
+
Name: 'PF00001',
|
|
173
|
+
},
|
|
174
|
+
];
|
|
175
|
+
const response = gffToInterProResponse(records);
|
|
176
|
+
expect(response).toHaveProperty('results');
|
|
177
|
+
expect(response.results).toHaveLength(1);
|
|
178
|
+
expect(response.results[0]?.xref[0]?.id).toBe('seq1');
|
|
179
|
+
});
|
|
180
|
+
});
|
|
181
|
+
//# sourceMappingURL=gffToInterPro.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"gffToInterPro.test.js","sourceRoot":"","sources":["../../src/gff/gffToInterPro.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAA;AAE/C,OAAO,EAAE,qBAAqB,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAA;AAI7E,QAAQ,CAAC,sBAAsB,EAAE,GAAG,EAAE;IACpC,IAAI,CAAC,sBAAsB,EAAE,GAAG,EAAE;QAChC,MAAM,CAAC,oBAAoB,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAA;IAC9C,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAgB;YAC3B;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;gBACf,cAAc,EAAE,OAAO;gBACvB,WAAW,EAAE,aAAa;aAC3B;SACF,CAAA;QACD,MAAM,MAAM,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAA;QAE5C,MAAM,CAAC,MAAM,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAA;QACrC,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC5C,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,KAAK,CAAC,CAAC,OAAO,CAAC;YACvD,SAAS,EAAE,SAAS;YACpB,IAAI,EAAE,OAAO;YACb,WAAW,EAAE,aAAa;SAC3B,CAAC,CAAA;QACF,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,OAAO,CAAC,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC,CAAC,CAAA;QAC5E,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC,CAAC,CAAA;IACrD,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACrD,MAAM,OAAO,GAAgB;YAC3B;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;aAChB;YACD;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,OAAO;gBACf,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,GAAG;gBACR,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;aAChB;SACF,CAAA;QACD,MAAM,MAAM,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAA;QAE5C,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC3C,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;IAC9C,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAgB;YAC3B;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;aAChB;YACD;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,CAAC;gBACR,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;aAChB;SACF,CAAA;QACD,MAAM,MAAM,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAA;QAE5C,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC3C,MAAM,CAAC,MAAM,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAA;QACrC,MAAM,CAAC,MAAM,CAAC,CAAC,cAAc,CAAC,MAAM,CAAC,CAAA;IACvC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,uCAAuC,EAAE,GAAG,EAAE;QACjD,MAAM,OAAO,GAAgB;YAC3B;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;aAChB;YACD;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,GAAG;gBACV,GAAG,EAAE,GAAG;gBACR,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;aAChB;SACF,CAAA;QACD,MAAM,MAAM,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAA;QAE5C,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC5C,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC1D,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,OAAO,CAAC;YACjD,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE;YACtB,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE;SACzB,CAAC,CAAA;IACJ,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACxC,MAAM,OAAO,GAAgB;YAC3B;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,QAAQ;gBAChB,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,EAAE,EAAE,YAAY;aACjB;SACF,CAAA;QACD,MAAM,MAAM,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAA;QAE5C,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC,IAAI,CAC9D,YAAY,CACb,CAAA;IACH,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,wDAAwD,EAAE,GAAG,EAAE;QAClE,MAAM,OAAO,GAAgB;YAC3B;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,cAAc;gBACtB,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;aACX;SACF,CAAA;QACD,MAAM,MAAM,GAAG,oBAAoB,CAAC,OAAO,CAAC,CAAA;QAE5C,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC,IAAI,CAC9D,oBAAoB,CACrB,CAAA;IACH,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA;AAEF,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;IACrC,IAAI,CAAC,kCAAkC,EAAE,GAAG,EAAE;QAC5C,MAAM,OAAO,GAAgB;YAC3B;gBACE,MAAM,EAAE,MAAM;gBACd,MAAM,EAAE,MAAM;gBACd,IAAI,EAAE,eAAe;gBACrB,KAAK,EAAE,EAAE;gBACT,GAAG,EAAE,EAAE;gBACP,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,GAAG;gBACX,KAAK,EAAE,GAAG;gBACV,IAAI,EAAE,SAAS;aAChB;SACF,CAAA;QACD,MAAM,QAAQ,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAA;QAE/C,MAAM,CAAC,QAAQ,CAAC,CAAC,cAAc,CAAC,SAAS,CAAC,CAAA;QAC1C,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QACxC,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IACvD,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/gff/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAA;AACrC,OAAO,EAAE,qBAAqB,EAAE,oBAAoB,EAAE,MAAM,iBAAiB,CAAA;AAC7E,OAAO,EAAE,qBAAqB,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAA"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import type { InterProScanResults } from '../types';
|
|
2
|
+
/**
|
|
3
|
+
* Convert InterProScan results to GFF3 format
|
|
4
|
+
*/
|
|
5
|
+
export declare function interProToGFF(results: Record<string, InterProScanResults>): string;
|
|
6
|
+
/**
|
|
7
|
+
* Convert InterProScan JSON response to GFF3 format
|
|
8
|
+
*/
|
|
9
|
+
export declare function interProResponseToGFF(results: InterProScanResults[]): string;
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Convert InterProScan results to GFF3 format
|
|
3
|
+
*/
|
|
4
|
+
export function interProToGFF(results) {
|
|
5
|
+
const lines = ['##gff-version 3'];
|
|
6
|
+
for (const [seqId, data] of Object.entries(results)) {
|
|
7
|
+
for (const match of data.matches) {
|
|
8
|
+
const entry = match.signature.entry;
|
|
9
|
+
if (!entry) {
|
|
10
|
+
continue;
|
|
11
|
+
}
|
|
12
|
+
for (const location of match.locations) {
|
|
13
|
+
const attributes = [
|
|
14
|
+
`Name=${encodeURIComponent(entry.accession)}`,
|
|
15
|
+
`signature_desc=${encodeURIComponent(entry.name)}`,
|
|
16
|
+
`description=${encodeURIComponent(entry.description)}`,
|
|
17
|
+
].join(';');
|
|
18
|
+
const line = [
|
|
19
|
+
seqId,
|
|
20
|
+
'InterProScan',
|
|
21
|
+
'protein_match',
|
|
22
|
+
location.start,
|
|
23
|
+
location.end,
|
|
24
|
+
'.',
|
|
25
|
+
'.',
|
|
26
|
+
'.',
|
|
27
|
+
attributes,
|
|
28
|
+
].join('\t');
|
|
29
|
+
lines.push(line);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
return lines.join('\n');
|
|
34
|
+
}
|
|
35
|
+
/**
|
|
36
|
+
* Convert InterProScan JSON response to GFF3 format
|
|
37
|
+
*/
|
|
38
|
+
export function interProResponseToGFF(results) {
|
|
39
|
+
const resultsMap = {};
|
|
40
|
+
for (const result of results) {
|
|
41
|
+
const seqId = result.xref[0]?.id;
|
|
42
|
+
if (seqId) {
|
|
43
|
+
resultsMap[seqId] = result;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
return interProToGFF(resultsMap);
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=interProToGFF.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"interProToGFF.js","sourceRoot":"","sources":["../../src/gff/interProToGFF.ts"],"names":[],"mappings":"AAEA;;GAEG;AACH,MAAM,UAAU,aAAa,CAC3B,OAA4C;IAE5C,MAAM,KAAK,GAAa,CAAC,iBAAiB,CAAC,CAAA;IAE3C,KAAK,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;QACpD,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjC,MAAM,KAAK,GAAG,KAAK,CAAC,SAAS,CAAC,KAAK,CAAA;YACnC,IAAI,CAAC,KAAK,EAAE,CAAC;gBACX,SAAQ;YACV,CAAC;YAED,KAAK,MAAM,QAAQ,IAAI,KAAK,CAAC,SAAS,EAAE,CAAC;gBACvC,MAAM,UAAU,GAAG;oBACjB,QAAQ,kBAAkB,CAAC,KAAK,CAAC,SAAS,CAAC,EAAE;oBAC7C,kBAAkB,kBAAkB,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE;oBAClD,eAAe,kBAAkB,CAAC,KAAK,CAAC,WAAW,CAAC,EAAE;iBACvD,CAAC,IAAI,CAAC,GAAG,CAAC,CAAA;gBAEX,MAAM,IAAI,GAAG;oBACX,KAAK;oBACL,cAAc;oBACd,eAAe;oBACf,QAAQ,CAAC,KAAK;oBACd,QAAQ,CAAC,GAAG;oBACZ,GAAG;oBACH,GAAG;oBACH,GAAG;oBACH,UAAU;iBACX,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;gBAEZ,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YAClB,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;AACzB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,qBAAqB,CAAC,OAA8B;IAClE,MAAM,UAAU,GAAwC,EAAE,CAAA;IAE1D,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAA;QAChC,IAAI,KAAK,EAAE,CAAC;YACV,UAAU,CAAC,KAAK,CAAC,GAAG,MAAM,CAAA;QAC5B,CAAC;IACH,CAAC;IAED,OAAO,aAAa,CAAC,UAAU,CAAC,CAAA;AAClC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
import { describe, expect, test } from 'vitest';
|
|
2
|
+
import { interProResponseToGFF, interProToGFF } from './interProToGFF';
|
|
3
|
+
describe('interProToGFF', () => {
|
|
4
|
+
test('converts empty results', () => {
|
|
5
|
+
const gff = interProToGFF({});
|
|
6
|
+
expect(gff).toBe('##gff-version 3');
|
|
7
|
+
});
|
|
8
|
+
test('converts single result', () => {
|
|
9
|
+
const results = {
|
|
10
|
+
seq1: {
|
|
11
|
+
matches: [
|
|
12
|
+
{
|
|
13
|
+
signature: {
|
|
14
|
+
entry: {
|
|
15
|
+
accession: 'PF00001',
|
|
16
|
+
name: '7tm_1',
|
|
17
|
+
description: 'GPCR family',
|
|
18
|
+
},
|
|
19
|
+
},
|
|
20
|
+
locations: [{ start: 10, end: 50 }],
|
|
21
|
+
},
|
|
22
|
+
],
|
|
23
|
+
xref: [{ id: 'seq1' }],
|
|
24
|
+
},
|
|
25
|
+
};
|
|
26
|
+
const gff = interProToGFF(results);
|
|
27
|
+
const lines = gff.split('\n');
|
|
28
|
+
expect(lines[0]).toBe('##gff-version 3');
|
|
29
|
+
expect(lines[1]).toContain('seq1');
|
|
30
|
+
expect(lines[1]).toContain('InterProScan');
|
|
31
|
+
expect(lines[1]).toContain('protein_match');
|
|
32
|
+
expect(lines[1]).toContain('10');
|
|
33
|
+
expect(lines[1]).toContain('50');
|
|
34
|
+
expect(lines[1]).toContain('Name=PF00001');
|
|
35
|
+
expect(lines[1]).toContain('signature_desc=7tm_1');
|
|
36
|
+
expect(lines[1]).toContain('description=GPCR%20family');
|
|
37
|
+
});
|
|
38
|
+
test('handles multiple locations', () => {
|
|
39
|
+
const results = {
|
|
40
|
+
seq1: {
|
|
41
|
+
matches: [
|
|
42
|
+
{
|
|
43
|
+
signature: {
|
|
44
|
+
entry: {
|
|
45
|
+
accession: 'PF00001',
|
|
46
|
+
name: 'domain',
|
|
47
|
+
description: 'test',
|
|
48
|
+
},
|
|
49
|
+
},
|
|
50
|
+
locations: [
|
|
51
|
+
{ start: 10, end: 50 },
|
|
52
|
+
{ start: 100, end: 150 },
|
|
53
|
+
],
|
|
54
|
+
},
|
|
55
|
+
],
|
|
56
|
+
xref: [{ id: 'seq1' }],
|
|
57
|
+
},
|
|
58
|
+
};
|
|
59
|
+
const gff = interProToGFF(results);
|
|
60
|
+
const lines = gff.split('\n');
|
|
61
|
+
expect(lines).toHaveLength(3);
|
|
62
|
+
expect(lines[1]).toContain('10\t50');
|
|
63
|
+
expect(lines[2]).toContain('100\t150');
|
|
64
|
+
});
|
|
65
|
+
test('handles multiple sequences', () => {
|
|
66
|
+
const results = {
|
|
67
|
+
seq1: {
|
|
68
|
+
matches: [
|
|
69
|
+
{
|
|
70
|
+
signature: {
|
|
71
|
+
entry: {
|
|
72
|
+
accession: 'PF00001',
|
|
73
|
+
name: 'domain1',
|
|
74
|
+
description: 'test1',
|
|
75
|
+
},
|
|
76
|
+
},
|
|
77
|
+
locations: [{ start: 10, end: 50 }],
|
|
78
|
+
},
|
|
79
|
+
],
|
|
80
|
+
xref: [{ id: 'seq1' }],
|
|
81
|
+
},
|
|
82
|
+
seq2: {
|
|
83
|
+
matches: [
|
|
84
|
+
{
|
|
85
|
+
signature: {
|
|
86
|
+
entry: {
|
|
87
|
+
accession: 'PF00002',
|
|
88
|
+
name: 'domain2',
|
|
89
|
+
description: 'test2',
|
|
90
|
+
},
|
|
91
|
+
},
|
|
92
|
+
locations: [{ start: 5, end: 40 }],
|
|
93
|
+
},
|
|
94
|
+
],
|
|
95
|
+
xref: [{ id: 'seq2' }],
|
|
96
|
+
},
|
|
97
|
+
};
|
|
98
|
+
const gff = interProToGFF(results);
|
|
99
|
+
const lines = gff.split('\n');
|
|
100
|
+
expect(lines).toHaveLength(3);
|
|
101
|
+
expect(lines.some(l => l.includes('seq1'))).toBe(true);
|
|
102
|
+
expect(lines.some(l => l.includes('seq2'))).toBe(true);
|
|
103
|
+
});
|
|
104
|
+
test('skips matches without entry', () => {
|
|
105
|
+
const results = {
|
|
106
|
+
seq1: {
|
|
107
|
+
matches: [
|
|
108
|
+
{
|
|
109
|
+
signature: {},
|
|
110
|
+
locations: [{ start: 10, end: 50 }],
|
|
111
|
+
},
|
|
112
|
+
],
|
|
113
|
+
xref: [{ id: 'seq1' }],
|
|
114
|
+
},
|
|
115
|
+
};
|
|
116
|
+
const gff = interProToGFF(results);
|
|
117
|
+
const lines = gff.split('\n');
|
|
118
|
+
expect(lines).toHaveLength(1);
|
|
119
|
+
expect(lines[0]).toBe('##gff-version 3');
|
|
120
|
+
});
|
|
121
|
+
test('URL-encodes special characters in attributes', () => {
|
|
122
|
+
const results = {
|
|
123
|
+
seq1: {
|
|
124
|
+
matches: [
|
|
125
|
+
{
|
|
126
|
+
signature: {
|
|
127
|
+
entry: {
|
|
128
|
+
accession: 'PF00001',
|
|
129
|
+
name: 'test;name=value',
|
|
130
|
+
description: 'description with spaces',
|
|
131
|
+
},
|
|
132
|
+
},
|
|
133
|
+
locations: [{ start: 10, end: 50 }],
|
|
134
|
+
},
|
|
135
|
+
],
|
|
136
|
+
xref: [{ id: 'seq1' }],
|
|
137
|
+
},
|
|
138
|
+
};
|
|
139
|
+
const gff = interProToGFF(results);
|
|
140
|
+
expect(gff).toContain('signature_desc=test%3Bname%3Dvalue');
|
|
141
|
+
expect(gff).toContain('description=description%20with%20spaces');
|
|
142
|
+
});
|
|
143
|
+
});
|
|
144
|
+
describe('interProResponseToGFF', () => {
|
|
145
|
+
test('converts array of results', () => {
|
|
146
|
+
const results = [
|
|
147
|
+
{
|
|
148
|
+
matches: [
|
|
149
|
+
{
|
|
150
|
+
signature: {
|
|
151
|
+
entry: {
|
|
152
|
+
accession: 'PF00001',
|
|
153
|
+
name: 'domain',
|
|
154
|
+
description: 'test',
|
|
155
|
+
},
|
|
156
|
+
},
|
|
157
|
+
locations: [{ start: 10, end: 50 }],
|
|
158
|
+
},
|
|
159
|
+
],
|
|
160
|
+
xref: [{ id: 'seq1' }],
|
|
161
|
+
},
|
|
162
|
+
];
|
|
163
|
+
const gff = interProResponseToGFF(results);
|
|
164
|
+
expect(gff).toContain('##gff-version 3');
|
|
165
|
+
expect(gff).toContain('seq1');
|
|
166
|
+
});
|
|
167
|
+
test('handles results without xref', () => {
|
|
168
|
+
const results = [
|
|
169
|
+
{
|
|
170
|
+
matches: [
|
|
171
|
+
{
|
|
172
|
+
signature: {
|
|
173
|
+
entry: {
|
|
174
|
+
accession: 'PF00001',
|
|
175
|
+
name: 'domain',
|
|
176
|
+
description: 'test',
|
|
177
|
+
},
|
|
178
|
+
},
|
|
179
|
+
locations: [{ start: 10, end: 50 }],
|
|
180
|
+
},
|
|
181
|
+
],
|
|
182
|
+
xref: [],
|
|
183
|
+
},
|
|
184
|
+
];
|
|
185
|
+
const gff = interProResponseToGFF(results);
|
|
186
|
+
expect(gff).toBe('##gff-version 3');
|
|
187
|
+
});
|
|
188
|
+
});
|
|
189
|
+
//# sourceMappingURL=interProToGFF.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"interProToGFF.test.js","sourceRoot":"","sources":["../../src/gff/interProToGFF.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAA;AAE/C,OAAO,EAAE,qBAAqB,EAAE,aAAa,EAAE,MAAM,iBAAiB,CAAA;AAItE,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;IAC7B,IAAI,CAAC,wBAAwB,EAAE,GAAG,EAAE;QAClC,MAAM,GAAG,GAAG,aAAa,CAAC,EAAE,CAAC,CAAA;QAC7B,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAA;IACrC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,wBAAwB,EAAE,GAAG,EAAE;QAClC,MAAM,OAAO,GAAwC;YACnD,IAAI,EAAE;gBACJ,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE;4BACT,KAAK,EAAE;gCACL,SAAS,EAAE,SAAS;gCACpB,IAAI,EAAE,OAAO;gCACb,WAAW,EAAE,aAAa;6BAC3B;yBACF;wBACD,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;qBACpC;iBACF;gBACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC;aACvB;SACF,CAAA;QACD,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,CAAA;QAClC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QAE7B,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAA;QACxC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAA;QAClC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,cAAc,CAAC,CAAA;QAC1C,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,eAAe,CAAC,CAAA;QAC3C,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;QAChC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAA;QAChC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,cAAc,CAAC,CAAA;QAC1C,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,sBAAsB,CAAC,CAAA;QAClD,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,2BAA2B,CAAC,CAAA;IACzD,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAwC;YACnD,IAAI,EAAE;gBACJ,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE;4BACT,KAAK,EAAE;gCACL,SAAS,EAAE,SAAS;gCACpB,IAAI,EAAE,QAAQ;gCACd,WAAW,EAAE,MAAM;6BACpB;yBACF;wBACD,SAAS,EAAE;4BACT,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE;4BACtB,EAAE,KAAK,EAAE,GAAG,EAAE,GAAG,EAAE,GAAG,EAAE;yBACzB;qBACF;iBACF;gBACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC;aACvB;SACF,CAAA;QACD,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,CAAA;QAClC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QAE7B,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC7B,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,QAAQ,CAAC,CAAA;QACpC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,UAAU,CAAC,CAAA;IACxC,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACtC,MAAM,OAAO,GAAwC;YACnD,IAAI,EAAE;gBACJ,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE;4BACT,KAAK,EAAE;gCACL,SAAS,EAAE,SAAS;gCACpB,IAAI,EAAE,SAAS;gCACf,WAAW,EAAE,OAAO;6BACrB;yBACF;wBACD,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;qBACpC;iBACF;gBACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC;aACvB;YACD,IAAI,EAAE;gBACJ,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE;4BACT,KAAK,EAAE;gCACL,SAAS,EAAE,SAAS;gCACpB,IAAI,EAAE,SAAS;gCACf,WAAW,EAAE,OAAO;6BACrB;yBACF;wBACD,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;qBACnC;iBACF;gBACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC;aACvB;SACF,CAAA;QACD,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,CAAA;QAClC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QAE7B,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC7B,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;QACtD,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACxD,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACvC,MAAM,OAAO,GAAwC;YACnD,IAAI,EAAE;gBACJ,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE,EAAE;wBACb,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;qBACpC;iBACF;gBACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC;aACvB;SACF,CAAA;QACD,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,CAAA;QAClC,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QAE7B,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAA;QAC7B,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAA;IAC1C,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,8CAA8C,EAAE,GAAG,EAAE;QACxD,MAAM,OAAO,GAAwC;YACnD,IAAI,EAAE;gBACJ,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE;4BACT,KAAK,EAAE;gCACL,SAAS,EAAE,SAAS;gCACpB,IAAI,EAAE,iBAAiB;gCACvB,WAAW,EAAE,yBAAyB;6BACvC;yBACF;wBACD,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;qBACpC;iBACF;gBACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC;aACvB;SACF,CAAA;QACD,MAAM,GAAG,GAAG,aAAa,CAAC,OAAO,CAAC,CAAA;QAElC,MAAM,CAAC,GAAG,CAAC,CAAC,SAAS,CAAC,oCAAoC,CAAC,CAAA;QAC3D,MAAM,CAAC,GAAG,CAAC,CAAC,SAAS,CAAC,yCAAyC,CAAC,CAAA;IAClE,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA;AAEF,QAAQ,CAAC,uBAAuB,EAAE,GAAG,EAAE;IACrC,IAAI,CAAC,2BAA2B,EAAE,GAAG,EAAE;QACrC,MAAM,OAAO,GAA0B;YACrC;gBACE,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE;4BACT,KAAK,EAAE;gCACL,SAAS,EAAE,SAAS;gCACpB,IAAI,EAAE,QAAQ;gCACd,WAAW,EAAE,MAAM;6BACpB;yBACF;wBACD,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;qBACpC;iBACF;gBACD,IAAI,EAAE,CAAC,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC;aACvB;SACF,CAAA;QACD,MAAM,GAAG,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAA;QAE1C,MAAM,CAAC,GAAG,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAA;QACxC,MAAM,CAAC,GAAG,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAA;IAC/B,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACxC,MAAM,OAAO,GAA0B;YACrC;gBACE,OAAO,EAAE;oBACP;wBACE,SAAS,EAAE;4BACT,KAAK,EAAE;gCACL,SAAS,EAAE,SAAS;gCACpB,IAAI,EAAE,QAAQ;gCACd,WAAW,EAAE,MAAM;6BACpB;yBACF;wBACD,SAAS,EAAE,CAAC,EAAE,KAAK,EAAE,EAAE,EAAE,GAAG,EAAE,EAAE,EAAE,CAAC;qBACpC;iBACF;gBACD,IAAI,EAAE,EAAE;aACT;SACF,CAAA;QACD,MAAM,GAAG,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAA;QAE1C,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAA;IACrC,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
function parseAttributes(col9) {
|
|
2
|
+
if (!col9) {
|
|
3
|
+
return {};
|
|
4
|
+
}
|
|
5
|
+
return Object.fromEntries(col9
|
|
6
|
+
.split(';')
|
|
7
|
+
.map(f => f.trim())
|
|
8
|
+
.filter(f => !!f)
|
|
9
|
+
.map(f => f.split('='))
|
|
10
|
+
.map(([key, val]) => [
|
|
11
|
+
key?.trim() ?? '',
|
|
12
|
+
val ? decodeURIComponent(val).trim().split(',').join(' ') : undefined,
|
|
13
|
+
])
|
|
14
|
+
.filter(([key]) => key !== ''));
|
|
15
|
+
}
|
|
16
|
+
export function parseGFF(str) {
|
|
17
|
+
if (!str) {
|
|
18
|
+
return [];
|
|
19
|
+
}
|
|
20
|
+
return str
|
|
21
|
+
.split('\n')
|
|
22
|
+
.map(f => f.trim())
|
|
23
|
+
.filter(f => !!f && !f.startsWith('#'))
|
|
24
|
+
.map(f => {
|
|
25
|
+
const parts = f.split('\t');
|
|
26
|
+
const [seq_id, source, type, start, end, score, strand, phase] = parts;
|
|
27
|
+
const col9 = parts[8];
|
|
28
|
+
return {
|
|
29
|
+
seq_id: seq_id ?? '',
|
|
30
|
+
source: source ?? '',
|
|
31
|
+
type: type ?? '',
|
|
32
|
+
start: Number(start) || 0,
|
|
33
|
+
end: Number(end) || 0,
|
|
34
|
+
score: Number(score) || 0,
|
|
35
|
+
strand: strand ?? '.',
|
|
36
|
+
phase: phase ?? '.',
|
|
37
|
+
...parseAttributes(col9),
|
|
38
|
+
};
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
//# sourceMappingURL=parseGFF.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parseGFF.js","sourceRoot":"","sources":["../../src/gff/parseGFF.ts"],"names":[],"mappings":"AAEA,SAAS,eAAe,CAAC,IAAa;IACpC,IAAI,CAAC,IAAI,EAAE,CAAC;QACV,OAAO,EAAE,CAAA;IACX,CAAC;IACD,OAAO,MAAM,CAAC,WAAW,CACvB,IAAI;SACD,KAAK,CAAC,GAAG,CAAC;SACV,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SAClB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC;SAChB,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;SACtB,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,GAAG,CAAC,EAAE,EAAE,CAAC;QACnB,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE;QACjB,GAAG,CAAC,CAAC,CAAC,kBAAkB,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS;KACtE,CAAC;SACD,MAAM,CAAC,CAAC,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC,GAAG,KAAK,EAAE,CAAC,CACjC,CAAA;AACH,CAAC;AAED,MAAM,UAAU,QAAQ,CAAC,GAAY;IACnC,IAAI,CAAC,GAAG,EAAE,CAAC;QACT,OAAO,EAAE,CAAA;IACX,CAAC;IACD,OAAO,GAAG;SACP,KAAK,CAAC,IAAI,CAAC;SACX,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;SAClB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC;SACtC,GAAG,CAAC,CAAC,CAAC,EAAE;QACP,MAAM,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QAC3B,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,CAAC,GAAG,KAAK,CAAA;QACtE,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAA;QAErB,OAAO;YACL,MAAM,EAAE,MAAM,IAAI,EAAE;YACpB,MAAM,EAAE,MAAM,IAAI,EAAE;YACpB,IAAI,EAAE,IAAI,IAAI,EAAE;YAChB,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC;YACzB,GAAG,EAAE,MAAM,CAAC,GAAG,CAAC,IAAI,CAAC;YACrB,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC;YACzB,MAAM,EAAE,MAAM,IAAI,GAAG;YACrB,KAAK,EAAE,KAAK,IAAI,GAAG;YACnB,GAAG,eAAe,CAAC,IAAI,CAAC;SACzB,CAAA;IACH,CAAC,CAAC,CAAA;AACN,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|