react-msaview 4.5.0 → 4.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bundle/index.js +99 -99
- package/bundle/index.js.LICENSE.txt +6 -6
- package/bundle/index.js.map +1 -1
- package/dist/__snapshots__/parseAsn1.test.js.snap +2400 -0
- package/dist/components/header/HeaderInfoArea.js +3 -4
- package/dist/components/header/HeaderInfoArea.js.map +1 -1
- package/dist/components/import/ImportForm.js +6 -2
- package/dist/components/import/ImportForm.js.map +1 -1
- package/dist/components/import/util.d.ts +1 -1
- package/dist/components/import/util.js +4 -1
- package/dist/components/import/util.js.map +1 -1
- package/dist/components/msa/renderBoxFeatureCanvasBlock.js +7 -2
- package/dist/components/msa/renderBoxFeatureCanvasBlock.js.map +1 -1
- package/dist/components/msa/renderMSABlock.js +20 -18
- package/dist/components/msa/renderMSABlock.js.map +1 -1
- package/dist/components/msa/renderMSAMouseover.js +8 -1
- package/dist/components/msa/renderMSAMouseover.js.map +1 -1
- package/dist/components/tree/renderTreeCanvas.d.ts +0 -1
- package/dist/components/tree/renderTreeCanvas.js +32 -31
- package/dist/components/tree/renderTreeCanvas.js.map +1 -1
- package/dist/model.d.ts +168 -16
- package/dist/model.js +116 -29
- package/dist/model.js.map +1 -1
- package/dist/rowCoordinateCalculations.d.ts +69 -9
- package/dist/rowCoordinateCalculations.js +118 -46
- package/dist/rowCoordinateCalculations.js.map +1 -1
- package/dist/rowCoordinateCalculations.test.js +152 -52
- package/dist/rowCoordinateCalculations.test.js.map +1 -1
- package/dist/seqPosToGlobalCol.d.ts +19 -0
- package/dist/seqPosToGlobalCol.js +34 -0
- package/dist/seqPosToGlobalCol.js.map +1 -0
- package/dist/seqPosToGlobalCol.test.js +60 -0
- package/dist/seqPosToGlobalCol.test.js.map +1 -0
- package/dist/util.d.ts +1 -2
- package/dist/util.js +0 -9
- package/dist/util.js.map +1 -1
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/package.json +7 -9
- package/src/components/header/HeaderInfoArea.tsx +2 -5
- package/src/components/import/ImportForm.tsx +6 -1
- package/src/components/import/util.ts +4 -0
- package/src/components/msa/renderBoxFeatureCanvasBlock.ts +7 -2
- package/src/components/msa/renderMSABlock.ts +26 -19
- package/src/components/msa/renderMSAMouseover.ts +9 -0
- package/src/components/tree/renderTreeCanvas.ts +35 -42
- package/src/declare.d.ts +0 -1
- package/src/model.ts +143 -42
- package/src/rowCoordinateCalculations.test.ts +167 -74
- package/src/rowCoordinateCalculations.ts +138 -63
- package/src/seqPosToGlobalCol.test.ts +71 -0
- package/src/seqPosToGlobalCol.ts +40 -0
- package/src/util.ts +1 -19
- package/src/version.ts +1 -1
- package/dist/parseGFF.d.ts +0 -10
- package/dist/parseGFF.js +0 -31
- package/dist/parseGFF.js.map +0 -1
- package/dist/parseNewick.d.ts +0 -60
- package/dist/parseNewick.js +0 -95
- package/dist/parseNewick.js.map +0 -1
- package/dist/parsers/A3mMSA.d.ts +0 -43
- package/dist/parsers/A3mMSA.js +0 -277
- package/dist/parsers/A3mMSA.js.map +0 -1
- package/dist/parsers/A3mMSA.test.js +0 -138
- package/dist/parsers/A3mMSA.test.js.map +0 -1
- package/dist/parsers/ClustalMSA.d.ts +0 -30
- package/dist/parsers/ClustalMSA.js +0 -55
- package/dist/parsers/ClustalMSA.js.map +0 -1
- package/dist/parsers/EmfMSA.d.ts +0 -27
- package/dist/parsers/EmfMSA.js +0 -53
- package/dist/parsers/EmfMSA.js.map +0 -1
- package/dist/parsers/EmfTree.d.ts +0 -5
- package/dist/parsers/EmfTree.js +0 -8
- package/dist/parsers/EmfTree.js.map +0 -1
- package/dist/parsers/FastaMSA.d.ts +0 -19
- package/dist/parsers/FastaMSA.js +0 -69
- package/dist/parsers/FastaMSA.js.map +0 -1
- package/dist/parsers/StockholmMSA.d.ts +0 -68
- package/dist/parsers/StockholmMSA.js +0 -107
- package/dist/parsers/StockholmMSA.js.map +0 -1
- package/dist/seqCoordToRowSpecificGlobalCoord.d.ts +0 -4
- package/dist/seqCoordToRowSpecificGlobalCoord.js +0 -19
- package/dist/seqCoordToRowSpecificGlobalCoord.js.map +0 -1
- package/dist/seqCoordToRowSpecificGlobalCoord.test.d.ts +0 -1
- package/dist/seqCoordToRowSpecificGlobalCoord.test.js +0 -42
- package/dist/seqCoordToRowSpecificGlobalCoord.test.js.map +0 -1
- package/src/parseGFF.ts +0 -34
- package/src/parseNewick.ts +0 -94
- package/src/parsers/A3mMSA.test.ts +0 -164
- package/src/parsers/A3mMSA.ts +0 -321
- package/src/parsers/ClustalMSA.ts +0 -69
- package/src/parsers/EmfMSA.ts +0 -67
- package/src/parsers/EmfTree.ts +0 -9
- package/src/parsers/FastaMSA.ts +0 -82
- package/src/parsers/StockholmMSA.ts +0 -140
- package/src/seqCoordToRowSpecificGlobalCoord.test.ts +0 -53
- package/src/seqCoordToRowSpecificGlobalCoord.ts +0 -25
- /package/dist/{parsers/A3mMSA.test.d.ts → seqPosToGlobalCol.test.d.ts} +0 -0
|
@@ -1,42 +0,0 @@
|
|
|
1
|
-
import { describe, expect, test } from 'vitest';
|
|
2
|
-
import { seqCoordToRowSpecificGlobalCoord } from './seqCoordToRowSpecificGlobalCoord';
|
|
3
|
-
describe('seqCoordToRowSpecificGlobalCoord', () => {
|
|
4
|
-
test('converts sequence coordinate to global coordinate with no gaps', () => {
|
|
5
|
-
const row = 'ATGCATGC';
|
|
6
|
-
expect(seqCoordToRowSpecificGlobalCoord({ row, position: 3 })).toBe(3);
|
|
7
|
-
expect(seqCoordToRowSpecificGlobalCoord({ row, position: 0 })).toBe(0);
|
|
8
|
-
expect(seqCoordToRowSpecificGlobalCoord({ row, position: 8 })).toBe(8);
|
|
9
|
-
});
|
|
10
|
-
test('converts sequence coordinate to global coordinate with gaps', () => {
|
|
11
|
-
const row = 'A-TG-CA-TGC';
|
|
12
|
-
// A(0) -(1) T(2) G(3) -(4) C(5) A(6) -(7) T(8) G(9) C(10)
|
|
13
|
-
// Sequence positions: A(0) T(1) G(2) C(3) A(4) T(5) G(6) C(7)
|
|
14
|
-
// Position 0 (first A) -> Global index 0
|
|
15
|
-
expect(seqCoordToRowSpecificGlobalCoord({ row, position: 0 })).toBe(0);
|
|
16
|
-
// Position 1 (T after first gap) -> Global index 2
|
|
17
|
-
expect(seqCoordToRowSpecificGlobalCoord({ row, position: 1 })).toBe(2);
|
|
18
|
-
// Position 3 (C after second gap) -> Global index 5
|
|
19
|
-
expect(seqCoordToRowSpecificGlobalCoord({ row, position: 3 })).toBe(5);
|
|
20
|
-
// Position 5 (T after third gap) -> Global index 8
|
|
21
|
-
expect(seqCoordToRowSpecificGlobalCoord({ row, position: 5 })).toBe(8);
|
|
22
|
-
// Position 8 (end of sequence) -> Global index 11
|
|
23
|
-
expect(seqCoordToRowSpecificGlobalCoord({ row, position: 8 })).toBe(11);
|
|
24
|
-
});
|
|
25
|
-
test('handles empty row', () => {
|
|
26
|
-
expect(seqCoordToRowSpecificGlobalCoord({ row: '', position: 0 })).toBe(0);
|
|
27
|
-
});
|
|
28
|
-
test('handles row with only gaps', () => {
|
|
29
|
-
const row = '---..--';
|
|
30
|
-
expect(seqCoordToRowSpecificGlobalCoord({ row, position: 0 })).toBe(0);
|
|
31
|
-
});
|
|
32
|
-
test('handles mixed gap characters', () => {
|
|
33
|
-
const row = 'A-.G-C.';
|
|
34
|
-
// A(0) -(1) .(2) G(3) -(4) C(5) .(6)
|
|
35
|
-
// Sequence positions: A(0) G(1) C(2)
|
|
36
|
-
expect(seqCoordToRowSpecificGlobalCoord({ row, position: 0 })).toBe(0);
|
|
37
|
-
expect(seqCoordToRowSpecificGlobalCoord({ row, position: 1 })).toBe(3);
|
|
38
|
-
expect(seqCoordToRowSpecificGlobalCoord({ row, position: 2 })).toBe(5);
|
|
39
|
-
expect(seqCoordToRowSpecificGlobalCoord({ row, position: 3 })).toBe(7);
|
|
40
|
-
});
|
|
41
|
-
});
|
|
42
|
-
//# sourceMappingURL=seqCoordToRowSpecificGlobalCoord.test.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"seqCoordToRowSpecificGlobalCoord.test.js","sourceRoot":"","sources":["../src/seqCoordToRowSpecificGlobalCoord.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,QAAQ,CAAA;AAE/C,OAAO,EAAE,gCAAgC,EAAE,MAAM,oCAAoC,CAAA;AAErF,QAAQ,CAAC,kCAAkC,EAAE,GAAG,EAAE;IAChD,IAAI,CAAC,gEAAgE,EAAE,GAAG,EAAE;QAC1E,MAAM,GAAG,GAAG,UAAU,CAAA;QACtB,MAAM,CAAC,gCAAgC,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACtE,MAAM,CAAC,gCAAgC,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACtE,MAAM,CAAC,gCAAgC,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACxE,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,6DAA6D,EAAE,GAAG,EAAE;QACvE,MAAM,GAAG,GAAG,aAAa,CAAA;QACzB,0DAA0D;QAC1D,8DAA8D;QAE9D,yCAAyC;QACzC,MAAM,CAAC,gCAAgC,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAEtE,mDAAmD;QACnD,MAAM,CAAC,gCAAgC,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAEtE,oDAAoD;QACpD,MAAM,CAAC,gCAAgC,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAEtE,mDAAmD;QACnD,MAAM,CAAC,gCAAgC,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QAEtE,kDAAkD;QAClD,MAAM,CAAC,gCAAgC,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;IACzE,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,mBAAmB,EAAE,GAAG,EAAE;QAC7B,MAAM,CAAC,gCAAgC,CAAC,EAAE,GAAG,EAAE,EAAE,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IAC5E,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,4BAA4B,EAAE,GAAG,EAAE;QACtC,MAAM,GAAG,GAAG,SAAS,CAAA;QACrB,MAAM,CAAC,gCAAgC,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACxE,CAAC,CAAC,CAAA;IAEF,IAAI,CAAC,8BAA8B,EAAE,GAAG,EAAE;QACxC,MAAM,GAAG,GAAG,SAAS,CAAA;QACrB,qCAAqC;QACrC,qCAAqC;QAErC,MAAM,CAAC,gCAAgC,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACtE,MAAM,CAAC,gCAAgC,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACtE,MAAM,CAAC,gCAAgC,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;QACtE,MAAM,CAAC,gCAAgC,CAAC,EAAE,GAAG,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;IACxE,CAAC,CAAC,CAAA;AACJ,CAAC,CAAC,CAAA"}
|
package/src/parseGFF.ts
DELETED
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
export function parseGFF(str?: string) {
|
|
2
|
-
return str
|
|
3
|
-
?.split('\n')
|
|
4
|
-
.map(f => f.trim())
|
|
5
|
-
.filter(f => !!f && !f.startsWith('#'))
|
|
6
|
-
.map(f => {
|
|
7
|
-
const [seq_id, source, type, start, end, score, strand, phase, col9] =
|
|
8
|
-
f.split('\t')
|
|
9
|
-
|
|
10
|
-
return {
|
|
11
|
-
seq_id: seq_id!,
|
|
12
|
-
source: source!,
|
|
13
|
-
type: type!,
|
|
14
|
-
start: +start!,
|
|
15
|
-
end: +end!,
|
|
16
|
-
score: +score!,
|
|
17
|
-
strand: strand!,
|
|
18
|
-
phase: phase!,
|
|
19
|
-
...Object.fromEntries(
|
|
20
|
-
col9!
|
|
21
|
-
.split(';')
|
|
22
|
-
.map(f => f.trim())
|
|
23
|
-
.filter(f => !!f)
|
|
24
|
-
.map(f => f.split('='))
|
|
25
|
-
.map(([key, val]) => [
|
|
26
|
-
key!.trim(),
|
|
27
|
-
val
|
|
28
|
-
? decodeURIComponent(val).trim().split(',').join(' ')
|
|
29
|
-
: undefined,
|
|
30
|
-
]),
|
|
31
|
-
),
|
|
32
|
-
}
|
|
33
|
-
})
|
|
34
|
-
}
|
package/src/parseNewick.ts
DELETED
|
@@ -1,94 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Newick format parser in JavaScript.
|
|
3
|
-
*
|
|
4
|
-
* Copyright (c) Jason Davies 2010.
|
|
5
|
-
*
|
|
6
|
-
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
7
|
-
* of this software and associated documentation files (the "Software"), to deal
|
|
8
|
-
* in the Software without restriction, including without limitation the rights
|
|
9
|
-
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
10
|
-
* copies of the Software, and to permit persons to whom the Software is
|
|
11
|
-
* furnished to do so, subject to the following conditions:
|
|
12
|
-
*
|
|
13
|
-
* The above copyright notice and this permission notice shall be included in
|
|
14
|
-
* all copies or substantial portions of the Software.
|
|
15
|
-
*
|
|
16
|
-
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
17
|
-
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
18
|
-
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
19
|
-
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
20
|
-
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
21
|
-
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
22
|
-
* THE SOFTWARE.
|
|
23
|
-
*
|
|
24
|
-
* Example tree (from http://en.wikipedia.org/wiki/Newick_format):
|
|
25
|
-
*
|
|
26
|
-
* +--0.1--A
|
|
27
|
-
* F-----0.2-----B +-------0.3----C
|
|
28
|
-
* +------------------0.5-----E
|
|
29
|
-
* +---------0.4------D
|
|
30
|
-
*
|
|
31
|
-
* Newick format:
|
|
32
|
-
* (A:0.1,B:0.2,(C:0.3,D:0.4)E:0.5)F;
|
|
33
|
-
*
|
|
34
|
-
* Converted to JSON:
|
|
35
|
-
* {
|
|
36
|
-
* name: "F",
|
|
37
|
-
* children: [
|
|
38
|
-
* {name: "A", length: 0.1},
|
|
39
|
-
* {name: "B", length: 0.2},
|
|
40
|
-
* {
|
|
41
|
-
* name: "E",
|
|
42
|
-
* length: 0.5,
|
|
43
|
-
* children: [
|
|
44
|
-
* {name: "C", length: 0.3},
|
|
45
|
-
* {name: "D", length: 0.4}
|
|
46
|
-
* ]
|
|
47
|
-
* }
|
|
48
|
-
* ]
|
|
49
|
-
* }
|
|
50
|
-
*
|
|
51
|
-
* Converted to JSON, but with no names or lengths:
|
|
52
|
-
* {
|
|
53
|
-
* children: [
|
|
54
|
-
* {}, {}, {
|
|
55
|
-
* children: [{}, {}]
|
|
56
|
-
* }
|
|
57
|
-
* ]
|
|
58
|
-
* }
|
|
59
|
-
*/
|
|
60
|
-
export default function parse(s: string) {
|
|
61
|
-
const ancestors = []
|
|
62
|
-
|
|
63
|
-
let tree = {} as Record<string, any>
|
|
64
|
-
const tokens = s.split(/\s*(;|\(|\)|,|:)\s*/)
|
|
65
|
-
for (let i = 0; i < tokens.length; i++) {
|
|
66
|
-
const token = tokens[i]!
|
|
67
|
-
const subtree = {}
|
|
68
|
-
switch (token) {
|
|
69
|
-
case '(': // new children
|
|
70
|
-
tree.children = [subtree]
|
|
71
|
-
ancestors.push(tree)
|
|
72
|
-
tree = subtree
|
|
73
|
-
break
|
|
74
|
-
case ',': // another branch
|
|
75
|
-
ancestors.at(-1)?.children.push(subtree)
|
|
76
|
-
tree = subtree
|
|
77
|
-
break
|
|
78
|
-
case ')': // optional name next
|
|
79
|
-
tree = ancestors.pop()!
|
|
80
|
-
break
|
|
81
|
-
case ':': // optional length next
|
|
82
|
-
break
|
|
83
|
-
default: {
|
|
84
|
-
const x = tokens[i - 1]!
|
|
85
|
-
if (x === ')' || x === '(' || x === ',') {
|
|
86
|
-
tree.name = token
|
|
87
|
-
} else if (x === ':') {
|
|
88
|
-
tree.length = Number.parseFloat(token)
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
return tree
|
|
94
|
-
}
|
|
@@ -1,164 +0,0 @@
|
|
|
1
|
-
import { describe, expect, test } from 'vitest'
|
|
2
|
-
|
|
3
|
-
import A3mMSA from './A3mMSA'
|
|
4
|
-
|
|
5
|
-
const exampleA3M = `>example
|
|
6
|
-
ETESMKTVRIREKIKKFLGDRPRNTAEILEHINSTMRHGTTSQQLGNVLSKDKDIVKVGYIKRSGILSGGYDICEWATRNWVAEHCPEWTE
|
|
7
|
-
>1
|
|
8
|
-
----MRTTRLRQKIKKFLNERGeANTTEILEHVNSTMRHGTTPQQLGNVLSKDKDILKVATTKRGGALSGRYEICVWTLRP-----------
|
|
9
|
-
>2
|
|
10
|
-
----MDSQNLRDLIRNYLSERPRNTIEISAWLASQMDPNSCPEDVTNILEADESIVRIGTVRKSGMRLTDLPISEWASSSWVRRHE-----
|
|
11
|
-
>3
|
|
12
|
-
----MNSQNLRELIRNYLSERPRNTIEISTWLSSQIDPTNSPVDITSILEADDQIVRIGTVRKSGMRRSESPVSEWASNTWVKHHE-----
|
|
13
|
-
>4
|
|
14
|
-
--RDMDTEKVREIVRNYISERPRNTAEIAAWLNRH-DDGTGGSDVAAILESDGSFVRIGTVRTSGMTGNSPPLSEWATEKWIQHHER----
|
|
15
|
-
>5
|
|
16
|
-
-----RTRRLREAVLVFLEEKGnANTVEVFDYLNERFRWGATMNQVGNILAKDTRFAKVGHQ-RGQFRGSVYTVCVWALS------------
|
|
17
|
-
>6
|
|
18
|
-
-----RTKRLREAVRVYLAENGrSHTVDIFDHLNDRFSWGATMNQVGNILAKDNRFEKVGHVRD-FFRGARYTVCVWDLAS-----------
|
|
19
|
-
`
|
|
20
|
-
|
|
21
|
-
describe('A3mMSA', () => {
|
|
22
|
-
test('sniff detects A3M format', () => {
|
|
23
|
-
expect(A3mMSA.sniff(exampleA3M)).toBe(true)
|
|
24
|
-
})
|
|
25
|
-
|
|
26
|
-
test('sniff returns false for regular FASTA', () => {
|
|
27
|
-
const fasta = `>seq1
|
|
28
|
-
ACDEFGHIKLMNPQRST
|
|
29
|
-
>seq2
|
|
30
|
-
ACDEFGHIKLMNPQRST
|
|
31
|
-
`
|
|
32
|
-
expect(A3mMSA.sniff(fasta)).toBe(false)
|
|
33
|
-
})
|
|
34
|
-
|
|
35
|
-
test('sniff returns false for non-FASTA formats', () => {
|
|
36
|
-
expect(A3mMSA.sniff('# STOCKHOLM 1.0\n')).toBe(false)
|
|
37
|
-
expect(A3mMSA.sniff('CLUSTAL W')).toBe(false)
|
|
38
|
-
})
|
|
39
|
-
|
|
40
|
-
test('parses A3M and expands insertions', () => {
|
|
41
|
-
const parser = new A3mMSA(exampleA3M)
|
|
42
|
-
const names = parser.getNames()
|
|
43
|
-
|
|
44
|
-
expect(names).toEqual(['example', '1', '2', '3', '4', '5', '6'])
|
|
45
|
-
|
|
46
|
-
// All sequences should have the same length after expansion
|
|
47
|
-
const widths = names.map(name => parser.getRow(name).length)
|
|
48
|
-
expect(widths.every(w => w === widths[0])).toBe(true)
|
|
49
|
-
|
|
50
|
-
// The width should be greater than the original due to expanded inserts
|
|
51
|
-
expect(parser.getWidth()).toBeGreaterThan(90)
|
|
52
|
-
})
|
|
53
|
-
|
|
54
|
-
test('lowercase inserts become uppercase after expansion', () => {
|
|
55
|
-
const parser = new A3mMSA(exampleA3M)
|
|
56
|
-
|
|
57
|
-
// The expanded sequences should not contain lowercase letters
|
|
58
|
-
for (const name of parser.getNames()) {
|
|
59
|
-
const row = parser.getRow(name)
|
|
60
|
-
expect(/[a-z]/.test(row)).toBe(false)
|
|
61
|
-
}
|
|
62
|
-
})
|
|
63
|
-
|
|
64
|
-
test('handles simple A3M with single insert', () => {
|
|
65
|
-
// In valid A3M, match columns (uppercase + - + .) must be consistent
|
|
66
|
-
// seq1 has 5 match columns + 1 insert after D
|
|
67
|
-
// seq2 has 5 match columns, no inserts
|
|
68
|
-
const simple = `>seq1
|
|
69
|
-
ACDaEF
|
|
70
|
-
>seq2
|
|
71
|
-
ACDEF
|
|
72
|
-
`
|
|
73
|
-
const parser = new A3mMSA(simple)
|
|
74
|
-
|
|
75
|
-
// seq1 has an 'a' insert after 'D', seq2 doesn't
|
|
76
|
-
// After expansion, both should be same length
|
|
77
|
-
const seq1 = parser.getRow('seq1')
|
|
78
|
-
const seq2 = parser.getRow('seq2')
|
|
79
|
-
|
|
80
|
-
expect(seq1.length).toBe(seq2.length)
|
|
81
|
-
expect(seq1).toBe('ACDAEF')
|
|
82
|
-
expect(seq2).toBe('ACD.EF')
|
|
83
|
-
})
|
|
84
|
-
|
|
85
|
-
test('handles multiple inserts at different positions', () => {
|
|
86
|
-
// seq1: 6 match columns (A,C,D,E,F,I) + inserts (ab after D, gh after F)
|
|
87
|
-
// seq2: 6 match columns, no inserts
|
|
88
|
-
const multi = `>seq1
|
|
89
|
-
ACDabEFghI
|
|
90
|
-
>seq2
|
|
91
|
-
ACDEFI
|
|
92
|
-
`
|
|
93
|
-
const parser = new A3mMSA(multi)
|
|
94
|
-
|
|
95
|
-
const seq1 = parser.getRow('seq1')
|
|
96
|
-
const seq2 = parser.getRow('seq2')
|
|
97
|
-
|
|
98
|
-
expect(seq1.length).toBe(seq2.length)
|
|
99
|
-
// seq1: ACD + ab (inserts) + EF + gh (inserts) + I
|
|
100
|
-
// seq2: ACD + EF + I -> needs . padding at insert positions
|
|
101
|
-
expect(seq1).toBe('ACDABEFGHI')
|
|
102
|
-
expect(seq2).toBe('ACD..EF..I')
|
|
103
|
-
})
|
|
104
|
-
|
|
105
|
-
test('handles varying insert lengths', () => {
|
|
106
|
-
const varying = `>seq1
|
|
107
|
-
ACDabcEF
|
|
108
|
-
>seq2
|
|
109
|
-
ACDaEF
|
|
110
|
-
>seq3
|
|
111
|
-
ACDEF
|
|
112
|
-
`
|
|
113
|
-
const parser = new A3mMSA(varying)
|
|
114
|
-
|
|
115
|
-
const seq1 = parser.getRow('seq1')
|
|
116
|
-
const seq2 = parser.getRow('seq2')
|
|
117
|
-
const seq3 = parser.getRow('seq3')
|
|
118
|
-
|
|
119
|
-
// All should have same length
|
|
120
|
-
expect(seq1.length).toBe(seq2.length)
|
|
121
|
-
expect(seq2.length).toBe(seq3.length)
|
|
122
|
-
|
|
123
|
-
// seq1 has 3 inserts, seq2 has 1, seq3 has 0
|
|
124
|
-
// After expansion with max 3 insert slots:
|
|
125
|
-
expect(seq1).toBe('ACDABCEF')
|
|
126
|
-
expect(seq2).toBe('ACDA..EF')
|
|
127
|
-
expect(seq3).toBe('ACD...EF')
|
|
128
|
-
})
|
|
129
|
-
|
|
130
|
-
test('getTree returns flat tree structure', () => {
|
|
131
|
-
const parser = new A3mMSA(exampleA3M)
|
|
132
|
-
const tree = parser.getTree()
|
|
133
|
-
|
|
134
|
-
expect(tree.id).toBe('root')
|
|
135
|
-
expect(tree.noTree).toBe(true)
|
|
136
|
-
expect(tree.children.length).toBe(7)
|
|
137
|
-
expect(tree.children.map(c => c.name)).toEqual([
|
|
138
|
-
'example',
|
|
139
|
-
'1',
|
|
140
|
-
'2',
|
|
141
|
-
'3',
|
|
142
|
-
'4',
|
|
143
|
-
'5',
|
|
144
|
-
'6',
|
|
145
|
-
])
|
|
146
|
-
})
|
|
147
|
-
|
|
148
|
-
test('getWidth returns consistent width', () => {
|
|
149
|
-
const parser = new A3mMSA(exampleA3M)
|
|
150
|
-
const width = parser.getWidth()
|
|
151
|
-
|
|
152
|
-
for (const name of parser.getNames()) {
|
|
153
|
-
expect(parser.getRow(name).length).toBe(width)
|
|
154
|
-
}
|
|
155
|
-
})
|
|
156
|
-
|
|
157
|
-
test('getMSA returns parsed data', () => {
|
|
158
|
-
const parser = new A3mMSA(exampleA3M)
|
|
159
|
-
const msa = parser.getMSA()
|
|
160
|
-
|
|
161
|
-
expect(msa.seqdata).toBeDefined()
|
|
162
|
-
expect(Object.keys(msa.seqdata).length).toBe(7)
|
|
163
|
-
})
|
|
164
|
-
})
|
package/src/parsers/A3mMSA.ts
DELETED
|
@@ -1,321 +0,0 @@
|
|
|
1
|
-
import type { NodeWithIds } from '../types'
|
|
2
|
-
|
|
3
|
-
// Char code helpers for fast character classification
|
|
4
|
-
const CODE_A = 65 // 'A'
|
|
5
|
-
const CODE_Z = 90 // 'Z'
|
|
6
|
-
const CODE_a = 97 // 'a'
|
|
7
|
-
const CODE_z = 122 // 'z'
|
|
8
|
-
const CODE_DASH = 45 // '-'
|
|
9
|
-
const CODE_DOT = 46 // '.'
|
|
10
|
-
|
|
11
|
-
function isUpperOrGap(code: number): boolean {
|
|
12
|
-
return (
|
|
13
|
-
(code >= CODE_A && code <= CODE_Z) ||
|
|
14
|
-
code === CODE_DASH ||
|
|
15
|
-
code === CODE_DOT
|
|
16
|
-
)
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
function isLower(code: number): boolean {
|
|
20
|
-
return code >= CODE_a && code <= CODE_z
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
/**
|
|
24
|
-
* A3M format parser
|
|
25
|
-
*
|
|
26
|
-
* The A3M format consists of aligned fasta, in which:
|
|
27
|
-
* - Insertions are shown as lowercase characters
|
|
28
|
-
* - Matches are shown as uppercase characters
|
|
29
|
-
* - Deletions are shown as '-'
|
|
30
|
-
* - Gaps aligned to inserts are shown as '.'
|
|
31
|
-
*
|
|
32
|
-
* The key property is that lowercase letters (inserts) implicitly introduce
|
|
33
|
-
* gaps in all other sequences that don't have an insert at that position.
|
|
34
|
-
*/
|
|
35
|
-
export default class A3mMSA {
|
|
36
|
-
private MSA: { seqdata: Record<string, string> }
|
|
37
|
-
private orderedNames: string[]
|
|
38
|
-
|
|
39
|
-
constructor(text: string) {
|
|
40
|
-
const rawSeqs: string[] = []
|
|
41
|
-
const names: string[] = []
|
|
42
|
-
|
|
43
|
-
// First pass: parse sequences (like FASTA), preserving order
|
|
44
|
-
for (const entry of text.split('>')) {
|
|
45
|
-
if (!/\S/.test(entry)) {
|
|
46
|
-
continue
|
|
47
|
-
}
|
|
48
|
-
const newlineIdx = entry.indexOf('\n')
|
|
49
|
-
if (newlineIdx === -1) {
|
|
50
|
-
continue
|
|
51
|
-
}
|
|
52
|
-
const defLine = entry.slice(0, newlineIdx)
|
|
53
|
-
const spaceIdx = defLine.indexOf(' ')
|
|
54
|
-
const id = spaceIdx === -1 ? defLine : defLine.slice(0, spaceIdx)
|
|
55
|
-
if (id) {
|
|
56
|
-
rawSeqs.push(entry.slice(newlineIdx + 1).replaceAll(/\s/g, ''))
|
|
57
|
-
names.push(id)
|
|
58
|
-
}
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
this.orderedNames = names
|
|
62
|
-
this.MSA = { seqdata: this.expandA3M(rawSeqs, names) }
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
/**
|
|
66
|
-
* Detect if text is likely A3M format
|
|
67
|
-
*/
|
|
68
|
-
static sniff(text: string): boolean {
|
|
69
|
-
if (!text.startsWith('>')) {
|
|
70
|
-
return false
|
|
71
|
-
}
|
|
72
|
-
|
|
73
|
-
const seqs: string[] = []
|
|
74
|
-
for (const entry of text.split('>')) {
|
|
75
|
-
if (!/\S/.test(entry)) {
|
|
76
|
-
continue
|
|
77
|
-
}
|
|
78
|
-
const newlineIdx = entry.indexOf('\n')
|
|
79
|
-
if (newlineIdx === -1) {
|
|
80
|
-
continue
|
|
81
|
-
}
|
|
82
|
-
const seq = entry.slice(newlineIdx + 1).replaceAll(/\s/g, '')
|
|
83
|
-
if (seq) {
|
|
84
|
-
seqs.push(seq)
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
if (seqs.length < 2) {
|
|
89
|
-
return false
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
// Check for lowercase and compute lengths in single pass per sequence
|
|
93
|
-
let hasLowercase = false
|
|
94
|
-
let firstMatchLen = -1
|
|
95
|
-
let firstRawLen = -1
|
|
96
|
-
let sameMatchLength = true
|
|
97
|
-
let differentRawLengths = false
|
|
98
|
-
|
|
99
|
-
for (const seq of seqs) {
|
|
100
|
-
let matchLen = 0
|
|
101
|
-
for (let i = 0; i < seq.length; i++) {
|
|
102
|
-
const code = seq.charCodeAt(i)
|
|
103
|
-
if (isLower(code)) {
|
|
104
|
-
hasLowercase = true
|
|
105
|
-
} else {
|
|
106
|
-
matchLen++
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
if (firstMatchLen === -1) {
|
|
111
|
-
firstMatchLen = matchLen
|
|
112
|
-
firstRawLen = seq.length
|
|
113
|
-
} else {
|
|
114
|
-
if (matchLen !== firstMatchLen) {
|
|
115
|
-
sameMatchLength = false
|
|
116
|
-
}
|
|
117
|
-
if (seq.length !== firstRawLen) {
|
|
118
|
-
differentRawLengths = true
|
|
119
|
-
}
|
|
120
|
-
}
|
|
121
|
-
}
|
|
122
|
-
|
|
123
|
-
return hasLowercase && sameMatchLength && differentRawLengths
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
/**
|
|
127
|
-
* Expand A3M format to standard aligned format.
|
|
128
|
-
*
|
|
129
|
-
* In A3M, lowercase characters are insertions that implicitly introduce
|
|
130
|
-
* gaps in sequences that don't have an insert at that position.
|
|
131
|
-
*/
|
|
132
|
-
private expandA3M(
|
|
133
|
-
rawSeqs: string[],
|
|
134
|
-
names: string[],
|
|
135
|
-
): Record<string, string> {
|
|
136
|
-
const numSeqs = names.length
|
|
137
|
-
if (numSeqs === 0) {
|
|
138
|
-
return {}
|
|
139
|
-
}
|
|
140
|
-
|
|
141
|
-
// Parse sequences into parallel arrays: matchChars and insertLengths
|
|
142
|
-
// matchChars[seqIdx] = string of match characters for that sequence
|
|
143
|
-
// insertLengths[seqIdx] = array of insert lengths after each match position
|
|
144
|
-
const matchChars: string[] = []
|
|
145
|
-
const insertLengths: number[][] = []
|
|
146
|
-
|
|
147
|
-
for (let seqIdx = 0; seqIdx < numSeqs; seqIdx++) {
|
|
148
|
-
const seq = rawSeqs[seqIdx]!
|
|
149
|
-
const matches: string[] = []
|
|
150
|
-
const insLens: number[] = []
|
|
151
|
-
let i = 0
|
|
152
|
-
|
|
153
|
-
while (i < seq.length) {
|
|
154
|
-
const code = seq.charCodeAt(i)
|
|
155
|
-
|
|
156
|
-
if (isUpperOrGap(code)) {
|
|
157
|
-
matches.push(seq[i]!)
|
|
158
|
-
// Count following lowercase inserts
|
|
159
|
-
let insLen = 0
|
|
160
|
-
let j = i + 1
|
|
161
|
-
while (j < seq.length && isLower(seq.charCodeAt(j))) {
|
|
162
|
-
insLen++
|
|
163
|
-
j++
|
|
164
|
-
}
|
|
165
|
-
insLens.push(insLen)
|
|
166
|
-
i = j
|
|
167
|
-
} else if (isLower(code)) {
|
|
168
|
-
// Leading insert before first match
|
|
169
|
-
matches.push('')
|
|
170
|
-
let insLen = 0
|
|
171
|
-
let j = i
|
|
172
|
-
while (j < seq.length && isLower(seq.charCodeAt(j))) {
|
|
173
|
-
insLen++
|
|
174
|
-
j++
|
|
175
|
-
}
|
|
176
|
-
insLens.push(insLen)
|
|
177
|
-
i = j
|
|
178
|
-
} else {
|
|
179
|
-
i++
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
|
|
183
|
-
matchChars.push(matches.join(''))
|
|
184
|
-
insertLengths.push(insLens)
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
// Find number of match positions and max inserts at each position
|
|
188
|
-
let numPositions = 0
|
|
189
|
-
for (let seqIdx = 0; seqIdx < numSeqs; seqIdx++) {
|
|
190
|
-
const len = insertLengths[seqIdx]!.length
|
|
191
|
-
if (len > numPositions) {
|
|
192
|
-
numPositions = len
|
|
193
|
-
}
|
|
194
|
-
}
|
|
195
|
-
|
|
196
|
-
const maxInserts = new Array<number>(numPositions).fill(0)
|
|
197
|
-
for (let seqIdx = 0; seqIdx < numSeqs; seqIdx++) {
|
|
198
|
-
const insLens = insertLengths[seqIdx]!
|
|
199
|
-
for (let pos = 0; pos < insLens.length; pos++) {
|
|
200
|
-
const len = insLens[pos]!
|
|
201
|
-
if (len > maxInserts[pos]!) {
|
|
202
|
-
maxInserts[pos] = len
|
|
203
|
-
}
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
|
|
207
|
-
// Pre-compute gap strings for common lengths (avoid repeated .repeat())
|
|
208
|
-
const gapCache: string[] = ['']
|
|
209
|
-
const maxGap = Math.max(...maxInserts, 0)
|
|
210
|
-
for (let i = 1; i <= maxGap; i++) {
|
|
211
|
-
gapCache.push('.'.repeat(i))
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
// Build expanded sequences
|
|
215
|
-
const expanded: Record<string, string> = {}
|
|
216
|
-
|
|
217
|
-
for (let seqIdx = 0; seqIdx < numSeqs; seqIdx++) {
|
|
218
|
-
const seq = rawSeqs[seqIdx]!
|
|
219
|
-
const matches = matchChars[seqIdx]!
|
|
220
|
-
const insLens = insertLengths[seqIdx]!
|
|
221
|
-
const result: string[] = []
|
|
222
|
-
|
|
223
|
-
// Track position in original sequence for extracting inserts
|
|
224
|
-
let seqPos = 0
|
|
225
|
-
|
|
226
|
-
for (let pos = 0; pos < numPositions; pos++) {
|
|
227
|
-
const maxIns = maxInserts[pos]!
|
|
228
|
-
|
|
229
|
-
if (pos < insLens.length) {
|
|
230
|
-
const matchChar = matches[pos]
|
|
231
|
-
const insLen = insLens[pos]!
|
|
232
|
-
|
|
233
|
-
// Add match character
|
|
234
|
-
if (matchChar) {
|
|
235
|
-
result.push(matchChar)
|
|
236
|
-
seqPos++
|
|
237
|
-
} else {
|
|
238
|
-
result.push('.')
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
// Extract and uppercase inserts from original sequence
|
|
242
|
-
if (insLen > 0) {
|
|
243
|
-
result.push(seq.slice(seqPos, seqPos + insLen).toUpperCase())
|
|
244
|
-
seqPos += insLen
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
// Pad with gaps
|
|
248
|
-
const padding = maxIns - insLen
|
|
249
|
-
if (padding > 0) {
|
|
250
|
-
result.push(gapCache[padding]!)
|
|
251
|
-
}
|
|
252
|
-
} else {
|
|
253
|
-
// This sequence is shorter - add gaps
|
|
254
|
-
result.push(gapCache[1 + maxIns]!)
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
|
|
258
|
-
expanded[names[seqIdx]!] = result.join('')
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
return expanded
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
getMSA() {
|
|
265
|
-
return this.MSA
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
getRowData() {
|
|
269
|
-
return undefined
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
getNames() {
|
|
273
|
-
return this.orderedNames
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
getRow(name: string) {
|
|
277
|
-
return this.MSA.seqdata[name] || ''
|
|
278
|
-
}
|
|
279
|
-
|
|
280
|
-
getWidth() {
|
|
281
|
-
const name = Object.keys(this.MSA.seqdata)[0]
|
|
282
|
-
return name ? this.getRow(name).length : 0
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
getStructures() {
|
|
286
|
-
return {}
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
get alignmentNames() {
|
|
290
|
-
return []
|
|
291
|
-
}
|
|
292
|
-
|
|
293
|
-
getHeader() {
|
|
294
|
-
return {}
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
getTree(): NodeWithIds {
|
|
298
|
-
return {
|
|
299
|
-
id: 'root',
|
|
300
|
-
name: 'root',
|
|
301
|
-
noTree: true,
|
|
302
|
-
children: this.getNames().map(name => ({
|
|
303
|
-
id: name,
|
|
304
|
-
children: [],
|
|
305
|
-
name,
|
|
306
|
-
})),
|
|
307
|
-
}
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
get seqConsensus() {
|
|
311
|
-
return undefined
|
|
312
|
-
}
|
|
313
|
-
|
|
314
|
-
get secondaryStructureConsensus() {
|
|
315
|
-
return undefined
|
|
316
|
-
}
|
|
317
|
-
|
|
318
|
-
get tracks() {
|
|
319
|
-
return []
|
|
320
|
-
}
|
|
321
|
-
}
|