react-msaview 4.4.6 → 4.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bundle/index.js +9 -9
- package/bundle/index.js.LICENSE.txt +8 -8
- package/bundle/index.js.map +1 -1
- package/dist/colorSchemes.d.ts +0 -6
- package/dist/colorSchemes.js +1 -119
- package/dist/colorSchemes.js.map +1 -1
- package/dist/components/ConservationTrack.d.ts +8 -0
- package/dist/components/ConservationTrack.js +54 -0
- package/dist/components/ConservationTrack.js.map +1 -0
- package/dist/components/Loading.js +14 -2
- package/dist/components/Loading.js.map +1 -1
- package/dist/components/MSAView.js +36 -0
- package/dist/components/MSAView.js.map +1 -1
- package/dist/components/SequenceTextArea.js +3 -2
- package/dist/components/SequenceTextArea.js.map +1 -1
- package/dist/components/TextTrack.d.ts +3 -3
- package/dist/components/TextTrack.js +4 -1
- package/dist/components/TextTrack.js.map +1 -1
- package/dist/components/Track.js +21 -8
- package/dist/components/Track.js.map +1 -1
- package/dist/components/dialogs/ExportSVGDialog.js +19 -3
- package/dist/components/dialogs/ExportSVGDialog.js.map +1 -1
- package/dist/components/header/GappynessSlider.d.ts +6 -0
- package/dist/components/header/GappynessSlider.js +19 -0
- package/dist/components/header/GappynessSlider.js.map +1 -0
- package/dist/components/header/Header.js +3 -1
- package/dist/components/header/Header.js.map +1 -1
- package/dist/components/header/HeaderMenu.js +30 -14
- package/dist/components/header/HeaderMenu.js.map +1 -1
- package/dist/components/minimap/MinimapSVG.js +4 -3
- package/dist/components/minimap/MinimapSVG.js.map +1 -1
- package/dist/components/msa/MSACanvasBlock.js +56 -42
- package/dist/components/msa/MSACanvasBlock.js.map +1 -1
- package/dist/components/msa/renderMSABlock.js +71 -26
- package/dist/components/msa/renderMSABlock.js.map +1 -1
- package/dist/components/msa/renderMSAMouseover.js +8 -1
- package/dist/components/msa/renderMSAMouseover.js.map +1 -1
- package/dist/components/tracks/renderTracksSvg.d.ts +29 -0
- package/dist/components/tracks/renderTracksSvg.js +83 -0
- package/dist/components/tracks/renderTracksSvg.js.map +1 -0
- package/dist/components/tree/TreeNodeMenu.js +2 -2
- package/dist/components/tree/TreeNodeMenu.js.map +1 -1
- package/dist/components/tree/renderTreeCanvas.d.ts +0 -1
- package/dist/components/tree/renderTreeCanvas.js +23 -24
- package/dist/components/tree/renderTreeCanvas.js.map +1 -1
- package/dist/constants.d.ts +22 -0
- package/dist/constants.js +26 -0
- package/dist/constants.js.map +1 -0
- package/dist/layout.js.map +1 -1
- package/dist/model/msaModel.js +3 -2
- package/dist/model/msaModel.js.map +1 -1
- package/dist/model/treeModel.js +9 -8
- package/dist/model/treeModel.js.map +1 -1
- package/dist/model.d.ts +271 -15
- package/dist/model.js +427 -128
- package/dist/model.js.map +1 -1
- package/dist/neighborJoining.d.ts +1 -0
- package/dist/neighborJoining.js +839 -0
- package/dist/neighborJoining.js.map +1 -0
- package/dist/neighborJoining.test.d.ts +1 -0
- package/dist/neighborJoining.test.js +110 -0
- package/dist/neighborJoining.test.js.map +1 -0
- package/dist/parsers/A3mMSA.d.ts +43 -0
- package/dist/parsers/A3mMSA.js +277 -0
- package/dist/parsers/A3mMSA.js.map +1 -0
- package/dist/parsers/A3mMSA.test.d.ts +1 -0
- package/dist/parsers/A3mMSA.test.js +138 -0
- package/dist/parsers/A3mMSA.test.js.map +1 -0
- package/dist/parsers/ClustalMSA.d.ts +4 -4
- package/dist/parsers/ClustalMSA.js +3 -1
- package/dist/parsers/ClustalMSA.js.map +1 -1
- package/dist/parsers/FastaMSA.js +17 -16
- package/dist/parsers/FastaMSA.js.map +1 -1
- package/dist/renderToSvg.d.ts +1 -0
- package/dist/renderToSvg.js +48 -18
- package/dist/renderToSvg.js.map +1 -1
- package/dist/rowCoordinateCalculations.js +2 -0
- package/dist/rowCoordinateCalculations.js.map +1 -1
- package/dist/types.d.ts +2 -3
- package/dist/util.js +17 -9
- package/dist/util.js.map +1 -1
- package/dist/version.d.ts +1 -1
- package/dist/version.js +1 -1
- package/package.json +6 -6
- package/src/colorSchemes.ts +1 -179
- package/src/components/ConservationTrack.tsx +104 -0
- package/src/components/Loading.tsx +44 -2
- package/src/components/MSAView.tsx +68 -0
- package/src/components/SequenceTextArea.tsx +3 -2
- package/src/components/TextTrack.tsx +7 -4
- package/src/components/Track.tsx +25 -9
- package/src/components/dialogs/ExportSVGDialog.tsx +25 -1
- package/src/components/header/GappynessSlider.tsx +35 -0
- package/src/components/header/Header.tsx +3 -1
- package/src/components/header/HeaderMenu.tsx +36 -15
- package/src/components/minimap/MinimapSVG.tsx +6 -3
- package/src/components/msa/MSACanvasBlock.tsx +66 -48
- package/src/components/msa/renderMSABlock.ts +103 -40
- package/src/components/msa/renderMSAMouseover.ts +9 -0
- package/src/components/tracks/renderTracksSvg.ts +157 -0
- package/src/components/tree/TreeNodeMenu.tsx +2 -2
- package/src/components/tree/renderTreeCanvas.ts +25 -34
- package/src/constants.ts +27 -0
- package/src/layout.ts +1 -6
- package/src/model/msaModel.ts +4 -2
- package/src/model/treeModel.ts +19 -8
- package/src/model.ts +517 -140
- package/src/neighborJoining.test.ts +129 -0
- package/src/neighborJoining.ts +885 -0
- package/src/parsers/A3mMSA.test.ts +164 -0
- package/src/parsers/A3mMSA.ts +321 -0
- package/src/parsers/ClustalMSA.ts +7 -5
- package/src/parsers/FastaMSA.ts +17 -17
- package/src/renderToSvg.tsx +105 -26
- package/src/rowCoordinateCalculations.ts +2 -0
- package/src/types.ts +2 -4
- package/src/util.ts +21 -8
- package/src/version.ts +1 -1
- package/dist/components/dialogs/TracklistDialog.d.ts +0 -7
- package/dist/components/dialogs/TracklistDialog.js +0 -23
- package/dist/components/dialogs/TracklistDialog.js.map +0 -1
- package/src/components/dialogs/TracklistDialog.tsx +0 -73
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import { describe, expect, test } from 'vitest'
|
|
2
|
+
|
|
3
|
+
import A3mMSA from './A3mMSA'
|
|
4
|
+
|
|
5
|
+
const exampleA3M = `>example
|
|
6
|
+
ETESMKTVRIREKIKKFLGDRPRNTAEILEHINSTMRHGTTSQQLGNVLSKDKDIVKVGYIKRSGILSGGYDICEWATRNWVAEHCPEWTE
|
|
7
|
+
>1
|
|
8
|
+
----MRTTRLRQKIKKFLNERGeANTTEILEHVNSTMRHGTTPQQLGNVLSKDKDILKVATTKRGGALSGRYEICVWTLRP-----------
|
|
9
|
+
>2
|
|
10
|
+
----MDSQNLRDLIRNYLSERPRNTIEISAWLASQMDPNSCPEDVTNILEADESIVRIGTVRKSGMRLTDLPISEWASSSWVRRHE-----
|
|
11
|
+
>3
|
|
12
|
+
----MNSQNLRELIRNYLSERPRNTIEISTWLSSQIDPTNSPVDITSILEADDQIVRIGTVRKSGMRRSESPVSEWASNTWVKHHE-----
|
|
13
|
+
>4
|
|
14
|
+
--RDMDTEKVREIVRNYISERPRNTAEIAAWLNRH-DDGTGGSDVAAILESDGSFVRIGTVRTSGMTGNSPPLSEWATEKWIQHHER----
|
|
15
|
+
>5
|
|
16
|
+
-----RTRRLREAVLVFLEEKGnANTVEVFDYLNERFRWGATMNQVGNILAKDTRFAKVGHQ-RGQFRGSVYTVCVWALS------------
|
|
17
|
+
>6
|
|
18
|
+
-----RTKRLREAVRVYLAENGrSHTVDIFDHLNDRFSWGATMNQVGNILAKDNRFEKVGHVRD-FFRGARYTVCVWDLAS-----------
|
|
19
|
+
`
|
|
20
|
+
|
|
21
|
+
describe('A3mMSA', () => {
|
|
22
|
+
test('sniff detects A3M format', () => {
|
|
23
|
+
expect(A3mMSA.sniff(exampleA3M)).toBe(true)
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
test('sniff returns false for regular FASTA', () => {
|
|
27
|
+
const fasta = `>seq1
|
|
28
|
+
ACDEFGHIKLMNPQRST
|
|
29
|
+
>seq2
|
|
30
|
+
ACDEFGHIKLMNPQRST
|
|
31
|
+
`
|
|
32
|
+
expect(A3mMSA.sniff(fasta)).toBe(false)
|
|
33
|
+
})
|
|
34
|
+
|
|
35
|
+
test('sniff returns false for non-FASTA formats', () => {
|
|
36
|
+
expect(A3mMSA.sniff('# STOCKHOLM 1.0\n')).toBe(false)
|
|
37
|
+
expect(A3mMSA.sniff('CLUSTAL W')).toBe(false)
|
|
38
|
+
})
|
|
39
|
+
|
|
40
|
+
test('parses A3M and expands insertions', () => {
|
|
41
|
+
const parser = new A3mMSA(exampleA3M)
|
|
42
|
+
const names = parser.getNames()
|
|
43
|
+
|
|
44
|
+
expect(names).toEqual(['example', '1', '2', '3', '4', '5', '6'])
|
|
45
|
+
|
|
46
|
+
// All sequences should have the same length after expansion
|
|
47
|
+
const widths = names.map(name => parser.getRow(name).length)
|
|
48
|
+
expect(widths.every(w => w === widths[0])).toBe(true)
|
|
49
|
+
|
|
50
|
+
// The width should be greater than the original due to expanded inserts
|
|
51
|
+
expect(parser.getWidth()).toBeGreaterThan(90)
|
|
52
|
+
})
|
|
53
|
+
|
|
54
|
+
test('lowercase inserts become uppercase after expansion', () => {
|
|
55
|
+
const parser = new A3mMSA(exampleA3M)
|
|
56
|
+
|
|
57
|
+
// The expanded sequences should not contain lowercase letters
|
|
58
|
+
for (const name of parser.getNames()) {
|
|
59
|
+
const row = parser.getRow(name)
|
|
60
|
+
expect(/[a-z]/.test(row)).toBe(false)
|
|
61
|
+
}
|
|
62
|
+
})
|
|
63
|
+
|
|
64
|
+
test('handles simple A3M with single insert', () => {
|
|
65
|
+
// In valid A3M, match columns (uppercase + - + .) must be consistent
|
|
66
|
+
// seq1 has 5 match columns + 1 insert after D
|
|
67
|
+
// seq2 has 5 match columns, no inserts
|
|
68
|
+
const simple = `>seq1
|
|
69
|
+
ACDaEF
|
|
70
|
+
>seq2
|
|
71
|
+
ACDEF
|
|
72
|
+
`
|
|
73
|
+
const parser = new A3mMSA(simple)
|
|
74
|
+
|
|
75
|
+
// seq1 has an 'a' insert after 'D', seq2 doesn't
|
|
76
|
+
// After expansion, both should be same length
|
|
77
|
+
const seq1 = parser.getRow('seq1')
|
|
78
|
+
const seq2 = parser.getRow('seq2')
|
|
79
|
+
|
|
80
|
+
expect(seq1.length).toBe(seq2.length)
|
|
81
|
+
expect(seq1).toBe('ACDAEF')
|
|
82
|
+
expect(seq2).toBe('ACD.EF')
|
|
83
|
+
})
|
|
84
|
+
|
|
85
|
+
test('handles multiple inserts at different positions', () => {
|
|
86
|
+
// seq1: 6 match columns (A,C,D,E,F,I) + inserts (ab after D, gh after F)
|
|
87
|
+
// seq2: 6 match columns, no inserts
|
|
88
|
+
const multi = `>seq1
|
|
89
|
+
ACDabEFghI
|
|
90
|
+
>seq2
|
|
91
|
+
ACDEFI
|
|
92
|
+
`
|
|
93
|
+
const parser = new A3mMSA(multi)
|
|
94
|
+
|
|
95
|
+
const seq1 = parser.getRow('seq1')
|
|
96
|
+
const seq2 = parser.getRow('seq2')
|
|
97
|
+
|
|
98
|
+
expect(seq1.length).toBe(seq2.length)
|
|
99
|
+
// seq1: ACD + ab (inserts) + EF + gh (inserts) + I
|
|
100
|
+
// seq2: ACD + EF + I -> needs . padding at insert positions
|
|
101
|
+
expect(seq1).toBe('ACDABEFGHI')
|
|
102
|
+
expect(seq2).toBe('ACD..EF..I')
|
|
103
|
+
})
|
|
104
|
+
|
|
105
|
+
test('handles varying insert lengths', () => {
|
|
106
|
+
const varying = `>seq1
|
|
107
|
+
ACDabcEF
|
|
108
|
+
>seq2
|
|
109
|
+
ACDaEF
|
|
110
|
+
>seq3
|
|
111
|
+
ACDEF
|
|
112
|
+
`
|
|
113
|
+
const parser = new A3mMSA(varying)
|
|
114
|
+
|
|
115
|
+
const seq1 = parser.getRow('seq1')
|
|
116
|
+
const seq2 = parser.getRow('seq2')
|
|
117
|
+
const seq3 = parser.getRow('seq3')
|
|
118
|
+
|
|
119
|
+
// All should have same length
|
|
120
|
+
expect(seq1.length).toBe(seq2.length)
|
|
121
|
+
expect(seq2.length).toBe(seq3.length)
|
|
122
|
+
|
|
123
|
+
// seq1 has 3 inserts, seq2 has 1, seq3 has 0
|
|
124
|
+
// After expansion with max 3 insert slots:
|
|
125
|
+
expect(seq1).toBe('ACDABCEF')
|
|
126
|
+
expect(seq2).toBe('ACDA..EF')
|
|
127
|
+
expect(seq3).toBe('ACD...EF')
|
|
128
|
+
})
|
|
129
|
+
|
|
130
|
+
test('getTree returns flat tree structure', () => {
|
|
131
|
+
const parser = new A3mMSA(exampleA3M)
|
|
132
|
+
const tree = parser.getTree()
|
|
133
|
+
|
|
134
|
+
expect(tree.id).toBe('root')
|
|
135
|
+
expect(tree.noTree).toBe(true)
|
|
136
|
+
expect(tree.children.length).toBe(7)
|
|
137
|
+
expect(tree.children.map(c => c.name)).toEqual([
|
|
138
|
+
'example',
|
|
139
|
+
'1',
|
|
140
|
+
'2',
|
|
141
|
+
'3',
|
|
142
|
+
'4',
|
|
143
|
+
'5',
|
|
144
|
+
'6',
|
|
145
|
+
])
|
|
146
|
+
})
|
|
147
|
+
|
|
148
|
+
test('getWidth returns consistent width', () => {
|
|
149
|
+
const parser = new A3mMSA(exampleA3M)
|
|
150
|
+
const width = parser.getWidth()
|
|
151
|
+
|
|
152
|
+
for (const name of parser.getNames()) {
|
|
153
|
+
expect(parser.getRow(name).length).toBe(width)
|
|
154
|
+
}
|
|
155
|
+
})
|
|
156
|
+
|
|
157
|
+
test('getMSA returns parsed data', () => {
|
|
158
|
+
const parser = new A3mMSA(exampleA3M)
|
|
159
|
+
const msa = parser.getMSA()
|
|
160
|
+
|
|
161
|
+
expect(msa.seqdata).toBeDefined()
|
|
162
|
+
expect(Object.keys(msa.seqdata).length).toBe(7)
|
|
163
|
+
})
|
|
164
|
+
})
|
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
import type { NodeWithIds } from '../types'
|
|
2
|
+
|
|
3
|
+
// Char code helpers for fast character classification
|
|
4
|
+
const CODE_A = 65 // 'A'
|
|
5
|
+
const CODE_Z = 90 // 'Z'
|
|
6
|
+
const CODE_a = 97 // 'a'
|
|
7
|
+
const CODE_z = 122 // 'z'
|
|
8
|
+
const CODE_DASH = 45 // '-'
|
|
9
|
+
const CODE_DOT = 46 // '.'
|
|
10
|
+
|
|
11
|
+
function isUpperOrGap(code: number): boolean {
|
|
12
|
+
return (
|
|
13
|
+
(code >= CODE_A && code <= CODE_Z) ||
|
|
14
|
+
code === CODE_DASH ||
|
|
15
|
+
code === CODE_DOT
|
|
16
|
+
)
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function isLower(code: number): boolean {
|
|
20
|
+
return code >= CODE_a && code <= CODE_z
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* A3M format parser
|
|
25
|
+
*
|
|
26
|
+
* The A3M format consists of aligned fasta, in which:
|
|
27
|
+
* - Insertions are shown as lowercase characters
|
|
28
|
+
* - Matches are shown as uppercase characters
|
|
29
|
+
* - Deletions are shown as '-'
|
|
30
|
+
* - Gaps aligned to inserts are shown as '.'
|
|
31
|
+
*
|
|
32
|
+
* The key property is that lowercase letters (inserts) implicitly introduce
|
|
33
|
+
* gaps in all other sequences that don't have an insert at that position.
|
|
34
|
+
*/
|
|
35
|
+
export default class A3mMSA {
|
|
36
|
+
private MSA: { seqdata: Record<string, string> }
|
|
37
|
+
private orderedNames: string[]
|
|
38
|
+
|
|
39
|
+
constructor(text: string) {
|
|
40
|
+
const rawSeqs: string[] = []
|
|
41
|
+
const names: string[] = []
|
|
42
|
+
|
|
43
|
+
// First pass: parse sequences (like FASTA), preserving order
|
|
44
|
+
for (const entry of text.split('>')) {
|
|
45
|
+
if (!/\S/.test(entry)) {
|
|
46
|
+
continue
|
|
47
|
+
}
|
|
48
|
+
const newlineIdx = entry.indexOf('\n')
|
|
49
|
+
if (newlineIdx === -1) {
|
|
50
|
+
continue
|
|
51
|
+
}
|
|
52
|
+
const defLine = entry.slice(0, newlineIdx)
|
|
53
|
+
const spaceIdx = defLine.indexOf(' ')
|
|
54
|
+
const id = spaceIdx === -1 ? defLine : defLine.slice(0, spaceIdx)
|
|
55
|
+
if (id) {
|
|
56
|
+
rawSeqs.push(entry.slice(newlineIdx + 1).replaceAll(/\s/g, ''))
|
|
57
|
+
names.push(id)
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
this.orderedNames = names
|
|
62
|
+
this.MSA = { seqdata: this.expandA3M(rawSeqs, names) }
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Detect if text is likely A3M format
|
|
67
|
+
*/
|
|
68
|
+
static sniff(text: string): boolean {
|
|
69
|
+
if (!text.startsWith('>')) {
|
|
70
|
+
return false
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const seqs: string[] = []
|
|
74
|
+
for (const entry of text.split('>')) {
|
|
75
|
+
if (!/\S/.test(entry)) {
|
|
76
|
+
continue
|
|
77
|
+
}
|
|
78
|
+
const newlineIdx = entry.indexOf('\n')
|
|
79
|
+
if (newlineIdx === -1) {
|
|
80
|
+
continue
|
|
81
|
+
}
|
|
82
|
+
const seq = entry.slice(newlineIdx + 1).replaceAll(/\s/g, '')
|
|
83
|
+
if (seq) {
|
|
84
|
+
seqs.push(seq)
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
if (seqs.length < 2) {
|
|
89
|
+
return false
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Check for lowercase and compute lengths in single pass per sequence
|
|
93
|
+
let hasLowercase = false
|
|
94
|
+
let firstMatchLen = -1
|
|
95
|
+
let firstRawLen = -1
|
|
96
|
+
let sameMatchLength = true
|
|
97
|
+
let differentRawLengths = false
|
|
98
|
+
|
|
99
|
+
for (const seq of seqs) {
|
|
100
|
+
let matchLen = 0
|
|
101
|
+
for (let i = 0; i < seq.length; i++) {
|
|
102
|
+
const code = seq.charCodeAt(i)
|
|
103
|
+
if (isLower(code)) {
|
|
104
|
+
hasLowercase = true
|
|
105
|
+
} else {
|
|
106
|
+
matchLen++
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
if (firstMatchLen === -1) {
|
|
111
|
+
firstMatchLen = matchLen
|
|
112
|
+
firstRawLen = seq.length
|
|
113
|
+
} else {
|
|
114
|
+
if (matchLen !== firstMatchLen) {
|
|
115
|
+
sameMatchLength = false
|
|
116
|
+
}
|
|
117
|
+
if (seq.length !== firstRawLen) {
|
|
118
|
+
differentRawLengths = true
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
return hasLowercase && sameMatchLength && differentRawLengths
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Expand A3M format to standard aligned format.
|
|
128
|
+
*
|
|
129
|
+
* In A3M, lowercase characters are insertions that implicitly introduce
|
|
130
|
+
* gaps in sequences that don't have an insert at that position.
|
|
131
|
+
*/
|
|
132
|
+
private expandA3M(
|
|
133
|
+
rawSeqs: string[],
|
|
134
|
+
names: string[],
|
|
135
|
+
): Record<string, string> {
|
|
136
|
+
const numSeqs = names.length
|
|
137
|
+
if (numSeqs === 0) {
|
|
138
|
+
return {}
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// Parse sequences into parallel arrays: matchChars and insertLengths
|
|
142
|
+
// matchChars[seqIdx] = string of match characters for that sequence
|
|
143
|
+
// insertLengths[seqIdx] = array of insert lengths after each match position
|
|
144
|
+
const matchChars: string[] = []
|
|
145
|
+
const insertLengths: number[][] = []
|
|
146
|
+
|
|
147
|
+
for (let seqIdx = 0; seqIdx < numSeqs; seqIdx++) {
|
|
148
|
+
const seq = rawSeqs[seqIdx]!
|
|
149
|
+
const matches: string[] = []
|
|
150
|
+
const insLens: number[] = []
|
|
151
|
+
let i = 0
|
|
152
|
+
|
|
153
|
+
while (i < seq.length) {
|
|
154
|
+
const code = seq.charCodeAt(i)
|
|
155
|
+
|
|
156
|
+
if (isUpperOrGap(code)) {
|
|
157
|
+
matches.push(seq[i]!)
|
|
158
|
+
// Count following lowercase inserts
|
|
159
|
+
let insLen = 0
|
|
160
|
+
let j = i + 1
|
|
161
|
+
while (j < seq.length && isLower(seq.charCodeAt(j))) {
|
|
162
|
+
insLen++
|
|
163
|
+
j++
|
|
164
|
+
}
|
|
165
|
+
insLens.push(insLen)
|
|
166
|
+
i = j
|
|
167
|
+
} else if (isLower(code)) {
|
|
168
|
+
// Leading insert before first match
|
|
169
|
+
matches.push('')
|
|
170
|
+
let insLen = 0
|
|
171
|
+
let j = i
|
|
172
|
+
while (j < seq.length && isLower(seq.charCodeAt(j))) {
|
|
173
|
+
insLen++
|
|
174
|
+
j++
|
|
175
|
+
}
|
|
176
|
+
insLens.push(insLen)
|
|
177
|
+
i = j
|
|
178
|
+
} else {
|
|
179
|
+
i++
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
matchChars.push(matches.join(''))
|
|
184
|
+
insertLengths.push(insLens)
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// Find number of match positions and max inserts at each position
|
|
188
|
+
let numPositions = 0
|
|
189
|
+
for (let seqIdx = 0; seqIdx < numSeqs; seqIdx++) {
|
|
190
|
+
const len = insertLengths[seqIdx]!.length
|
|
191
|
+
if (len > numPositions) {
|
|
192
|
+
numPositions = len
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
const maxInserts = new Array<number>(numPositions).fill(0)
|
|
197
|
+
for (let seqIdx = 0; seqIdx < numSeqs; seqIdx++) {
|
|
198
|
+
const insLens = insertLengths[seqIdx]!
|
|
199
|
+
for (let pos = 0; pos < insLens.length; pos++) {
|
|
200
|
+
const len = insLens[pos]!
|
|
201
|
+
if (len > maxInserts[pos]!) {
|
|
202
|
+
maxInserts[pos] = len
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Pre-compute gap strings for common lengths (avoid repeated .repeat())
|
|
208
|
+
const gapCache: string[] = ['']
|
|
209
|
+
const maxGap = Math.max(...maxInserts, 0)
|
|
210
|
+
for (let i = 1; i <= maxGap; i++) {
|
|
211
|
+
gapCache.push('.'.repeat(i))
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Build expanded sequences
|
|
215
|
+
const expanded: Record<string, string> = {}
|
|
216
|
+
|
|
217
|
+
for (let seqIdx = 0; seqIdx < numSeqs; seqIdx++) {
|
|
218
|
+
const seq = rawSeqs[seqIdx]!
|
|
219
|
+
const matches = matchChars[seqIdx]!
|
|
220
|
+
const insLens = insertLengths[seqIdx]!
|
|
221
|
+
const result: string[] = []
|
|
222
|
+
|
|
223
|
+
// Track position in original sequence for extracting inserts
|
|
224
|
+
let seqPos = 0
|
|
225
|
+
|
|
226
|
+
for (let pos = 0; pos < numPositions; pos++) {
|
|
227
|
+
const maxIns = maxInserts[pos]!
|
|
228
|
+
|
|
229
|
+
if (pos < insLens.length) {
|
|
230
|
+
const matchChar = matches[pos]
|
|
231
|
+
const insLen = insLens[pos]!
|
|
232
|
+
|
|
233
|
+
// Add match character
|
|
234
|
+
if (matchChar) {
|
|
235
|
+
result.push(matchChar)
|
|
236
|
+
seqPos++
|
|
237
|
+
} else {
|
|
238
|
+
result.push('.')
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// Extract and uppercase inserts from original sequence
|
|
242
|
+
if (insLen > 0) {
|
|
243
|
+
result.push(seq.slice(seqPos, seqPos + insLen).toUpperCase())
|
|
244
|
+
seqPos += insLen
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// Pad with gaps
|
|
248
|
+
const padding = maxIns - insLen
|
|
249
|
+
if (padding > 0) {
|
|
250
|
+
result.push(gapCache[padding]!)
|
|
251
|
+
}
|
|
252
|
+
} else {
|
|
253
|
+
// This sequence is shorter - add gaps
|
|
254
|
+
result.push(gapCache[1 + maxIns]!)
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
expanded[names[seqIdx]!] = result.join('')
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
return expanded
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
getMSA() {
|
|
265
|
+
return this.MSA
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
getRowData() {
|
|
269
|
+
return undefined
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
getNames() {
|
|
273
|
+
return this.orderedNames
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
getRow(name: string) {
|
|
277
|
+
return this.MSA.seqdata[name] || ''
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
getWidth() {
|
|
281
|
+
const name = Object.keys(this.MSA.seqdata)[0]
|
|
282
|
+
return name ? this.getRow(name).length : 0
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
getStructures() {
|
|
286
|
+
return {}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
get alignmentNames() {
|
|
290
|
+
return []
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
getHeader() {
|
|
294
|
+
return {}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
getTree(): NodeWithIds {
|
|
298
|
+
return {
|
|
299
|
+
id: 'root',
|
|
300
|
+
name: 'root',
|
|
301
|
+
noTree: true,
|
|
302
|
+
children: this.getNames().map(name => ({
|
|
303
|
+
id: name,
|
|
304
|
+
children: [],
|
|
305
|
+
name,
|
|
306
|
+
})),
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
get seqConsensus() {
|
|
311
|
+
return undefined
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
get secondaryStructureConsensus() {
|
|
315
|
+
return undefined
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
get tracks() {
|
|
319
|
+
return []
|
|
320
|
+
}
|
|
321
|
+
}
|
|
@@ -46,11 +46,13 @@ export default class ClustalMSA {
|
|
|
46
46
|
id: 'root',
|
|
47
47
|
name: 'root',
|
|
48
48
|
noTree: true,
|
|
49
|
-
children: this.getNames()
|
|
50
|
-
|
|
51
|
-
name
|
|
52
|
-
|
|
53
|
-
|
|
49
|
+
children: this.getNames()
|
|
50
|
+
.filter((name): name is string => name !== undefined)
|
|
51
|
+
.map(name => ({
|
|
52
|
+
id: name,
|
|
53
|
+
name,
|
|
54
|
+
children: [],
|
|
55
|
+
})),
|
|
54
56
|
}
|
|
55
57
|
}
|
|
56
58
|
|
package/src/parsers/FastaMSA.ts
CHANGED
|
@@ -1,26 +1,26 @@
|
|
|
1
1
|
import type { NodeWithIds } from '../types'
|
|
2
2
|
|
|
3
|
-
function parseSmallFasta(text: string) {
|
|
4
|
-
return text
|
|
5
|
-
.split('>')
|
|
6
|
-
.filter(t => /\S/.test(t))
|
|
7
|
-
.map(entryText => {
|
|
8
|
-
const [defLine, ...seqLines] = entryText.split('\n')
|
|
9
|
-
const [id, ...description] = defLine!.split(' ')
|
|
10
|
-
const descriptionStr = description.join(' ')
|
|
11
|
-
const seqLinesStr = seqLines.join('')
|
|
12
|
-
const sequence = seqLinesStr.replaceAll(/\s/g, '')
|
|
13
|
-
return { id, description: descriptionStr, sequence }
|
|
14
|
-
})
|
|
15
|
-
}
|
|
16
3
|
export default class FastaMSA {
|
|
17
4
|
private MSA: { seqdata: Record<string, string> }
|
|
5
|
+
|
|
18
6
|
constructor(text: string) {
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
7
|
+
const seqdata: Record<string, string> = {}
|
|
8
|
+
for (const entry of text.split('>')) {
|
|
9
|
+
if (!/\S/.test(entry)) {
|
|
10
|
+
continue
|
|
11
|
+
}
|
|
12
|
+
const newlineIdx = entry.indexOf('\n')
|
|
13
|
+
if (newlineIdx === -1) {
|
|
14
|
+
continue
|
|
15
|
+
}
|
|
16
|
+
const defLine = entry.slice(0, newlineIdx)
|
|
17
|
+
const spaceIdx = defLine.indexOf(' ')
|
|
18
|
+
const id = spaceIdx === -1 ? defLine : defLine.slice(0, spaceIdx)
|
|
19
|
+
if (id) {
|
|
20
|
+
seqdata[id] = entry.slice(newlineIdx + 1).replaceAll(/\s/g, '')
|
|
21
|
+
}
|
|
23
22
|
}
|
|
23
|
+
this.MSA = { seqdata }
|
|
24
24
|
}
|
|
25
25
|
|
|
26
26
|
getMSA() {
|