jbrowse-plugin-mafviewer 1.4.5 → 1.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/BigMafAdapter/BigMafAdapter.js +4 -5
- package/dist/BigMafAdapter/BigMafAdapter.js.map +1 -1
- package/dist/BigMafAdapter/configSchema.d.ts +2 -2
- package/dist/LinearMafDisplay/components/LinearMafDisplayComponent.js +38 -108
- package/dist/LinearMafDisplay/components/LinearMafDisplayComponent.js.map +1 -1
- package/dist/LinearMafDisplay/components/MAFTooltip.d.ts +0 -3
- package/dist/LinearMafDisplay/components/MAFTooltip.js.map +1 -1
- package/dist/LinearMafDisplay/components/MsaHighlightOverlay.d.ts +9 -0
- package/dist/LinearMafDisplay/components/MsaHighlightOverlay.js +34 -0
- package/dist/LinearMafDisplay/components/MsaHighlightOverlay.js.map +1 -0
- package/dist/LinearMafDisplay/components/Sidebar/SvgWrapper.js +1 -1
- package/dist/LinearMafDisplay/components/Sidebar/SvgWrapper.js.map +1 -1
- package/dist/LinearMafDisplay/components/useDragSelection.d.ts +25 -0
- package/dist/LinearMafDisplay/components/useDragSelection.js +103 -0
- package/dist/LinearMafDisplay/components/useDragSelection.js.map +1 -0
- package/dist/LinearMafDisplay/configSchema.d.ts +3 -30
- package/dist/LinearMafDisplay/stateModel.d.ts +1043 -121
- package/dist/LinearMafDisplay/stateModel.js +85 -41
- package/dist/LinearMafDisplay/stateModel.js.map +1 -1
- package/dist/LinearMafDisplay/types.d.ts +2 -2
- package/dist/LinearMafDisplay/util.d.ts +5 -0
- package/dist/LinearMafDisplay/util.js +25 -4
- package/dist/LinearMafDisplay/util.js.map +1 -1
- package/dist/LinearMafRenderer/LinearMafRenderer.d.ts +41 -5
- package/dist/LinearMafRenderer/LinearMafRenderer.js +1 -1
- package/dist/LinearMafRenderer/LinearMafRenderer.js.map +1 -1
- package/dist/LinearMafRenderer/components/LinearMafRendering.d.ts +14 -5
- package/dist/LinearMafRenderer/components/LinearMafRendering.js +21 -19
- package/dist/LinearMafRenderer/components/LinearMafRendering.js.map +1 -1
- package/dist/LinearMafRenderer/configSchema.d.ts +1 -6
- package/dist/LinearMafRenderer/configSchema.js +1 -6
- package/dist/LinearMafRenderer/configSchema.js.map +1 -1
- package/dist/LinearMafRenderer/rendering/insertions.d.ts +1 -1
- package/dist/LinearMafRenderer/rendering/insertions.js +2 -2
- package/dist/LinearMafRenderer/rendering/mismatches.d.ts +1 -1
- package/dist/LinearMafRenderer/rendering/mismatches.js +3 -3
- package/dist/LinearMafRenderer/rendering/types.d.ts +1 -1
- package/dist/MafAddTrackWorkflow/AddTrackWorkflow.js +1 -1
- package/dist/MafAddTrackWorkflow/AddTrackWorkflow.js.map +1 -1
- package/dist/MafAddTrackWorkflow/index.js +1 -1
- package/dist/MafAddTrackWorkflow/index.js.map +1 -1
- package/dist/MafGetSequences/MafGetSequences.d.ts +1 -0
- package/dist/MafGetSequences/MafGetSequences.js +2 -1
- package/dist/MafGetSequences/MafGetSequences.js.map +1 -1
- package/dist/MafSequenceWidget/LabelsCanvas.d.ts +8 -0
- package/dist/MafSequenceWidget/LabelsCanvas.js +37 -0
- package/dist/MafSequenceWidget/LabelsCanvas.js.map +1 -0
- package/dist/MafSequenceWidget/MafSequenceHoverHighlight.d.ts +6 -0
- package/dist/MafSequenceWidget/MafSequenceHoverHighlight.js +52 -0
- package/dist/MafSequenceWidget/MafSequenceHoverHighlight.js.map +1 -0
- package/dist/MafSequenceWidget/MafSequenceHoverHighlightExtension.d.ts +2 -0
- package/dist/MafSequenceWidget/MafSequenceHoverHighlightExtension.js +12 -0
- package/dist/MafSequenceWidget/MafSequenceHoverHighlightExtension.js.map +1 -0
- package/dist/MafSequenceWidget/MafSequenceWidget.d.ts +6 -0
- package/dist/MafSequenceWidget/MafSequenceWidget.js +189 -0
- package/dist/MafSequenceWidget/MafSequenceWidget.js.map +1 -0
- package/dist/MafSequenceWidget/SequenceCanvas.d.ts +12 -0
- package/dist/MafSequenceWidget/SequenceCanvas.js +86 -0
- package/dist/MafSequenceWidget/SequenceCanvas.js.map +1 -0
- package/dist/MafSequenceWidget/SequenceDisplay.d.ts +12 -0
- package/dist/MafSequenceWidget/SequenceDisplay.js +117 -0
- package/dist/MafSequenceWidget/SequenceDisplay.js.map +1 -0
- package/dist/MafSequenceWidget/SequenceTooltip.d.ts +11 -0
- package/dist/MafSequenceWidget/SequenceTooltip.js +39 -0
- package/dist/MafSequenceWidget/SequenceTooltip.js.map +1 -0
- package/dist/MafSequenceWidget/baseColors.d.ts +3 -0
- package/dist/MafSequenceWidget/baseColors.js +64 -0
- package/dist/MafSequenceWidget/baseColors.js.map +1 -0
- package/dist/MafSequenceWidget/colToGenomePos.d.ts +13 -0
- package/dist/MafSequenceWidget/colToGenomePos.js +32 -0
- package/dist/MafSequenceWidget/colToGenomePos.js.map +1 -0
- package/dist/MafSequenceWidget/colToGenomePos.test.d.ts +1 -0
- package/dist/MafSequenceWidget/colToGenomePos.test.js +136 -0
- package/dist/MafSequenceWidget/colToGenomePos.test.js.map +1 -0
- package/dist/MafSequenceWidget/configSchema.d.ts +1 -0
- package/dist/MafSequenceWidget/configSchema.js +3 -0
- package/dist/MafSequenceWidget/configSchema.js.map +1 -0
- package/dist/MafSequenceWidget/constants.d.ts +4 -0
- package/dist/MafSequenceWidget/constants.js +5 -0
- package/dist/MafSequenceWidget/constants.js.map +1 -0
- package/dist/MafSequenceWidget/index.d.ts +2 -0
- package/dist/MafSequenceWidget/index.js +16 -0
- package/dist/MafSequenceWidget/index.js.map +1 -0
- package/dist/MafSequenceWidget/stateModelFactory.d.ts +67 -0
- package/dist/MafSequenceWidget/stateModelFactory.js +21 -0
- package/dist/MafSequenceWidget/stateModelFactory.js.map +1 -0
- package/dist/MafTabixAdapter/MafTabixAdapter.js +4 -35
- package/dist/MafTabixAdapter/MafTabixAdapter.js.map +1 -1
- package/dist/MafTabixAdapter/configSchema.d.ts +4 -4
- package/dist/MafTrack/configSchema.d.ts +16 -11
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/jbrowse-plugin-mafviewer.umd.production.min.js +12 -24
- package/dist/jbrowse-plugin-mafviewer.umd.production.min.js.map +4 -4
- package/dist/util/clipboard.d.ts +2 -0
- package/dist/util/clipboard.js +28 -0
- package/dist/util/clipboard.js.map +1 -0
- package/dist/util/fastaUtils.d.ts +2 -1
- package/dist/util/fastaUtils.js +72 -2
- package/dist/util/fastaUtils.js.map +1 -1
- package/dist/util/fastaUtils.test.js +190 -0
- package/dist/util/fastaUtils.test.js.map +1 -1
- package/dist/util/parseAssemblyName.d.ts +32 -0
- package/dist/util/parseAssemblyName.js +87 -0
- package/dist/util/parseAssemblyName.js.map +1 -0
- package/dist/util/parseAssemblyName.test.d.ts +1 -0
- package/dist/util/parseAssemblyName.test.js +269 -0
- package/dist/util/parseAssemblyName.test.js.map +1 -0
- package/package.json +7 -7
- package/src/BigMafAdapter/BigMafAdapter.ts +5 -5
- package/src/LinearMafDisplay/components/LinearMafDisplayComponent.tsx +62 -144
- package/src/LinearMafDisplay/components/MAFTooltip.tsx +0 -3
- package/src/LinearMafDisplay/components/MsaHighlightOverlay.tsx +62 -0
- package/src/LinearMafDisplay/components/Sidebar/SvgWrapper.tsx +1 -1
- package/src/LinearMafDisplay/components/useDragSelection.ts +159 -0
- package/src/LinearMafDisplay/stateModel.ts +135 -48
- package/src/LinearMafDisplay/types.ts +2 -2
- package/src/LinearMafDisplay/util.ts +31 -5
- package/src/LinearMafRenderer/LinearMafRenderer.ts +1 -1
- package/src/LinearMafRenderer/components/LinearMafRendering.tsx +38 -24
- package/src/LinearMafRenderer/configSchema.ts +1 -6
- package/src/LinearMafRenderer/rendering/insertions.ts +2 -2
- package/src/LinearMafRenderer/rendering/mismatches.ts +3 -3
- package/src/LinearMafRenderer/rendering/types.ts +1 -1
- package/src/MafAddTrackWorkflow/AddTrackWorkflow.tsx +1 -1
- package/src/MafAddTrackWorkflow/index.ts +1 -1
- package/src/MafGetSequences/MafGetSequences.ts +10 -2
- package/src/MafSequenceWidget/LabelsCanvas.tsx +58 -0
- package/src/MafSequenceWidget/MafSequenceHoverHighlight.tsx +83 -0
- package/src/MafSequenceWidget/MafSequenceHoverHighlightExtension.tsx +24 -0
- package/src/MafSequenceWidget/MafSequenceWidget.tsx +294 -0
- package/src/MafSequenceWidget/SequenceCanvas.tsx +136 -0
- package/src/MafSequenceWidget/SequenceDisplay.tsx +188 -0
- package/src/MafSequenceWidget/SequenceTooltip.tsx +70 -0
- package/src/MafSequenceWidget/baseColors.ts +76 -0
- package/src/MafSequenceWidget/colToGenomePos.test.ts +166 -0
- package/src/MafSequenceWidget/colToGenomePos.ts +40 -0
- package/src/MafSequenceWidget/configSchema.ts +3 -0
- package/src/MafSequenceWidget/constants.ts +4 -0
- package/src/MafSequenceWidget/index.ts +24 -0
- package/src/MafSequenceWidget/stateModelFactory.ts +43 -0
- package/src/MafTabixAdapter/MafTabixAdapter.ts +12 -51
- package/src/index.ts +2 -0
- package/src/util/__snapshots__/fastaUtils.test.ts.snap +35 -0
- package/src/util/clipboard.ts +35 -0
- package/src/util/fastaUtils.test.ts +199 -0
- package/src/util/fastaUtils.ts +94 -1
- package/src/util/parseAssemblyName.test.ts +350 -0
- package/src/util/parseAssemblyName.ts +106 -0
- package/dist/LinearMafDisplay/components/GetSequenceDialog/GetSequenceDialog.d.ts +0 -11
- package/dist/LinearMafDisplay/components/GetSequenceDialog/GetSequenceDialog.js +0 -97
- package/dist/LinearMafDisplay/components/GetSequenceDialog/GetSequenceDialog.js.map +0 -1
- package/dist/LinearMafDisplay/components/InsertionSequenceDialog/InsertionSequenceDialog.d.ts +0 -14
- package/dist/LinearMafDisplay/components/InsertionSequenceDialog/InsertionSequenceDialog.js +0 -69
- package/dist/LinearMafDisplay/components/InsertionSequenceDialog/InsertionSequenceDialog.js.map +0 -1
- package/dist/LinearMafDisplay/components/util.d.ts +0 -1
- package/dist/LinearMafDisplay/components/util.js +0 -8
- package/dist/LinearMafDisplay/components/util.js.map +0 -1
- package/dist/util/fetchSequences.d.ts +0 -18
- package/dist/util/fetchSequences.js +0 -39
- package/dist/util/fetchSequences.js.map +0 -1
- package/dist/util/useSequences.d.ts +0 -21
- package/dist/util/useSequences.js +0 -64
- package/dist/util/useSequences.js.map +0 -1
- package/src/LinearMafDisplay/components/GetSequenceDialog/GetSequenceDialog.tsx +0 -175
- package/src/LinearMafDisplay/components/InsertionSequenceDialog/InsertionSequenceDialog.tsx +0 -105
- package/src/LinearMafDisplay/components/util.ts +0 -7
- package/src/util/fetchSequences.ts +0 -57
- package/src/util/useSequences.ts +0 -90
|
@@ -97,3 +97,202 @@ test('gap in assembly1', () => {
|
|
|
97
97
|
})
|
|
98
98
|
expect(result).toMatchSnapshot()
|
|
99
99
|
})
|
|
100
|
+
|
|
101
|
+
test('includeInsertions - single insertion in one sample', () => {
|
|
102
|
+
// Reference seq has a gap (insertion in assembly2)
|
|
103
|
+
// seq: AC--GTAC (reference with gap = insertion in aligned seq)
|
|
104
|
+
// assembly1: AC--GTAC (no insertion, matches reference gap)
|
|
105
|
+
// assembly2: ACTTGTAC (has TT insertion)
|
|
106
|
+
const mockFeature = new SimpleFeature({
|
|
107
|
+
uniqueId: '123',
|
|
108
|
+
refName: 'abc',
|
|
109
|
+
start: 100,
|
|
110
|
+
end: 106, // 6 bp reference (AC GTAC without the gap)
|
|
111
|
+
seq: 'AC--GTAC',
|
|
112
|
+
alignments: {
|
|
113
|
+
assembly1: {
|
|
114
|
+
chr: 'chr1',
|
|
115
|
+
start: 100,
|
|
116
|
+
seq: 'AC--GTAC',
|
|
117
|
+
strand: 1,
|
|
118
|
+
},
|
|
119
|
+
assembly2: {
|
|
120
|
+
chr: 'chr2',
|
|
121
|
+
start: 200,
|
|
122
|
+
seq: 'ACTTGTAC',
|
|
123
|
+
strand: 1,
|
|
124
|
+
},
|
|
125
|
+
},
|
|
126
|
+
})
|
|
127
|
+
|
|
128
|
+
const result = processFeaturesToFasta({
|
|
129
|
+
features: makeMap([mockFeature]),
|
|
130
|
+
samples: [{ id: 'assembly1' }, { id: 'assembly2' }],
|
|
131
|
+
includeInsertions: true,
|
|
132
|
+
showAllLetters: true,
|
|
133
|
+
regions: [
|
|
134
|
+
{
|
|
135
|
+
refName: 'chr1',
|
|
136
|
+
start: 100,
|
|
137
|
+
end: 106,
|
|
138
|
+
assemblyName: 'assembly1',
|
|
139
|
+
},
|
|
140
|
+
],
|
|
141
|
+
})
|
|
142
|
+
// assembly1 should have gaps where the insertion is
|
|
143
|
+
// assembly2 should have the TT insertion
|
|
144
|
+
expect(result).toMatchSnapshot()
|
|
145
|
+
})
|
|
146
|
+
|
|
147
|
+
test('includeInsertions - insertions in multiple samples with different lengths', () => {
|
|
148
|
+
// Reference has gap, different samples have different insertion lengths
|
|
149
|
+
// seq: AC---GTAC (reference with 3-bp gap)
|
|
150
|
+
// assembly1: AC-T-GTAC (has T insertion, 1 bp)
|
|
151
|
+
// assembly2: ACTTTGTAC (has TTT insertion, 3 bp)
|
|
152
|
+
const mockFeature = new SimpleFeature({
|
|
153
|
+
uniqueId: '123',
|
|
154
|
+
refName: 'abc',
|
|
155
|
+
start: 100,
|
|
156
|
+
end: 106,
|
|
157
|
+
seq: 'AC---GTAC',
|
|
158
|
+
alignments: {
|
|
159
|
+
assembly1: {
|
|
160
|
+
chr: 'chr1',
|
|
161
|
+
start: 100,
|
|
162
|
+
seq: 'AC-T-GTAC',
|
|
163
|
+
strand: 1,
|
|
164
|
+
},
|
|
165
|
+
assembly2: {
|
|
166
|
+
chr: 'chr2',
|
|
167
|
+
start: 200,
|
|
168
|
+
seq: 'ACTTTGTAC',
|
|
169
|
+
strand: 1,
|
|
170
|
+
},
|
|
171
|
+
},
|
|
172
|
+
})
|
|
173
|
+
|
|
174
|
+
const result = processFeaturesToFasta({
|
|
175
|
+
features: makeMap([mockFeature]),
|
|
176
|
+
samples: [{ id: 'assembly1' }, { id: 'assembly2' }],
|
|
177
|
+
includeInsertions: true,
|
|
178
|
+
showAllLetters: true,
|
|
179
|
+
regions: [
|
|
180
|
+
{
|
|
181
|
+
refName: 'chr1',
|
|
182
|
+
start: 100,
|
|
183
|
+
end: 106,
|
|
184
|
+
assemblyName: 'assembly1',
|
|
185
|
+
},
|
|
186
|
+
],
|
|
187
|
+
})
|
|
188
|
+
// assembly1 should have T-- (padded to max insertion length 3)
|
|
189
|
+
// assembly2 should have TTT
|
|
190
|
+
expect(result).toMatchSnapshot()
|
|
191
|
+
})
|
|
192
|
+
|
|
193
|
+
test('includeInsertions - insertions at multiple positions', () => {
|
|
194
|
+
// Reference has gaps at two positions
|
|
195
|
+
// seq: A-CG-TAC
|
|
196
|
+
// assembly1: ATCGGTAC (T insertion at pos 1, G insertion at pos 4)
|
|
197
|
+
// assembly2: A-CG-TAC (no insertions)
|
|
198
|
+
const mockFeature = new SimpleFeature({
|
|
199
|
+
uniqueId: '123',
|
|
200
|
+
refName: 'abc',
|
|
201
|
+
start: 100,
|
|
202
|
+
end: 106,
|
|
203
|
+
seq: 'A-CG-TAC',
|
|
204
|
+
alignments: {
|
|
205
|
+
assembly1: {
|
|
206
|
+
chr: 'chr1',
|
|
207
|
+
start: 100,
|
|
208
|
+
seq: 'ATCGGTAC',
|
|
209
|
+
strand: 1,
|
|
210
|
+
},
|
|
211
|
+
assembly2: {
|
|
212
|
+
chr: 'chr2',
|
|
213
|
+
start: 200,
|
|
214
|
+
seq: 'A-CG-TAC',
|
|
215
|
+
strand: 1,
|
|
216
|
+
},
|
|
217
|
+
},
|
|
218
|
+
})
|
|
219
|
+
|
|
220
|
+
const result = processFeaturesToFasta({
|
|
221
|
+
features: makeMap([mockFeature]),
|
|
222
|
+
samples: [{ id: 'assembly1' }, { id: 'assembly2' }],
|
|
223
|
+
includeInsertions: true,
|
|
224
|
+
showAllLetters: true,
|
|
225
|
+
regions: [
|
|
226
|
+
{
|
|
227
|
+
refName: 'chr1',
|
|
228
|
+
start: 100,
|
|
229
|
+
end: 106,
|
|
230
|
+
assemblyName: 'assembly1',
|
|
231
|
+
},
|
|
232
|
+
],
|
|
233
|
+
})
|
|
234
|
+
expect(result).toMatchSnapshot()
|
|
235
|
+
})
|
|
236
|
+
|
|
237
|
+
test('includeInsertions=false ignores insertions', () => {
|
|
238
|
+
const mockFeature = new SimpleFeature({
|
|
239
|
+
uniqueId: '123',
|
|
240
|
+
refName: 'abc',
|
|
241
|
+
start: 100,
|
|
242
|
+
end: 106,
|
|
243
|
+
seq: 'AC--GTAC',
|
|
244
|
+
alignments: {
|
|
245
|
+
assembly1: {
|
|
246
|
+
chr: 'chr1',
|
|
247
|
+
start: 100,
|
|
248
|
+
seq: 'AC--GTAC',
|
|
249
|
+
strand: 1,
|
|
250
|
+
},
|
|
251
|
+
assembly2: {
|
|
252
|
+
chr: 'chr2',
|
|
253
|
+
start: 200,
|
|
254
|
+
seq: 'ACTTGTAC',
|
|
255
|
+
strand: 1,
|
|
256
|
+
},
|
|
257
|
+
},
|
|
258
|
+
})
|
|
259
|
+
|
|
260
|
+
const result = processFeaturesToFasta({
|
|
261
|
+
features: makeMap([mockFeature]),
|
|
262
|
+
samples: [{ id: 'assembly1' }, { id: 'assembly2' }],
|
|
263
|
+
includeInsertions: false,
|
|
264
|
+
showAllLetters: true,
|
|
265
|
+
regions: [
|
|
266
|
+
{
|
|
267
|
+
refName: 'chr1',
|
|
268
|
+
start: 100,
|
|
269
|
+
end: 106,
|
|
270
|
+
assemblyName: 'assembly1',
|
|
271
|
+
},
|
|
272
|
+
],
|
|
273
|
+
})
|
|
274
|
+
// Without insertions, both should be 6 characters (no expansion)
|
|
275
|
+
expect(result[0]).toHaveLength(6)
|
|
276
|
+
expect(result[1]).toHaveLength(6)
|
|
277
|
+
expect(result).toMatchSnapshot()
|
|
278
|
+
})
|
|
279
|
+
|
|
280
|
+
test('includeInsertions with no insertions present', () => {
|
|
281
|
+
// No gaps in reference = no insertions
|
|
282
|
+
const result = processFeaturesToFasta({
|
|
283
|
+
features: makeMap([mockFeature]),
|
|
284
|
+
samples: [{ id: 'assembly1' }, { id: 'assembly2' }],
|
|
285
|
+
includeInsertions: true,
|
|
286
|
+
showAllLetters: true,
|
|
287
|
+
regions: [
|
|
288
|
+
{
|
|
289
|
+
refName: 'chr1',
|
|
290
|
+
start: 100,
|
|
291
|
+
end: 105,
|
|
292
|
+
assemblyName: 'assembly1',
|
|
293
|
+
},
|
|
294
|
+
],
|
|
295
|
+
})
|
|
296
|
+
// Should behave same as without includeInsertions since there are none
|
|
297
|
+
expect(result).toMatchSnapshot()
|
|
298
|
+
})
|
package/src/util/fastaUtils.ts
CHANGED
|
@@ -3,6 +3,11 @@ import { Sample } from '../LinearMafDisplay/types'
|
|
|
3
3
|
import type { AlignmentRecord } from '../LinearMafRenderer/rendering'
|
|
4
4
|
import type { Feature, Region } from '@jbrowse/core/util'
|
|
5
5
|
|
|
6
|
+
interface InsertionInfo {
|
|
7
|
+
sequence: string
|
|
8
|
+
sampleIndex: number
|
|
9
|
+
}
|
|
10
|
+
|
|
6
11
|
/**
|
|
7
12
|
* Process features into FASTA format
|
|
8
13
|
* @param features - The features to process
|
|
@@ -14,12 +19,14 @@ export function processFeaturesToFasta({
|
|
|
14
19
|
showAllLetters,
|
|
15
20
|
samples,
|
|
16
21
|
features,
|
|
22
|
+
includeInsertions,
|
|
17
23
|
}: {
|
|
18
24
|
regions: Region[]
|
|
19
25
|
samples: Sample[]
|
|
20
26
|
showAsUpperCase?: boolean
|
|
21
27
|
mismatchRendering?: boolean
|
|
22
28
|
showAllLetters?: boolean
|
|
29
|
+
includeInsertions?: boolean
|
|
23
30
|
features: Map<string, Feature>
|
|
24
31
|
}) {
|
|
25
32
|
const region = regions[0]!
|
|
@@ -29,6 +36,10 @@ export function processFeaturesToFasta({
|
|
|
29
36
|
// Use character arrays instead of strings for O(1) mutations
|
|
30
37
|
const outputRowsArrays = samples.map(() => new Array(rlen).fill('-'))
|
|
31
38
|
|
|
39
|
+
// Track insertions at each position if includeInsertions is enabled
|
|
40
|
+
// Key is the reference position (0-based relative to region), value is array of insertions
|
|
41
|
+
const insertionsAtPosition = new Map<number, InsertionInfo[]>()
|
|
42
|
+
|
|
32
43
|
for (const feature of features.values()) {
|
|
33
44
|
const leftCoord = feature.get('start')
|
|
34
45
|
const vals = feature.get('alignments') as Record<string, AlignmentRecord>
|
|
@@ -43,7 +54,7 @@ export function processFeaturesToFasta({
|
|
|
43
54
|
|
|
44
55
|
const rowArray = outputRowsArrays[row]!
|
|
45
56
|
|
|
46
|
-
// Single-pass processing: handle gaps, matches, and
|
|
57
|
+
// Single-pass processing: handle gaps, matches, mismatches, and collect insertions
|
|
47
58
|
for (let i = 0, o = 0, l = alignment.length; i < l; i++) {
|
|
48
59
|
if (seq[i] !== '-') {
|
|
49
60
|
const c = alignment[i]
|
|
@@ -67,11 +78,93 @@ export function processFeaturesToFasta({
|
|
|
67
78
|
}
|
|
68
79
|
}
|
|
69
80
|
o++
|
|
81
|
+
} else if (includeInsertions) {
|
|
82
|
+
// This is an insertion (reference has gap)
|
|
83
|
+
// Collect all consecutive insertion characters
|
|
84
|
+
let insertionSequence = ''
|
|
85
|
+
while (i < alignment.length && seq[i] === '-') {
|
|
86
|
+
const c = alignment[i]
|
|
87
|
+
insertionSequence += c !== '-' && c !== ' ' ? c : '-'
|
|
88
|
+
i++
|
|
89
|
+
}
|
|
90
|
+
i-- // Back up one since the outer loop will increment
|
|
91
|
+
|
|
92
|
+
if (insertionSequence.length > 0) {
|
|
93
|
+
// Position is relative to region start, insertions come after position o-1
|
|
94
|
+
// (or before position 0 if o is 0)
|
|
95
|
+
const insertPos = leftCoord + o - region.start
|
|
96
|
+
if (insertPos >= 0 && insertPos <= rlen) {
|
|
97
|
+
const existing = insertionsAtPosition.get(insertPos) || []
|
|
98
|
+
existing.push({ sequence: insertionSequence, sampleIndex: row })
|
|
99
|
+
insertionsAtPosition.set(insertPos, existing)
|
|
100
|
+
}
|
|
101
|
+
}
|
|
70
102
|
}
|
|
71
103
|
}
|
|
72
104
|
}
|
|
73
105
|
}
|
|
74
106
|
|
|
107
|
+
if (includeInsertions && insertionsAtPosition.size > 0) {
|
|
108
|
+
return expandWithInsertions(
|
|
109
|
+
outputRowsArrays,
|
|
110
|
+
insertionsAtPosition,
|
|
111
|
+
samples.length,
|
|
112
|
+
)
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Convert character arrays back to strings
|
|
116
|
+
return outputRowsArrays.map(arr => arr.join(''))
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
/**
|
|
120
|
+
* Expand sequences to include insertions
|
|
121
|
+
* At each position with insertions, find the max insertion length,
|
|
122
|
+
* then expand all sequences by that amount
|
|
123
|
+
*/
|
|
124
|
+
function expandWithInsertions(
|
|
125
|
+
outputRowsArrays: string[][],
|
|
126
|
+
insertionsAtPosition: Map<number, InsertionInfo[]>,
|
|
127
|
+
numSamples: number,
|
|
128
|
+
) {
|
|
129
|
+
// Sort insertion positions in descending order so we can insert from right to left
|
|
130
|
+
// without affecting earlier positions
|
|
131
|
+
const sortedPositions = [...insertionsAtPosition.keys()].sort((a, b) => b - a)
|
|
132
|
+
|
|
133
|
+
for (const pos of sortedPositions) {
|
|
134
|
+
const insertions = insertionsAtPosition.get(pos)!
|
|
135
|
+
|
|
136
|
+
// Find max insertion length at this position
|
|
137
|
+
let maxLen = 0
|
|
138
|
+
for (const ins of insertions) {
|
|
139
|
+
if (ins.sequence.length > maxLen) {
|
|
140
|
+
maxLen = ins.sequence.length
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Create a map from sample index to insertion sequence
|
|
145
|
+
const sampleInsertions = new Map<number, string>()
|
|
146
|
+
for (const ins of insertions) {
|
|
147
|
+
sampleInsertions.set(ins.sampleIndex, ins.sequence)
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// Insert characters at this position for each sample
|
|
151
|
+
for (let sampleIdx = 0; sampleIdx < numSamples; sampleIdx++) {
|
|
152
|
+
const rowArray = outputRowsArrays[sampleIdx]!
|
|
153
|
+
const insertionSeq = sampleInsertions.get(sampleIdx)
|
|
154
|
+
|
|
155
|
+
if (insertionSeq) {
|
|
156
|
+
// This sample has an insertion - add it, padded with gaps if needed
|
|
157
|
+
const paddedInsertion = insertionSeq.padEnd(maxLen, '-')
|
|
158
|
+
// Insert after position `pos`
|
|
159
|
+
rowArray.splice(pos, 0, ...paddedInsertion.split(''))
|
|
160
|
+
} else {
|
|
161
|
+
// No insertion for this sample - fill with gaps
|
|
162
|
+
const gaps = new Array(maxLen).fill('-')
|
|
163
|
+
rowArray.splice(pos, 0, ...gaps)
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
75
168
|
// Convert character arrays back to strings
|
|
76
169
|
return outputRowsArrays.map(arr => arr.join(''))
|
|
77
170
|
}
|
|
@@ -0,0 +1,350 @@
|
|
|
1
|
+
import { describe, expect, test } from 'vitest'
|
|
2
|
+
|
|
3
|
+
import {
|
|
4
|
+
parseAssemblyAndChr,
|
|
5
|
+
parseAssemblyAndChrSimple,
|
|
6
|
+
selectReferenceSequence,
|
|
7
|
+
} from './parseAssemblyName'
|
|
8
|
+
|
|
9
|
+
describe('parseAssemblyAndChr (MafTabix format)', () => {
|
|
10
|
+
test('no dot - entire string is assembly name', () => {
|
|
11
|
+
const result = parseAssemblyAndChr('hg38')
|
|
12
|
+
expect(result).toEqual({
|
|
13
|
+
assemblyName: 'hg38',
|
|
14
|
+
chr: '',
|
|
15
|
+
})
|
|
16
|
+
})
|
|
17
|
+
|
|
18
|
+
test('single dot - simple assembly.chr format', () => {
|
|
19
|
+
const result = parseAssemblyAndChr('hg38.chr1')
|
|
20
|
+
expect(result).toEqual({
|
|
21
|
+
assemblyName: 'hg38',
|
|
22
|
+
chr: 'chr1',
|
|
23
|
+
})
|
|
24
|
+
})
|
|
25
|
+
|
|
26
|
+
test('single dot - assembly.refName format with non-chr name', () => {
|
|
27
|
+
const result = parseAssemblyAndChr('mm10.scaffold_1')
|
|
28
|
+
expect(result).toEqual({
|
|
29
|
+
assemblyName: 'mm10',
|
|
30
|
+
chr: 'scaffold_1',
|
|
31
|
+
})
|
|
32
|
+
})
|
|
33
|
+
|
|
34
|
+
test('two dots with numeric version - assembly.version.chr format', () => {
|
|
35
|
+
const result = parseAssemblyAndChr('hg38.1.chr1')
|
|
36
|
+
expect(result).toEqual({
|
|
37
|
+
assemblyName: 'hg38.1',
|
|
38
|
+
chr: 'chr1',
|
|
39
|
+
})
|
|
40
|
+
})
|
|
41
|
+
|
|
42
|
+
test('two dots with multi-digit numeric version', () => {
|
|
43
|
+
const result = parseAssemblyAndChr('GRCh38.123.chrX')
|
|
44
|
+
expect(result).toEqual({
|
|
45
|
+
assemblyName: 'GRCh38.123',
|
|
46
|
+
chr: 'chrX',
|
|
47
|
+
})
|
|
48
|
+
})
|
|
49
|
+
|
|
50
|
+
test('two dots with non-numeric middle - assembly.chr.more format', () => {
|
|
51
|
+
const result = parseAssemblyAndChr('mm10.chr1.random')
|
|
52
|
+
expect(result).toEqual({
|
|
53
|
+
assemblyName: 'mm10',
|
|
54
|
+
chr: 'chr1.random',
|
|
55
|
+
})
|
|
56
|
+
})
|
|
57
|
+
|
|
58
|
+
test('two dots with non-numeric middle - chr_Un type naming', () => {
|
|
59
|
+
const result = parseAssemblyAndChr('hg38.chrUn_gl000220')
|
|
60
|
+
expect(result).toEqual({
|
|
61
|
+
assemblyName: 'hg38',
|
|
62
|
+
chr: 'chrUn_gl000220',
|
|
63
|
+
})
|
|
64
|
+
})
|
|
65
|
+
|
|
66
|
+
test('three dots with numeric version - assembly.version.chr.more format', () => {
|
|
67
|
+
const result = parseAssemblyAndChr('GRCh38.1.chr1.random')
|
|
68
|
+
expect(result).toEqual({
|
|
69
|
+
assemblyName: 'GRCh38.1',
|
|
70
|
+
chr: 'chr1.random',
|
|
71
|
+
})
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
test('empty string', () => {
|
|
75
|
+
const result = parseAssemblyAndChr('')
|
|
76
|
+
expect(result).toEqual({
|
|
77
|
+
assemblyName: '',
|
|
78
|
+
chr: '',
|
|
79
|
+
})
|
|
80
|
+
})
|
|
81
|
+
|
|
82
|
+
test('just a dot', () => {
|
|
83
|
+
const result = parseAssemblyAndChr('.')
|
|
84
|
+
expect(result).toEqual({
|
|
85
|
+
assemblyName: '',
|
|
86
|
+
chr: '',
|
|
87
|
+
})
|
|
88
|
+
})
|
|
89
|
+
|
|
90
|
+
test('leading dot', () => {
|
|
91
|
+
const result = parseAssemblyAndChr('.chr1')
|
|
92
|
+
expect(result).toEqual({
|
|
93
|
+
assemblyName: '',
|
|
94
|
+
chr: 'chr1',
|
|
95
|
+
})
|
|
96
|
+
})
|
|
97
|
+
|
|
98
|
+
test('trailing dot', () => {
|
|
99
|
+
const result = parseAssemblyAndChr('hg38.')
|
|
100
|
+
expect(result).toEqual({
|
|
101
|
+
assemblyName: 'hg38',
|
|
102
|
+
chr: '',
|
|
103
|
+
})
|
|
104
|
+
})
|
|
105
|
+
|
|
106
|
+
test('real world example - UCSC style', () => {
|
|
107
|
+
const result = parseAssemblyAndChr('hg19.chr6_ssto_hap7')
|
|
108
|
+
expect(result).toEqual({
|
|
109
|
+
assemblyName: 'hg19',
|
|
110
|
+
chr: 'chr6_ssto_hap7',
|
|
111
|
+
})
|
|
112
|
+
})
|
|
113
|
+
|
|
114
|
+
test('real world example - Ensembl style with numeric', () => {
|
|
115
|
+
const result = parseAssemblyAndChr('GRCh37.1.1')
|
|
116
|
+
expect(result).toEqual({
|
|
117
|
+
assemblyName: 'GRCh37.1',
|
|
118
|
+
chr: '1',
|
|
119
|
+
})
|
|
120
|
+
})
|
|
121
|
+
})
|
|
122
|
+
|
|
123
|
+
describe('parseAssemblyAndChrSimple (BigMaf format)', () => {
|
|
124
|
+
test('no dot - entire string is assembly name', () => {
|
|
125
|
+
const result = parseAssemblyAndChrSimple('hg38')
|
|
126
|
+
expect(result).toEqual({
|
|
127
|
+
assemblyName: 'hg38',
|
|
128
|
+
chr: '',
|
|
129
|
+
})
|
|
130
|
+
})
|
|
131
|
+
|
|
132
|
+
test('single dot - simple org.chr format', () => {
|
|
133
|
+
const result = parseAssemblyAndChrSimple('hg38.chr1')
|
|
134
|
+
expect(result).toEqual({
|
|
135
|
+
assemblyName: 'hg38',
|
|
136
|
+
chr: 'chr1',
|
|
137
|
+
})
|
|
138
|
+
})
|
|
139
|
+
|
|
140
|
+
test('multiple dots - only splits on first dot', () => {
|
|
141
|
+
const result = parseAssemblyAndChrSimple('mm10.chr1.random')
|
|
142
|
+
expect(result).toEqual({
|
|
143
|
+
assemblyName: 'mm10',
|
|
144
|
+
chr: 'chr1.random',
|
|
145
|
+
})
|
|
146
|
+
})
|
|
147
|
+
|
|
148
|
+
test('empty string', () => {
|
|
149
|
+
const result = parseAssemblyAndChrSimple('')
|
|
150
|
+
expect(result).toEqual({
|
|
151
|
+
assemblyName: '',
|
|
152
|
+
chr: '',
|
|
153
|
+
})
|
|
154
|
+
})
|
|
155
|
+
})
|
|
156
|
+
|
|
157
|
+
describe('selectReferenceSequence', () => {
|
|
158
|
+
const alignments = {
|
|
159
|
+
hg38: { seq: 'ACGTACGT' },
|
|
160
|
+
mm10: { seq: 'TGCATGCA' },
|
|
161
|
+
panTro6: { seq: 'GGGGGGGG' },
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
test('uses refAssemblyName when provided and exists', () => {
|
|
165
|
+
const result = selectReferenceSequence(
|
|
166
|
+
alignments,
|
|
167
|
+
'mm10',
|
|
168
|
+
'hg38',
|
|
169
|
+
'panTro6',
|
|
170
|
+
)
|
|
171
|
+
expect(result).toBe('TGCATGCA')
|
|
172
|
+
})
|
|
173
|
+
|
|
174
|
+
test('falls back to queryAssemblyName when refAssemblyName is empty', () => {
|
|
175
|
+
const result = selectReferenceSequence(alignments, '', 'hg38', 'panTro6')
|
|
176
|
+
expect(result).toBe('ACGTACGT')
|
|
177
|
+
})
|
|
178
|
+
|
|
179
|
+
test('falls back to queryAssemblyName when refAssemblyName is undefined', () => {
|
|
180
|
+
const result = selectReferenceSequence(
|
|
181
|
+
alignments,
|
|
182
|
+
undefined,
|
|
183
|
+
'hg38',
|
|
184
|
+
'panTro6',
|
|
185
|
+
)
|
|
186
|
+
expect(result).toBe('ACGTACGT')
|
|
187
|
+
})
|
|
188
|
+
|
|
189
|
+
test('falls back to firstAssemblyNameFound when queryAssemblyName does not match', () => {
|
|
190
|
+
const result = selectReferenceSequence(
|
|
191
|
+
alignments,
|
|
192
|
+
undefined,
|
|
193
|
+
'galGal6', // not in alignments
|
|
194
|
+
'hg38',
|
|
195
|
+
)
|
|
196
|
+
expect(result).toBe('ACGTACGT')
|
|
197
|
+
})
|
|
198
|
+
|
|
199
|
+
test('falls back to firstAssemblyNameFound when both config values are empty', () => {
|
|
200
|
+
const result = selectReferenceSequence(alignments, '', '', 'panTro6')
|
|
201
|
+
expect(result).toBe('GGGGGGGG')
|
|
202
|
+
})
|
|
203
|
+
|
|
204
|
+
test('returns undefined when refAssemblyName does not exist in alignments', () => {
|
|
205
|
+
const result = selectReferenceSequence(
|
|
206
|
+
alignments,
|
|
207
|
+
'nonexistent',
|
|
208
|
+
undefined,
|
|
209
|
+
undefined,
|
|
210
|
+
)
|
|
211
|
+
expect(result).toBeUndefined()
|
|
212
|
+
})
|
|
213
|
+
|
|
214
|
+
test('returns undefined when no matches and all params undefined', () => {
|
|
215
|
+
const result = selectReferenceSequence(
|
|
216
|
+
alignments,
|
|
217
|
+
undefined,
|
|
218
|
+
undefined,
|
|
219
|
+
undefined,
|
|
220
|
+
)
|
|
221
|
+
expect(result).toBeUndefined()
|
|
222
|
+
})
|
|
223
|
+
|
|
224
|
+
test('returns undefined for empty alignments object', () => {
|
|
225
|
+
const result = selectReferenceSequence({}, 'hg38', 'mm10', 'panTro6')
|
|
226
|
+
expect(result).toBeUndefined()
|
|
227
|
+
})
|
|
228
|
+
|
|
229
|
+
test('skips refAssemblyName when it does not exist and uses queryAssemblyName', () => {
|
|
230
|
+
const result = selectReferenceSequence(
|
|
231
|
+
alignments,
|
|
232
|
+
'galGal6', // not in alignments
|
|
233
|
+
'hg38',
|
|
234
|
+
'panTro6',
|
|
235
|
+
)
|
|
236
|
+
expect(result).toBe('ACGTACGT')
|
|
237
|
+
})
|
|
238
|
+
|
|
239
|
+
test('skips both refAssemblyName and queryAssemblyName when neither exists', () => {
|
|
240
|
+
const result = selectReferenceSequence(
|
|
241
|
+
alignments,
|
|
242
|
+
'galGal6', // not in alignments
|
|
243
|
+
'rn6', // not in alignments
|
|
244
|
+
'mm10',
|
|
245
|
+
)
|
|
246
|
+
expect(result).toBe('TGCATGCA')
|
|
247
|
+
})
|
|
248
|
+
})
|
|
249
|
+
|
|
250
|
+
describe('assembly name lookup integration scenarios', () => {
|
|
251
|
+
test('refAssemblyName config takes precedence over query.assemblyName', () => {
|
|
252
|
+
const alignments = {
|
|
253
|
+
hg38: { seq: 'REFERENCE_SEQ' },
|
|
254
|
+
mm10: { seq: 'QUERY_SEQ' },
|
|
255
|
+
}
|
|
256
|
+
const result = selectReferenceSequence(alignments, 'hg38', 'mm10', 'mm10')
|
|
257
|
+
expect(result).toBe('REFERENCE_SEQ')
|
|
258
|
+
})
|
|
259
|
+
|
|
260
|
+
test('query.assemblyName works when refAssemblyName not configured', () => {
|
|
261
|
+
const alignments = {
|
|
262
|
+
hg38: { seq: 'QUERY_SEQ' },
|
|
263
|
+
mm10: { seq: 'OTHER_SEQ' },
|
|
264
|
+
}
|
|
265
|
+
const result = selectReferenceSequence(alignments, '', 'hg38', 'mm10')
|
|
266
|
+
expect(result).toBe('QUERY_SEQ')
|
|
267
|
+
})
|
|
268
|
+
|
|
269
|
+
test('firstAssemblyNameFound is used as last resort fallback', () => {
|
|
270
|
+
const alignments = {
|
|
271
|
+
panTro6: { seq: 'FIRST_FOUND' },
|
|
272
|
+
mm10: { seq: 'OTHER_SEQ' },
|
|
273
|
+
}
|
|
274
|
+
// When query assemblyName does not match any alignment
|
|
275
|
+
const result = selectReferenceSequence(alignments, '', 'hg38', 'panTro6')
|
|
276
|
+
expect(result).toBe('FIRST_FOUND')
|
|
277
|
+
})
|
|
278
|
+
})
|
|
279
|
+
|
|
280
|
+
describe('real-world MAF format parsing', () => {
|
|
281
|
+
test('ce10.chrI from UCSC 7-way alignment', () => {
|
|
282
|
+
const result = parseAssemblyAndChr('ce10.chrI')
|
|
283
|
+
expect(result).toEqual({
|
|
284
|
+
assemblyName: 'ce10',
|
|
285
|
+
chr: 'chrI',
|
|
286
|
+
})
|
|
287
|
+
})
|
|
288
|
+
|
|
289
|
+
test('caePb3.Scfld02_18 scaffold format', () => {
|
|
290
|
+
const result = parseAssemblyAndChr('caePb3.Scfld02_18')
|
|
291
|
+
expect(result).toEqual({
|
|
292
|
+
assemblyName: 'caePb3',
|
|
293
|
+
chr: 'Scfld02_18',
|
|
294
|
+
})
|
|
295
|
+
})
|
|
296
|
+
|
|
297
|
+
test('caeRem4.Crem_Contig16 contig format', () => {
|
|
298
|
+
const result = parseAssemblyAndChr('caeRem4.Crem_Contig16')
|
|
299
|
+
expect(result).toEqual({
|
|
300
|
+
assemblyName: 'caeRem4',
|
|
301
|
+
chr: 'Crem_Contig16',
|
|
302
|
+
})
|
|
303
|
+
})
|
|
304
|
+
|
|
305
|
+
test('cb4.chrI C. briggsae format', () => {
|
|
306
|
+
const result = parseAssemblyAndChr('cb4.chrI')
|
|
307
|
+
expect(result).toEqual({
|
|
308
|
+
assemblyName: 'cb4',
|
|
309
|
+
chr: 'chrI',
|
|
310
|
+
})
|
|
311
|
+
})
|
|
312
|
+
|
|
313
|
+
test('multiple assemblies from same MAF block produce correct lookup', () => {
|
|
314
|
+
const alignments = {
|
|
315
|
+
ce10: { seq: 'TCTTTTAGTATTTGTAA' },
|
|
316
|
+
caePb3: { seq: 'tcTTTTCGC-TTTATAA' },
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// When querying with ce10 assembly
|
|
320
|
+
expect(selectReferenceSequence(alignments, '', 'ce10', 'ce10')).toBe(
|
|
321
|
+
'TCTTTTAGTATTTGTAA',
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
// When refAssemblyName is configured to override
|
|
325
|
+
expect(selectReferenceSequence(alignments, 'caePb3', 'ce10', 'ce10')).toBe(
|
|
326
|
+
'tcTTTTCGC-TTTATAA',
|
|
327
|
+
)
|
|
328
|
+
})
|
|
329
|
+
})
|
|
330
|
+
|
|
331
|
+
describe('refName renaming compatibility', () => {
|
|
332
|
+
test('parseAssemblyAndChr extracts chr correctly for refName alias matching', () => {
|
|
333
|
+
// When a file uses "chrI" but assembly has alias "I" -> "chrI"
|
|
334
|
+
// The chr portion extracted here should match what renameRegionsIfNeeded expects
|
|
335
|
+
const { chr } = parseAssemblyAndChr('ce10.chrI')
|
|
336
|
+
expect(chr).toBe('chrI')
|
|
337
|
+
})
|
|
338
|
+
|
|
339
|
+
test('parseAssemblyAndChrSimple extracts chr correctly for refName alias matching', () => {
|
|
340
|
+
const { chr } = parseAssemblyAndChrSimple('ce10.chrI')
|
|
341
|
+
expect(chr).toBe('chrI')
|
|
342
|
+
})
|
|
343
|
+
|
|
344
|
+
test('assembly name is isolated from chr for assembly-based lookups', () => {
|
|
345
|
+
// The assembly name (e.g., "ce10") is used to look up reference sequence
|
|
346
|
+
// It should not include the chr portion
|
|
347
|
+
const { assemblyName } = parseAssemblyAndChr('ce10.chrI')
|
|
348
|
+
expect(assemblyName).toBe('ce10')
|
|
349
|
+
})
|
|
350
|
+
})
|