@gmod/cram 7.0.3 → 8.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cram-bundle.js +1 -1
- package/dist/cramFile/codecs/beta.js +27 -3
- package/dist/cramFile/codecs/beta.js.map +1 -1
- package/dist/cramFile/codecs/external.d.ts +1 -0
- package/dist/cramFile/codecs/external.js +15 -0
- package/dist/cramFile/codecs/external.js.map +1 -1
- package/dist/cramFile/codecs/gamma.js +44 -8
- package/dist/cramFile/codecs/gamma.js.map +1 -1
- package/dist/cramFile/codecs/getBits.js +18 -2
- package/dist/cramFile/codecs/getBits.js.map +1 -1
- package/dist/cramFile/codecs/huffman.js +37 -3
- package/dist/cramFile/codecs/huffman.js.map +1 -1
- package/dist/cramFile/codecs/subexp.js +37 -15
- package/dist/cramFile/codecs/subexp.js.map +1 -1
- package/dist/cramFile/file.d.ts +1 -0
- package/dist/cramFile/file.js +44 -1
- package/dist/cramFile/file.js.map +1 -1
- package/dist/cramFile/record.d.ts +12 -1
- package/dist/cramFile/record.js +18 -5
- package/dist/cramFile/record.js.map +1 -1
- package/dist/cramFile/slice/decodeRecord.d.ts +4 -3
- package/dist/cramFile/slice/decodeRecord.js +95 -53
- package/dist/cramFile/slice/decodeRecord.js.map +1 -1
- package/dist/cramFile/slice/index.d.ts +3 -3
- package/dist/cramFile/slice/index.js +82 -9
- package/dist/cramFile/slice/index.js.map +1 -1
- package/dist/indexedCramFile.d.ts +3 -3
- package/dist/indexedCramFile.js +12 -9
- package/dist/indexedCramFile.js.map +1 -1
- package/dist/wasm/htscodecs.js +0 -0
- package/dist/wasm/htscodecs.js.map +1 -1
- package/dist/wasm/noodles-cram/noodles_cram_wasm.d.ts +1 -0
- package/dist/wasm/noodles-cram/noodles_cram_wasm.js +44 -0
- package/dist/wasm/noodles-cram/noodles_cram_wasm.js.map +1 -0
- package/dist/wasm/noodles-cram/noodles_cram_wasm_bg.d.ts +94 -0
- package/dist/wasm/noodles-cram/noodles_cram_wasm_bg.js +578 -0
- package/dist/wasm/noodles-cram/noodles_cram_wasm_bg.js.map +1 -0
- package/esm/cramFile/codecs/beta.js +27 -3
- package/esm/cramFile/codecs/beta.js.map +1 -1
- package/esm/cramFile/codecs/external.d.ts +1 -0
- package/esm/cramFile/codecs/external.js +15 -0
- package/esm/cramFile/codecs/external.js.map +1 -1
- package/esm/cramFile/codecs/gamma.js +43 -7
- package/esm/cramFile/codecs/gamma.js.map +1 -1
- package/esm/cramFile/codecs/getBits.js +18 -2
- package/esm/cramFile/codecs/getBits.js.map +1 -1
- package/esm/cramFile/codecs/huffman.js +37 -3
- package/esm/cramFile/codecs/huffman.js.map +1 -1
- package/esm/cramFile/codecs/subexp.js +36 -14
- package/esm/cramFile/codecs/subexp.js.map +1 -1
- package/esm/cramFile/file.d.ts +1 -0
- package/esm/cramFile/file.js +44 -1
- package/esm/cramFile/file.js.map +1 -1
- package/esm/cramFile/record.d.ts +12 -1
- package/esm/cramFile/record.js +17 -4
- package/esm/cramFile/record.js.map +1 -1
- package/esm/cramFile/slice/decodeRecord.d.ts +4 -3
- package/esm/cramFile/slice/decodeRecord.js +95 -53
- package/esm/cramFile/slice/decodeRecord.js.map +1 -1
- package/esm/cramFile/slice/index.d.ts +3 -3
- package/esm/cramFile/slice/index.js +49 -9
- package/esm/cramFile/slice/index.js.map +1 -1
- package/esm/indexedCramFile.d.ts +3 -3
- package/esm/indexedCramFile.js +12 -9
- package/esm/indexedCramFile.js.map +1 -1
- package/esm/wasm/htscodecs.js +0 -0
- package/esm/wasm/htscodecs.js.map +1 -1
- package/esm/wasm/noodles-cram/noodles_cram_wasm.d.ts +1 -0
- package/esm/wasm/noodles-cram/noodles_cram_wasm.js +6 -0
- package/esm/wasm/noodles-cram/noodles_cram_wasm.js.map +1 -0
- package/esm/wasm/noodles-cram/noodles_cram_wasm_bg.d.ts +94 -0
- package/esm/wasm/noodles-cram/noodles_cram_wasm_bg.js +529 -0
- package/esm/wasm/noodles-cram/noodles_cram_wasm_bg.js.map +1 -0
- package/package.json +8 -6
- package/src/cramFile/codecs/beta.ts +38 -4
- package/src/cramFile/codecs/external.ts +25 -0
- package/src/cramFile/codecs/gamma.ts +54 -12
- package/src/cramFile/codecs/getBits.ts +21 -2
- package/src/cramFile/codecs/huffman.ts +45 -3
- package/src/cramFile/codecs/subexp.ts +53 -16
- package/src/cramFile/file.ts +83 -1
- package/src/cramFile/record.ts +25 -5
- package/src/cramFile/slice/decodeRecord.ts +107 -55
- package/src/cramFile/slice/index.ts +82 -11
- package/src/indexedCramFile.ts +35 -27
- package/src/wasm/htscodecs.js +0 -0
- package/src/wasm/noodles-cram/.gitignore +1 -0
- package/src/wasm/noodles-cram/noodles_cram_wasm.d.ts +42 -0
- package/src/wasm/noodles-cram/noodles_cram_wasm.js +5 -0
- package/src/wasm/noodles-cram/noodles_cram_wasm_bg.js +541 -0
- package/src/wasm/noodles-cram/noodles_cram_wasm_bg.wasm +0 -0
- package/src/wasm/noodles-cram/noodles_cram_wasm_bg.wasm.d.ts +18 -0
- package/src/wasm/noodles-cram/package.json +17 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gmod/cram",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "8.0.1",
|
|
4
4
|
"description": "read CRAM files with pure Javascript",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": "GMOD/cram-js",
|
|
@@ -29,6 +29,8 @@
|
|
|
29
29
|
],
|
|
30
30
|
"scripts": {
|
|
31
31
|
"test": "vitest",
|
|
32
|
+
"benchonly": "vitest bench",
|
|
33
|
+
"bench": "./scripts/build-both-branches.sh \"$BRANCH1\" \"$BRANCH2\" && vitest bench",
|
|
32
34
|
"lint": "eslint --report-unused-disable-directives --max-warnings 0",
|
|
33
35
|
"format": "prettier --write .",
|
|
34
36
|
"docs": "documentation readme --shallow src/indexedCramFile.ts --section=IndexedCramFile; documentation readme --shallow src/cramFile/file.ts --section=CramFile; documentation readme --shallow src/craiIndex.ts --section=CraiIndex; documentation readme --shallow src/cramFile/file.ts --section=CramFile; documentation readme --shallow src/cramFile/record.ts --section=CramRecord",
|
|
@@ -56,20 +58,20 @@
|
|
|
56
58
|
"md5": "^2.2.1"
|
|
57
59
|
},
|
|
58
60
|
"devDependencies": {
|
|
59
|
-
"@gmod/indexedfasta": "^5.0.
|
|
61
|
+
"@gmod/indexedfasta": "^5.0.2",
|
|
60
62
|
"@types/md5": "^2.3.2",
|
|
61
|
-
"@vitest/coverage-v8": "^4.0.
|
|
63
|
+
"@vitest/coverage-v8": "^4.0.18",
|
|
62
64
|
"buffer": "^6.0.3",
|
|
63
65
|
"documentation": "^14.0.3",
|
|
64
66
|
"eslint": "^9.29.0",
|
|
65
67
|
"eslint-plugin-import": "^2.31.0",
|
|
66
68
|
"eslint-plugin-unicorn": "^62.0.0",
|
|
67
69
|
"mock-fs": "^5.2.0",
|
|
68
|
-
"prettier": "^3.
|
|
70
|
+
"prettier": "^3.8.1",
|
|
69
71
|
"rimraf": "^6.0.1",
|
|
70
72
|
"typescript": "^5.7.0",
|
|
71
|
-
"typescript-eslint": "^8.
|
|
72
|
-
"vitest": "^4.0.
|
|
73
|
+
"typescript-eslint": "^8.54.0",
|
|
74
|
+
"vitest": "^4.0.18",
|
|
73
75
|
"webpack": "^5.99.9",
|
|
74
76
|
"webpack-cli": "^6.0.1"
|
|
75
77
|
},
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import CramCodec, { Cursors } from './_base.ts'
|
|
2
|
-
import { getBits } from './getBits.ts'
|
|
1
|
+
import CramCodec, { Cursor, Cursors } from './_base.ts'
|
|
3
2
|
import { CramUnimplementedError } from '../../errors.ts'
|
|
4
3
|
import { BetaEncoding } from '../encoding.ts'
|
|
5
4
|
import { CramFileBlock } from '../file.ts'
|
|
@@ -24,11 +23,46 @@ export default class BetaCodec extends CramCodec<
|
|
|
24
23
|
_blocksByContentId: Record<number, CramFileBlock>,
|
|
25
24
|
cursors: Cursors,
|
|
26
25
|
) {
|
|
27
|
-
|
|
26
|
+
return decodeBetaInline(
|
|
28
27
|
coreDataBlock.content,
|
|
29
28
|
cursors.coreBlock,
|
|
30
29
|
this.parameters.length,
|
|
30
|
+
this.parameters.offset,
|
|
31
31
|
)
|
|
32
|
-
return fromBits - this.parameters.offset
|
|
33
32
|
}
|
|
34
33
|
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Optimized beta decoder with inlined bit reading.
|
|
37
|
+
*/
|
|
38
|
+
function decodeBetaInline(
|
|
39
|
+
data: Uint8Array,
|
|
40
|
+
cursor: Cursor,
|
|
41
|
+
numBits: number,
|
|
42
|
+
offset: number,
|
|
43
|
+
): number {
|
|
44
|
+
let { bytePosition, bitPosition } = cursor
|
|
45
|
+
|
|
46
|
+
// Fast path: reading exactly 8 bits when byte-aligned
|
|
47
|
+
if (numBits === 8 && bitPosition === 7) {
|
|
48
|
+
const val = data[bytePosition]!
|
|
49
|
+
cursor.bytePosition = bytePosition + 1
|
|
50
|
+
return val - offset
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// General case
|
|
54
|
+
let val = 0
|
|
55
|
+
for (let i = 0; i < numBits; i++) {
|
|
56
|
+
val <<= 1
|
|
57
|
+
val |= (data[bytePosition]! >> bitPosition) & 1
|
|
58
|
+
bitPosition -= 1
|
|
59
|
+
if (bitPosition < 0) {
|
|
60
|
+
bytePosition += 1
|
|
61
|
+
bitPosition = 7
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
cursor.bytePosition = bytePosition
|
|
66
|
+
cursor.bitPosition = bitPosition
|
|
67
|
+
return val - offset
|
|
68
|
+
}
|
|
@@ -52,4 +52,29 @@ export default class ExternalCodec extends CramCodec<
|
|
|
52
52
|
return contentBlock.content[cursor.bytePosition++]!
|
|
53
53
|
}
|
|
54
54
|
}
|
|
55
|
+
|
|
56
|
+
getBytesSubarray(
|
|
57
|
+
blocksByContentId: Record<number, CramFileBlock>,
|
|
58
|
+
cursors: Cursors,
|
|
59
|
+
length: number,
|
|
60
|
+
): Uint8Array | undefined {
|
|
61
|
+
const { blockContentId } = this.parameters
|
|
62
|
+
const contentBlock = blocksByContentId[blockContentId]
|
|
63
|
+
if (!contentBlock) {
|
|
64
|
+
return undefined
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const cursor = cursors.externalBlocks.getCursor(blockContentId)
|
|
68
|
+
const start = cursor.bytePosition
|
|
69
|
+
const end = start + length
|
|
70
|
+
|
|
71
|
+
if (end > contentBlock.content.length) {
|
|
72
|
+
throw new CramBufferOverrunError(
|
|
73
|
+
'attempted to read beyond end of block. this file seems truncated.',
|
|
74
|
+
)
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
cursor.bytePosition = end
|
|
78
|
+
return contentBlock.content.subarray(start, end)
|
|
79
|
+
}
|
|
55
80
|
}
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import CramCodec, { Cursors } from './_base.ts'
|
|
2
|
-
import { getBits } from './getBits.ts'
|
|
1
|
+
import CramCodec, { Cursor, Cursors } from './_base.ts'
|
|
3
2
|
import { CramUnimplementedError } from '../../errors.ts'
|
|
4
3
|
import { GammaEncoding } from '../encoding.ts'
|
|
5
4
|
import { CramFileBlock } from '../file.ts'
|
|
@@ -24,19 +23,62 @@ export default class GammaCodec extends CramCodec<
|
|
|
24
23
|
_blocksByContentId: Record<number, CramFileBlock>,
|
|
25
24
|
cursors: Cursors,
|
|
26
25
|
) {
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
while (getBits(coreDataBlock.content, cursors.coreBlock, 1) === 0) {
|
|
30
|
-
length = length + 1
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
const readBits = getBits(
|
|
26
|
+
return decodeGammaInline(
|
|
34
27
|
coreDataBlock.content,
|
|
35
28
|
cursors.coreBlock,
|
|
36
|
-
|
|
29
|
+
this.parameters.offset,
|
|
37
30
|
)
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Optimized gamma decoder with inlined bit reading.
|
|
36
|
+
* Avoids function call overhead by inlining the getBits logic.
|
|
37
|
+
*/
|
|
38
|
+
function decodeGammaInline(
|
|
39
|
+
data: Uint8Array,
|
|
40
|
+
cursor: Cursor,
|
|
41
|
+
offset: number,
|
|
42
|
+
): number {
|
|
43
|
+
let { bytePosition, bitPosition } = cursor
|
|
44
|
+
let length = 1
|
|
38
45
|
|
|
39
|
-
|
|
40
|
-
|
|
46
|
+
// Count leading zeros (each 0 bit increases length)
|
|
47
|
+
// Inline single-bit reads for the while loop
|
|
48
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
49
|
+
while (true) {
|
|
50
|
+
const bit = (data[bytePosition]! >> bitPosition) & 1
|
|
51
|
+
bitPosition -= 1
|
|
52
|
+
if (bitPosition < 0) {
|
|
53
|
+
bytePosition += 1
|
|
54
|
+
bitPosition = 7
|
|
55
|
+
}
|
|
56
|
+
if (bit === 1) {
|
|
57
|
+
break
|
|
58
|
+
}
|
|
59
|
+
length += 1
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Now read (length - 1) more bits for the value
|
|
63
|
+
let readBits = 0
|
|
64
|
+
const bitsToRead = length - 1
|
|
65
|
+
if (bitsToRead > 0) {
|
|
66
|
+
// Optimized multi-bit read
|
|
67
|
+
for (let i = 0; i < bitsToRead; i++) {
|
|
68
|
+
readBits <<= 1
|
|
69
|
+
readBits |= (data[bytePosition]! >> bitPosition) & 1
|
|
70
|
+
bitPosition -= 1
|
|
71
|
+
if (bitPosition < 0) {
|
|
72
|
+
bytePosition += 1
|
|
73
|
+
bitPosition = 7
|
|
74
|
+
}
|
|
75
|
+
}
|
|
41
76
|
}
|
|
77
|
+
|
|
78
|
+
// Update cursor
|
|
79
|
+
cursor.bytePosition = bytePosition
|
|
80
|
+
cursor.bitPosition = bitPosition as Cursor['bitPosition']
|
|
81
|
+
|
|
82
|
+
const value = readBits | (1 << (length - 1))
|
|
83
|
+
return value - offset
|
|
42
84
|
}
|
|
@@ -5,7 +5,6 @@ export function getBits(
|
|
|
5
5
|
cursor: { bytePosition: number; bitPosition: number },
|
|
6
6
|
numBits: number,
|
|
7
7
|
) {
|
|
8
|
-
let val = 0
|
|
9
8
|
if (
|
|
10
9
|
cursor.bytePosition + (7 - cursor.bitPosition + numBits) / 8 >
|
|
11
10
|
data.length
|
|
@@ -14,8 +13,28 @@ export function getBits(
|
|
|
14
13
|
'read error during decoding. the file seems to be truncated.',
|
|
15
14
|
)
|
|
16
15
|
}
|
|
16
|
+
|
|
17
|
+
// Fast path: reading exactly 8 bits when byte-aligned
|
|
18
|
+
if (numBits === 8 && cursor.bitPosition === 7) {
|
|
19
|
+
const val = data[cursor.bytePosition]!
|
|
20
|
+
cursor.bytePosition += 1
|
|
21
|
+
return val
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// Fast path: reading exactly 1 bit
|
|
25
|
+
if (numBits === 1) {
|
|
26
|
+
const val = (data[cursor.bytePosition]! >> cursor.bitPosition) & 1
|
|
27
|
+
cursor.bitPosition -= 1
|
|
28
|
+
if (cursor.bitPosition < 0) {
|
|
29
|
+
cursor.bytePosition += 1
|
|
30
|
+
cursor.bitPosition = 7
|
|
31
|
+
}
|
|
32
|
+
return val
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// General case: bit-by-bit loop
|
|
36
|
+
let val = 0
|
|
17
37
|
for (let dlen = numBits; dlen; dlen--) {
|
|
18
|
-
// get the next `dlen` bits in the input, put them in val
|
|
19
38
|
val <<= 1
|
|
20
39
|
val |= (data[cursor.bytePosition]! >> cursor.bitPosition) & 1
|
|
21
40
|
cursor.bitPosition -= 1
|
|
@@ -1,10 +1,49 @@
|
|
|
1
1
|
import CramCodec, { Cursor, Cursors } from './_base.ts'
|
|
2
|
-
import { getBits } from './getBits.ts'
|
|
3
2
|
import { CramMalformedError } from '../../errors.ts'
|
|
4
3
|
import { HuffmanEncoding } from '../encoding.ts'
|
|
5
4
|
import { CramFileBlock } from '../file.ts'
|
|
6
5
|
import CramSlice from '../slice/index.ts'
|
|
7
6
|
|
|
7
|
+
/**
|
|
8
|
+
* Inlined getBits for huffman decoding - avoids function call overhead
|
|
9
|
+
*/
|
|
10
|
+
function getBitsInline(
|
|
11
|
+
data: Uint8Array,
|
|
12
|
+
cursor: Cursor,
|
|
13
|
+
numBits: number,
|
|
14
|
+
): number {
|
|
15
|
+
let { bytePosition, bitPosition } = cursor
|
|
16
|
+
|
|
17
|
+
// Fast path for single bit (common in huffman)
|
|
18
|
+
if (numBits === 1) {
|
|
19
|
+
const val = (data[bytePosition]! >> bitPosition) & 1
|
|
20
|
+
bitPosition -= 1
|
|
21
|
+
if (bitPosition < 0) {
|
|
22
|
+
bytePosition += 1
|
|
23
|
+
bitPosition = 7
|
|
24
|
+
}
|
|
25
|
+
cursor.bytePosition = bytePosition
|
|
26
|
+
cursor.bitPosition = bitPosition as Cursor['bitPosition']
|
|
27
|
+
return val
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// General case
|
|
31
|
+
let val = 0
|
|
32
|
+
for (let i = 0; i < numBits; i++) {
|
|
33
|
+
val <<= 1
|
|
34
|
+
val |= (data[bytePosition]! >> bitPosition) & 1
|
|
35
|
+
bitPosition -= 1
|
|
36
|
+
if (bitPosition < 0) {
|
|
37
|
+
bytePosition += 1
|
|
38
|
+
bitPosition = 7
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
cursor.bytePosition = bytePosition
|
|
43
|
+
cursor.bitPosition = bitPosition
|
|
44
|
+
return val
|
|
45
|
+
}
|
|
46
|
+
|
|
8
47
|
function numberOfSetBits(ii: number) {
|
|
9
48
|
let i = (ii - (ii >> 1)) & 0x55555555
|
|
10
49
|
i = (i & 0x33333333) + ((i >> 2) & 0x33333333)
|
|
@@ -144,8 +183,11 @@ export default class HuffmanIntCodec extends CramCodec<
|
|
|
144
183
|
let bits = 0
|
|
145
184
|
for (let i = 0; i < this.sortedCodes.length; i += 1) {
|
|
146
185
|
const length = this.sortedCodes[i]!.bitLength
|
|
147
|
-
|
|
148
|
-
|
|
186
|
+
const bitsToRead = length - prevLen
|
|
187
|
+
if (bitsToRead > 0) {
|
|
188
|
+
bits <<= bitsToRead
|
|
189
|
+
bits |= getBitsInline(input, coreCursor, bitsToRead)
|
|
190
|
+
}
|
|
149
191
|
prevLen = length
|
|
150
192
|
{
|
|
151
193
|
const index = this.bitCodeToValue[bits]!
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import CramCodec, { Cursors } from './_base.ts'
|
|
2
|
-
import { getBits } from './getBits.ts'
|
|
1
|
+
import CramCodec, { Cursor, Cursors } from './_base.ts'
|
|
3
2
|
import { CramUnimplementedError } from '../../errors.ts'
|
|
4
3
|
import { SubexpEncoding } from '../encoding.ts'
|
|
5
4
|
import { CramFileBlock } from '../file.ts'
|
|
@@ -24,22 +23,60 @@ export default class SubexpCodec extends CramCodec<
|
|
|
24
23
|
_blocksByContentId: Record<number, CramFileBlock>,
|
|
25
24
|
cursors: Cursors,
|
|
26
25
|
) {
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
26
|
+
return decodeSubexpInline(
|
|
27
|
+
coreDataBlock.content,
|
|
28
|
+
cursors.coreBlock,
|
|
29
|
+
this.parameters.K,
|
|
30
|
+
this.parameters.offset,
|
|
31
|
+
)
|
|
32
|
+
}
|
|
33
|
+
}
|
|
31
34
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
35
|
+
/**
|
|
36
|
+
* Optimized subexp decoder with inlined bit reading.
|
|
37
|
+
*/
|
|
38
|
+
function decodeSubexpInline(
|
|
39
|
+
data: Uint8Array,
|
|
40
|
+
cursor: Cursor,
|
|
41
|
+
K: number,
|
|
42
|
+
offset: number,
|
|
43
|
+
): number {
|
|
44
|
+
let { bytePosition, bitPosition } = cursor
|
|
45
|
+
|
|
46
|
+
// Count leading ones (inline single-bit reads)
|
|
47
|
+
let numLeadingOnes = 0
|
|
48
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
49
|
+
while (true) {
|
|
50
|
+
const bit = (data[bytePosition]! >> bitPosition) & 1
|
|
51
|
+
bitPosition -= 1
|
|
52
|
+
if (bitPosition < 0) {
|
|
53
|
+
bytePosition += 1
|
|
54
|
+
bitPosition = 7
|
|
55
|
+
}
|
|
56
|
+
if (bit === 0) {
|
|
57
|
+
break
|
|
41
58
|
}
|
|
59
|
+
numLeadingOnes += 1
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Determine how many bits to read for the value
|
|
63
|
+
const b = numLeadingOnes === 0 ? K : numLeadingOnes + K - 1
|
|
42
64
|
|
|
43
|
-
|
|
65
|
+
// Read b bits
|
|
66
|
+
let bits = 0
|
|
67
|
+
for (let i = 0; i < b; i++) {
|
|
68
|
+
bits <<= 1
|
|
69
|
+
bits |= (data[bytePosition]! >> bitPosition) & 1
|
|
70
|
+
bitPosition -= 1
|
|
71
|
+
if (bitPosition < 0) {
|
|
72
|
+
bytePosition += 1
|
|
73
|
+
bitPosition = 7
|
|
74
|
+
}
|
|
44
75
|
}
|
|
76
|
+
|
|
77
|
+
cursor.bytePosition = bytePosition
|
|
78
|
+
cursor.bitPosition = bitPosition as Cursor['bitPosition']
|
|
79
|
+
|
|
80
|
+
const n = numLeadingOnes === 0 ? bits : (1 << b) | bits
|
|
81
|
+
return n - offset
|
|
45
82
|
}
|
package/src/cramFile/file.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import crc32 from 'crc/calculators/crc32'
|
|
2
1
|
import QuickLRU from '@jbrowse/quick-lru'
|
|
2
|
+
import crc32 from 'crc/calculators/crc32'
|
|
3
3
|
|
|
4
4
|
import { CramMalformedError, CramUnimplementedError } from '../errors.ts'
|
|
5
5
|
import * as htscodecs from '../htscodecs/index.ts'
|
|
@@ -380,6 +380,88 @@ export default class CramFile {
|
|
|
380
380
|
|
|
381
381
|
return block
|
|
382
382
|
}
|
|
383
|
+
|
|
384
|
+
async readBlockFromBuffer(
|
|
385
|
+
buffer: Uint8Array,
|
|
386
|
+
bufferOffset: number,
|
|
387
|
+
filePosition: number,
|
|
388
|
+
) {
|
|
389
|
+
const { majorVersion } = await this.getDefinition()
|
|
390
|
+
if (!this._sectionParsers) {
|
|
391
|
+
this._sectionParsers = getSectionParsers(majorVersion)
|
|
392
|
+
}
|
|
393
|
+
const sectionParsers = this._sectionParsers
|
|
394
|
+
const { cramBlockHeader } = sectionParsers
|
|
395
|
+
|
|
396
|
+
const headerBytes = buffer.subarray(
|
|
397
|
+
bufferOffset,
|
|
398
|
+
bufferOffset + cramBlockHeader.maxLength,
|
|
399
|
+
)
|
|
400
|
+
const blockHeader = parseItem(
|
|
401
|
+
headerBytes,
|
|
402
|
+
cramBlockHeader.parser,
|
|
403
|
+
0,
|
|
404
|
+
filePosition,
|
|
405
|
+
)
|
|
406
|
+
const blockContentPosition = blockHeader._endPosition
|
|
407
|
+
const contentOffset = bufferOffset + blockHeader._size
|
|
408
|
+
|
|
409
|
+
const d = buffer.subarray(
|
|
410
|
+
contentOffset,
|
|
411
|
+
contentOffset + blockHeader.compressedSize,
|
|
412
|
+
)
|
|
413
|
+
const uncompressedData =
|
|
414
|
+
blockHeader.compressionMethod !== 'raw'
|
|
415
|
+
? await this._uncompress(
|
|
416
|
+
blockHeader.compressionMethod,
|
|
417
|
+
d,
|
|
418
|
+
blockHeader.uncompressedSize,
|
|
419
|
+
)
|
|
420
|
+
: d
|
|
421
|
+
|
|
422
|
+
const block: CramFileBlock = {
|
|
423
|
+
...blockHeader,
|
|
424
|
+
_endPosition: blockContentPosition,
|
|
425
|
+
contentPosition: blockContentPosition,
|
|
426
|
+
content: uncompressedData,
|
|
427
|
+
}
|
|
428
|
+
if (majorVersion >= 3) {
|
|
429
|
+
const crcOffset = contentOffset + blockHeader.compressedSize
|
|
430
|
+
const crcBytes = buffer.subarray(
|
|
431
|
+
crcOffset,
|
|
432
|
+
crcOffset + sectionParsers.cramBlockCrc32.maxLength,
|
|
433
|
+
)
|
|
434
|
+
const crc = parseItem(
|
|
435
|
+
crcBytes,
|
|
436
|
+
sectionParsers.cramBlockCrc32.parser,
|
|
437
|
+
0,
|
|
438
|
+
blockContentPosition + blockHeader.compressedSize,
|
|
439
|
+
)
|
|
440
|
+
block.crc32 = crc.crc32
|
|
441
|
+
|
|
442
|
+
if (this.validateChecksums) {
|
|
443
|
+
const blockData = buffer.subarray(
|
|
444
|
+
bufferOffset,
|
|
445
|
+
bufferOffset + blockHeader._size + blockHeader.compressedSize,
|
|
446
|
+
)
|
|
447
|
+
const calculatedCrc32 = crc32(blockData) >>> 0
|
|
448
|
+
if (calculatedCrc32 !== crc.crc32) {
|
|
449
|
+
throw new CramMalformedError(
|
|
450
|
+
`crc mismatch in block data: recorded CRC32 = ${crc.crc32}, but calculated CRC32 = ${calculatedCrc32}`,
|
|
451
|
+
)
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
block._endPosition = crc._endPosition
|
|
456
|
+
block._size =
|
|
457
|
+
block.compressedSize + sectionParsers.cramBlockCrc32.maxLength
|
|
458
|
+
} else {
|
|
459
|
+
block._endPosition = blockContentPosition + block.compressedSize
|
|
460
|
+
block._size = block.compressedSize
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
return block
|
|
464
|
+
}
|
|
383
465
|
}
|
|
384
466
|
|
|
385
467
|
'getDefinition getSectionParsers getSamHeader'.split(' ').forEach(method => {
|
package/src/cramFile/record.ts
CHANGED
|
@@ -18,6 +18,15 @@ export interface ReadFeature {
|
|
|
18
18
|
sub?: string
|
|
19
19
|
}
|
|
20
20
|
|
|
21
|
+
export interface DecodeOptions {
|
|
22
|
+
/** Whether to parse tags. If false, raw tag data is stored for lazy parsing. Default true. */
|
|
23
|
+
decodeTags?: boolean
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export const defaultDecodeOptions: Required<DecodeOptions> = {
|
|
27
|
+
decodeTags: true,
|
|
28
|
+
}
|
|
29
|
+
|
|
21
30
|
function decodeReadSequence(cramRecord: CramRecord, refRegion: RefRegion) {
|
|
22
31
|
// if it has no length, it has no sequence
|
|
23
32
|
if (!cramRecord.lengthOnRef && !cramRecord.readLength) {
|
|
@@ -238,7 +247,7 @@ export default class CramRecord {
|
|
|
238
247
|
public sequenceId: number
|
|
239
248
|
public readGroupId: number
|
|
240
249
|
public mappingQuality: number | undefined
|
|
241
|
-
public qualityScores:
|
|
250
|
+
public qualityScores: Uint8Array | null | undefined
|
|
242
251
|
|
|
243
252
|
constructor({
|
|
244
253
|
flags,
|
|
@@ -294,6 +303,15 @@ export default class CramRecord {
|
|
|
294
303
|
}
|
|
295
304
|
}
|
|
296
305
|
|
|
306
|
+
/**
|
|
307
|
+
* Get a single quality score at the given index.
|
|
308
|
+
* @param index 0-based index into the quality scores
|
|
309
|
+
* @returns the quality score at that index, or undefined if not available
|
|
310
|
+
*/
|
|
311
|
+
qualityScoreAt(index: number): number | undefined {
|
|
312
|
+
return this.qualityScores?.[index]
|
|
313
|
+
}
|
|
314
|
+
|
|
297
315
|
/**
|
|
298
316
|
* @returns {boolean} true if the read is paired, regardless of whether both segments are mapped
|
|
299
317
|
*/
|
|
@@ -401,8 +419,7 @@ export default class CramRecord {
|
|
|
401
419
|
!this.isSegmentUnmapped() &&
|
|
402
420
|
this.isPaired() &&
|
|
403
421
|
!this.isMateUnmapped() &&
|
|
404
|
-
this.mate
|
|
405
|
-
this.sequenceId === this.mate.sequenceId
|
|
422
|
+
this.sequenceId === this.mate?.sequenceId
|
|
406
423
|
) {
|
|
407
424
|
const s1 = this.isReverseComplemented() ? 'R' : 'F'
|
|
408
425
|
const s2 = this.isMateReverseComplemented() ? 'R' : 'F'
|
|
@@ -460,7 +477,7 @@ export default class CramRecord {
|
|
|
460
477
|
if (this.readFeatures) {
|
|
461
478
|
// use the reference bases to decode the bases substituted in each base
|
|
462
479
|
// substitution
|
|
463
|
-
this.readFeatures
|
|
480
|
+
for (const readFeature of this.readFeatures) {
|
|
464
481
|
if (readFeature.code === 'X') {
|
|
465
482
|
decodeBaseSubstitution(
|
|
466
483
|
this,
|
|
@@ -469,7 +486,7 @@ export default class CramRecord {
|
|
|
469
486
|
readFeature,
|
|
470
487
|
)
|
|
471
488
|
}
|
|
472
|
-
}
|
|
489
|
+
}
|
|
473
490
|
}
|
|
474
491
|
|
|
475
492
|
// if this region completely covers this read,
|
|
@@ -494,6 +511,9 @@ export default class CramRecord {
|
|
|
494
511
|
})
|
|
495
512
|
|
|
496
513
|
data.readBases = this.getReadBases()
|
|
514
|
+
data.qualityScores = this.qualityScores
|
|
515
|
+
? Array.from(this.qualityScores)
|
|
516
|
+
: this.qualityScores
|
|
497
517
|
|
|
498
518
|
return data
|
|
499
519
|
}
|