@gmod/cram 3.0.4 → 3.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/craiIndex.d.ts +3 -5
- package/dist/craiIndex.js +28 -30
- package/dist/craiIndex.js.map +1 -1
- package/dist/cram-bundle.js +1 -1
- package/dist/cramFile/codecs/byteArrayLength.js +3 -6
- package/dist/cramFile/codecs/byteArrayLength.js.map +1 -1
- package/dist/cramFile/codecs/byteArrayStop.d.ts +0 -1
- package/dist/cramFile/codecs/byteArrayStop.js +0 -6
- package/dist/cramFile/codecs/byteArrayStop.js.map +1 -1
- package/dist/cramFile/codecs/external.js +0 -1
- package/dist/cramFile/codecs/external.js.map +1 -1
- package/dist/cramFile/codecs/getBits.js.map +1 -1
- package/dist/cramFile/codecs/huffman.js +1 -2
- package/dist/cramFile/codecs/huffman.js.map +1 -1
- package/dist/cramFile/container/compressionScheme.d.ts +1 -1
- package/dist/cramFile/container/compressionScheme.js +12 -4
- package/dist/cramFile/container/compressionScheme.js.map +1 -1
- package/dist/cramFile/container/index.js +3 -1
- package/dist/cramFile/container/index.js.map +1 -1
- package/dist/cramFile/file.d.ts +1 -1
- package/dist/cramFile/file.js +9 -10
- package/dist/cramFile/file.js.map +1 -1
- package/dist/cramFile/record.js +1 -7
- package/dist/cramFile/record.js.map +1 -1
- package/dist/cramFile/sectionParsers.js.map +1 -1
- package/dist/cramFile/slice/decodeRecord.js +6 -11
- package/dist/cramFile/slice/decodeRecord.js.map +1 -1
- package/dist/cramFile/slice/index.d.ts +3 -2
- package/dist/cramFile/slice/index.js +10 -16
- package/dist/cramFile/slice/index.js.map +1 -1
- package/dist/cramFile/util.js.map +1 -1
- package/dist/indexedCramFile.js +0 -3
- package/dist/indexedCramFile.js.map +1 -1
- package/dist/rans/d04.js +1 -1
- package/dist/rans/d04.js.map +1 -1
- package/dist/rans/d14.js +1 -1
- package/dist/rans/d14.js.map +1 -1
- package/dist/rans/decoding.js +1 -1
- package/dist/rans/decoding.js.map +1 -1
- package/dist/rans/frequencies.js +1 -1
- package/dist/rans/frequencies.js.map +1 -1
- package/dist/rans/index.js +1 -1
- package/dist/rans/index.js.map +1 -1
- package/esm/craiIndex.d.ts +3 -5
- package/esm/craiIndex.js +63 -64
- package/esm/craiIndex.js.map +1 -1
- package/esm/cramFile/codecs/byteArrayLength.js +3 -6
- package/esm/cramFile/codecs/byteArrayLength.js.map +1 -1
- package/esm/cramFile/codecs/byteArrayStop.d.ts +0 -1
- package/esm/cramFile/codecs/byteArrayStop.js +0 -6
- package/esm/cramFile/codecs/byteArrayStop.js.map +1 -1
- package/esm/cramFile/codecs/external.js +0 -1
- package/esm/cramFile/codecs/external.js.map +1 -1
- package/esm/cramFile/codecs/getBits.js.map +1 -1
- package/esm/cramFile/codecs/huffman.js +1 -2
- package/esm/cramFile/codecs/huffman.js.map +1 -1
- package/esm/cramFile/container/compressionScheme.d.ts +1 -1
- package/esm/cramFile/container/compressionScheme.js +12 -4
- package/esm/cramFile/container/compressionScheme.js.map +1 -1
- package/esm/cramFile/container/index.js +3 -1
- package/esm/cramFile/container/index.js.map +1 -1
- package/esm/cramFile/file.d.ts +1 -1
- package/esm/cramFile/file.js +9 -10
- package/esm/cramFile/file.js.map +1 -1
- package/esm/cramFile/record.js +1 -7
- package/esm/cramFile/record.js.map +1 -1
- package/esm/cramFile/sectionParsers.js.map +1 -1
- package/esm/cramFile/slice/decodeRecord.js +6 -8
- package/esm/cramFile/slice/decodeRecord.js.map +1 -1
- package/esm/cramFile/slice/index.d.ts +3 -2
- package/esm/cramFile/slice/index.js +10 -16
- package/esm/cramFile/slice/index.js.map +1 -1
- package/esm/cramFile/util.js.map +1 -1
- package/esm/indexedCramFile.js +0 -3
- package/esm/indexedCramFile.js.map +1 -1
- package/esm/rans/d04.js +1 -1
- package/esm/rans/d04.js.map +1 -1
- package/esm/rans/d14.js +1 -1
- package/esm/rans/d14.js.map +1 -1
- package/esm/rans/decoding.js +1 -1
- package/esm/rans/decoding.js.map +1 -1
- package/esm/rans/frequencies.js +1 -1
- package/esm/rans/frequencies.js.map +1 -1
- package/esm/rans/index.js +1 -1
- package/esm/rans/index.js.map +1 -1
- package/package.json +9 -9
- package/src/craiIndex.ts +82 -81
- package/src/cramFile/codecs/byteArrayLength.ts +3 -8
- package/src/cramFile/codecs/byteArrayStop.ts +0 -12
- package/src/cramFile/codecs/external.ts +1 -2
- package/src/cramFile/codecs/getBits.ts +1 -1
- package/src/cramFile/codecs/huffman.ts +11 -12
- package/src/cramFile/container/compressionScheme.ts +38 -31
- package/src/cramFile/container/index.ts +3 -1
- package/src/cramFile/file.ts +9 -10
- package/src/cramFile/record.ts +3 -10
- package/src/cramFile/sectionParsers.ts +4 -4
- package/src/cramFile/slice/decodeRecord.ts +22 -32
- package/src/cramFile/slice/index.ts +16 -25
- package/src/cramFile/util.ts +36 -35
- package/src/indexedCramFile.ts +1 -4
- package/src/rans/d04.ts +1 -1
- package/src/rans/d14.ts +1 -1
- package/src/rans/decoding.ts +1 -1
- package/src/rans/frequencies.ts +1 -1
- package/src/rans/index.ts +1 -1
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gmod/cram",
|
|
3
|
-
"version": "3.0.
|
|
3
|
+
"version": "3.0.5",
|
|
4
4
|
"description": "read CRAM files with pure Javascript",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": "GMOD/cram-js",
|
|
@@ -21,8 +21,9 @@
|
|
|
21
21
|
"esm"
|
|
22
22
|
],
|
|
23
23
|
"scripts": {
|
|
24
|
-
"test": "
|
|
25
|
-
"lint": "eslint --report-unused-disable-directives --max-warnings 0
|
|
24
|
+
"test": "vitest",
|
|
25
|
+
"lint": "eslint --report-unused-disable-directives --max-warnings 0",
|
|
26
|
+
"format": "prettier --write .",
|
|
26
27
|
"docs": "documentation readme --shallow src/indexedCramFile.ts --section=IndexedCramFile; documentation readme --shallow src/cramFile/file.ts --section=CramFile; documentation readme --shallow src/craiIndex.ts --section=CraiIndex; documentation readme --shallow errors.ts '--section=Exception Classes'; documentation readme --shallow src/cramFile/file.ts --section=CramFile; documentation readme --shallow src/cramFile/record.ts --section=CramRecord",
|
|
27
28
|
"prebuild": "npm run clean",
|
|
28
29
|
"clean": "rimraf dist esm",
|
|
@@ -30,7 +31,7 @@
|
|
|
30
31
|
"build:es5": "tsc --target es2015 --module commonjs --outDir dist",
|
|
31
32
|
"build": "npm run build:esm && npm run build:es5",
|
|
32
33
|
"postbuild": "webpack",
|
|
33
|
-
"prepack": "npm test && npm run build && cp dist/errors.js errors.js",
|
|
34
|
+
"prepack": "npm test run && npm run build && cp dist/errors.js errors.js",
|
|
34
35
|
"postpublish": "rm errors.js",
|
|
35
36
|
"postversion": "git push --follow-tags"
|
|
36
37
|
},
|
|
@@ -41,7 +42,6 @@
|
|
|
41
42
|
"biojs"
|
|
42
43
|
],
|
|
43
44
|
"dependencies": {
|
|
44
|
-
"@gmod/abortable-promise-cache": "^2.0.0",
|
|
45
45
|
"@jkbonfield/htscodecs": "^0.5.1",
|
|
46
46
|
"bzip2": "^0.1.1",
|
|
47
47
|
"crc": "^4.3.2",
|
|
@@ -53,24 +53,24 @@
|
|
|
53
53
|
},
|
|
54
54
|
"devDependencies": {
|
|
55
55
|
"@gmod/indexedfasta": "^2.1.0",
|
|
56
|
-
"@types/jest": "^29.5.12",
|
|
57
56
|
"@types/long": "^4.0.0",
|
|
58
57
|
"@types/md5": "^2.3.2",
|
|
59
58
|
"@types/pako": "^1.0.3",
|
|
60
59
|
"@typescript-eslint/eslint-plugin": "^8.0.0",
|
|
61
60
|
"@typescript-eslint/parser": "^8.0.0",
|
|
61
|
+
"@vitest/coverage-v8": "^2.0.5",
|
|
62
62
|
"buffer": "^6.0.3",
|
|
63
63
|
"documentation": "^14.0.3",
|
|
64
|
-
"eslint": "^9.
|
|
64
|
+
"eslint": "^9.9.0",
|
|
65
65
|
"eslint-config-prettier": "^9.0.0",
|
|
66
66
|
"eslint-plugin-prettier": "^5.1.3",
|
|
67
67
|
"eslint-plugin-unicorn": "^55.0.0",
|
|
68
|
-
"jest": "^29.3.1",
|
|
69
68
|
"mock-fs": "^5.2.0",
|
|
70
69
|
"prettier": "^3.2.5",
|
|
71
70
|
"rimraf": "^6.0.1",
|
|
72
|
-
"ts-jest": "^29.1.2",
|
|
73
71
|
"typescript": "^5.0.3",
|
|
72
|
+
"typescript-eslint": "^8.0.1",
|
|
73
|
+
"vitest": "^2.0.5",
|
|
74
74
|
"webpack": "^5.90.3",
|
|
75
75
|
"webpack-cli": "^5.0.1"
|
|
76
76
|
},
|
package/src/craiIndex.ts
CHANGED
|
@@ -1,12 +1,10 @@
|
|
|
1
|
-
import AbortablePromiseCache from '@gmod/abortable-promise-cache'
|
|
2
|
-
import QuickLRU from 'quick-lru'
|
|
3
1
|
import { unzip } from './unzip'
|
|
4
2
|
import { open } from './io'
|
|
5
3
|
import { CramMalformedError } from './errors'
|
|
6
4
|
import { CramFileSource } from './cramFile/file'
|
|
7
5
|
import { Filehandle } from './cramFile/filehandle'
|
|
8
6
|
|
|
9
|
-
const BAI_MAGIC =
|
|
7
|
+
const BAI_MAGIC = 21_578_050 // BAI\1
|
|
10
8
|
|
|
11
9
|
export interface Slice {
|
|
12
10
|
start: number
|
|
@@ -16,26 +14,36 @@ export interface Slice {
|
|
|
16
14
|
sliceBytes: number
|
|
17
15
|
}
|
|
18
16
|
|
|
19
|
-
type ParsedIndex = Record<string, Slice[]>
|
|
17
|
+
type ParsedIndex = Record<string, Slice[] | undefined>
|
|
20
18
|
|
|
21
19
|
function addRecordToIndex(index: ParsedIndex, record: number[]) {
|
|
22
20
|
const [seqId, start, span, containerStart, sliceStart, sliceBytes] = record
|
|
23
21
|
|
|
24
|
-
|
|
25
|
-
|
|
22
|
+
const s = seqId!
|
|
23
|
+
if (!index[s]) {
|
|
24
|
+
index[s] = []
|
|
26
25
|
}
|
|
27
26
|
|
|
28
|
-
index[
|
|
29
|
-
start
|
|
30
|
-
span
|
|
31
|
-
containerStart
|
|
32
|
-
sliceStart
|
|
33
|
-
sliceBytes
|
|
27
|
+
index[s].push({
|
|
28
|
+
start: start!,
|
|
29
|
+
span: span!,
|
|
30
|
+
containerStart: containerStart!,
|
|
31
|
+
sliceStart: sliceStart!,
|
|
32
|
+
sliceBytes: sliceBytes!,
|
|
34
33
|
})
|
|
35
34
|
}
|
|
36
35
|
|
|
36
|
+
function maybeUnzip(data: Buffer) {
|
|
37
|
+
if (data[0] === 31 && data[1] === 139) {
|
|
38
|
+
return unzip(data)
|
|
39
|
+
}
|
|
40
|
+
return data
|
|
41
|
+
}
|
|
42
|
+
|
|
37
43
|
export default class CraiIndex {
|
|
38
|
-
// A CRAM index (.crai) is a gzipped tab delimited file containing the
|
|
44
|
+
// A CRAM index (.crai) is a gzipped tab delimited file containing the
|
|
45
|
+
// following columns:
|
|
46
|
+
//
|
|
39
47
|
// 1. Sequence id
|
|
40
48
|
// 2. Alignment start
|
|
41
49
|
// 3. Alignment span
|
|
@@ -43,7 +51,8 @@ export default class CraiIndex {
|
|
|
43
51
|
// 5. Slice start byte position in the container data (‘blocks’)
|
|
44
52
|
// 6. Slice size in bytes
|
|
45
53
|
// Each line represents a slice in the CRAM file. Please note that all slices must be listed in index file.
|
|
46
|
-
private
|
|
54
|
+
private parseIndexP?: Promise<ParsedIndex>
|
|
55
|
+
|
|
47
56
|
private filehandle: Filehandle
|
|
48
57
|
|
|
49
58
|
/**
|
|
@@ -55,78 +64,70 @@ export default class CraiIndex {
|
|
|
55
64
|
*/
|
|
56
65
|
constructor(args: CramFileSource) {
|
|
57
66
|
this.filehandle = open(args.url, args.path, args.filehandle)
|
|
58
|
-
this._parseCache = new AbortablePromiseCache<unknown, ParsedIndex>({
|
|
59
|
-
cache: new QuickLRU({ maxSize: 1 }),
|
|
60
|
-
fill: (_data, _signal) => this.parseIndex(),
|
|
61
|
-
})
|
|
62
67
|
}
|
|
63
68
|
|
|
64
|
-
parseIndex() {
|
|
69
|
+
async parseIndex() {
|
|
65
70
|
const index: ParsedIndex = {}
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
.
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
// sort each of them by start
|
|
119
|
-
Object.entries(index).forEach(([seqId, ent]) => {
|
|
120
|
-
index[seqId] = ent.sort(
|
|
121
|
-
(a, b) => a.start - b.start || a.span - b.span,
|
|
122
|
-
)
|
|
123
|
-
})
|
|
124
|
-
return index
|
|
125
|
-
})
|
|
71
|
+
const uncompressedBuffer = maybeUnzip(await this.filehandle.readFile())
|
|
72
|
+
if (
|
|
73
|
+
uncompressedBuffer.length > 4 &&
|
|
74
|
+
uncompressedBuffer.readUInt32LE(0) === BAI_MAGIC
|
|
75
|
+
) {
|
|
76
|
+
throw new CramMalformedError(
|
|
77
|
+
'invalid .crai index file. note: file appears to be a .bai index. this is technically legal but please open a github issue if you need support',
|
|
78
|
+
)
|
|
79
|
+
}
|
|
80
|
+
// interpret the text as regular ascii, since it is
|
|
81
|
+
// supposed to be only digits and whitespace characters
|
|
82
|
+
// this is written in a deliberately low-level fashion for performance,
|
|
83
|
+
// because some .crai files can be pretty large.
|
|
84
|
+
let currentRecord: number[] = []
|
|
85
|
+
let currentString = ''
|
|
86
|
+
for (const charCode of uncompressedBuffer) {
|
|
87
|
+
if (
|
|
88
|
+
(charCode >= 48 && charCode <= 57) /* 0-9 */ ||
|
|
89
|
+
(!currentString && charCode === 45) /* leading - */
|
|
90
|
+
) {
|
|
91
|
+
currentString += String.fromCharCode(charCode)
|
|
92
|
+
} else if (charCode === 9 /* \t */) {
|
|
93
|
+
currentRecord.push(Number.parseInt(currentString, 10))
|
|
94
|
+
currentString = ''
|
|
95
|
+
} else if (charCode === 10 /* \n */) {
|
|
96
|
+
currentRecord.push(Number.parseInt(currentString, 10))
|
|
97
|
+
currentString = ''
|
|
98
|
+
addRecordToIndex(index, currentRecord)
|
|
99
|
+
currentRecord = []
|
|
100
|
+
} else if (charCode !== 13 /* \r */ && charCode !== 32 /* space */) {
|
|
101
|
+
// if there are other characters in the file besides
|
|
102
|
+
// space and \r, something is wrong.
|
|
103
|
+
throw new CramMalformedError('invalid .crai index file')
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// if the file ends without a \n, we need to flush our buffers
|
|
108
|
+
if (currentString) {
|
|
109
|
+
currentRecord.push(Number.parseInt(currentString, 10))
|
|
110
|
+
}
|
|
111
|
+
if (currentRecord.length === 6) {
|
|
112
|
+
addRecordToIndex(index, currentRecord)
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// sort each of them by start
|
|
116
|
+
Object.entries(index).forEach(([seqId, ent]) => {
|
|
117
|
+
const e2 = ent!
|
|
118
|
+
index[seqId] = e2.sort((a, b) => a.start - b.start || a.span - b.span)
|
|
119
|
+
})
|
|
120
|
+
return index
|
|
126
121
|
}
|
|
127
122
|
|
|
128
|
-
getIndex(
|
|
129
|
-
|
|
123
|
+
getIndex() {
|
|
124
|
+
if (!this.parseIndexP) {
|
|
125
|
+
this.parseIndexP = this.parseIndex().catch((e: unknown) => {
|
|
126
|
+
this.parseIndexP = undefined
|
|
127
|
+
throw e
|
|
128
|
+
})
|
|
129
|
+
}
|
|
130
|
+
return this.parseIndexP
|
|
130
131
|
}
|
|
131
132
|
|
|
132
133
|
/**
|
|
@@ -23,11 +23,6 @@ export default class ByteArrayStopCodec extends CramCodec<
|
|
|
23
23
|
) {
|
|
24
24
|
super(parameters, dataType)
|
|
25
25
|
this.instantiateCodec = instantiateCodec
|
|
26
|
-
if (dataType !== 'byteArray') {
|
|
27
|
-
throw new TypeError(
|
|
28
|
-
`byteArrayLength does not support data type ${dataType}`,
|
|
29
|
-
)
|
|
30
|
-
}
|
|
31
26
|
}
|
|
32
27
|
|
|
33
28
|
decode(
|
|
@@ -71,6 +66,6 @@ export default class ByteArrayStopCodec extends CramCodec<
|
|
|
71
66
|
}
|
|
72
67
|
}
|
|
73
68
|
|
|
74
|
-
'_getLengthCodec _getDataCodec'
|
|
75
|
-
|
|
76
|
-
|
|
69
|
+
'_getLengthCodec _getDataCodec'.split(' ').forEach(method => {
|
|
70
|
+
tinyMemoize(ByteArrayStopCodec, method)
|
|
71
|
+
})
|
|
@@ -10,18 +10,6 @@ export default class ByteArrayStopCodec extends CramCodec<
|
|
|
10
10
|
'byteArray',
|
|
11
11
|
ByteArrayStopCramEncoding['parameters']
|
|
12
12
|
> {
|
|
13
|
-
constructor(
|
|
14
|
-
parameters: ByteArrayStopCramEncoding['parameters'],
|
|
15
|
-
dataType: 'byteArray',
|
|
16
|
-
) {
|
|
17
|
-
super(parameters, dataType)
|
|
18
|
-
if (dataType !== 'byteArray') {
|
|
19
|
-
throw new TypeError(
|
|
20
|
-
`byteArrayStop codec does not support data type ${dataType}`,
|
|
21
|
-
)
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
|
|
25
13
|
decode(
|
|
26
14
|
slice: CramSlice,
|
|
27
15
|
coreDataBlock: CramFileBlock,
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
/* eslint-disable @typescript-eslint/unbound-method */
|
|
2
1
|
import { CramMalformedError, CramUnimplementedError } from '../../errors'
|
|
3
2
|
import CramCodec, { Cursor, Cursors } from './_base'
|
|
4
3
|
import { parseItf8 } from '../util'
|
|
@@ -64,6 +63,6 @@ export default class ExternalCodec extends CramCodec<
|
|
|
64
63
|
'attempted to read beyond end of block. this file seems truncated.',
|
|
65
64
|
)
|
|
66
65
|
}
|
|
67
|
-
return contentBlock.content[cursor.bytePosition++]
|
|
66
|
+
return contentBlock.content[cursor.bytePosition++]!
|
|
68
67
|
}
|
|
69
68
|
}
|
|
@@ -17,7 +17,7 @@ export function getBits(
|
|
|
17
17
|
for (let dlen = numBits; dlen; dlen--) {
|
|
18
18
|
// get the next `dlen` bits in the input, put them in val
|
|
19
19
|
val <<= 1
|
|
20
|
-
val |= (data[cursor.bytePosition] >> cursor.bitPosition) & 1
|
|
20
|
+
val |= (data[cursor.bytePosition]! >> cursor.bitPosition) & 1
|
|
21
21
|
cursor.bitPosition -= 1
|
|
22
22
|
if (cursor.bitPosition < 0) {
|
|
23
23
|
cursor.bytePosition += 1
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
/* eslint-disable @typescript-eslint/unbound-method */
|
|
2
1
|
import { CramMalformedError } from '../../errors'
|
|
3
2
|
import CramCodec, { Cursor, Cursors } from './_base'
|
|
4
3
|
import { getBits } from './getBits'
|
|
@@ -48,7 +47,7 @@ export default class HuffmanIntCodec extends CramCodec<
|
|
|
48
47
|
|
|
49
48
|
// if this is a degenerate zero-length huffman code, special-case the
|
|
50
49
|
// decoding
|
|
51
|
-
if (this.sortedCodes[0]
|
|
50
|
+
if (this.sortedCodes[0]!.bitLength === 0) {
|
|
52
51
|
this._decode = this._decodeZeroLengthCode
|
|
53
52
|
}
|
|
54
53
|
}
|
|
@@ -58,10 +57,10 @@ export default class HuffmanIntCodec extends CramCodec<
|
|
|
58
57
|
let codes = new Array<{ symbol: number; bitLength: number }>(
|
|
59
58
|
this.parameters.numCodes,
|
|
60
59
|
)
|
|
61
|
-
for (let i = 0; i < this.parameters.numCodes; i
|
|
60
|
+
for (let i = 0; i < this.parameters.numCodes; i++) {
|
|
62
61
|
codes[i] = {
|
|
63
|
-
symbol: this.parameters.symbols[i]
|
|
64
|
-
bitLength: this.parameters.bitLengths[i]
|
|
62
|
+
symbol: this.parameters.symbols[i]!,
|
|
63
|
+
bitLength: this.parameters.bitLengths[i]!,
|
|
65
64
|
}
|
|
66
65
|
}
|
|
67
66
|
// sort the codes by bit length and symbol value
|
|
@@ -74,7 +73,7 @@ export default class HuffmanIntCodec extends CramCodec<
|
|
|
74
73
|
if (!this.codeBook[code.bitLength]) {
|
|
75
74
|
this.codeBook[code.bitLength] = []
|
|
76
75
|
}
|
|
77
|
-
this.codeBook[code.bitLength]
|
|
76
|
+
this.codeBook[code.bitLength]!.push(code.symbol)
|
|
78
77
|
})
|
|
79
78
|
}
|
|
80
79
|
|
|
@@ -117,7 +116,7 @@ export default class HuffmanIntCodec extends CramCodec<
|
|
|
117
116
|
|
|
118
117
|
this.bitCodeToValue = new Array(maxBitCode + 1).fill(-1)
|
|
119
118
|
for (let i = 0; i < this.sortedBitCodes.length; i += 1) {
|
|
120
|
-
this.bitCodeToValue[this.sortedCodes[i]
|
|
119
|
+
this.bitCodeToValue[this.sortedCodes[i]!.bitCode] = i
|
|
121
120
|
}
|
|
122
121
|
}
|
|
123
122
|
|
|
@@ -136,7 +135,7 @@ export default class HuffmanIntCodec extends CramCodec<
|
|
|
136
135
|
|
|
137
136
|
// the special case for zero-length codes
|
|
138
137
|
_decodeZeroLengthCode() {
|
|
139
|
-
return this.sortedCodes[0]
|
|
138
|
+
return this.sortedCodes[0]!.value
|
|
140
139
|
}
|
|
141
140
|
|
|
142
141
|
_decode(slice: CramSlice, coreDataBlock: CramFileBlock, coreCursor: Cursor) {
|
|
@@ -145,19 +144,19 @@ export default class HuffmanIntCodec extends CramCodec<
|
|
|
145
144
|
let prevLen = 0
|
|
146
145
|
let bits = 0
|
|
147
146
|
for (let i = 0; i < this.sortedCodes.length; i += 1) {
|
|
148
|
-
const length = this.sortedCodes[i]
|
|
147
|
+
const length = this.sortedCodes[i]!.bitLength
|
|
149
148
|
bits <<= length - prevLen
|
|
150
149
|
bits |= getBits(input, coreCursor, length - prevLen)
|
|
151
150
|
prevLen = length
|
|
152
151
|
{
|
|
153
|
-
const index = this.bitCodeToValue[bits]
|
|
152
|
+
const index = this.bitCodeToValue[bits]!
|
|
154
153
|
if (index > -1 && this.sortedBitLengthsByBitCode[index] === length) {
|
|
155
|
-
return this.sortedValuesByBitCode[index]
|
|
154
|
+
return this.sortedValuesByBitCode[index]!
|
|
156
155
|
}
|
|
157
156
|
|
|
158
157
|
for (
|
|
159
158
|
let j = i;
|
|
160
|
-
this.sortedCodes[j + 1]
|
|
159
|
+
this.sortedCodes[j + 1]!.bitLength === length &&
|
|
161
160
|
j < this.sortedCodes.length;
|
|
162
161
|
j += 1
|
|
163
162
|
) {
|
|
@@ -53,30 +53,30 @@ function parseSubstitutionMatrix(byteArray: number[]) {
|
|
|
53
53
|
matrix[i] = new Array(4)
|
|
54
54
|
}
|
|
55
55
|
|
|
56
|
-
matrix[0][(byteArray[0] >> 6) & 3] = 'C'
|
|
57
|
-
matrix[0][(byteArray[0] >> 4) & 3] = 'G'
|
|
58
|
-
matrix[0][(byteArray[0] >> 2) & 3] = 'T'
|
|
59
|
-
matrix[0][(byteArray[0] >> 0) & 3] = 'N'
|
|
60
|
-
|
|
61
|
-
matrix[1][(byteArray[1] >> 6) & 3] = 'A'
|
|
62
|
-
matrix[1][(byteArray[1] >> 4) & 3] = 'G'
|
|
63
|
-
matrix[1][(byteArray[1] >> 2) & 3] = 'T'
|
|
64
|
-
matrix[1][(byteArray[1] >> 0) & 3] = 'N'
|
|
65
|
-
|
|
66
|
-
matrix[2][(byteArray[2] >> 6) & 3] = 'A'
|
|
67
|
-
matrix[2][(byteArray[2] >> 4) & 3] = 'C'
|
|
68
|
-
matrix[2][(byteArray[2] >> 2) & 3] = 'T'
|
|
69
|
-
matrix[2][(byteArray[2] >> 0) & 3] = 'N'
|
|
70
|
-
|
|
71
|
-
matrix[3][(byteArray[3] >> 6) & 3] = 'A'
|
|
72
|
-
matrix[3][(byteArray[3] >> 4) & 3] = 'C'
|
|
73
|
-
matrix[3][(byteArray[3] >> 2) & 3] = 'G'
|
|
74
|
-
matrix[3][(byteArray[3] >> 0) & 3] = 'N'
|
|
75
|
-
|
|
76
|
-
matrix[4][(byteArray[4] >> 6) & 3] = 'A'
|
|
77
|
-
matrix[4][(byteArray[4] >> 4) & 3] = 'C'
|
|
78
|
-
matrix[4][(byteArray[4] >> 2) & 3] = 'G'
|
|
79
|
-
matrix[4][(byteArray[4] >> 0) & 3] = 'T'
|
|
56
|
+
matrix[0]![(byteArray[0]! >> 6) & 3] = 'C'
|
|
57
|
+
matrix[0]![(byteArray[0]! >> 4) & 3] = 'G'
|
|
58
|
+
matrix[0]![(byteArray[0]! >> 2) & 3] = 'T'
|
|
59
|
+
matrix[0]![(byteArray[0]! >> 0) & 3] = 'N'
|
|
60
|
+
|
|
61
|
+
matrix[1]![(byteArray[1]! >> 6) & 3] = 'A'
|
|
62
|
+
matrix[1]![(byteArray[1]! >> 4) & 3] = 'G'
|
|
63
|
+
matrix[1]![(byteArray[1]! >> 2) & 3] = 'T'
|
|
64
|
+
matrix[1]![(byteArray[1]! >> 0) & 3] = 'N'
|
|
65
|
+
|
|
66
|
+
matrix[2]![(byteArray[2]! >> 6) & 3] = 'A'
|
|
67
|
+
matrix[2]![(byteArray[2]! >> 4) & 3] = 'C'
|
|
68
|
+
matrix[2]![(byteArray[2]! >> 2) & 3] = 'T'
|
|
69
|
+
matrix[2]![(byteArray[2]! >> 0) & 3] = 'N'
|
|
70
|
+
|
|
71
|
+
matrix[3]![(byteArray[3]! >> 6) & 3] = 'A'
|
|
72
|
+
matrix[3]![(byteArray[3]! >> 4) & 3] = 'C'
|
|
73
|
+
matrix[3]![(byteArray[3]! >> 2) & 3] = 'G'
|
|
74
|
+
matrix[3]![(byteArray[3]! >> 0) & 3] = 'N'
|
|
75
|
+
|
|
76
|
+
matrix[4]![(byteArray[4]! >> 6) & 3] = 'A'
|
|
77
|
+
matrix[4]![(byteArray[4]! >> 4) & 3] = 'C'
|
|
78
|
+
matrix[4]![(byteArray[4]! >> 2) & 3] = 'G'
|
|
79
|
+
matrix[4]![(byteArray[4]! >> 0) & 3] = 'T'
|
|
80
80
|
|
|
81
81
|
return matrix
|
|
82
82
|
}
|
|
@@ -112,16 +112,21 @@ export default class CramContainerCompressionScheme {
|
|
|
112
112
|
* @private
|
|
113
113
|
*/
|
|
114
114
|
getCodecForTag(tagName: string): CramCodec {
|
|
115
|
-
|
|
115
|
+
const test = this.tagCodecCache[tagName]
|
|
116
|
+
if (!test) {
|
|
116
117
|
const encodingData = this.tagEncoding[tagName]
|
|
117
|
-
if (encodingData) {
|
|
118
|
-
|
|
119
|
-
encodingData,
|
|
120
|
-
'byteArray', // all tags are byte array data
|
|
121
|
-
)
|
|
118
|
+
if (!encodingData) {
|
|
119
|
+
throw new Error('Error, no tag encoding')
|
|
122
120
|
}
|
|
121
|
+
const ret = instantiateCodec(
|
|
122
|
+
encodingData,
|
|
123
|
+
'byteArray', // all tags are byte array data
|
|
124
|
+
)
|
|
125
|
+
this.tagCodecCache[tagName] = ret
|
|
126
|
+
return ret
|
|
127
|
+
} else {
|
|
128
|
+
return test
|
|
123
129
|
}
|
|
124
|
-
return this.tagCodecCache[tagName]
|
|
125
130
|
}
|
|
126
131
|
|
|
127
132
|
/**
|
|
@@ -140,8 +145,10 @@ export default class CramContainerCompressionScheme {
|
|
|
140
145
|
this.dataSeriesCodecCache[dataSeriesName]
|
|
141
146
|
if (r === undefined) {
|
|
142
147
|
const encodingData = this.dataSeriesEncoding[dataSeriesName]
|
|
148
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
143
149
|
if (encodingData) {
|
|
144
150
|
const dataType = dataSeriesTypes[dataSeriesName]
|
|
151
|
+
// eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
|
|
145
152
|
if (!dataType) {
|
|
146
153
|
throw new CramMalformedError(
|
|
147
154
|
`data series name ${dataSeriesName} not defined in file compression header`,
|
package/src/cramFile/file.ts
CHANGED
|
@@ -90,10 +90,9 @@ export default class CramFile {
|
|
|
90
90
|
cacheSize: args.cacheSize ?? 20000,
|
|
91
91
|
}
|
|
92
92
|
|
|
93
|
-
// cache of features in a slice, keyed by the
|
|
94
|
-
//
|
|
95
|
-
//
|
|
96
|
-
// kept here at the level of the file
|
|
93
|
+
// cache of features in a slice, keyed by the slice offset. caches all of
|
|
94
|
+
// the features in a slice, or none. the cache is actually used by the
|
|
95
|
+
// slice object, it's just kept here at the level of the file
|
|
97
96
|
this.featureCache = new QuickLRU({
|
|
98
97
|
maxSize: this.options.cacheSize,
|
|
99
98
|
})
|
|
@@ -279,7 +278,7 @@ export default class CramFile {
|
|
|
279
278
|
},
|
|
280
279
|
position: number,
|
|
281
280
|
size = section.maxLength,
|
|
282
|
-
preReadBuffer
|
|
281
|
+
preReadBuffer?: Buffer,
|
|
283
282
|
) {
|
|
284
283
|
let buffer: Buffer
|
|
285
284
|
if (preReadBuffer) {
|
|
@@ -330,8 +329,8 @@ export default class CramFile {
|
|
|
330
329
|
ret.copy(outputBuffer)
|
|
331
330
|
} else if (compressionMethod === 'rans') {
|
|
332
331
|
ransuncompress(inputBuffer, outputBuffer)
|
|
333
|
-
//htscodecs r4x8 is slower, but compatible.
|
|
334
|
-
//htscodecs.r4x8_uncompress(inputBuffer, outputBuffer);
|
|
332
|
+
// htscodecs r4x8 is slower, but compatible.
|
|
333
|
+
// htscodecs.r4x8_uncompress(inputBuffer, outputBuffer);
|
|
335
334
|
} else if (compressionMethod === 'rans4x16') {
|
|
336
335
|
htscodecs.r4x16_uncompress(inputBuffer, outputBuffer)
|
|
337
336
|
} else if (compressionMethod === 'arith') {
|
|
@@ -422,6 +421,6 @@ export default class CramFile {
|
|
|
422
421
|
}
|
|
423
422
|
}
|
|
424
423
|
|
|
425
|
-
'getDefinition getSectionParsers getSamHeader'
|
|
426
|
-
|
|
427
|
-
|
|
424
|
+
'getDefinition getSectionParsers getSamHeader'.split(' ').forEach(method => {
|
|
425
|
+
tinyMemoize(CramFile, method)
|
|
426
|
+
})
|
package/src/cramFile/record.ts
CHANGED
|
@@ -42,7 +42,7 @@ function decodeReadSequence(cramRecord: CramRecord, refRegion: RefRegion) {
|
|
|
42
42
|
let currentReadFeature = 0
|
|
43
43
|
while (bases.length < cramRecord.readLength) {
|
|
44
44
|
if (currentReadFeature < cramRecord.readFeatures.length) {
|
|
45
|
-
const feature = cramRecord.readFeatures[currentReadFeature]
|
|
45
|
+
const feature = cramRecord.readFeatures[currentReadFeature]!
|
|
46
46
|
if (feature.code === 'Q' || feature.code === 'q') {
|
|
47
47
|
currentReadFeature += 1
|
|
48
48
|
} else if (feature.pos === bases.length + 1) {
|
|
@@ -90,10 +90,7 @@ function decodeReadSequence(cramRecord: CramRecord, refRegion: RefRegion) {
|
|
|
90
90
|
// put down a chunk of sequence up to the next read feature
|
|
91
91
|
const chunk = refRegion.seq.slice(
|
|
92
92
|
regionPos,
|
|
93
|
-
regionPos +
|
|
94
|
-
cramRecord.readFeatures[currentReadFeature].pos -
|
|
95
|
-
bases.length -
|
|
96
|
-
1,
|
|
93
|
+
regionPos + feature.pos - bases.length - 1,
|
|
97
94
|
)
|
|
98
95
|
bases += chunk
|
|
99
96
|
regionPos += chunk.length
|
|
@@ -131,10 +128,6 @@ function decodeBaseSubstitution(
|
|
|
131
128
|
compressionScheme: CramContainerCompressionScheme,
|
|
132
129
|
readFeature: ReadFeature,
|
|
133
130
|
) {
|
|
134
|
-
if (!refRegion) {
|
|
135
|
-
return
|
|
136
|
-
}
|
|
137
|
-
|
|
138
131
|
// decode base substitution code using the substitution matrix
|
|
139
132
|
const refCoord = readFeature.refPos - refRegion.start
|
|
140
133
|
const refBase = refRegion.seq.charAt(refCoord)
|
|
@@ -145,7 +138,7 @@ function decodeBaseSubstitution(
|
|
|
145
138
|
if (baseNumber === undefined) {
|
|
146
139
|
baseNumber = 4
|
|
147
140
|
}
|
|
148
|
-
const substitutionScheme = compressionScheme.substitutionMatrix[baseNumber]
|
|
141
|
+
const substitutionScheme = compressionScheme.substitutionMatrix[baseNumber]!
|
|
149
142
|
const base = substitutionScheme[readFeature.data]
|
|
150
143
|
if (base) {
|
|
151
144
|
readFeature.sub = base
|
|
@@ -179,8 +179,8 @@ export function cramPreservationMap() {
|
|
|
179
179
|
const ents = []
|
|
180
180
|
for (let i = 0; i < mapCount; i++) {
|
|
181
181
|
const key =
|
|
182
|
-
String.fromCharCode(buffer[offset]) +
|
|
183
|
-
String.fromCharCode(buffer[offset + 1])
|
|
182
|
+
String.fromCharCode(buffer[offset]!) +
|
|
183
|
+
String.fromCharCode(buffer[offset + 1]!)
|
|
184
184
|
offset += 2
|
|
185
185
|
|
|
186
186
|
if (
|
|
@@ -550,8 +550,8 @@ function cramDataSeriesEncodingMap() {
|
|
|
550
550
|
const ents = []
|
|
551
551
|
for (let i = 0; i < mapCount; i++) {
|
|
552
552
|
const key =
|
|
553
|
-
String.fromCharCode(buffer[offset]) +
|
|
554
|
-
String.fromCharCode(buffer[offset + 1])
|
|
553
|
+
String.fromCharCode(buffer[offset]!) +
|
|
554
|
+
String.fromCharCode(buffer[offset + 1]!)
|
|
555
555
|
offset += 2
|
|
556
556
|
|
|
557
557
|
const { value, offset: newOffset4 } = cramEncodingSub(buffer, offset)
|