@gmod/cram 1.6.1 → 1.6.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/package.json +2 -1
- package/src/craiIndex.js +180 -0
- package/src/cramFile/codecs/_base.js +49 -0
- package/src/cramFile/codecs/beta.js +23 -0
- package/src/cramFile/codecs/byteArrayLength.js +55 -0
- package/src/cramFile/codecs/byteArrayStop.js +50 -0
- package/src/cramFile/codecs/external.js +54 -0
- package/src/cramFile/codecs/gamma.js +30 -0
- package/src/cramFile/codecs/huffman.js +137 -0
- package/src/cramFile/codecs/index.js +38 -0
- package/src/cramFile/codecs/subexp.js +32 -0
- package/src/cramFile/constants.js +55 -0
- package/src/cramFile/container/compressionScheme.js +144 -0
- package/src/cramFile/container/index.js +119 -0
- package/src/cramFile/file.js +347 -0
- package/src/cramFile/index.js +3 -0
- package/src/cramFile/record.js +337 -0
- package/src/cramFile/sectionParsers.js +379 -0
- package/src/cramFile/slice/decodeRecord.js +362 -0
- package/src/cramFile/slice/index.js +497 -0
- package/src/cramFile/util.js +169 -0
- package/src/errors.js +22 -0
- package/src/index.js +5 -0
- package/src/indexedCramFile.js +191 -0
- package/src/io/bufferCache.js +66 -0
- package/src/io/index.js +26 -0
- package/src/io/localFile.js +35 -0
- package/src/io/remoteFile.js +71 -0
- package/src/rans/README.md +1 -0
- package/src/rans/constants.js +5 -0
- package/src/rans/d04.js +83 -0
- package/src/rans/d14.js +59 -0
- package/src/rans/decoding.js +141 -0
- package/src/rans/frequencies.js +121 -0
- package/src/rans/index.js +249 -0
- package/src/sam.js +15 -0
- package/src/unzip-pako.ts +5 -0
- package/src/unzip.ts +2 -0
package/CHANGELOG.md
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@gmod/cram",
|
|
3
|
-
"version": "1.6.
|
|
3
|
+
"version": "1.6.2",
|
|
4
4
|
"description": "read CRAM files with pure Javascript",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": "GMOD/cram-js",
|
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
},
|
|
17
17
|
"files": [
|
|
18
18
|
"errors.js",
|
|
19
|
+
"src",
|
|
19
20
|
"dist",
|
|
20
21
|
"esm"
|
|
21
22
|
],
|
package/src/craiIndex.js
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import AbortablePromiseCache from 'abortable-promise-cache'
|
|
2
|
+
import QuickLRU from 'quick-lru'
|
|
3
|
+
import { unzip } from './unzip'
|
|
4
|
+
import { open } from './io'
|
|
5
|
+
import { CramMalformedError } from './errors'
|
|
6
|
+
|
|
7
|
+
const BAI_MAGIC = 21578050 // BAI\1
|
|
8
|
+
|
|
9
|
+
class Slice {
|
|
10
|
+
constructor(args) {
|
|
11
|
+
Object.assign(this, args)
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
toString() {
|
|
15
|
+
return `${this.start}:${this.span}:${this.containerStart}:${this.sliceStart}:${this.sliceBytes}`
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function addRecordToIndex(index, record) {
|
|
20
|
+
if (record.some(el => el === undefined)) {
|
|
21
|
+
throw new CramMalformedError('invalid .crai index file')
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const [seqId, start, span, containerStart, sliceStart, sliceBytes] = record
|
|
25
|
+
|
|
26
|
+
if (!index[seqId]) {
|
|
27
|
+
index[seqId] = []
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
index[seqId].push(
|
|
31
|
+
new Slice({
|
|
32
|
+
start,
|
|
33
|
+
span,
|
|
34
|
+
containerStart,
|
|
35
|
+
sliceStart,
|
|
36
|
+
sliceBytes,
|
|
37
|
+
}),
|
|
38
|
+
)
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export default class CraiIndex {
|
|
42
|
+
// A CRAM index (.crai) is a gzipped tab delimited file containing the following columns:
|
|
43
|
+
// 1. Sequence id
|
|
44
|
+
// 2. Alignment start
|
|
45
|
+
// 3. Alignment span
|
|
46
|
+
// 4. Container start byte position in the file
|
|
47
|
+
// 5. Slice start byte position in the container data (‘blocks’)
|
|
48
|
+
// 6. Slice size in bytes
|
|
49
|
+
// Each line represents a slice in the CRAM file. Please note that all slices must be listed in index file.
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
*
|
|
53
|
+
* @param {object} args
|
|
54
|
+
* @param {string} [args.path]
|
|
55
|
+
* @param {string} [args.url]
|
|
56
|
+
* @param {FileHandle} [args.filehandle]
|
|
57
|
+
*/
|
|
58
|
+
constructor(args) {
|
|
59
|
+
const filehandle = open(args.url, args.path, args.filehandle)
|
|
60
|
+
this._parseCache = new AbortablePromiseCache({
|
|
61
|
+
cache: new QuickLRU({ maxSize: 1 }),
|
|
62
|
+
fill: (data, signal) => this.parseIndex({ signal }),
|
|
63
|
+
})
|
|
64
|
+
this.readFile = filehandle.readFile.bind(filehandle)
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
parseIndex() {
|
|
68
|
+
const index = {}
|
|
69
|
+
return this.readFile()
|
|
70
|
+
.then(data => {
|
|
71
|
+
if (data[0] === 31 && data[1] === 139) {
|
|
72
|
+
return unzip(data)
|
|
73
|
+
}
|
|
74
|
+
return data
|
|
75
|
+
})
|
|
76
|
+
.then(uncompressedBuffer => {
|
|
77
|
+
if (
|
|
78
|
+
uncompressedBuffer.length > 4 &&
|
|
79
|
+
uncompressedBuffer.readUInt32LE(0) === BAI_MAGIC
|
|
80
|
+
) {
|
|
81
|
+
throw new CramMalformedError(
|
|
82
|
+
'invalid .crai index file. note: file appears to be a .bai index. this is technically legal but please open a github issue if you need support',
|
|
83
|
+
)
|
|
84
|
+
}
|
|
85
|
+
// interpret the text as regular ascii, since it is
|
|
86
|
+
// supposed to be only digits and whitespace characters
|
|
87
|
+
// this is written in a deliberately low-level fashion for performance,
|
|
88
|
+
// because some .crai files can be pretty large.
|
|
89
|
+
let currentRecord = []
|
|
90
|
+
let currentString = ''
|
|
91
|
+
for (let i = 0; i < uncompressedBuffer.length; i += 1) {
|
|
92
|
+
const charCode = uncompressedBuffer[i]
|
|
93
|
+
if (
|
|
94
|
+
(charCode >= 48 && charCode <= 57) /* 0-9 */ ||
|
|
95
|
+
(!currentString && charCode === 45) /* leading - */
|
|
96
|
+
) {
|
|
97
|
+
currentString += String.fromCharCode(charCode)
|
|
98
|
+
} else if (charCode === 9 /* \t */) {
|
|
99
|
+
currentRecord.push(Number.parseInt(currentString, 10))
|
|
100
|
+
currentString = ''
|
|
101
|
+
} else if (charCode === 10 /* \n */) {
|
|
102
|
+
currentRecord.push(Number.parseInt(currentString, 10))
|
|
103
|
+
currentString = ''
|
|
104
|
+
addRecordToIndex(index, currentRecord)
|
|
105
|
+
currentRecord = []
|
|
106
|
+
} else if (charCode !== 13 /* \r */ && charCode !== 32 /* space */) {
|
|
107
|
+
// if there are other characters in the file besides
|
|
108
|
+
// space and \r, something is wrong.
|
|
109
|
+
throw new CramMalformedError('invalid .crai index file')
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// if the file ends without a \n, we need to flush our buffers
|
|
114
|
+
if (currentString) {
|
|
115
|
+
currentRecord.push(Number.parseInt(currentString, 10))
|
|
116
|
+
}
|
|
117
|
+
if (currentRecord.length === 6) {
|
|
118
|
+
addRecordToIndex(index, currentRecord)
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// sort each of them by start
|
|
122
|
+
Object.entries(index).forEach(([seqId, ent]) => {
|
|
123
|
+
index[seqId] = ent.sort(
|
|
124
|
+
(a, b) => a.start - b.start || a.span - b.span,
|
|
125
|
+
)
|
|
126
|
+
})
|
|
127
|
+
return index
|
|
128
|
+
})
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
getIndex(opts = {}) {
|
|
132
|
+
return this._parseCache.get('index', null, opts.signal)
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* @param {number} seqId
|
|
137
|
+
* @returns {Promise} true if the index contains entries for
|
|
138
|
+
* the given reference sequence ID, false otherwise
|
|
139
|
+
*/
|
|
140
|
+
async hasDataForReferenceSequence(seqId) {
|
|
141
|
+
return !!(await this.getIndex())[seqId]
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* fetch index entries for the given range
|
|
146
|
+
*
|
|
147
|
+
* @param {number} seqId
|
|
148
|
+
* @param {number} queryStart
|
|
149
|
+
* @param {number} queryEnd
|
|
150
|
+
*
|
|
151
|
+
* @returns {Promise} promise for
|
|
152
|
+
* an array of objects of the form
|
|
153
|
+
* `{start, span, containerStart, sliceStart, sliceBytes }`
|
|
154
|
+
*/
|
|
155
|
+
async getEntriesForRange(seqId, queryStart, queryEnd) {
|
|
156
|
+
const seqEntries = (await this.getIndex())[seqId]
|
|
157
|
+
if (!seqEntries) {
|
|
158
|
+
return []
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
const compare = entry => {
|
|
162
|
+
const entryStart = entry.start
|
|
163
|
+
const entryEnd = entry.start + entry.span
|
|
164
|
+
if (entryStart >= queryEnd) {
|
|
165
|
+
return -1
|
|
166
|
+
} // entry is ahead of query
|
|
167
|
+
if (entryEnd <= queryStart) {
|
|
168
|
+
return 1
|
|
169
|
+
} // entry is behind query
|
|
170
|
+
return 0 // entry overlaps query
|
|
171
|
+
}
|
|
172
|
+
const bins = []
|
|
173
|
+
for (let i = 0; i < seqEntries.length; i += 1) {
|
|
174
|
+
if (compare(seqEntries[i]) === 0) {
|
|
175
|
+
bins.push(seqEntries[i])
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
return bins
|
|
179
|
+
}
|
|
180
|
+
}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { CramBufferOverrunError } from '../../errors'
|
|
2
|
+
|
|
3
|
+
const validDataTypes = {
|
|
4
|
+
int: true,
|
|
5
|
+
byte: true,
|
|
6
|
+
long: true,
|
|
7
|
+
byteArray: true,
|
|
8
|
+
byteArrayBlock: true,
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
// codec base class
|
|
12
|
+
export default class CramCodec {
|
|
13
|
+
constructor(parameters = {}, dataType) {
|
|
14
|
+
this.parameters = parameters
|
|
15
|
+
this.dataType = dataType
|
|
16
|
+
if (!dataType) {
|
|
17
|
+
throw new TypeError('must provide a data type to codec constructor')
|
|
18
|
+
}
|
|
19
|
+
if (!validDataTypes[dataType]) {
|
|
20
|
+
throw new TypeError(`invalid data type ${dataType}`)
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// decode(slice, coreDataBlock, blocksByContentId, cursors) {
|
|
25
|
+
// }
|
|
26
|
+
|
|
27
|
+
_getBits(data, cursor, numBits) {
|
|
28
|
+
let val = 0
|
|
29
|
+
if (
|
|
30
|
+
cursor.bytePosition + (7 - cursor.bitPosition + numBits) / 8 >
|
|
31
|
+
data.length
|
|
32
|
+
) {
|
|
33
|
+
throw new CramBufferOverrunError(
|
|
34
|
+
'read error during decoding. the file seems to be truncated.',
|
|
35
|
+
)
|
|
36
|
+
}
|
|
37
|
+
for (let dlen = numBits; dlen; dlen -= 1) {
|
|
38
|
+
// get the next `dlen` bits in the input, put them in val
|
|
39
|
+
val <<= 1
|
|
40
|
+
val |= (data[cursor.bytePosition] >> cursor.bitPosition) & 1
|
|
41
|
+
cursor.bitPosition -= 1
|
|
42
|
+
if (cursor.bitPosition < 0) {
|
|
43
|
+
cursor.bytePosition += 1
|
|
44
|
+
}
|
|
45
|
+
cursor.bitPosition &= 7
|
|
46
|
+
}
|
|
47
|
+
return val
|
|
48
|
+
}
|
|
49
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { CramUnimplementedError } from '../../errors'
|
|
2
|
+
import CramCodec from './_base'
|
|
3
|
+
|
|
4
|
+
export default class BetaCodec extends CramCodec {
|
|
5
|
+
constructor(parameters = {}, dataType) {
|
|
6
|
+
super(parameters, dataType)
|
|
7
|
+
if (this.dataType !== 'int') {
|
|
8
|
+
throw new CramUnimplementedError(
|
|
9
|
+
`${this.dataType} decoding not yet implemented by BETA codec`,
|
|
10
|
+
)
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
decode(slice, coreDataBlock, blocksByContentId, cursors) {
|
|
15
|
+
const data =
|
|
16
|
+
this._getBits(
|
|
17
|
+
coreDataBlock.content,
|
|
18
|
+
cursors.coreBlock,
|
|
19
|
+
this.parameters.length,
|
|
20
|
+
) - this.parameters.offset
|
|
21
|
+
return data
|
|
22
|
+
}
|
|
23
|
+
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { tinyMemoize } from '../util'
|
|
2
|
+
|
|
3
|
+
import CramCodec from './_base'
|
|
4
|
+
|
|
5
|
+
export default class ByteArrayStopCodec extends CramCodec {
|
|
6
|
+
constructor(parameters = {}, dataType, instantiateCodec) {
|
|
7
|
+
super(parameters, dataType)
|
|
8
|
+
this.instantiateCodec = instantiateCodec
|
|
9
|
+
if (dataType !== 'byteArray') {
|
|
10
|
+
throw new TypeError(
|
|
11
|
+
`byteArrayLength does not support data type ${dataType}`,
|
|
12
|
+
)
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
decode(slice, coreDataBlock, blocksByContentId, cursors) {
|
|
17
|
+
const lengthCodec = this._getLengthCodec()
|
|
18
|
+
const arrayLength = lengthCodec.decode(
|
|
19
|
+
slice,
|
|
20
|
+
coreDataBlock,
|
|
21
|
+
blocksByContentId,
|
|
22
|
+
cursors,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
const dataCodec = this._getDataCodec()
|
|
26
|
+
const data = new Array(arrayLength)
|
|
27
|
+
for (let i = 0; i < arrayLength; i += 1) {
|
|
28
|
+
data[i] = dataCodec.decode(
|
|
29
|
+
slice,
|
|
30
|
+
coreDataBlock,
|
|
31
|
+
blocksByContentId,
|
|
32
|
+
cursors,
|
|
33
|
+
)
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
return data
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// memoize
|
|
40
|
+
_getLengthCodec() {
|
|
41
|
+
const encodingParams = this.parameters.lengthsEncoding
|
|
42
|
+
return this.instantiateCodec(encodingParams, 'int')
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// memoize
|
|
46
|
+
_getDataCodec() {
|
|
47
|
+
const encodingParams = this.parameters.valuesEncoding
|
|
48
|
+
|
|
49
|
+
return this.instantiateCodec(encodingParams, 'byte')
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
'_getLengthCodec _getDataCodec'
|
|
54
|
+
.split(' ')
|
|
55
|
+
.forEach(method => tinyMemoize(ByteArrayStopCodec, method))
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { CramBufferOverrunError, CramMalformedError } from '../../errors'
|
|
2
|
+
|
|
3
|
+
import CramCodec from './_base'
|
|
4
|
+
|
|
5
|
+
export default class ByteArrayStopCodec extends CramCodec {
|
|
6
|
+
constructor(parameters = {}, dataType) {
|
|
7
|
+
super(parameters, dataType)
|
|
8
|
+
if (dataType === 'byteArray') {
|
|
9
|
+
this._decode = this._decodeByteArray
|
|
10
|
+
} else {
|
|
11
|
+
throw new TypeError(
|
|
12
|
+
`byteArrayStop codec does not support data type ${dataType}`,
|
|
13
|
+
)
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
decode(slice, coreDataBlock, blocksByContentId, cursors) {
|
|
18
|
+
const { blockContentId } = this.parameters
|
|
19
|
+
const contentBlock = blocksByContentId[blockContentId]
|
|
20
|
+
if (!contentBlock) {
|
|
21
|
+
throw new CramMalformedError(
|
|
22
|
+
`no block found with content ID ${blockContentId}`,
|
|
23
|
+
)
|
|
24
|
+
}
|
|
25
|
+
const cursor = cursors.externalBlocks.getCursor(blockContentId)
|
|
26
|
+
return this._decode(contentBlock, cursor)
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
_decodeByteArray(contentBlock, cursor) {
|
|
30
|
+
const dataBuffer = contentBlock.content
|
|
31
|
+
const { stopByte } = this.parameters
|
|
32
|
+
// scan to the next stop byte
|
|
33
|
+
const startPosition = cursor.bytePosition
|
|
34
|
+
let stopPosition = cursor.bytePosition
|
|
35
|
+
while (
|
|
36
|
+
dataBuffer[stopPosition] !== stopByte &&
|
|
37
|
+
stopPosition < dataBuffer.length
|
|
38
|
+
) {
|
|
39
|
+
if (stopPosition === dataBuffer.length) {
|
|
40
|
+
throw new CramBufferOverrunError(
|
|
41
|
+
`byteArrayStop reading beyond length of data buffer?`,
|
|
42
|
+
)
|
|
43
|
+
}
|
|
44
|
+
stopPosition += 1
|
|
45
|
+
}
|
|
46
|
+
cursor.bytePosition = stopPosition + 1
|
|
47
|
+
const data = dataBuffer.slice(startPosition, stopPosition)
|
|
48
|
+
return data
|
|
49
|
+
}
|
|
50
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import {
|
|
2
|
+
CramUnimplementedError,
|
|
3
|
+
CramMalformedError,
|
|
4
|
+
CramBufferOverrunError,
|
|
5
|
+
} from '../../errors'
|
|
6
|
+
import CramCodec from './_base'
|
|
7
|
+
import { parseItf8 } from '../util'
|
|
8
|
+
|
|
9
|
+
export default class ExternalCodec extends CramCodec {
|
|
10
|
+
constructor(parameters = {}, dataType) {
|
|
11
|
+
super(parameters, dataType)
|
|
12
|
+
if (this.dataType === 'int') {
|
|
13
|
+
this._decodeData = this._decodeInt
|
|
14
|
+
} else if (this.dataType === 'byte') {
|
|
15
|
+
this._decodeData = this._decodeByte
|
|
16
|
+
} else {
|
|
17
|
+
throw new CramUnimplementedError(
|
|
18
|
+
`${this.dataType} decoding not yet implemented by EXTERNAL codec`,
|
|
19
|
+
)
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
decode(slice, coreDataBlock, blocksByContentId, cursors) {
|
|
24
|
+
const { blockContentId } = this.parameters
|
|
25
|
+
const contentBlock = blocksByContentId[blockContentId]
|
|
26
|
+
if (!contentBlock) {
|
|
27
|
+
throw new CramMalformedError(
|
|
28
|
+
`no block found with content ID ${blockContentId}`,
|
|
29
|
+
)
|
|
30
|
+
}
|
|
31
|
+
const cursor = cursors.externalBlocks.getCursor(blockContentId)
|
|
32
|
+
return this._decodeData(contentBlock, cursor)
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
_decodeInt(contentBlock, cursor) {
|
|
36
|
+
const [result, bytesRead] = parseItf8(
|
|
37
|
+
contentBlock.content,
|
|
38
|
+
cursor.bytePosition,
|
|
39
|
+
)
|
|
40
|
+
cursor.bytePosition += bytesRead
|
|
41
|
+
return result
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
_decodeByte(contentBlock, cursor) {
|
|
45
|
+
if (cursor.bytePosition >= contentBlock.content.length) {
|
|
46
|
+
throw new CramBufferOverrunError(
|
|
47
|
+
'attempted to read beyond end of block. this file seems truncated.',
|
|
48
|
+
)
|
|
49
|
+
}
|
|
50
|
+
const result = contentBlock.content[cursor.bytePosition]
|
|
51
|
+
cursor.bytePosition += 1
|
|
52
|
+
return result
|
|
53
|
+
}
|
|
54
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { CramUnimplementedError } from '../../errors'
|
|
2
|
+
import CramCodec from './_base'
|
|
3
|
+
|
|
4
|
+
export default class GammaCodec extends CramCodec {
|
|
5
|
+
constructor(parameters = {}, dataType) {
|
|
6
|
+
super(parameters, dataType)
|
|
7
|
+
if (this.dataType !== 'int') {
|
|
8
|
+
throw new CramUnimplementedError(
|
|
9
|
+
`${this.dataType} decoding not yet implemented by GAMMA codec`,
|
|
10
|
+
)
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
decode(slice, coreDataBlock, blocksByContentId, cursors) {
|
|
15
|
+
let length = 1
|
|
16
|
+
|
|
17
|
+
while (this._getBits(coreDataBlock.content, cursors.coreBlock, 1) === 0) {
|
|
18
|
+
length += 1
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const readBits = this._getBits(
|
|
22
|
+
coreDataBlock.content,
|
|
23
|
+
cursors.coreBlock,
|
|
24
|
+
length - 1,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
const value = readBits | (1 << (length - 1))
|
|
28
|
+
return value - this.parameters.offset
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
import { CramMalformedError } from '../../errors'
|
|
2
|
+
import CramCodec from './_base'
|
|
3
|
+
|
|
4
|
+
function numberOfSetBits(ii) {
|
|
5
|
+
let i = (ii - (ii >> 1)) & 0x55555555
|
|
6
|
+
i = (i & 0x33333333) + ((i >> 2) & 0x33333333)
|
|
7
|
+
return (((i + (i >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export default class HuffmanIntCodec extends CramCodec {
|
|
11
|
+
constructor(parameters = {}, dataType) {
|
|
12
|
+
super(parameters, dataType)
|
|
13
|
+
if (!['byte', 'int'].includes(this.dataType)) {
|
|
14
|
+
throw new TypeError(
|
|
15
|
+
`${this.dataType} decoding not yet implemented by HUFFMAN_INT codec`,
|
|
16
|
+
)
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
this.buildCodeBook()
|
|
20
|
+
this.buildCodes()
|
|
21
|
+
this.buildCaches()
|
|
22
|
+
|
|
23
|
+
// if this is a degenerate zero-length huffman code, special-case the decoding
|
|
24
|
+
if (this.sortedCodes[0].bitLength === 0) {
|
|
25
|
+
this._decode = this._decodeZeroLengthCode
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
buildCodeBook() {
|
|
30
|
+
// parse the parameters together into a `codes` data structure
|
|
31
|
+
let codes = new Array(this.parameters.numCodes)
|
|
32
|
+
for (let i = 0; i < this.parameters.numCodes; i += 1) {
|
|
33
|
+
codes[i] = {
|
|
34
|
+
symbol: this.parameters.symbols[i],
|
|
35
|
+
bitLength: this.parameters.bitLengths[i],
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
// sort the codes by bit length and symbol value
|
|
39
|
+
codes = codes.sort(
|
|
40
|
+
(a, b) => a.bitLength - b.bitLength || a.symbol - b.symbol,
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
this.codeBook = {}
|
|
44
|
+
codes.forEach(code => {
|
|
45
|
+
if (!this.codeBook[code.bitLength]) {
|
|
46
|
+
this.codeBook[code.bitLength] = []
|
|
47
|
+
}
|
|
48
|
+
this.codeBook[code.bitLength].push(code.symbol)
|
|
49
|
+
})
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
buildCodes() {
|
|
53
|
+
this.codes = {} /* new TreeMap<Integer, HuffmanBitCode>(); */
|
|
54
|
+
let codeLength = 0
|
|
55
|
+
let codeValue = -1
|
|
56
|
+
Object.entries(this.codeBook).forEach(([bitLength, symbols]) => {
|
|
57
|
+
bitLength = parseInt(bitLength, 10)
|
|
58
|
+
symbols.forEach(symbol => {
|
|
59
|
+
const code = { bitLength, value: symbol }
|
|
60
|
+
codeValue += 1
|
|
61
|
+
const delta = bitLength - codeLength // new length?
|
|
62
|
+
codeValue <<= delta // pad with 0's
|
|
63
|
+
code.bitCode = codeValue // calculated: huffman code
|
|
64
|
+
codeLength += delta // adjust current code length
|
|
65
|
+
|
|
66
|
+
if (numberOfSetBits(codeValue) > bitLength) {
|
|
67
|
+
throw new CramMalformedError('Symbol out of range')
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
this.codes[symbol] = code
|
|
71
|
+
})
|
|
72
|
+
})
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
buildCaches() {
|
|
76
|
+
this.sortedCodes = Object.values(this.codes).sort(
|
|
77
|
+
(a, b) => a.bitLength - b.bitLength || a.bitCode - b.bitCode,
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
// this.sortedValues = this.parameters.values.sort((a,b) => a-b)
|
|
81
|
+
this.sortedByValue = Object.values(this.codes).sort(
|
|
82
|
+
(a, b) => a.value - b.value,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
this.sortedValuesByBitCode = this.sortedCodes.map(c => c.value)
|
|
86
|
+
this.sortedBitCodes = this.sortedCodes.map(c => c.bitCode)
|
|
87
|
+
this.sortedBitLengthsByBitCode = this.sortedCodes.map(c => c.bitLength)
|
|
88
|
+
const maxBitCode = Math.max(...this.sortedBitCodes)
|
|
89
|
+
|
|
90
|
+
this.bitCodeToValue = new Array(maxBitCode + 1).fill(-1)
|
|
91
|
+
for (let i = 0; i < this.sortedBitCodes.length; i += 1) {
|
|
92
|
+
this.bitCodeToValue[this.sortedCodes[i].bitCode] = i
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
decode(slice, coreDataBlock, blocksByContentId, cursors) {
|
|
97
|
+
return this._decode(slice, coreDataBlock, cursors.coreBlock)
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// _decodeNull() {
|
|
101
|
+
// return -1
|
|
102
|
+
// }
|
|
103
|
+
|
|
104
|
+
// the special case for zero-length codes
|
|
105
|
+
_decodeZeroLengthCode() {
|
|
106
|
+
return this.sortedCodes[0].value
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
_decode(slice, coreDataBlock, coreCursor) {
|
|
110
|
+
const input = coreDataBlock.content
|
|
111
|
+
|
|
112
|
+
let prevLen = 0
|
|
113
|
+
let bits = 0
|
|
114
|
+
for (let i = 0; i < this.sortedCodes.length; i += 1) {
|
|
115
|
+
const length = this.sortedCodes[i].bitLength
|
|
116
|
+
bits <<= length - prevLen
|
|
117
|
+
bits |= this._getBits(input, coreCursor, length - prevLen)
|
|
118
|
+
prevLen = length
|
|
119
|
+
{
|
|
120
|
+
const index = this.bitCodeToValue[bits]
|
|
121
|
+
if (index > -1 && this.sortedBitLengthsByBitCode[index] === length) {
|
|
122
|
+
return this.sortedValuesByBitCode[index]
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
for (
|
|
126
|
+
let j = i;
|
|
127
|
+
this.sortedCodes[j + 1].bitLength === length &&
|
|
128
|
+
j < this.sortedCodes.length;
|
|
129
|
+
j += 1
|
|
130
|
+
) {
|
|
131
|
+
i += 1
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
throw new CramMalformedError('Huffman symbol not found.')
|
|
136
|
+
}
|
|
137
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { CramUnimplementedError } from '../../errors'
|
|
2
|
+
|
|
3
|
+
import HuffmanIntCodec from './huffman'
|
|
4
|
+
import ExternalCodec from './external'
|
|
5
|
+
import ByteArrayStopCodec from './byteArrayStop'
|
|
6
|
+
import ByteArrayLengthCodec from './byteArrayLength'
|
|
7
|
+
import BetaCodec from './beta'
|
|
8
|
+
import GammaCodec from './gamma'
|
|
9
|
+
import SubexpCodec from './subexp'
|
|
10
|
+
|
|
11
|
+
const codecClasses = {
|
|
12
|
+
1: ExternalCodec,
|
|
13
|
+
// 2: GolombCodec,
|
|
14
|
+
3: HuffmanIntCodec,
|
|
15
|
+
4: ByteArrayLengthCodec,
|
|
16
|
+
5: ByteArrayStopCodec,
|
|
17
|
+
6: BetaCodec,
|
|
18
|
+
7: SubexpCodec,
|
|
19
|
+
// 8: GolombRiceCodec,
|
|
20
|
+
9: GammaCodec,
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export function getCodecClassWithId(id) {
|
|
24
|
+
return codecClasses[id]
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function instantiateCodec(encodingData, dataType) {
|
|
28
|
+
const CodecClass = getCodecClassWithId(
|
|
29
|
+
dataType === 'ignore' ? 0 : encodingData.codecId,
|
|
30
|
+
)
|
|
31
|
+
if (!CodecClass) {
|
|
32
|
+
throw new CramUnimplementedError(
|
|
33
|
+
`no codec implemented for codec ID ${encodingData.codecId}`,
|
|
34
|
+
)
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return new CodecClass(encodingData.parameters, dataType, instantiateCodec)
|
|
38
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
import { CramUnimplementedError } from '../../errors'
|
|
2
|
+
import CramCodec from './_base'
|
|
3
|
+
|
|
4
|
+
export default class SubexpCodec extends CramCodec {
|
|
5
|
+
constructor(parameters = {}, dataType) {
|
|
6
|
+
super(parameters, dataType)
|
|
7
|
+
if (this.dataType !== 'int') {
|
|
8
|
+
throw new CramUnimplementedError(
|
|
9
|
+
`${this.dataType} decoding not yet implemented by SUBEXP codec`,
|
|
10
|
+
)
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
decode(slice, coreDataBlock, blocksByContentId, cursors) {
|
|
15
|
+
let numLeadingOnes = 0
|
|
16
|
+
while (this._getBits(coreDataBlock.content, cursors.coreBlock, 1)) {
|
|
17
|
+
numLeadingOnes += 1
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
let b
|
|
21
|
+
let n
|
|
22
|
+
if (numLeadingOnes === 0) {
|
|
23
|
+
b = this.parameters.K
|
|
24
|
+
n = this._getBits(coreDataBlock.content, cursors.coreBlock, b)
|
|
25
|
+
} else {
|
|
26
|
+
b = numLeadingOnes + this.parameters.K - 1
|
|
27
|
+
n = (1 << b) | this._getBits(coreDataBlock.content, cursors.coreBlock, b)
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
return n - this.parameters.offset
|
|
31
|
+
}
|
|
32
|
+
}
|