@gmod/cram 1.6.1 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/CHANGELOG.md +4 -0
  2. package/package.json +2 -1
  3. package/src/craiIndex.js +180 -0
  4. package/src/cramFile/codecs/_base.js +49 -0
  5. package/src/cramFile/codecs/beta.js +23 -0
  6. package/src/cramFile/codecs/byteArrayLength.js +55 -0
  7. package/src/cramFile/codecs/byteArrayStop.js +50 -0
  8. package/src/cramFile/codecs/external.js +54 -0
  9. package/src/cramFile/codecs/gamma.js +30 -0
  10. package/src/cramFile/codecs/huffman.js +137 -0
  11. package/src/cramFile/codecs/index.js +38 -0
  12. package/src/cramFile/codecs/subexp.js +32 -0
  13. package/src/cramFile/constants.js +55 -0
  14. package/src/cramFile/container/compressionScheme.js +144 -0
  15. package/src/cramFile/container/index.js +119 -0
  16. package/src/cramFile/file.js +347 -0
  17. package/src/cramFile/index.js +3 -0
  18. package/src/cramFile/record.js +337 -0
  19. package/src/cramFile/sectionParsers.js +379 -0
  20. package/src/cramFile/slice/decodeRecord.js +362 -0
  21. package/src/cramFile/slice/index.js +497 -0
  22. package/src/cramFile/util.js +169 -0
  23. package/src/errors.js +22 -0
  24. package/src/index.js +5 -0
  25. package/src/indexedCramFile.js +191 -0
  26. package/src/io/bufferCache.js +66 -0
  27. package/src/io/index.js +26 -0
  28. package/src/io/localFile.js +35 -0
  29. package/src/io/remoteFile.js +71 -0
  30. package/src/rans/README.md +1 -0
  31. package/src/rans/constants.js +5 -0
  32. package/src/rans/d04.js +83 -0
  33. package/src/rans/d14.js +59 -0
  34. package/src/rans/decoding.js +141 -0
  35. package/src/rans/frequencies.js +121 -0
  36. package/src/rans/index.js +249 -0
  37. package/src/sam.js +15 -0
  38. package/src/unzip-pako.ts +5 -0
  39. package/src/unzip.ts +2 -0
@@ -0,0 +1,55 @@
1
+ const Constants = {
2
+ CRAM_FLAG_PRESERVE_QUAL_SCORES: 1 << 0,
3
+ CRAM_FLAG_DETACHED: 1 << 1,
4
+ CRAM_FLAG_MATE_DOWNSTREAM: 1 << 2,
5
+ CRAM_FLAG_NO_SEQ: 1 << 3,
6
+ CRAM_FLAG_MASK: (1 << 4) - 1,
7
+
8
+ // mate read is reversed
9
+ CRAM_M_REVERSE: 1,
10
+ // mated read is unmapped
11
+ CRAM_M_UNMAP: 2,
12
+
13
+ // the read is paired in sequencing, no matter whether it is mapped in a pair
14
+ BAM_FPAIRED: 1,
15
+ // the read is mapped in a proper pair
16
+ BAM_FPROPER_PAIR: 2,
17
+ // the read itself is unmapped; conflictive with BAM_FPROPER_PAIR
18
+ BAM_FUNMAP: 4,
19
+ // the mate is unmapped
20
+ BAM_FMUNMAP: 8,
21
+ // the read is mapped to the reverse strand
22
+ BAM_FREVERSE: 16,
23
+ // the mate is mapped to the reverse strand
24
+ BAM_FMREVERSE: 32,
25
+ // this is read1
26
+ BAM_FREAD1: 64,
27
+ // this is read2
28
+ BAM_FREAD2: 128,
29
+ // not primary alignment
30
+ BAM_FSECONDARY: 256,
31
+ // QC failure
32
+ BAM_FQCFAIL: 512,
33
+ // optical or PCR duplicate
34
+ BAM_FDUP: 1024,
35
+ // supplementary alignment
36
+ BAM_FSUPPLEMENTARY: 2048,
37
+
38
+ BAM_CMATCH: 0,
39
+ BAM_CINS: 1,
40
+ BAM_CDEL: 2,
41
+ BAM_CREF_SKIP: 3,
42
+ BAM_CSOFT_CLIP: 4,
43
+ BAM_CHARD_CLIP: 5,
44
+ BAM_CPAD: 6,
45
+ BAM_CEQUAL: 7,
46
+ BAM_CDIFF: 8,
47
+ BAM_CBACK: 9,
48
+
49
+ BAM_CIGAR_STR: 'MIDNSHP:XB',
50
+ BAM_CIGAR_SHIFT: 4,
51
+ BAM_CIGAR_MASK: 0xf,
52
+ BAM_CIGAR_TYPE: 0x3c1a7,
53
+ }
54
+
55
+ export default Constants
@@ -0,0 +1,144 @@
1
+ import { CramMalformedError } from '../../errors'
2
+ import { instantiateCodec } from '../codecs'
3
+
4
+ // the hardcoded data type to be decoded for each core
5
+ // data field
6
+ const dataSeriesTypes = {
7
+ BF: 'int',
8
+ CF: 'int',
9
+ RI: 'int',
10
+ RL: 'int',
11
+ AP: 'int',
12
+ RG: 'int',
13
+ MF: 'int',
14
+ NS: 'int',
15
+ NP: 'int',
16
+ TS: 'int',
17
+ NF: 'int',
18
+ TC: 'byte',
19
+ TN: 'int',
20
+ FN: 'int',
21
+ FC: 'byte',
22
+ FP: 'int',
23
+ BS: 'byte',
24
+ IN: 'byteArray',
25
+ SC: 'byteArray',
26
+ DL: 'int',
27
+ BA: 'byte',
28
+ BB: 'byteArray',
29
+ RS: 'int',
30
+ PD: 'int',
31
+ HC: 'int',
32
+ MQ: 'int',
33
+ RN: 'byteArray',
34
+ QS: 'byte',
35
+ QQ: 'byteArray',
36
+ TL: 'int',
37
+ TM: 'ignore',
38
+ TV: 'ignore',
39
+ }
40
+
41
+ function parseSubstitutionMatrix(byteArray) {
42
+ const matrix = new Array(5)
43
+ for (let i = 0; i < 5; i += 1) {
44
+ matrix[i] = new Array(4)
45
+ }
46
+
47
+ matrix[0][(byteArray[0] >> 6) & 3] = 'C'
48
+ matrix[0][(byteArray[0] >> 4) & 3] = 'G'
49
+ matrix[0][(byteArray[0] >> 2) & 3] = 'T'
50
+ matrix[0][(byteArray[0] >> 0) & 3] = 'N'
51
+
52
+ matrix[1][(byteArray[1] >> 6) & 3] = 'A'
53
+ matrix[1][(byteArray[1] >> 4) & 3] = 'G'
54
+ matrix[1][(byteArray[1] >> 2) & 3] = 'T'
55
+ matrix[1][(byteArray[1] >> 0) & 3] = 'N'
56
+
57
+ matrix[2][(byteArray[2] >> 6) & 3] = 'A'
58
+ matrix[2][(byteArray[2] >> 4) & 3] = 'C'
59
+ matrix[2][(byteArray[2] >> 2) & 3] = 'T'
60
+ matrix[2][(byteArray[2] >> 0) & 3] = 'N'
61
+
62
+ matrix[3][(byteArray[3] >> 6) & 3] = 'A'
63
+ matrix[3][(byteArray[3] >> 4) & 3] = 'C'
64
+ matrix[3][(byteArray[3] >> 2) & 3] = 'G'
65
+ matrix[3][(byteArray[3] >> 0) & 3] = 'N'
66
+
67
+ matrix[4][(byteArray[4] >> 6) & 3] = 'A'
68
+ matrix[4][(byteArray[4] >> 4) & 3] = 'C'
69
+ matrix[4][(byteArray[4] >> 2) & 3] = 'G'
70
+ matrix[4][(byteArray[4] >> 0) & 3] = 'T'
71
+
72
+ return matrix
73
+ }
74
+
75
+ export default class CramContainerCompressionScheme {
76
+ constructor(content) {
77
+ Object.assign(this, content)
78
+ // interpret some of the preservation map tags for convenient use
79
+ this.readNamesIncluded = content.preservation.RN
80
+ this.APdelta = content.preservation.AP
81
+ this.referenceRequired = !!content.preservation.RR
82
+ this.tagIdsDictionary = content.preservation.TD
83
+ this.substitutionMatrix = parseSubstitutionMatrix(content.preservation.SM)
84
+
85
+ this.dataSeriesCodecCache = {}
86
+ this.tagCodecCache = {}
87
+ }
88
+
89
+ /**
90
+ * @param {string} tagName three-character tag name
91
+ * @private
92
+ */
93
+ getCodecForTag(tagName) {
94
+ if (!this.tagCodecCache[tagName]) {
95
+ const encodingData = this.tagEncoding[tagName]
96
+ if (encodingData) {
97
+ this.tagCodecCache[tagName] = instantiateCodec(
98
+ encodingData,
99
+ 'byteArray', // all tags are byte array data
100
+ )
101
+ }
102
+ }
103
+ return this.tagCodecCache[tagName]
104
+ }
105
+
106
+ /**
107
+ *
108
+ * @param {number} tagListId ID of the tag list to fetch from the tag dictionary
109
+ * @private
110
+ */
111
+ getTagNames(tagListId) {
112
+ return this.tagIdsDictionary[tagListId]
113
+ }
114
+
115
+ getCodecForDataSeries(dataSeriesName) {
116
+ if (!this.dataSeriesCodecCache[dataSeriesName]) {
117
+ const encodingData = this.dataSeriesEncoding[dataSeriesName]
118
+ if (encodingData) {
119
+ const dataType = dataSeriesTypes[dataSeriesName]
120
+ if (!dataType) {
121
+ throw new CramMalformedError(
122
+ `data series name ${dataSeriesName} not defined in file compression header`,
123
+ )
124
+ }
125
+ this.dataSeriesCodecCache[dataSeriesName] = instantiateCodec(
126
+ encodingData,
127
+ dataType,
128
+ )
129
+ }
130
+ }
131
+ return this.dataSeriesCodecCache[dataSeriesName]
132
+ }
133
+
134
+ toJSON() {
135
+ const data = {}
136
+ Object.keys(this).forEach(k => {
137
+ if (/Cache$/.test(k)) {
138
+ return
139
+ }
140
+ data[k] = this[k]
141
+ })
142
+ return data
143
+ }
144
+ }
@@ -0,0 +1,119 @@
1
+ import { CramMalformedError } from '../../errors'
2
+
3
+ import { itf8Size, parseItem, tinyMemoize } from '../util'
4
+ import CramSlice from '../slice'
5
+ import CramContainerCompressionScheme from './compressionScheme'
6
+
7
+ export default class CramContainer {
8
+ constructor(cramFile, position) {
9
+ // cram file this container comes from
10
+ this.file = cramFile
11
+ // position of this container in the file
12
+ this.filePosition = position
13
+ // console.log(`container: ${this.filePosition}`)
14
+ }
15
+
16
+ // memoize
17
+ getHeader() {
18
+ return this._readContainerHeader(this.filePosition)
19
+ }
20
+
21
+ // memoize
22
+ async getCompressionHeaderBlock() {
23
+ const containerHeader = await this.getHeader()
24
+
25
+ // if there are no records in the container, there will be no compression header
26
+ if (!containerHeader.numRecords) {
27
+ return null
28
+ }
29
+ const sectionParsers = await this.file.getSectionParsers()
30
+ const block = await this.getFirstBlock()
31
+ if (block.contentType !== 'COMPRESSION_HEADER') {
32
+ throw new CramMalformedError(
33
+ `invalid content type ${block.contentType} in what is supposed to be the compression header block`,
34
+ )
35
+ }
36
+ const content = parseItem(
37
+ block.content,
38
+ sectionParsers.cramCompressionHeader.parser,
39
+ 0,
40
+ block.contentPosition,
41
+ )
42
+ block.content = content
43
+ return block
44
+ }
45
+
46
+ async getFirstBlock() {
47
+ const containerHeader = await this.getHeader()
48
+ return this.file.readBlock(containerHeader._endPosition)
49
+ }
50
+
51
+ // parses the compression header data into a CramContainerCompressionScheme object
52
+ // memoize
53
+ async getCompressionScheme() {
54
+ const header = await this.getCompressionHeaderBlock()
55
+ if (!header) {
56
+ return undefined
57
+ }
58
+ return new CramContainerCompressionScheme(header.content)
59
+ }
60
+
61
+ getSlice(slicePosition, sliceSize) {
62
+ // note: slicePosition is relative to the end of the container header
63
+ // TODO: perhaps we should cache slices?
64
+ return new CramSlice(this, slicePosition, sliceSize)
65
+ }
66
+
67
+ async _readContainerHeader(position) {
68
+ const sectionParsers = await this.file.getSectionParsers()
69
+ const { cramContainerHeader1, cramContainerHeader2 } = sectionParsers
70
+ const { size: fileSize } = await this.file.stat()
71
+
72
+ if (position >= fileSize) {
73
+ return undefined
74
+ }
75
+
76
+ // parse the container header. do it in 2 pieces because you cannot tell
77
+ // how much to buffer until you read numLandmarks
78
+ const bytes1 = Buffer.allocUnsafe(cramContainerHeader1.maxLength)
79
+ await this.file.read(bytes1, 0, cramContainerHeader1.maxLength, position)
80
+ const header1 = parseItem(bytes1, cramContainerHeader1.parser)
81
+ const numLandmarksSize = itf8Size(header1.numLandmarks)
82
+ if (position + header1.length >= fileSize) {
83
+ console.warn(
84
+ `${this.file}: container header at ${position} indicates that the container has length ${header1.length}, which extends beyond the length of the file. Skipping this container.`,
85
+ )
86
+ return undefined
87
+ }
88
+ const bytes2 = Buffer.allocUnsafe(
89
+ cramContainerHeader2.maxLength(header1.numLandmarks),
90
+ )
91
+ await this.file.read(
92
+ bytes2,
93
+ 0,
94
+ cramContainerHeader2.maxLength(header1.numLandmarks),
95
+ position + header1._size - numLandmarksSize,
96
+ )
97
+ const header2 = parseItem(bytes2, cramContainerHeader2.parser)
98
+
99
+ if (this.file.validateChecksums && header2.crc32 !== undefined) {
100
+ await this.file.checkCrc32(
101
+ position,
102
+ header1._size + header2._size - numLandmarksSize - 4,
103
+ header2.crc32,
104
+ `container header beginning at position ${position}`,
105
+ )
106
+ }
107
+
108
+ const completeHeader = Object.assign(header1, header2, {
109
+ _size: header1._size + header2._size - numLandmarksSize,
110
+ _endPosition: header1._size + header2._size - numLandmarksSize + position,
111
+ })
112
+
113
+ return completeHeader
114
+ }
115
+ }
116
+
117
+ 'getHeader getCompressionHeaderBlock getCompressionScheme'
118
+ .split(' ')
119
+ .forEach(method => tinyMemoize(CramContainer, method))
@@ -0,0 +1,347 @@
1
+ import { unzip } from '../unzip'
2
+ import crc32 from 'buffer-crc32'
3
+ import LRU from 'quick-lru'
4
+
5
+ import { CramUnimplementedError, CramMalformedError } from '../errors'
6
+ import ransuncompress from '../rans'
7
+ import {
8
+ cramFileDefinition as cramFileDefinitionParser,
9
+ getSectionParsers,
10
+ } from './sectionParsers'
11
+ import htscodecs from '@jkbonfield/htscodecs'
12
+ import CramContainer from './container'
13
+
14
+ import { open } from '../io'
15
+ import { parseItem, tinyMemoize } from './util'
16
+ import { parseHeaderText } from '../sam'
17
+
18
+ export default class CramFile {
19
+ /**
20
+ * @param {object} args
21
+ * @param {object} [args.filehandle] - a filehandle that implements the stat() and
22
+ * read() methods of the Node filehandle API https://nodejs.org/api/fs.html#fs_class_filehandle
23
+ * @param {object} [args.path] - path to the cram file
24
+ * @param {object} [args.url] - url for the cram file. also supports file:// urls for local files
25
+ * @param {function} [args.seqFetch] - a function with signature
26
+ * `(seqId, startCoordinate, endCoordinate)` that returns a promise for a string of sequence bases
27
+ * @param {number} [args.cacheSize] optional maximum number of CRAM records to cache. default 20,000
28
+ * @param {boolean} [args.checkSequenceMD5] - default true. if false, disables verifying the MD5
29
+ * checksum of the reference sequence underlying a slice. In some applications, this check can cause an inconvenient amount (many megabases) of sequences to be fetched.
30
+ */
31
+ constructor(args) {
32
+ this.file = open(args.url, args.path, args.filehandle)
33
+ this.validateChecksums = true
34
+ this.fetchReferenceSequenceCallback = args.seqFetch
35
+ this.options = {
36
+ checkSequenceMD5: args.checkSequenceMD5 !== false,
37
+ cacheSize: args.cacheSize !== undefined ? args.cacheSize : 20000,
38
+ }
39
+
40
+ // cache of features in a slice, keyed by the
41
+ // slice offset. caches all of the features in a slice, or none.
42
+ // the cache is actually used by the slice object, it's just
43
+ // kept here at the level of the file
44
+ this.featureCache = new LRU({
45
+ maxSize: this.options.cacheSize,
46
+ })
47
+ }
48
+
49
+ toString() {
50
+ if (this.file.filename) {
51
+ return this.file.filename
52
+ }
53
+ if (this.file.url) {
54
+ return this.file.url
55
+ }
56
+
57
+ return '(cram file)'
58
+ }
59
+
60
+ // can just read this object like a filehandle
61
+ read(buffer, offset, length, position) {
62
+ return this.file.read(buffer, offset, length, position)
63
+ }
64
+
65
+ // can just stat this object like a filehandle
66
+ stat() {
67
+ return this.file.stat()
68
+ }
69
+
70
+ // memoized
71
+ async getDefinition() {
72
+ const headbytes = Buffer.allocUnsafe(cramFileDefinitionParser.maxLength)
73
+ await this.file.read(headbytes, 0, cramFileDefinitionParser.maxLength, 0)
74
+ const definition = cramFileDefinitionParser.parser.parse(headbytes).result
75
+ if (definition.majorVersion !== 2 && definition.majorVersion !== 3) {
76
+ throw new CramUnimplementedError(
77
+ `CRAM version ${definition.majorVersion} not supported`,
78
+ )
79
+ }
80
+ return definition
81
+ }
82
+
83
+ // memoize
84
+ async getSamHeader() {
85
+ const firstContainer = await this.getContainerById(0)
86
+ if (!firstContainer) {
87
+ throw new CramMalformedError('file contains no containers')
88
+ }
89
+
90
+ const { content } = await firstContainer.getFirstBlock()
91
+ // find the end of the trailing zeros in the header text
92
+ const headerLength = content.readInt32LE(0)
93
+ const textStart = 4
94
+ // let textEnd = content.length - 1
95
+ // while (textEnd >= textStart && !content[textEnd]) textEnd -= 1
96
+ // trim off the trailing zeros
97
+ const text = content.toString('utf8', textStart, textStart + headerLength)
98
+ this.header = text
99
+ return parseHeaderText(text)
100
+ }
101
+
102
+ async getHeaderText() {
103
+ await this.getSamHeader()
104
+ return this.header
105
+ }
106
+
107
+ // memoize
108
+ async getSectionParsers() {
109
+ const { majorVersion } = await this.getDefinition()
110
+ return getSectionParsers(majorVersion)
111
+ }
112
+
113
+ async getContainerById(containerNumber) {
114
+ const sectionParsers = await this.getSectionParsers()
115
+ let position = sectionParsers.cramFileDefinition.maxLength
116
+ const { size: fileSize } = await this.file.stat()
117
+ const { cramContainerHeader1 } = sectionParsers
118
+
119
+ // skip with a series of reads to the proper container
120
+ let currentContainer
121
+ for (let i = 0; i <= containerNumber; i += 1) {
122
+ // if we are about to go off the end of the file
123
+ // and have not found that container, it does not exist
124
+ if (position + cramContainerHeader1.maxLength + 8 >= fileSize) {
125
+ return undefined
126
+ }
127
+
128
+ currentContainer = this.getContainerAtPosition(position)
129
+ const currentHeader = await currentContainer.getHeader()
130
+ if (!currentHeader) {
131
+ throw new CramMalformedError(
132
+ `container ${containerNumber} not found in file`,
133
+ )
134
+ }
135
+ // if this is the first container, read all the blocks in the
136
+ // container to determine its length, because we cannot trust
137
+ // the container header's given length due to a bug somewhere
138
+ // in htslib
139
+ if (i === 0) {
140
+ position = currentHeader._endPosition
141
+ for (let j = 0; j < currentHeader.numBlocks; j += 1) {
142
+ const block = await this.readBlock(position)
143
+ position = block._endPosition
144
+ }
145
+ } else {
146
+ // otherwise, just traverse to the next container using the container's length
147
+ position += currentHeader._size + currentHeader.length
148
+ }
149
+ }
150
+
151
+ return currentContainer
152
+ }
153
+
154
+ async checkCrc32(position, length, recordedCrc32, description) {
155
+ const b = Buffer.allocUnsafe(length)
156
+ await this.file.read(b, 0, length, position)
157
+ const calculatedCrc32 = crc32.unsigned(b)
158
+ if (calculatedCrc32 !== recordedCrc32) {
159
+ throw new CramMalformedError(
160
+ `crc mismatch in ${description}: recorded CRC32 = ${recordedCrc32}, but calculated CRC32 = ${calculatedCrc32}`,
161
+ )
162
+ }
163
+ }
164
+
165
+ /**
166
+ * @returns {Promise[number]} the number of containers in the file
167
+ */
168
+ async containerCount() {
169
+ const sectionParsers = await this.getSectionParsers()
170
+ const { size: fileSize } = await this.file.stat()
171
+ const { cramContainerHeader1 } = sectionParsers
172
+
173
+ let containerCount = 0
174
+ let position = sectionParsers.cramFileDefinition.maxLength
175
+ while (position + cramContainerHeader1.maxLength + 8 < fileSize) {
176
+ const currentHeader = await this.getContainerAtPosition(
177
+ position,
178
+ ).getHeader()
179
+ if (!currentHeader) {
180
+ break
181
+ }
182
+ // if this is the first container, read all the blocks in the
183
+ // container, because we cannot trust the container
184
+ // header's given length due to a bug somewhere in htslib
185
+ if (containerCount === 0) {
186
+ position = currentHeader._endPosition
187
+ for (let j = 0; j < currentHeader.numBlocks; j += 1) {
188
+ const block = await this.readBlock(position)
189
+ position = block._endPosition
190
+ }
191
+ } else {
192
+ // otherwise, just traverse to the next container using the container's length
193
+ position += currentHeader._size + currentHeader.length
194
+ }
195
+ containerCount += 1
196
+ }
197
+
198
+ return containerCount
199
+ }
200
+
201
+ getContainerAtPosition(position) {
202
+ return new CramContainer(this, position)
203
+ }
204
+
205
+ async readBlockHeader(position) {
206
+ const sectionParsers = await this.getSectionParsers()
207
+ const { cramBlockHeader } = sectionParsers
208
+ const { size: fileSize } = await this.file.stat()
209
+
210
+ if (position + cramBlockHeader.maxLength >= fileSize) {
211
+ return undefined
212
+ }
213
+
214
+ const buffer = Buffer.allocUnsafe(cramBlockHeader.maxLength)
215
+ await this.file.read(buffer, 0, cramBlockHeader.maxLength, position)
216
+ return parseItem(buffer, cramBlockHeader.parser, 0, position)
217
+ }
218
+
219
+ async _parseSection(
220
+ section,
221
+ position,
222
+ size = section.maxLength,
223
+ preReadBuffer,
224
+ ) {
225
+ let buffer
226
+ if (preReadBuffer) {
227
+ buffer = preReadBuffer
228
+ } else {
229
+ const { size: fileSize } = await this.file.stat()
230
+ if (position + size >= fileSize) {
231
+ return undefined
232
+ }
233
+ buffer = Buffer.allocUnsafe(size)
234
+ await this.file.read(buffer, 0, size, position)
235
+ }
236
+ const data = parseItem(buffer, section.parser, 0, position)
237
+ if (data._size !== size) {
238
+ throw new CramMalformedError(
239
+ `section read error: requested size ${size} does not equal parsed size ${data._size}`,
240
+ )
241
+ }
242
+ return data
243
+ }
244
+
245
+ _uncompress(compressionMethod, inputBuffer, outputBuffer) {
246
+ if (compressionMethod === 'gzip') {
247
+ const result = unzip(inputBuffer)
248
+ result.copy(outputBuffer)
249
+ } else if (compressionMethod === 'bzip2') {
250
+ var bits = bzip2.array(inputBuffer)
251
+ var size = bzip2.header(bits)
252
+ var j = 0
253
+ do {
254
+ var chunk = bzip2.decompress(bits, size)
255
+ if (chunk != -1) {
256
+ Buffer.from(chunk).copy(outputBuffer, j)
257
+ j += chunk.length
258
+ size -= chunk.length
259
+ }
260
+ } while (chunk != -1)
261
+ } else if (compressionMethod === 'rans') {
262
+ ransuncompress(inputBuffer, outputBuffer)
263
+ //htscodecs r4x8 is slower, but compatible.
264
+ //htscodecs.r4x8_uncompress(inputBuffer, outputBuffer);
265
+ } else if (compressionMethod === 'rans4x16') {
266
+ htscodecs.r4x16_uncompress(inputBuffer, outputBuffer)
267
+ } else if (compressionMethod === 'arith') {
268
+ htscodecs.arith_uncompress(inputBuffer, outputBuffer)
269
+ } else if (compressionMethod === 'fqzcomp') {
270
+ htscodecs.fqzcomp_uncompress(inputBuffer, outputBuffer)
271
+ } else if (compressionMethod === 'tok3') {
272
+ htscodecs.tok3_uncompress(inputBuffer, outputBuffer)
273
+ } else {
274
+ throw new CramUnimplementedError(
275
+ `${compressionMethod} decompression not yet implemented`,
276
+ )
277
+ }
278
+ }
279
+
280
+ async readBlock(position) {
281
+ const { majorVersion } = await this.getDefinition()
282
+ const sectionParsers = await this.getSectionParsers()
283
+ const block = await this.readBlockHeader(position)
284
+ const blockContentPosition = block._endPosition
285
+ block.contentPosition = block._endPosition
286
+
287
+ const uncompressedData = Buffer.allocUnsafe(block.uncompressedSize)
288
+
289
+ if (block.compressionMethod !== 'raw') {
290
+ const compressedData = Buffer.allocUnsafe(block.compressedSize)
291
+ await this.read(
292
+ compressedData,
293
+ 0,
294
+ block.compressedSize,
295
+ blockContentPosition,
296
+ )
297
+
298
+ this._uncompress(
299
+ block.compressionMethod,
300
+ compressedData,
301
+ uncompressedData,
302
+ )
303
+ } else {
304
+ await this.read(
305
+ uncompressedData,
306
+ 0,
307
+ block.uncompressedSize,
308
+ blockContentPosition,
309
+ )
310
+ }
311
+
312
+ block.content = uncompressedData
313
+
314
+ if (majorVersion >= 3) {
315
+ // parse the crc32
316
+ const crc = await this._parseSection(
317
+ sectionParsers.cramBlockCrc32,
318
+ blockContentPosition + block.compressedSize,
319
+ )
320
+ block.crc32 = crc.crc32
321
+
322
+ // check the block data crc32
323
+ if (this.validateChecksums) {
324
+ await this.checkCrc32(
325
+ position,
326
+ block._size + block.compressedSize,
327
+ block.crc32,
328
+ 'block data',
329
+ )
330
+ }
331
+
332
+ // make the endposition and size reflect the whole block
333
+ block._endPosition = crc._endPosition
334
+ block._size =
335
+ block.compressedSize + sectionParsers.cramBlockCrc32.maxLength
336
+ } else {
337
+ block._endPosition = blockContentPosition + block.compressedSize
338
+ block._size = block.compressedSize
339
+ }
340
+
341
+ return block
342
+ }
343
+ }
344
+
345
+ 'getDefinition getSectionParsers getSamHeader'
346
+ .split(' ')
347
+ .forEach(method => tinyMemoize(CramFile, method))
@@ -0,0 +1,3 @@
1
+ import CramFile from './file'
2
+
3
+ export default CramFile