@gmod/cram 1.6.1 → 1.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. package/CHANGELOG.md +13 -0
  2. package/dist/craiIndex.js +1 -1
  3. package/dist/craiIndex.js.map +1 -1
  4. package/dist/cram-bundle.js +3 -3
  5. package/dist/cramFile/codecs/byteArrayLength.d.ts +1 -1
  6. package/dist/cramFile/codecs/byteArrayLength.js +1 -1
  7. package/dist/cramFile/codecs/byteArrayLength.js.map +1 -1
  8. package/dist/cramFile/codecs/byteArrayStop.js +1 -2
  9. package/dist/cramFile/codecs/byteArrayStop.js.map +1 -1
  10. package/dist/cramFile/codecs/external.js +1 -3
  11. package/dist/cramFile/codecs/external.js.map +1 -1
  12. package/dist/cramFile/container/compressionScheme.d.ts +1 -1
  13. package/dist/cramFile/container/compressionScheme.js +6 -4
  14. package/dist/cramFile/container/compressionScheme.js.map +1 -1
  15. package/dist/cramFile/file.js +17 -0
  16. package/dist/cramFile/file.js.map +1 -1
  17. package/dist/cramFile/slice/decodeRecord.js +62 -58
  18. package/dist/cramFile/slice/decodeRecord.js.map +1 -1
  19. package/dist/cramFile/slice/index.js +8 -5
  20. package/dist/cramFile/slice/index.js.map +1 -1
  21. package/esm/craiIndex.js +1 -1
  22. package/esm/craiIndex.js.map +1 -1
  23. package/esm/cramFile/codecs/byteArrayLength.d.ts +1 -1
  24. package/esm/cramFile/codecs/byteArrayLength.js +1 -1
  25. package/esm/cramFile/codecs/byteArrayLength.js.map +1 -1
  26. package/esm/cramFile/codecs/byteArrayStop.js +1 -2
  27. package/esm/cramFile/codecs/byteArrayStop.js.map +1 -1
  28. package/esm/cramFile/codecs/external.js +1 -3
  29. package/esm/cramFile/codecs/external.js.map +1 -1
  30. package/esm/cramFile/container/compressionScheme.d.ts +1 -1
  31. package/esm/cramFile/container/compressionScheme.js +6 -4
  32. package/esm/cramFile/container/compressionScheme.js.map +1 -1
  33. package/esm/cramFile/file.js +17 -0
  34. package/esm/cramFile/file.js.map +1 -1
  35. package/esm/cramFile/slice/decodeRecord.js +63 -59
  36. package/esm/cramFile/slice/decodeRecord.js.map +1 -1
  37. package/esm/cramFile/slice/index.js +11 -7
  38. package/esm/cramFile/slice/index.js.map +1 -1
  39. package/package.json +2 -1
  40. package/src/craiIndex.js +180 -0
  41. package/src/cramFile/codecs/_base.js +49 -0
  42. package/src/cramFile/codecs/beta.js +23 -0
  43. package/src/cramFile/codecs/byteArrayLength.js +55 -0
  44. package/src/cramFile/codecs/byteArrayStop.js +49 -0
  45. package/src/cramFile/codecs/external.js +52 -0
  46. package/src/cramFile/codecs/gamma.js +30 -0
  47. package/src/cramFile/codecs/huffman.js +137 -0
  48. package/src/cramFile/codecs/index.js +38 -0
  49. package/src/cramFile/codecs/subexp.js +32 -0
  50. package/src/cramFile/constants.js +55 -0
  51. package/src/cramFile/container/compressionScheme.js +143 -0
  52. package/src/cramFile/container/index.js +119 -0
  53. package/src/cramFile/file.js +363 -0
  54. package/src/cramFile/index.js +3 -0
  55. package/src/cramFile/record.js +337 -0
  56. package/src/cramFile/sectionParsers.js +379 -0
  57. package/src/cramFile/slice/decodeRecord.js +359 -0
  58. package/src/cramFile/slice/index.js +501 -0
  59. package/src/cramFile/util.js +169 -0
  60. package/src/errors.js +22 -0
  61. package/src/index.js +5 -0
  62. package/src/indexedCramFile.js +191 -0
  63. package/src/io/bufferCache.js +66 -0
  64. package/src/io/index.js +26 -0
  65. package/src/io/localFile.js +35 -0
  66. package/src/io/remoteFile.js +71 -0
  67. package/src/rans/README.md +1 -0
  68. package/src/rans/constants.js +5 -0
  69. package/src/rans/d04.js +83 -0
  70. package/src/rans/d14.js +59 -0
  71. package/src/rans/decoding.js +141 -0
  72. package/src/rans/frequencies.js +121 -0
  73. package/src/rans/index.js +249 -0
  74. package/src/sam.js +15 -0
  75. package/src/unzip-pako.ts +5 -0
  76. package/src/unzip.ts +2 -0
@@ -0,0 +1,55 @@
1
+ const Constants = {
2
+ CRAM_FLAG_PRESERVE_QUAL_SCORES: 1 << 0,
3
+ CRAM_FLAG_DETACHED: 1 << 1,
4
+ CRAM_FLAG_MATE_DOWNSTREAM: 1 << 2,
5
+ CRAM_FLAG_NO_SEQ: 1 << 3,
6
+ CRAM_FLAG_MASK: (1 << 4) - 1,
7
+
8
+ // mate read is reversed
9
+ CRAM_M_REVERSE: 1,
10
+ // mated read is unmapped
11
+ CRAM_M_UNMAP: 2,
12
+
13
+ // the read is paired in sequencing, no matter whether it is mapped in a pair
14
+ BAM_FPAIRED: 1,
15
+ // the read is mapped in a proper pair
16
+ BAM_FPROPER_PAIR: 2,
17
+ // the read itself is unmapped; conflictive with BAM_FPROPER_PAIR
18
+ BAM_FUNMAP: 4,
19
+ // the mate is unmapped
20
+ BAM_FMUNMAP: 8,
21
+ // the read is mapped to the reverse strand
22
+ BAM_FREVERSE: 16,
23
+ // the mate is mapped to the reverse strand
24
+ BAM_FMREVERSE: 32,
25
+ // this is read1
26
+ BAM_FREAD1: 64,
27
+ // this is read2
28
+ BAM_FREAD2: 128,
29
+ // not primary alignment
30
+ BAM_FSECONDARY: 256,
31
+ // QC failure
32
+ BAM_FQCFAIL: 512,
33
+ // optical or PCR duplicate
34
+ BAM_FDUP: 1024,
35
+ // supplementary alignment
36
+ BAM_FSUPPLEMENTARY: 2048,
37
+
38
+ BAM_CMATCH: 0,
39
+ BAM_CINS: 1,
40
+ BAM_CDEL: 2,
41
+ BAM_CREF_SKIP: 3,
42
+ BAM_CSOFT_CLIP: 4,
43
+ BAM_CHARD_CLIP: 5,
44
+ BAM_CPAD: 6,
45
+ BAM_CEQUAL: 7,
46
+ BAM_CDIFF: 8,
47
+ BAM_CBACK: 9,
48
+
49
+ BAM_CIGAR_STR: 'MIDNSHP:XB',
50
+ BAM_CIGAR_SHIFT: 4,
51
+ BAM_CIGAR_MASK: 0xf,
52
+ BAM_CIGAR_TYPE: 0x3c1a7,
53
+ }
54
+
55
+ export default Constants
@@ -0,0 +1,143 @@
1
+ import { CramMalformedError } from '../../errors'
2
+ import { instantiateCodec } from '../codecs'
3
+
4
+ // the hardcoded data type to be decoded for each core
5
+ // data field
6
+ const dataSeriesTypes = {
7
+ BF: 'int',
8
+ CF: 'int',
9
+ RI: 'int',
10
+ RL: 'int',
11
+ AP: 'int',
12
+ RG: 'int',
13
+ MF: 'int',
14
+ NS: 'int',
15
+ NP: 'int',
16
+ TS: 'int',
17
+ NF: 'int',
18
+ TC: 'byte',
19
+ TN: 'int',
20
+ FN: 'int',
21
+ FC: 'byte',
22
+ FP: 'int',
23
+ BS: 'byte',
24
+ IN: 'byteArray',
25
+ SC: 'byteArray',
26
+ DL: 'int',
27
+ BA: 'byte',
28
+ BB: 'byteArray',
29
+ RS: 'int',
30
+ PD: 'int',
31
+ HC: 'int',
32
+ MQ: 'int',
33
+ RN: 'byteArray',
34
+ QS: 'byte',
35
+ QQ: 'byteArray',
36
+ TL: 'int',
37
+ TM: 'ignore',
38
+ TV: 'ignore',
39
+ }
40
+
41
+ function parseSubstitutionMatrix(byteArray) {
42
+ const matrix = new Array(5)
43
+ for (let i = 0; i < 5; i += 1) {
44
+ matrix[i] = new Array(4)
45
+ }
46
+
47
+ matrix[0][(byteArray[0] >> 6) & 3] = 'C'
48
+ matrix[0][(byteArray[0] >> 4) & 3] = 'G'
49
+ matrix[0][(byteArray[0] >> 2) & 3] = 'T'
50
+ matrix[0][(byteArray[0] >> 0) & 3] = 'N'
51
+
52
+ matrix[1][(byteArray[1] >> 6) & 3] = 'A'
53
+ matrix[1][(byteArray[1] >> 4) & 3] = 'G'
54
+ matrix[1][(byteArray[1] >> 2) & 3] = 'T'
55
+ matrix[1][(byteArray[1] >> 0) & 3] = 'N'
56
+
57
+ matrix[2][(byteArray[2] >> 6) & 3] = 'A'
58
+ matrix[2][(byteArray[2] >> 4) & 3] = 'C'
59
+ matrix[2][(byteArray[2] >> 2) & 3] = 'T'
60
+ matrix[2][(byteArray[2] >> 0) & 3] = 'N'
61
+
62
+ matrix[3][(byteArray[3] >> 6) & 3] = 'A'
63
+ matrix[3][(byteArray[3] >> 4) & 3] = 'C'
64
+ matrix[3][(byteArray[3] >> 2) & 3] = 'G'
65
+ matrix[3][(byteArray[3] >> 0) & 3] = 'N'
66
+
67
+ matrix[4][(byteArray[4] >> 6) & 3] = 'A'
68
+ matrix[4][(byteArray[4] >> 4) & 3] = 'C'
69
+ matrix[4][(byteArray[4] >> 2) & 3] = 'G'
70
+ matrix[4][(byteArray[4] >> 0) & 3] = 'T'
71
+
72
+ return matrix
73
+ }
74
+
75
+ export default class CramContainerCompressionScheme {
76
+ constructor(content) {
77
+ Object.assign(this, content)
78
+ // interpret some of the preservation map tags for convenient use
79
+ this.readNamesIncluded = content.preservation.RN
80
+ this.APdelta = content.preservation.AP
81
+ this.referenceRequired = !!content.preservation.RR
82
+ this.tagIdsDictionary = content.preservation.TD
83
+ this.substitutionMatrix = parseSubstitutionMatrix(content.preservation.SM)
84
+
85
+ this.dataSeriesCodecCache = new Map()
86
+ this.tagCodecCache = {}
87
+ }
88
+
89
+ /**
90
+ * @param {string} tagName three-character tag name
91
+ * @private
92
+ */
93
+ getCodecForTag(tagName) {
94
+ if (!this.tagCodecCache[tagName]) {
95
+ const encodingData = this.tagEncoding[tagName]
96
+ if (encodingData) {
97
+ this.tagCodecCache[tagName] = instantiateCodec(
98
+ encodingData,
99
+ 'byteArray', // all tags are byte array data
100
+ )
101
+ }
102
+ }
103
+ return this.tagCodecCache[tagName]
104
+ }
105
+
106
+ /**
107
+ *
108
+ * @param {number} tagListId ID of the tag list to fetch from the tag dictionary
109
+ * @private
110
+ */
111
+ getTagNames(tagListId) {
112
+ return this.tagIdsDictionary[tagListId]
113
+ }
114
+
115
+ getCodecForDataSeries(dataSeriesName) {
116
+ let r = this.dataSeriesCodecCache.get(dataSeriesName)
117
+ if (r === undefined) {
118
+ const encodingData = this.dataSeriesEncoding[dataSeriesName]
119
+ if (encodingData) {
120
+ const dataType = dataSeriesTypes[dataSeriesName]
121
+ if (!dataType) {
122
+ throw new CramMalformedError(
123
+ `data series name ${dataSeriesName} not defined in file compression header`,
124
+ )
125
+ }
126
+ r = instantiateCodec(encodingData, dataType)
127
+ this.dataSeriesCodecCache.set(dataSeriesName, r)
128
+ }
129
+ }
130
+ return r
131
+ }
132
+
133
+ toJSON() {
134
+ const data = {}
135
+ Object.keys(this).forEach(k => {
136
+ if (/Cache$/.test(k)) {
137
+ return
138
+ }
139
+ data[k] = this[k]
140
+ })
141
+ return data
142
+ }
143
+ }
@@ -0,0 +1,119 @@
1
+ import { CramMalformedError } from '../../errors'
2
+
3
+ import { itf8Size, parseItem, tinyMemoize } from '../util'
4
+ import CramSlice from '../slice'
5
+ import CramContainerCompressionScheme from './compressionScheme'
6
+
7
+ export default class CramContainer {
8
+ constructor(cramFile, position) {
9
+ // cram file this container comes from
10
+ this.file = cramFile
11
+ // position of this container in the file
12
+ this.filePosition = position
13
+ // console.log(`container: ${this.filePosition}`)
14
+ }
15
+
16
+ // memoize
17
+ getHeader() {
18
+ return this._readContainerHeader(this.filePosition)
19
+ }
20
+
21
+ // memoize
22
+ async getCompressionHeaderBlock() {
23
+ const containerHeader = await this.getHeader()
24
+
25
+ // if there are no records in the container, there will be no compression header
26
+ if (!containerHeader.numRecords) {
27
+ return null
28
+ }
29
+ const sectionParsers = await this.file.getSectionParsers()
30
+ const block = await this.getFirstBlock()
31
+ if (block.contentType !== 'COMPRESSION_HEADER') {
32
+ throw new CramMalformedError(
33
+ `invalid content type ${block.contentType} in what is supposed to be the compression header block`,
34
+ )
35
+ }
36
+ const content = parseItem(
37
+ block.content,
38
+ sectionParsers.cramCompressionHeader.parser,
39
+ 0,
40
+ block.contentPosition,
41
+ )
42
+ block.content = content
43
+ return block
44
+ }
45
+
46
+ async getFirstBlock() {
47
+ const containerHeader = await this.getHeader()
48
+ return this.file.readBlock(containerHeader._endPosition)
49
+ }
50
+
51
+ // parses the compression header data into a CramContainerCompressionScheme object
52
+ // memoize
53
+ async getCompressionScheme() {
54
+ const header = await this.getCompressionHeaderBlock()
55
+ if (!header) {
56
+ return undefined
57
+ }
58
+ return new CramContainerCompressionScheme(header.content)
59
+ }
60
+
61
+ getSlice(slicePosition, sliceSize) {
62
+ // note: slicePosition is relative to the end of the container header
63
+ // TODO: perhaps we should cache slices?
64
+ return new CramSlice(this, slicePosition, sliceSize)
65
+ }
66
+
67
+ async _readContainerHeader(position) {
68
+ const sectionParsers = await this.file.getSectionParsers()
69
+ const { cramContainerHeader1, cramContainerHeader2 } = sectionParsers
70
+ const { size: fileSize } = await this.file.stat()
71
+
72
+ if (position >= fileSize) {
73
+ return undefined
74
+ }
75
+
76
+ // parse the container header. do it in 2 pieces because you cannot tell
77
+ // how much to buffer until you read numLandmarks
78
+ const bytes1 = Buffer.allocUnsafe(cramContainerHeader1.maxLength)
79
+ await this.file.read(bytes1, 0, cramContainerHeader1.maxLength, position)
80
+ const header1 = parseItem(bytes1, cramContainerHeader1.parser)
81
+ const numLandmarksSize = itf8Size(header1.numLandmarks)
82
+ if (position + header1.length >= fileSize) {
83
+ console.warn(
84
+ `${this.file}: container header at ${position} indicates that the container has length ${header1.length}, which extends beyond the length of the file. Skipping this container.`,
85
+ )
86
+ return undefined
87
+ }
88
+ const bytes2 = Buffer.allocUnsafe(
89
+ cramContainerHeader2.maxLength(header1.numLandmarks),
90
+ )
91
+ await this.file.read(
92
+ bytes2,
93
+ 0,
94
+ cramContainerHeader2.maxLength(header1.numLandmarks),
95
+ position + header1._size - numLandmarksSize,
96
+ )
97
+ const header2 = parseItem(bytes2, cramContainerHeader2.parser)
98
+
99
+ if (this.file.validateChecksums && header2.crc32 !== undefined) {
100
+ await this.file.checkCrc32(
101
+ position,
102
+ header1._size + header2._size - numLandmarksSize - 4,
103
+ header2.crc32,
104
+ `container header beginning at position ${position}`,
105
+ )
106
+ }
107
+
108
+ const completeHeader = Object.assign(header1, header2, {
109
+ _size: header1._size + header2._size - numLandmarksSize,
110
+ _endPosition: header1._size + header2._size - numLandmarksSize + position,
111
+ })
112
+
113
+ return completeHeader
114
+ }
115
+ }
116
+
117
+ 'getHeader getCompressionHeaderBlock getCompressionScheme'
118
+ .split(' ')
119
+ .forEach(method => tinyMemoize(CramContainer, method))
@@ -0,0 +1,363 @@
1
+ import { unzip } from '../unzip'
2
+ import crc32 from 'buffer-crc32'
3
+ import LRU from 'quick-lru'
4
+
5
+ import { CramUnimplementedError, CramMalformedError } from '../errors'
6
+ import ransuncompress from '../rans'
7
+ import {
8
+ cramFileDefinition as cramFileDefinitionParser,
9
+ getSectionParsers,
10
+ } from './sectionParsers'
11
+ import htscodecs from '@jkbonfield/htscodecs'
12
+ import CramContainer from './container'
13
+
14
+ import { open } from '../io'
15
+ import { parseItem, tinyMemoize } from './util'
16
+ import { parseHeaderText } from '../sam'
17
+ //source:https://abdulapopoola.com/2019/01/20/check-endianness-with-javascript/
18
+ function getEndianness() {
19
+ let uInt32 = new Uint32Array([0x11223344])
20
+ let uInt8 = new Uint8Array(uInt32.buffer)
21
+
22
+ if (uInt8[0] === 0x44) {
23
+ return 0 //little-endian
24
+ } else if (uInt8[0] === 0x11) {
25
+ return 1 //big-endian
26
+ } else {
27
+ return 2 //mixed-endian?
28
+ }
29
+ }
30
+
31
+ export default class CramFile {
32
+ /**
33
+ * @param {object} args
34
+ * @param {object} [args.filehandle] - a filehandle that implements the stat() and
35
+ * read() methods of the Node filehandle API https://nodejs.org/api/fs.html#fs_class_filehandle
36
+ * @param {object} [args.path] - path to the cram file
37
+ * @param {object} [args.url] - url for the cram file. also supports file:// urls for local files
38
+ * @param {function} [args.seqFetch] - a function with signature
39
+ * `(seqId, startCoordinate, endCoordinate)` that returns a promise for a string of sequence bases
40
+ * @param {number} [args.cacheSize] optional maximum number of CRAM records to cache. default 20,000
41
+ * @param {boolean} [args.checkSequenceMD5] - default true. if false, disables verifying the MD5
42
+ * checksum of the reference sequence underlying a slice. In some applications, this check can cause an inconvenient amount (many megabases) of sequences to be fetched.
43
+ */
44
+ constructor(args) {
45
+ this.file = open(args.url, args.path, args.filehandle)
46
+ this.validateChecksums = true
47
+ this.fetchReferenceSequenceCallback = args.seqFetch
48
+ this.options = {
49
+ checkSequenceMD5: args.checkSequenceMD5 !== false,
50
+ cacheSize: args.cacheSize !== undefined ? args.cacheSize : 20000,
51
+ }
52
+
53
+ // cache of features in a slice, keyed by the
54
+ // slice offset. caches all of the features in a slice, or none.
55
+ // the cache is actually used by the slice object, it's just
56
+ // kept here at the level of the file
57
+ this.featureCache = new LRU({
58
+ maxSize: this.options.cacheSize,
59
+ })
60
+ if (getEndianness() > 0) {
61
+ throw new Error('Detected big-endian machine, may be unable to run')
62
+ }
63
+ }
64
+
65
+ toString() {
66
+ if (this.file.filename) {
67
+ return this.file.filename
68
+ }
69
+ if (this.file.url) {
70
+ return this.file.url
71
+ }
72
+
73
+ return '(cram file)'
74
+ }
75
+
76
+ // can just read this object like a filehandle
77
+ read(buffer, offset, length, position) {
78
+ return this.file.read(buffer, offset, length, position)
79
+ }
80
+
81
+ // can just stat this object like a filehandle
82
+ stat() {
83
+ return this.file.stat()
84
+ }
85
+
86
+ // memoized
87
+ async getDefinition() {
88
+ const headbytes = Buffer.allocUnsafe(cramFileDefinitionParser.maxLength)
89
+ await this.file.read(headbytes, 0, cramFileDefinitionParser.maxLength, 0)
90
+ const definition = cramFileDefinitionParser.parser.parse(headbytes).result
91
+ if (definition.majorVersion !== 2 && definition.majorVersion !== 3) {
92
+ throw new CramUnimplementedError(
93
+ `CRAM version ${definition.majorVersion} not supported`,
94
+ )
95
+ }
96
+ return definition
97
+ }
98
+
99
+ // memoize
100
+ async getSamHeader() {
101
+ const firstContainer = await this.getContainerById(0)
102
+ if (!firstContainer) {
103
+ throw new CramMalformedError('file contains no containers')
104
+ }
105
+
106
+ const { content } = await firstContainer.getFirstBlock()
107
+ // find the end of the trailing zeros in the header text
108
+ const headerLength = content.readInt32LE(0)
109
+ const textStart = 4
110
+ // let textEnd = content.length - 1
111
+ // while (textEnd >= textStart && !content[textEnd]) textEnd -= 1
112
+ // trim off the trailing zeros
113
+ const text = content.toString('utf8', textStart, textStart + headerLength)
114
+ this.header = text
115
+ return parseHeaderText(text)
116
+ }
117
+
118
+ async getHeaderText() {
119
+ await this.getSamHeader()
120
+ return this.header
121
+ }
122
+
123
+ // memoize
124
+ async getSectionParsers() {
125
+ const { majorVersion } = await this.getDefinition()
126
+ return getSectionParsers(majorVersion)
127
+ }
128
+
129
+ async getContainerById(containerNumber) {
130
+ const sectionParsers = await this.getSectionParsers()
131
+ let position = sectionParsers.cramFileDefinition.maxLength
132
+ const { size: fileSize } = await this.file.stat()
133
+ const { cramContainerHeader1 } = sectionParsers
134
+
135
+ // skip with a series of reads to the proper container
136
+ let currentContainer
137
+ for (let i = 0; i <= containerNumber; i += 1) {
138
+ // if we are about to go off the end of the file
139
+ // and have not found that container, it does not exist
140
+ if (position + cramContainerHeader1.maxLength + 8 >= fileSize) {
141
+ return undefined
142
+ }
143
+
144
+ currentContainer = this.getContainerAtPosition(position)
145
+ const currentHeader = await currentContainer.getHeader()
146
+ if (!currentHeader) {
147
+ throw new CramMalformedError(
148
+ `container ${containerNumber} not found in file`,
149
+ )
150
+ }
151
+ // if this is the first container, read all the blocks in the
152
+ // container to determine its length, because we cannot trust
153
+ // the container header's given length due to a bug somewhere
154
+ // in htslib
155
+ if (i === 0) {
156
+ position = currentHeader._endPosition
157
+ for (let j = 0; j < currentHeader.numBlocks; j += 1) {
158
+ const block = await this.readBlock(position)
159
+ position = block._endPosition
160
+ }
161
+ } else {
162
+ // otherwise, just traverse to the next container using the container's length
163
+ position += currentHeader._size + currentHeader.length
164
+ }
165
+ }
166
+
167
+ return currentContainer
168
+ }
169
+
170
+ async checkCrc32(position, length, recordedCrc32, description) {
171
+ const b = Buffer.allocUnsafe(length)
172
+ await this.file.read(b, 0, length, position)
173
+ const calculatedCrc32 = crc32.unsigned(b)
174
+ if (calculatedCrc32 !== recordedCrc32) {
175
+ throw new CramMalformedError(
176
+ `crc mismatch in ${description}: recorded CRC32 = ${recordedCrc32}, but calculated CRC32 = ${calculatedCrc32}`,
177
+ )
178
+ }
179
+ }
180
+
181
+ /**
182
+ * @returns {Promise[number]} the number of containers in the file
183
+ */
184
+ async containerCount() {
185
+ const sectionParsers = await this.getSectionParsers()
186
+ const { size: fileSize } = await this.file.stat()
187
+ const { cramContainerHeader1 } = sectionParsers
188
+
189
+ let containerCount = 0
190
+ let position = sectionParsers.cramFileDefinition.maxLength
191
+ while (position + cramContainerHeader1.maxLength + 8 < fileSize) {
192
+ const currentHeader = await this.getContainerAtPosition(
193
+ position,
194
+ ).getHeader()
195
+ if (!currentHeader) {
196
+ break
197
+ }
198
+ // if this is the first container, read all the blocks in the
199
+ // container, because we cannot trust the container
200
+ // header's given length due to a bug somewhere in htslib
201
+ if (containerCount === 0) {
202
+ position = currentHeader._endPosition
203
+ for (let j = 0; j < currentHeader.numBlocks; j += 1) {
204
+ const block = await this.readBlock(position)
205
+ position = block._endPosition
206
+ }
207
+ } else {
208
+ // otherwise, just traverse to the next container using the container's length
209
+ position += currentHeader._size + currentHeader.length
210
+ }
211
+ containerCount += 1
212
+ }
213
+
214
+ return containerCount
215
+ }
216
+
217
+ getContainerAtPosition(position) {
218
+ return new CramContainer(this, position)
219
+ }
220
+
221
+ async readBlockHeader(position) {
222
+ const sectionParsers = await this.getSectionParsers()
223
+ const { cramBlockHeader } = sectionParsers
224
+ const { size: fileSize } = await this.file.stat()
225
+
226
+ if (position + cramBlockHeader.maxLength >= fileSize) {
227
+ return undefined
228
+ }
229
+
230
+ const buffer = Buffer.allocUnsafe(cramBlockHeader.maxLength)
231
+ await this.file.read(buffer, 0, cramBlockHeader.maxLength, position)
232
+ return parseItem(buffer, cramBlockHeader.parser, 0, position)
233
+ }
234
+
235
+ async _parseSection(
236
+ section,
237
+ position,
238
+ size = section.maxLength,
239
+ preReadBuffer,
240
+ ) {
241
+ let buffer
242
+ if (preReadBuffer) {
243
+ buffer = preReadBuffer
244
+ } else {
245
+ const { size: fileSize } = await this.file.stat()
246
+ if (position + size >= fileSize) {
247
+ return undefined
248
+ }
249
+ buffer = Buffer.allocUnsafe(size)
250
+ await this.file.read(buffer, 0, size, position)
251
+ }
252
+ const data = parseItem(buffer, section.parser, 0, position)
253
+ if (data._size !== size) {
254
+ throw new CramMalformedError(
255
+ `section read error: requested size ${size} does not equal parsed size ${data._size}`,
256
+ )
257
+ }
258
+ return data
259
+ }
260
+
261
+ _uncompress(compressionMethod, inputBuffer, outputBuffer) {
262
+ if (compressionMethod === 'gzip') {
263
+ const result = unzip(inputBuffer)
264
+ result.copy(outputBuffer)
265
+ } else if (compressionMethod === 'bzip2') {
266
+ var bits = bzip2.array(inputBuffer)
267
+ var size = bzip2.header(bits)
268
+ var j = 0
269
+ do {
270
+ var chunk = bzip2.decompress(bits, size)
271
+ if (chunk != -1) {
272
+ Buffer.from(chunk).copy(outputBuffer, j)
273
+ j += chunk.length
274
+ size -= chunk.length
275
+ }
276
+ } while (chunk != -1)
277
+ } else if (compressionMethod === 'rans') {
278
+ ransuncompress(inputBuffer, outputBuffer)
279
+ //htscodecs r4x8 is slower, but compatible.
280
+ //htscodecs.r4x8_uncompress(inputBuffer, outputBuffer);
281
+ } else if (compressionMethod === 'rans4x16') {
282
+ htscodecs.r4x16_uncompress(inputBuffer, outputBuffer)
283
+ } else if (compressionMethod === 'arith') {
284
+ htscodecs.arith_uncompress(inputBuffer, outputBuffer)
285
+ } else if (compressionMethod === 'fqzcomp') {
286
+ htscodecs.fqzcomp_uncompress(inputBuffer, outputBuffer)
287
+ } else if (compressionMethod === 'tok3') {
288
+ htscodecs.tok3_uncompress(inputBuffer, outputBuffer)
289
+ } else {
290
+ throw new CramUnimplementedError(
291
+ `${compressionMethod} decompression not yet implemented`,
292
+ )
293
+ }
294
+ }
295
+
296
+ async readBlock(position) {
297
+ const { majorVersion } = await this.getDefinition()
298
+ const sectionParsers = await this.getSectionParsers()
299
+ const block = await this.readBlockHeader(position)
300
+ const blockContentPosition = block._endPosition
301
+ block.contentPosition = block._endPosition
302
+
303
+ const uncompressedData = Buffer.allocUnsafe(block.uncompressedSize)
304
+
305
+ if (block.compressionMethod !== 'raw') {
306
+ const compressedData = Buffer.allocUnsafe(block.compressedSize)
307
+ await this.read(
308
+ compressedData,
309
+ 0,
310
+ block.compressedSize,
311
+ blockContentPosition,
312
+ )
313
+
314
+ this._uncompress(
315
+ block.compressionMethod,
316
+ compressedData,
317
+ uncompressedData,
318
+ )
319
+ } else {
320
+ await this.read(
321
+ uncompressedData,
322
+ 0,
323
+ block.uncompressedSize,
324
+ blockContentPosition,
325
+ )
326
+ }
327
+
328
+ block.content = uncompressedData
329
+
330
+ if (majorVersion >= 3) {
331
+ // parse the crc32
332
+ const crc = await this._parseSection(
333
+ sectionParsers.cramBlockCrc32,
334
+ blockContentPosition + block.compressedSize,
335
+ )
336
+ block.crc32 = crc.crc32
337
+
338
+ // check the block data crc32
339
+ if (this.validateChecksums) {
340
+ await this.checkCrc32(
341
+ position,
342
+ block._size + block.compressedSize,
343
+ block.crc32,
344
+ 'block data',
345
+ )
346
+ }
347
+
348
+ // make the endposition and size reflect the whole block
349
+ block._endPosition = crc._endPosition
350
+ block._size =
351
+ block.compressedSize + sectionParsers.cramBlockCrc32.maxLength
352
+ } else {
353
+ block._endPosition = blockContentPosition + block.compressedSize
354
+ block._size = block.compressedSize
355
+ }
356
+
357
+ return block
358
+ }
359
+ }
360
+
361
+ 'getDefinition getSectionParsers getSamHeader'
362
+ .split(' ')
363
+ .forEach(method => tinyMemoize(CramFile, method))
@@ -0,0 +1,3 @@
1
+ import CramFile from './file'
2
+
3
+ export default CramFile