@gmod/cram 1.6.3 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -0
- package/dist/craiIndex.d.ts +19 -12
- package/dist/craiIndex.js +63 -123
- package/dist/craiIndex.js.map +1 -1
- package/dist/cram-bundle.js +2 -17
- package/dist/cram-bundle.js.LICENSE.txt +17 -0
- package/dist/cramFile/codecs/_base.d.ts +26 -5
- package/dist/cramFile/codecs/_base.js +3 -39
- package/dist/cramFile/codecs/_base.js.map +1 -1
- package/dist/cramFile/codecs/beta.d.ts +7 -3
- package/dist/cramFile/codecs/beta.js +13 -31
- package/dist/cramFile/codecs/beta.js.map +1 -1
- package/dist/cramFile/codecs/byteArrayLength.d.ts +13 -7
- package/dist/cramFile/codecs/byteArrayLength.js +22 -41
- package/dist/cramFile/codecs/byteArrayLength.js.map +1 -1
- package/dist/cramFile/codecs/byteArrayStop.d.ts +9 -5
- package/dist/cramFile/codecs/byteArrayStop.js +25 -46
- package/dist/cramFile/codecs/byteArrayStop.js.map +1 -1
- package/dist/cramFile/codecs/dataSeriesTypes.d.ts +4 -0
- package/dist/cramFile/codecs/dataSeriesTypes.js +3 -0
- package/dist/cramFile/codecs/dataSeriesTypes.js.map +1 -0
- package/dist/cramFile/codecs/external.d.ts +10 -6
- package/dist/cramFile/codecs/external.js +26 -44
- package/dist/cramFile/codecs/external.js.map +1 -1
- package/dist/cramFile/codecs/gamma.d.ts +7 -3
- package/dist/cramFile/codecs/gamma.js +16 -34
- package/dist/cramFile/codecs/gamma.js.map +1 -1
- package/dist/cramFile/codecs/getBits.d.ts +7 -0
- package/dist/cramFile/codecs/getBits.js +26 -0
- package/dist/cramFile/codecs/getBits.js.map +1 -0
- package/dist/cramFile/codecs/huffman.d.ts +17 -13
- package/dist/cramFile/codecs/huffman.js +76 -85
- package/dist/cramFile/codecs/huffman.js.map +1 -1
- package/dist/cramFile/codecs/index.d.ts +4 -2
- package/dist/cramFile/codecs/index.js +12 -13
- package/dist/cramFile/codecs/index.js.map +1 -1
- package/dist/cramFile/codecs/subexp.d.ts +7 -3
- package/dist/cramFile/codecs/subexp.js +19 -36
- package/dist/cramFile/codecs/subexp.js.map +1 -1
- package/dist/cramFile/constants.d.ts +35 -35
- package/dist/cramFile/constants.js +1 -1
- package/dist/cramFile/constants.js.map +1 -1
- package/dist/cramFile/container/compressionScheme.d.ts +57 -11
- package/dist/cramFile/container/compressionScheme.js +37 -32
- package/dist/cramFile/container/compressionScheme.js.map +1 -1
- package/dist/cramFile/container/index.d.ts +23 -9
- package/dist/cramFile/container/index.js +74 -144
- package/dist/cramFile/container/index.js.map +1 -1
- package/dist/cramFile/encoding.d.ts +78 -0
- package/dist/cramFile/encoding.js +3 -0
- package/dist/cramFile/encoding.js.map +1 -0
- package/dist/cramFile/file.d.ts +91 -41
- package/dist/cramFile/file.js +234 -368
- package/dist/cramFile/file.js.map +1 -1
- package/dist/cramFile/filehandle.d.ts +2 -0
- package/dist/cramFile/filehandle.js +3 -0
- package/dist/cramFile/filehandle.js.map +1 -0
- package/dist/cramFile/index.d.ts +1 -1
- package/dist/cramFile/index.js +1 -1
- package/dist/cramFile/index.js.map +1 -1
- package/dist/cramFile/record.d.ts +61 -17
- package/dist/cramFile/record.js +153 -77
- package/dist/cramFile/record.js.map +1 -1
- package/dist/cramFile/sectionParsers.d.ts +99 -8
- package/dist/cramFile/sectionParsers.js +70 -80
- package/dist/cramFile/sectionParsers.js.map +1 -1
- package/dist/cramFile/slice/decodeRecord.d.ts +30 -2
- package/dist/cramFile/slice/decodeRecord.js +148 -118
- package/dist/cramFile/slice/decodeRecord.js.map +1 -1
- package/dist/cramFile/slice/index.d.ts +21 -14
- package/dist/cramFile/slice/index.js +286 -381
- package/dist/cramFile/slice/index.js.map +1 -1
- package/dist/cramFile/util.d.ts +11 -5
- package/dist/cramFile/util.js +19 -97
- package/dist/cramFile/util.js.map +1 -1
- package/dist/errors.d.ts +5 -10
- package/dist/errors.js +11 -62
- package/dist/errors.js.map +1 -1
- package/dist/index.d.ts +3 -3
- package/dist/index.js +3 -3
- package/dist/index.js.map +1 -1
- package/dist/indexedCramFile.d.ts +37 -12
- package/dist/indexedCramFile.js +114 -154
- package/dist/indexedCramFile.js.map +1 -1
- package/dist/io/index.d.ts +5 -5
- package/dist/io/index.js +9 -9
- package/dist/io/index.js.map +1 -1
- package/dist/rans/constants.js +3 -3
- package/dist/rans/constants.js.map +1 -1
- package/dist/rans/d04.js +15 -15
- package/dist/rans/d04.js.map +1 -1
- package/dist/rans/d14.js +21 -21
- package/dist/rans/d14.js.map +1 -1
- package/dist/rans/decoding.js +27 -30
- package/dist/rans/decoding.js.map +1 -1
- package/dist/rans/frequencies.js +11 -11
- package/dist/rans/frequencies.js.map +1 -1
- package/dist/rans/index.js +46 -49
- package/dist/rans/index.js.map +1 -1
- package/dist/sam.d.ts +8 -1
- package/dist/sam.js +7 -7
- package/dist/sam.js.map +1 -1
- package/dist/typescript.d.ts +3 -0
- package/dist/typescript.js +11 -0
- package/dist/typescript.js.map +1 -0
- package/dist/unzip-pako.js +1 -1
- package/dist/unzip-pako.js.map +1 -1
- package/dist/unzip.js +1 -1
- package/dist/unzip.js.map +1 -1
- package/errors.js +11 -62
- package/esm/craiIndex.d.ts +19 -12
- package/esm/craiIndex.js +8 -24
- package/esm/craiIndex.js.map +1 -1
- package/esm/cramFile/codecs/_base.d.ts +26 -5
- package/esm/cramFile/codecs/_base.js +1 -35
- package/esm/cramFile/codecs/_base.js.map +1 -1
- package/esm/cramFile/codecs/beta.d.ts +7 -3
- package/esm/cramFile/codecs/beta.js +4 -3
- package/esm/cramFile/codecs/beta.js.map +1 -1
- package/esm/cramFile/codecs/byteArrayLength.d.ts +13 -7
- package/esm/cramFile/codecs/byteArrayLength.js +1 -1
- package/esm/cramFile/codecs/byteArrayLength.js.map +1 -1
- package/esm/cramFile/codecs/byteArrayStop.d.ts +9 -5
- package/esm/cramFile/codecs/byteArrayStop.js +7 -9
- package/esm/cramFile/codecs/byteArrayStop.js.map +1 -1
- package/esm/cramFile/codecs/dataSeriesTypes.d.ts +4 -0
- package/esm/cramFile/codecs/dataSeriesTypes.js +2 -0
- package/esm/cramFile/codecs/dataSeriesTypes.js.map +1 -0
- package/esm/cramFile/codecs/external.d.ts +10 -6
- package/esm/cramFile/codecs/external.js +4 -3
- package/esm/cramFile/codecs/external.js.map +1 -1
- package/esm/cramFile/codecs/gamma.d.ts +7 -3
- package/esm/cramFile/codecs/gamma.js +5 -4
- package/esm/cramFile/codecs/gamma.js.map +1 -1
- package/esm/cramFile/codecs/getBits.d.ts +7 -0
- package/esm/cramFile/codecs/getBits.js +21 -0
- package/esm/cramFile/codecs/getBits.js.map +1 -0
- package/esm/cramFile/codecs/huffman.d.ts +17 -13
- package/esm/cramFile/codecs/huffman.js +22 -9
- package/esm/cramFile/codecs/huffman.js.map +1 -1
- package/esm/cramFile/codecs/index.d.ts +4 -2
- package/esm/cramFile/codecs/index.js +1 -1
- package/esm/cramFile/codecs/index.js.map +1 -1
- package/esm/cramFile/codecs/subexp.d.ts +7 -3
- package/esm/cramFile/codecs/subexp.js +7 -5
- package/esm/cramFile/codecs/subexp.js.map +1 -1
- package/esm/cramFile/constants.d.ts +35 -35
- package/esm/cramFile/constants.js.map +1 -1
- package/esm/cramFile/container/compressionScheme.d.ts +57 -11
- package/esm/cramFile/container/compressionScheme.js +15 -8
- package/esm/cramFile/container/compressionScheme.js.map +1 -1
- package/esm/cramFile/container/index.d.ts +23 -9
- package/esm/cramFile/container/index.js +11 -9
- package/esm/cramFile/container/index.js.map +1 -1
- package/esm/cramFile/encoding.d.ts +78 -0
- package/esm/cramFile/encoding.js +2 -0
- package/esm/cramFile/encoding.js.map +1 -0
- package/esm/cramFile/file.d.ts +91 -41
- package/esm/cramFile/file.js +59 -47
- package/esm/cramFile/file.js.map +1 -1
- package/esm/cramFile/filehandle.d.ts +2 -0
- package/esm/cramFile/filehandle.js +2 -0
- package/esm/cramFile/filehandle.js.map +1 -0
- package/esm/cramFile/index.d.ts +1 -1
- package/esm/cramFile/index.js.map +1 -1
- package/esm/cramFile/record.d.ts +61 -17
- package/esm/cramFile/record.js +83 -5
- package/esm/cramFile/record.js.map +1 -1
- package/esm/cramFile/sectionParsers.d.ts +99 -8
- package/esm/cramFile/sectionParsers.js +7 -17
- package/esm/cramFile/sectionParsers.js.map +1 -1
- package/esm/cramFile/slice/decodeRecord.d.ts +30 -2
- package/esm/cramFile/slice/decodeRecord.js +102 -70
- package/esm/cramFile/slice/decodeRecord.js.map +1 -1
- package/esm/cramFile/slice/index.d.ts +21 -14
- package/esm/cramFile/slice/index.js +77 -38
- package/esm/cramFile/slice/index.js.map +1 -1
- package/esm/cramFile/util.d.ts +11 -5
- package/esm/cramFile/util.js +11 -82
- package/esm/cramFile/util.js.map +1 -1
- package/esm/errors.d.ts +5 -10
- package/esm/errors.js +0 -5
- package/esm/errors.js.map +1 -1
- package/esm/index.d.ts +3 -3
- package/esm/index.js.map +1 -1
- package/esm/indexedCramFile.d.ts +37 -12
- package/esm/indexedCramFile.js +19 -8
- package/esm/indexedCramFile.js.map +1 -1
- package/esm/io/index.d.ts +5 -5
- package/esm/io/index.js +3 -3
- package/esm/io/index.js.map +1 -1
- package/esm/sam.d.ts +8 -1
- package/esm/sam.js.map +1 -1
- package/esm/typescript.d.ts +3 -0
- package/esm/typescript.js +7 -0
- package/esm/typescript.js.map +1 -0
- package/package.json +18 -11
- package/src/{craiIndex.js → craiIndex.ts} +37 -31
- package/src/cramFile/codecs/_base.ts +45 -0
- package/src/cramFile/codecs/beta.ts +34 -0
- package/src/cramFile/codecs/{byteArrayLength.js → byteArrayLength.ts} +27 -5
- package/src/cramFile/codecs/{byteArrayStop.js → byteArrayStop.ts} +25 -12
- package/src/cramFile/codecs/dataSeriesTypes.ts +39 -0
- package/src/cramFile/codecs/{external.js → external.ts} +28 -12
- package/src/cramFile/codecs/gamma.ts +42 -0
- package/src/cramFile/codecs/getBits.ts +28 -0
- package/src/cramFile/codecs/{huffman.js → huffman.ts} +48 -15
- package/src/cramFile/codecs/{index.js → index.ts} +9 -3
- package/src/cramFile/codecs/subexp.ts +45 -0
- package/src/cramFile/{constants.js → constants.ts} +0 -0
- package/src/cramFile/container/{compressionScheme.js → compressionScheme.ts} +50 -18
- package/src/cramFile/container/{index.js → index.ts} +13 -13
- package/src/cramFile/encoding.ts +98 -0
- package/src/cramFile/{file.js → file.ts} +136 -62
- package/src/cramFile/filehandle.ts +3 -0
- package/src/cramFile/{index.js → index.ts} +0 -0
- package/src/cramFile/{record.js → record.ts} +185 -14
- package/src/cramFile/{sectionParsers.js → sectionParsers.ts} +148 -20
- package/src/cramFile/slice/{decodeRecord.js → decodeRecord.ts} +158 -105
- package/src/cramFile/slice/{index.js → index.ts} +138 -63
- package/src/cramFile/{util.js → util.ts} +28 -17
- package/src/{errors.js → errors.ts} +0 -5
- package/src/{index.js → index.ts} +0 -0
- package/src/{indexedCramFile.js → indexedCramFile.ts} +79 -19
- package/src/io/{index.js → index.ts} +10 -5
- package/src/{sam.js → sam.ts} +7 -2
- package/src/typescript.ts +17 -0
- package/src/typings/binary-parser.d.ts +44 -0
- package/src/typings/bzip2.d.ts +7 -0
- package/src/typings/htscodecs.d.ts +6 -0
- package/dist/io/bufferCache.d.ts +0 -12
- package/dist/io/bufferCache.js +0 -112
- package/dist/io/bufferCache.js.map +0 -1
- package/dist/io/localFile.d.ts +0 -10
- package/dist/io/localFile.js +0 -108
- package/dist/io/localFile.js.map +0 -1
- package/dist/io/remoteFile.d.ts +0 -16
- package/dist/io/remoteFile.js +0 -143
- package/dist/io/remoteFile.js.map +0 -1
- package/esm/io/bufferCache.d.ts +0 -12
- package/esm/io/bufferCache.js +0 -54
- package/esm/io/bufferCache.js.map +0 -1
- package/esm/io/localFile.d.ts +0 -10
- package/esm/io/localFile.js +0 -31
- package/esm/io/localFile.js.map +0 -1
- package/esm/io/remoteFile.d.ts +0 -16
- package/esm/io/remoteFile.js +0 -64
- package/esm/io/remoteFile.js.map +0 -1
- package/src/cramFile/codecs/_base.js +0 -49
- package/src/cramFile/codecs/beta.js +0 -23
- package/src/cramFile/codecs/gamma.js +0 -30
- package/src/cramFile/codecs/subexp.js +0 -32
- package/src/io/bufferCache.js +0 -66
- package/src/io/localFile.js +0 -35
- package/src/io/remoteFile.js +0 -71
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import { unzip } from '../unzip'
|
|
2
2
|
import crc32 from 'buffer-crc32'
|
|
3
|
-
import
|
|
3
|
+
import QuickLRU from 'quick-lru'
|
|
4
4
|
|
|
5
|
-
import {
|
|
5
|
+
import { CramMalformedError, CramUnimplementedError } from '../errors'
|
|
6
6
|
import ransuncompress from '../rans'
|
|
7
7
|
import {
|
|
8
|
+
BlockHeader,
|
|
9
|
+
CompressionMethod,
|
|
8
10
|
cramFileDefinition as cramFileDefinitionParser,
|
|
9
11
|
getSectionParsers,
|
|
10
12
|
} from './sectionParsers'
|
|
@@ -14,10 +16,14 @@ import CramContainer from './container'
|
|
|
14
16
|
import { open } from '../io'
|
|
15
17
|
import { parseItem, tinyMemoize } from './util'
|
|
16
18
|
import { parseHeaderText } from '../sam'
|
|
19
|
+
import { Parser } from '@gmod/binary-parser'
|
|
20
|
+
import CramRecord from './record'
|
|
21
|
+
import { Filehandle } from './filehandle'
|
|
22
|
+
|
|
17
23
|
//source:https://abdulapopoola.com/2019/01/20/check-endianness-with-javascript/
|
|
18
24
|
function getEndianness() {
|
|
19
|
-
|
|
20
|
-
|
|
25
|
+
const uInt32 = new Uint32Array([0x11223344])
|
|
26
|
+
const uInt8 = new Uint8Array(uInt32.buffer)
|
|
21
27
|
|
|
22
28
|
if (uInt8[0] === 0x44) {
|
|
23
29
|
return 0 //little-endian
|
|
@@ -28,33 +34,62 @@ function getEndianness() {
|
|
|
28
34
|
}
|
|
29
35
|
}
|
|
30
36
|
|
|
37
|
+
// export type CramFileSource =
|
|
38
|
+
// | { url: string; path?: undefined; filehandle?: undefined }
|
|
39
|
+
// | { path: string; url?: undefined; filehandle?: undefined }
|
|
40
|
+
// | { filehandle: Filehandle; url?: undefined; path?: undefined }
|
|
41
|
+
|
|
42
|
+
export type CramFileSource = {
|
|
43
|
+
filehandle?: Filehandle
|
|
44
|
+
url?: string
|
|
45
|
+
path?: string
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export type SeqFetch = (
|
|
49
|
+
seqId: number,
|
|
50
|
+
start: number,
|
|
51
|
+
end: number,
|
|
52
|
+
) => Promise<string>
|
|
53
|
+
|
|
54
|
+
export type CramFileArgs = CramFileSource & {
|
|
55
|
+
checkSequenceMD5: boolean
|
|
56
|
+
cacheSize?: number
|
|
57
|
+
seqFetch: SeqFetch
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export type CramFileBlock = BlockHeader & {
|
|
61
|
+
_endPosition: number
|
|
62
|
+
contentPosition: number
|
|
63
|
+
_size: number
|
|
64
|
+
content: Buffer
|
|
65
|
+
crc32?: number
|
|
66
|
+
}
|
|
67
|
+
|
|
31
68
|
export default class CramFile {
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
*/
|
|
44
|
-
constructor(args) {
|
|
69
|
+
private file: Filehandle
|
|
70
|
+
public validateChecksums: boolean
|
|
71
|
+
public fetchReferenceSequenceCallback: SeqFetch
|
|
72
|
+
public options: {
|
|
73
|
+
checkSequenceMD5: boolean
|
|
74
|
+
cacheSize: number
|
|
75
|
+
}
|
|
76
|
+
public featureCache: QuickLRU<string, Promise<CramRecord[]>>
|
|
77
|
+
private header: string | undefined
|
|
78
|
+
|
|
79
|
+
constructor(args: CramFileArgs) {
|
|
45
80
|
this.file = open(args.url, args.path, args.filehandle)
|
|
46
81
|
this.validateChecksums = true
|
|
47
82
|
this.fetchReferenceSequenceCallback = args.seqFetch
|
|
48
83
|
this.options = {
|
|
49
|
-
checkSequenceMD5: args.checkSequenceMD5
|
|
50
|
-
cacheSize: args.cacheSize
|
|
84
|
+
checkSequenceMD5: args.checkSequenceMD5,
|
|
85
|
+
cacheSize: args.cacheSize ?? 20000,
|
|
51
86
|
}
|
|
52
87
|
|
|
53
88
|
// cache of features in a slice, keyed by the
|
|
54
89
|
// slice offset. caches all of the features in a slice, or none.
|
|
55
90
|
// the cache is actually used by the slice object, it's just
|
|
56
91
|
// kept here at the level of the file
|
|
57
|
-
this.featureCache = new
|
|
92
|
+
this.featureCache = new QuickLRU({
|
|
58
93
|
maxSize: this.options.cacheSize,
|
|
59
94
|
})
|
|
60
95
|
if (getEndianness() > 0) {
|
|
@@ -62,19 +97,27 @@ export default class CramFile {
|
|
|
62
97
|
}
|
|
63
98
|
}
|
|
64
99
|
|
|
65
|
-
toString() {
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
}
|
|
100
|
+
// toString() {
|
|
101
|
+
// if (this.file.filename) {
|
|
102
|
+
// return this.file.filename
|
|
103
|
+
// }
|
|
104
|
+
// if (this.file.url) {
|
|
105
|
+
// return this.file.url
|
|
106
|
+
// }
|
|
107
|
+
//
|
|
108
|
+
// return '(cram file)'
|
|
109
|
+
// }
|
|
75
110
|
|
|
76
111
|
// can just read this object like a filehandle
|
|
77
|
-
read(
|
|
112
|
+
read(
|
|
113
|
+
buffer: Buffer,
|
|
114
|
+
offset: number,
|
|
115
|
+
length: number,
|
|
116
|
+
position: number,
|
|
117
|
+
): Promise<{
|
|
118
|
+
bytesRead: number
|
|
119
|
+
buffer: Buffer
|
|
120
|
+
}> {
|
|
78
121
|
return this.file.read(buffer, offset, length, position)
|
|
79
122
|
}
|
|
80
123
|
|
|
@@ -87,7 +130,8 @@ export default class CramFile {
|
|
|
87
130
|
async getDefinition() {
|
|
88
131
|
const headbytes = Buffer.allocUnsafe(cramFileDefinitionParser.maxLength)
|
|
89
132
|
await this.file.read(headbytes, 0, cramFileDefinitionParser.maxLength, 0)
|
|
90
|
-
const definition = cramFileDefinitionParser.parser.parse(headbytes)
|
|
133
|
+
const definition = cramFileDefinitionParser.parser.parse(headbytes)
|
|
134
|
+
.result as any
|
|
91
135
|
if (definition.majorVersion !== 2 && definition.majorVersion !== 3) {
|
|
92
136
|
throw new CramUnimplementedError(
|
|
93
137
|
`CRAM version ${definition.majorVersion} not supported`,
|
|
@@ -103,7 +147,11 @@ export default class CramFile {
|
|
|
103
147
|
throw new CramMalformedError('file contains no containers')
|
|
104
148
|
}
|
|
105
149
|
|
|
106
|
-
const
|
|
150
|
+
const firstBlock = await firstContainer.getFirstBlock()
|
|
151
|
+
if (firstBlock === undefined) {
|
|
152
|
+
return parseHeaderText('')
|
|
153
|
+
}
|
|
154
|
+
const content = firstBlock.content
|
|
107
155
|
// find the end of the trailing zeros in the header text
|
|
108
156
|
const headerLength = content.readInt32LE(0)
|
|
109
157
|
const textStart = 4
|
|
@@ -126,7 +174,7 @@ export default class CramFile {
|
|
|
126
174
|
return getSectionParsers(majorVersion)
|
|
127
175
|
}
|
|
128
176
|
|
|
129
|
-
async getContainerById(containerNumber) {
|
|
177
|
+
async getContainerById(containerNumber: number) {
|
|
130
178
|
const sectionParsers = await this.getSectionParsers()
|
|
131
179
|
let position = sectionParsers.cramFileDefinition.maxLength
|
|
132
180
|
const { size: fileSize } = await this.file.stat()
|
|
@@ -156,6 +204,9 @@ export default class CramFile {
|
|
|
156
204
|
position = currentHeader._endPosition
|
|
157
205
|
for (let j = 0; j < currentHeader.numBlocks; j += 1) {
|
|
158
206
|
const block = await this.readBlock(position)
|
|
207
|
+
if (block === undefined) {
|
|
208
|
+
return undefined
|
|
209
|
+
}
|
|
159
210
|
position = block._endPosition
|
|
160
211
|
}
|
|
161
212
|
} else {
|
|
@@ -167,7 +218,12 @@ export default class CramFile {
|
|
|
167
218
|
return currentContainer
|
|
168
219
|
}
|
|
169
220
|
|
|
170
|
-
async checkCrc32(
|
|
221
|
+
async checkCrc32(
|
|
222
|
+
position: number,
|
|
223
|
+
length: number,
|
|
224
|
+
recordedCrc32: number,
|
|
225
|
+
description: string,
|
|
226
|
+
) {
|
|
171
227
|
const b = Buffer.allocUnsafe(length)
|
|
172
228
|
await this.file.read(b, 0, length, position)
|
|
173
229
|
const calculatedCrc32 = crc32.unsigned(b)
|
|
@@ -181,7 +237,7 @@ export default class CramFile {
|
|
|
181
237
|
/**
|
|
182
238
|
* @returns {Promise[number]} the number of containers in the file
|
|
183
239
|
*/
|
|
184
|
-
async containerCount() {
|
|
240
|
+
async containerCount(): Promise<number | undefined> {
|
|
185
241
|
const sectionParsers = await this.getSectionParsers()
|
|
186
242
|
const { size: fileSize } = await this.file.stat()
|
|
187
243
|
const { cramContainerHeader1 } = sectionParsers
|
|
@@ -202,6 +258,9 @@ export default class CramFile {
|
|
|
202
258
|
position = currentHeader._endPosition
|
|
203
259
|
for (let j = 0; j < currentHeader.numBlocks; j += 1) {
|
|
204
260
|
const block = await this.readBlock(position)
|
|
261
|
+
if (block === undefined) {
|
|
262
|
+
return undefined
|
|
263
|
+
}
|
|
205
264
|
position = block._endPosition
|
|
206
265
|
}
|
|
207
266
|
} else {
|
|
@@ -214,11 +273,11 @@ export default class CramFile {
|
|
|
214
273
|
return containerCount
|
|
215
274
|
}
|
|
216
275
|
|
|
217
|
-
getContainerAtPosition(position) {
|
|
276
|
+
getContainerAtPosition(position: number) {
|
|
218
277
|
return new CramContainer(this, position)
|
|
219
278
|
}
|
|
220
279
|
|
|
221
|
-
async readBlockHeader(position) {
|
|
280
|
+
async readBlockHeader(position: number) {
|
|
222
281
|
const sectionParsers = await this.getSectionParsers()
|
|
223
282
|
const { cramBlockHeader } = sectionParsers
|
|
224
283
|
const { size: fileSize } = await this.file.stat()
|
|
@@ -232,11 +291,11 @@ export default class CramFile {
|
|
|
232
291
|
return parseItem(buffer, cramBlockHeader.parser, 0, position)
|
|
233
292
|
}
|
|
234
293
|
|
|
235
|
-
async _parseSection(
|
|
236
|
-
section,
|
|
237
|
-
position,
|
|
294
|
+
async _parseSection<T>(
|
|
295
|
+
section: { parser: Parser<T>; maxLength: number },
|
|
296
|
+
position: number,
|
|
238
297
|
size = section.maxLength,
|
|
239
|
-
preReadBuffer,
|
|
298
|
+
preReadBuffer = undefined,
|
|
240
299
|
) {
|
|
241
300
|
let buffer
|
|
242
301
|
if (preReadBuffer) {
|
|
@@ -258,16 +317,21 @@ export default class CramFile {
|
|
|
258
317
|
return data
|
|
259
318
|
}
|
|
260
319
|
|
|
261
|
-
_uncompress(
|
|
320
|
+
_uncompress(
|
|
321
|
+
compressionMethod: CompressionMethod,
|
|
322
|
+
inputBuffer: Buffer,
|
|
323
|
+
outputBuffer: Buffer,
|
|
324
|
+
) {
|
|
262
325
|
if (compressionMethod === 'gzip') {
|
|
263
326
|
const result = unzip(inputBuffer)
|
|
264
327
|
result.copy(outputBuffer)
|
|
265
328
|
} else if (compressionMethod === 'bzip2') {
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
329
|
+
const bits = bzip2.array(inputBuffer)
|
|
330
|
+
let size = bzip2.header(bits)
|
|
331
|
+
let j = 0
|
|
332
|
+
let chunk
|
|
269
333
|
do {
|
|
270
|
-
|
|
334
|
+
chunk = bzip2.decompress(bits, size)
|
|
271
335
|
if (chunk != -1) {
|
|
272
336
|
Buffer.from(chunk).copy(outputBuffer, j)
|
|
273
337
|
j += chunk.length
|
|
@@ -293,26 +357,35 @@ export default class CramFile {
|
|
|
293
357
|
}
|
|
294
358
|
}
|
|
295
359
|
|
|
296
|
-
async readBlock(position) {
|
|
360
|
+
async readBlock(position: number): Promise<CramFileBlock | undefined> {
|
|
297
361
|
const { majorVersion } = await this.getDefinition()
|
|
298
362
|
const sectionParsers = await this.getSectionParsers()
|
|
299
|
-
const
|
|
300
|
-
|
|
301
|
-
|
|
363
|
+
const blockHeader = await this.readBlockHeader(position)
|
|
364
|
+
if (blockHeader === undefined) {
|
|
365
|
+
return undefined
|
|
366
|
+
}
|
|
367
|
+
const blockContentPosition = blockHeader._endPosition
|
|
368
|
+
|
|
369
|
+
const uncompressedData = Buffer.allocUnsafe(blockHeader.uncompressedSize)
|
|
302
370
|
|
|
303
|
-
const
|
|
371
|
+
const block: CramFileBlock = {
|
|
372
|
+
...blockHeader,
|
|
373
|
+
_endPosition: blockContentPosition,
|
|
374
|
+
contentPosition: blockContentPosition,
|
|
375
|
+
content: uncompressedData,
|
|
376
|
+
}
|
|
304
377
|
|
|
305
|
-
if (
|
|
306
|
-
const compressedData = Buffer.allocUnsafe(
|
|
378
|
+
if (blockHeader.compressionMethod !== 'raw') {
|
|
379
|
+
const compressedData = Buffer.allocUnsafe(blockHeader.compressedSize)
|
|
307
380
|
await this.read(
|
|
308
381
|
compressedData,
|
|
309
382
|
0,
|
|
310
|
-
|
|
383
|
+
blockHeader.compressedSize,
|
|
311
384
|
blockContentPosition,
|
|
312
385
|
)
|
|
313
386
|
|
|
314
387
|
this._uncompress(
|
|
315
|
-
|
|
388
|
+
blockHeader.compressionMethod,
|
|
316
389
|
compressedData,
|
|
317
390
|
uncompressedData,
|
|
318
391
|
)
|
|
@@ -320,27 +393,28 @@ export default class CramFile {
|
|
|
320
393
|
await this.read(
|
|
321
394
|
uncompressedData,
|
|
322
395
|
0,
|
|
323
|
-
|
|
396
|
+
blockHeader.uncompressedSize,
|
|
324
397
|
blockContentPosition,
|
|
325
398
|
)
|
|
326
399
|
}
|
|
327
400
|
|
|
328
|
-
block.content = uncompressedData
|
|
329
|
-
|
|
330
401
|
if (majorVersion >= 3) {
|
|
331
402
|
// parse the crc32
|
|
332
403
|
const crc = await this._parseSection(
|
|
333
404
|
sectionParsers.cramBlockCrc32,
|
|
334
|
-
blockContentPosition +
|
|
405
|
+
blockContentPosition + blockHeader.compressedSize,
|
|
335
406
|
)
|
|
407
|
+
if (crc === undefined) {
|
|
408
|
+
return undefined
|
|
409
|
+
}
|
|
336
410
|
block.crc32 = crc.crc32
|
|
337
411
|
|
|
338
412
|
// check the block data crc32
|
|
339
413
|
if (this.validateChecksums) {
|
|
340
414
|
await this.checkCrc32(
|
|
341
415
|
position,
|
|
342
|
-
|
|
343
|
-
|
|
416
|
+
blockHeader._size + blockHeader.compressedSize,
|
|
417
|
+
crc.crc32,
|
|
344
418
|
'block data',
|
|
345
419
|
)
|
|
346
420
|
}
|
|
File without changes
|
|
@@ -1,13 +1,34 @@
|
|
|
1
1
|
import Constants from './constants'
|
|
2
|
+
import CramContainerCompressionScheme from './container/compressionScheme'
|
|
3
|
+
import decodeRecord from './slice/decodeRecord'
|
|
2
4
|
|
|
3
|
-
|
|
5
|
+
export type RefRegion = {
|
|
6
|
+
start: number
|
|
7
|
+
end: number
|
|
8
|
+
seq: string
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export type ReadFeature = {
|
|
12
|
+
code: string
|
|
13
|
+
pos: number
|
|
14
|
+
refPos: number
|
|
15
|
+
data: any
|
|
16
|
+
|
|
17
|
+
ref?: string
|
|
18
|
+
sub?: string
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function decodeReadSequence(
|
|
22
|
+
cramRecord: CramRecord,
|
|
23
|
+
refRegion: RefRegion,
|
|
24
|
+
): string | null {
|
|
4
25
|
// if it has no length, it has no sequence
|
|
5
26
|
if (!cramRecord.lengthOnRef && !cramRecord.readLength) {
|
|
6
|
-
return
|
|
27
|
+
return null
|
|
7
28
|
}
|
|
8
29
|
|
|
9
30
|
if (cramRecord.isUnknownBases()) {
|
|
10
|
-
return
|
|
31
|
+
return null
|
|
11
32
|
}
|
|
12
33
|
|
|
13
34
|
// remember: all coordinates are 1-based closed
|
|
@@ -106,10 +127,10 @@ const baseNumbers = {
|
|
|
106
127
|
}
|
|
107
128
|
|
|
108
129
|
function decodeBaseSubstitution(
|
|
109
|
-
cramRecord,
|
|
110
|
-
refRegion,
|
|
111
|
-
compressionScheme,
|
|
112
|
-
readFeature,
|
|
130
|
+
cramRecord: CramRecord,
|
|
131
|
+
refRegion: RefRegion,
|
|
132
|
+
compressionScheme: CramContainerCompressionScheme,
|
|
133
|
+
readFeature: ReadFeature,
|
|
113
134
|
) {
|
|
114
135
|
if (!refRegion) {
|
|
115
136
|
return
|
|
@@ -121,7 +142,7 @@ function decodeBaseSubstitution(
|
|
|
121
142
|
if (refBase) {
|
|
122
143
|
readFeature.ref = refBase
|
|
123
144
|
}
|
|
124
|
-
let baseNumber = baseNumbers[refBase]
|
|
145
|
+
let baseNumber = (baseNumbers as any)[refBase]
|
|
125
146
|
if (baseNumber === undefined) {
|
|
126
147
|
baseNumber = 4
|
|
127
148
|
}
|
|
@@ -132,12 +153,153 @@ function decodeBaseSubstitution(
|
|
|
132
153
|
}
|
|
133
154
|
}
|
|
134
155
|
|
|
156
|
+
export type MateRecord = {
|
|
157
|
+
readName?: string
|
|
158
|
+
sequenceId: number
|
|
159
|
+
alignmentStart: number
|
|
160
|
+
flags?: number
|
|
161
|
+
|
|
162
|
+
uniqueId?: number
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
export const BamFlags = [
|
|
166
|
+
[0x1, 'Paired'],
|
|
167
|
+
[0x2, 'ProperlyPaired'],
|
|
168
|
+
[0x4, 'SegmentUnmapped'],
|
|
169
|
+
[0x8, 'MateUnmapped'],
|
|
170
|
+
[0x10, 'ReverseComplemented'],
|
|
171
|
+
// the mate is mapped to the reverse strand
|
|
172
|
+
[0x20, 'MateReverseComplemented'],
|
|
173
|
+
// this is read1
|
|
174
|
+
[0x40, 'Read1'],
|
|
175
|
+
// this is read2
|
|
176
|
+
[0x80, 'Read2'],
|
|
177
|
+
// not primary alignment
|
|
178
|
+
[0x100, 'Secondary'],
|
|
179
|
+
// QC failure
|
|
180
|
+
[0x200, 'FailedQc'],
|
|
181
|
+
// optical or PCR duplicate
|
|
182
|
+
[0x400, 'Duplicate'],
|
|
183
|
+
// supplementary alignment
|
|
184
|
+
[0x800, 'Supplementary'],
|
|
185
|
+
] as const
|
|
186
|
+
|
|
187
|
+
export const CramFlags = [
|
|
188
|
+
[0x1, 'PreservingQualityScores'],
|
|
189
|
+
[0x2, 'Detached'],
|
|
190
|
+
[0x4, 'WithMateDownstream'],
|
|
191
|
+
[0x8, 'DecodeSequenceAsStar'],
|
|
192
|
+
] as const
|
|
193
|
+
|
|
194
|
+
export const MateFlags = [
|
|
195
|
+
[0x1, 'OnNegativeStrand'],
|
|
196
|
+
[0x2, 'Unmapped'],
|
|
197
|
+
] as const
|
|
198
|
+
|
|
199
|
+
type FlagsDecoder<Type> = {
|
|
200
|
+
[Property in Type as `is${Capitalize<string & Property>}`]: (
|
|
201
|
+
flags: number,
|
|
202
|
+
) => boolean
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
type FlagsEncoder<Type> = {
|
|
206
|
+
[Property in Type as `set${Capitalize<string & Property>}`]: (
|
|
207
|
+
flags: number,
|
|
208
|
+
) => number
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
function makeFlagsHelper<T>(
|
|
212
|
+
x: ReadonlyArray<readonly [number, T]>,
|
|
213
|
+
): FlagsDecoder<T> & FlagsEncoder<T> {
|
|
214
|
+
const r: any = {}
|
|
215
|
+
for (const [code, name] of x) {
|
|
216
|
+
r['is' + name] = (flags: number) => !!(flags & code)
|
|
217
|
+
r['set' + name] = (flags: number) => flags | code
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
return r
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
export const BamFlagsDecoder = makeFlagsHelper(BamFlags)
|
|
224
|
+
export const CramFlagsDecoder = makeFlagsHelper(CramFlags)
|
|
225
|
+
export const MateFlagsDecoder = makeFlagsHelper(MateFlags)
|
|
226
|
+
|
|
135
227
|
/**
|
|
136
228
|
* Class of each CRAM record returned by this API.
|
|
137
229
|
*/
|
|
138
230
|
export default class CramRecord {
|
|
139
|
-
|
|
140
|
-
|
|
231
|
+
public tags: Record<string, string>
|
|
232
|
+
public flags: number
|
|
233
|
+
public cramFlags: number
|
|
234
|
+
public readBases?: string | null
|
|
235
|
+
public _refRegion?: RefRegion
|
|
236
|
+
public readFeatures?: ReadFeature[]
|
|
237
|
+
public alignmentStart: number
|
|
238
|
+
public lengthOnRef: number | undefined
|
|
239
|
+
public readLength: number
|
|
240
|
+
public templateLength?: number
|
|
241
|
+
public templateSize?: number
|
|
242
|
+
public readName?: string
|
|
243
|
+
public mateRecordNumber?: number
|
|
244
|
+
public mate?: MateRecord
|
|
245
|
+
public uniqueId: number
|
|
246
|
+
public sequenceId: number
|
|
247
|
+
public readGroupId: number
|
|
248
|
+
public mappingQuality: number | undefined
|
|
249
|
+
public qualityScores: number[] | null | undefined
|
|
250
|
+
|
|
251
|
+
constructor({
|
|
252
|
+
flags,
|
|
253
|
+
cramFlags,
|
|
254
|
+
readLength,
|
|
255
|
+
mappingQuality,
|
|
256
|
+
lengthOnRef,
|
|
257
|
+
qualityScores,
|
|
258
|
+
mateRecordNumber,
|
|
259
|
+
readBases,
|
|
260
|
+
readFeatures,
|
|
261
|
+
mateToUse,
|
|
262
|
+
readGroupId,
|
|
263
|
+
readName,
|
|
264
|
+
sequenceId,
|
|
265
|
+
uniqueId,
|
|
266
|
+
templateSize,
|
|
267
|
+
alignmentStart,
|
|
268
|
+
tags,
|
|
269
|
+
}: ReturnType<typeof decodeRecord> & { uniqueId: number }) {
|
|
270
|
+
this.flags = flags
|
|
271
|
+
this.cramFlags = cramFlags
|
|
272
|
+
this.readLength = readLength
|
|
273
|
+
this.mappingQuality = mappingQuality
|
|
274
|
+
this.lengthOnRef = lengthOnRef
|
|
275
|
+
this.qualityScores = qualityScores
|
|
276
|
+
if (readBases) {
|
|
277
|
+
this.readBases = readBases
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
this.readGroupId = readGroupId
|
|
281
|
+
this.readName = readName
|
|
282
|
+
this.sequenceId = sequenceId
|
|
283
|
+
this.uniqueId = uniqueId
|
|
284
|
+
this.templateSize = templateSize
|
|
285
|
+
this.alignmentStart = alignmentStart
|
|
286
|
+
this.tags = tags
|
|
287
|
+
|
|
288
|
+
// backwards compatibility
|
|
289
|
+
if (readFeatures) {
|
|
290
|
+
this.readFeatures = readFeatures
|
|
291
|
+
}
|
|
292
|
+
if (mateToUse) {
|
|
293
|
+
this.mate = {
|
|
294
|
+
flags: mateToUse.mateFlags,
|
|
295
|
+
readName: mateToUse.mateReadName,
|
|
296
|
+
sequenceId: mateToUse.mateSequenceId,
|
|
297
|
+
alignmentStart: mateToUse.mateAlignmentStart,
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
if (mateRecordNumber) {
|
|
301
|
+
this.mateRecordNumber = mateRecordNumber
|
|
302
|
+
}
|
|
141
303
|
}
|
|
142
304
|
|
|
143
305
|
/**
|
|
@@ -230,7 +392,10 @@ export default class CramRecord {
|
|
|
230
392
|
*/
|
|
231
393
|
getReadBases() {
|
|
232
394
|
if (!this.readBases && this._refRegion) {
|
|
233
|
-
|
|
395
|
+
const decoded = decodeReadSequence(this, this._refRegion)
|
|
396
|
+
if (decoded) {
|
|
397
|
+
this.readBases = decoded
|
|
398
|
+
}
|
|
234
399
|
}
|
|
235
400
|
return this.readBases
|
|
236
401
|
}
|
|
@@ -261,6 +426,9 @@ export default class CramRecord {
|
|
|
261
426
|
|
|
262
427
|
const tmp = []
|
|
263
428
|
let isize = this.templateLength || this.templateSize
|
|
429
|
+
if (isize === undefined) {
|
|
430
|
+
throw new Error('One of templateSize and templateLength must be set')
|
|
431
|
+
}
|
|
264
432
|
if (this.alignmentStart > this.mate.alignmentStart && isize > 0) {
|
|
265
433
|
isize = -isize
|
|
266
434
|
}
|
|
@@ -293,7 +461,10 @@ export default class CramRecord {
|
|
|
293
461
|
* @param {CramContainerCompressionScheme} compressionScheme
|
|
294
462
|
* @returns {undefined} nothing
|
|
295
463
|
*/
|
|
296
|
-
addReferenceSequence(
|
|
464
|
+
addReferenceSequence(
|
|
465
|
+
refRegion: RefRegion,
|
|
466
|
+
compressionScheme: CramContainerCompressionScheme,
|
|
467
|
+
) {
|
|
297
468
|
if (this.readFeatures) {
|
|
298
469
|
// use the reference bases to decode the bases
|
|
299
470
|
// substituted in each base substitution
|
|
@@ -322,12 +493,12 @@ export default class CramRecord {
|
|
|
322
493
|
}
|
|
323
494
|
|
|
324
495
|
toJSON() {
|
|
325
|
-
const data = {}
|
|
496
|
+
const data: any = {}
|
|
326
497
|
Object.keys(this).forEach(k => {
|
|
327
498
|
if (k.charAt(0) === '_') {
|
|
328
499
|
return
|
|
329
500
|
}
|
|
330
|
-
data[k] = this[k]
|
|
501
|
+
data[k] = (this as any)[k]
|
|
331
502
|
})
|
|
332
503
|
|
|
333
504
|
data.readBases = this.getReadBases()
|