@gmod/cram 1.5.9 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. package/CHANGELOG.md +90 -0
  2. package/README.md +182 -172
  3. package/dist/craiIndex.d.ts +37 -0
  4. package/dist/craiIndex.js +196 -301
  5. package/dist/craiIndex.js.map +1 -0
  6. package/dist/cram-bundle.js +6 -15
  7. package/dist/cramFile/codecs/_base.d.ts +6 -0
  8. package/dist/cramFile/codecs/_base.js +44 -53
  9. package/dist/cramFile/codecs/_base.js.map +1 -0
  10. package/dist/cramFile/codecs/beta.d.ts +4 -0
  11. package/dist/cramFile/codecs/beta.js +38 -48
  12. package/dist/cramFile/codecs/beta.js.map +1 -0
  13. package/dist/cramFile/codecs/byteArrayLength.d.ts +8 -0
  14. package/dist/cramFile/codecs/byteArrayLength.js +58 -78
  15. package/dist/cramFile/codecs/byteArrayLength.js.map +1 -0
  16. package/dist/cramFile/codecs/byteArrayStop.d.ts +6 -0
  17. package/dist/cramFile/codecs/byteArrayStop.js +62 -76
  18. package/dist/cramFile/codecs/byteArrayStop.js.map +1 -0
  19. package/dist/cramFile/codecs/external.d.ts +7 -0
  20. package/dist/cramFile/codecs/external.js +63 -81
  21. package/dist/cramFile/codecs/external.js.map +1 -0
  22. package/dist/cramFile/codecs/gamma.d.ts +4 -0
  23. package/dist/cramFile/codecs/gamma.js +43 -56
  24. package/dist/cramFile/codecs/gamma.js.map +1 -0
  25. package/dist/cramFile/codecs/huffman.d.ts +17 -0
  26. package/dist/cramFile/codecs/huffman.js +126 -199
  27. package/dist/cramFile/codecs/huffman.js.map +1 -0
  28. package/dist/cramFile/codecs/index.d.ts +2 -0
  29. package/dist/cramFile/codecs/index.js +31 -38
  30. package/dist/cramFile/codecs/index.js.map +1 -0
  31. package/dist/cramFile/codecs/subexp.d.ts +4 -0
  32. package/dist/cramFile/codecs/subexp.js +51 -64
  33. package/dist/cramFile/codecs/subexp.js.map +1 -0
  34. package/dist/cramFile/constants.d.ts +36 -0
  35. package/dist/cramFile/constants.js +52 -50
  36. package/dist/cramFile/constants.js.map +1 -0
  37. package/dist/cramFile/container/compressionScheme.d.ts +23 -0
  38. package/dist/cramFile/container/compressionScheme.js +115 -153
  39. package/dist/cramFile/container/compressionScheme.js.map +1 -0
  40. package/dist/cramFile/container/index.d.ts +13 -0
  41. package/dist/cramFile/container/index.js +169 -283
  42. package/dist/cramFile/container/index.js.map +1 -0
  43. package/dist/cramFile/file.d.ts +63 -0
  44. package/dist/cramFile/file.js +440 -766
  45. package/dist/cramFile/file.js.map +1 -0
  46. package/dist/cramFile/index.d.ts +2 -0
  47. package/dist/cramFile/index.js +7 -4
  48. package/dist/cramFile/index.js.map +1 -0
  49. package/dist/cramFile/record.d.ts +79 -0
  50. package/dist/cramFile/record.js +253 -308
  51. package/dist/cramFile/record.js.map +1 -0
  52. package/dist/cramFile/sectionParsers.d.ts +18 -0
  53. package/dist/cramFile/sectionParsers.js +324 -362
  54. package/dist/cramFile/sectionParsers.js.map +1 -0
  55. package/dist/cramFile/slice/decodeRecord.d.ts +2 -0
  56. package/dist/cramFile/slice/decodeRecord.js +278 -298
  57. package/dist/cramFile/slice/decodeRecord.js.map +1 -0
  58. package/dist/cramFile/slice/index.d.ts +20 -0
  59. package/dist/cramFile/slice/index.js +488 -789
  60. package/dist/cramFile/slice/index.js.map +1 -0
  61. package/dist/cramFile/util.d.ts +5 -0
  62. package/dist/cramFile/util.js +158 -144
  63. package/dist/cramFile/util.js.map +1 -0
  64. package/dist/errors.d.ts +23 -0
  65. package/dist/errors.js +66 -103
  66. package/dist/errors.js.map +1 -0
  67. package/dist/index.d.ts +4 -0
  68. package/dist/index.js +12 -12
  69. package/dist/index.js.map +1 -0
  70. package/dist/indexedCramFile.d.ts +39 -0
  71. package/dist/indexedCramFile.js +213 -315
  72. package/dist/indexedCramFile.js.map +1 -0
  73. package/dist/io/bufferCache.d.ts +12 -0
  74. package/dist/io/bufferCache.js +108 -128
  75. package/dist/io/bufferCache.js.map +1 -0
  76. package/dist/io/index.d.ts +5 -0
  77. package/dist/io/index.js +29 -27
  78. package/dist/io/index.js.map +1 -0
  79. package/dist/io/localFile.d.ts +10 -0
  80. package/dist/io/localFile.js +105 -162
  81. package/dist/io/localFile.js.map +1 -0
  82. package/dist/io/remoteFile.d.ts +16 -0
  83. package/dist/io/remoteFile.js +137 -206
  84. package/dist/io/remoteFile.js.map +1 -0
  85. package/dist/rans/constants.d.ts +3 -0
  86. package/dist/rans/constants.js +6 -6
  87. package/dist/rans/constants.js.map +1 -0
  88. package/dist/rans/d04.d.ts +1 -0
  89. package/dist/rans/d04.js +70 -99
  90. package/dist/rans/d04.js.map +1 -0
  91. package/dist/rans/d14.d.ts +1 -0
  92. package/dist/rans/d14.js +55 -93
  93. package/dist/rans/d14.js.map +1 -0
  94. package/dist/rans/decoding.d.ts +30 -0
  95. package/dist/rans/decoding.js +112 -159
  96. package/dist/rans/decoding.js.map +1 -0
  97. package/dist/rans/frequencies.d.ts +2 -0
  98. package/dist/rans/frequencies.js +110 -119
  99. package/dist/rans/frequencies.js.map +1 -0
  100. package/dist/rans/index.d.ts +1 -0
  101. package/dist/rans/index.js +111 -174
  102. package/dist/rans/index.js.map +1 -0
  103. package/dist/sam.d.ts +1 -0
  104. package/dist/sam.js +16 -41
  105. package/dist/sam.js.map +1 -0
  106. package/dist/unzip-pako.d.ts +2 -0
  107. package/dist/unzip-pako.js +9 -0
  108. package/dist/unzip-pako.js.map +1 -0
  109. package/dist/unzip.d.ts +2 -0
  110. package/dist/unzip.js +6 -0
  111. package/dist/unzip.js.map +1 -0
  112. package/errors.js +66 -103
  113. package/esm/craiIndex.d.ts +37 -0
  114. package/esm/craiIndex.js +158 -0
  115. package/esm/craiIndex.js.map +1 -0
  116. package/esm/cramFile/codecs/_base.d.ts +6 -0
  117. package/esm/cramFile/codecs/_base.js +42 -0
  118. package/esm/cramFile/codecs/_base.js.map +1 -0
  119. package/esm/cramFile/codecs/beta.d.ts +4 -0
  120. package/esm/cramFile/codecs/beta.js +15 -0
  121. package/esm/cramFile/codecs/beta.js.map +1 -0
  122. package/esm/cramFile/codecs/byteArrayLength.d.ts +8 -0
  123. package/esm/cramFile/codecs/byteArrayLength.js +35 -0
  124. package/esm/cramFile/codecs/byteArrayLength.js.map +1 -0
  125. package/esm/cramFile/codecs/byteArrayStop.d.ts +6 -0
  126. package/esm/cramFile/codecs/byteArrayStop.js +40 -0
  127. package/esm/cramFile/codecs/byteArrayStop.js.map +1 -0
  128. package/esm/cramFile/codecs/external.d.ts +7 -0
  129. package/esm/cramFile/codecs/external.js +40 -0
  130. package/esm/cramFile/codecs/external.js.map +1 -0
  131. package/esm/cramFile/codecs/gamma.d.ts +4 -0
  132. package/esm/cramFile/codecs/gamma.js +20 -0
  133. package/esm/cramFile/codecs/gamma.js.map +1 -0
  134. package/esm/cramFile/codecs/huffman.d.ts +17 -0
  135. package/esm/cramFile/codecs/huffman.js +107 -0
  136. package/esm/cramFile/codecs/huffman.js.map +1 -0
  137. package/esm/cramFile/codecs/index.d.ts +2 -0
  138. package/esm/cramFile/codecs/index.js +30 -0
  139. package/esm/cramFile/codecs/index.js.map +1 -0
  140. package/esm/cramFile/codecs/subexp.d.ts +4 -0
  141. package/esm/cramFile/codecs/subexp.js +28 -0
  142. package/esm/cramFile/codecs/subexp.js.map +1 -0
  143. package/esm/cramFile/constants.d.ts +36 -0
  144. package/esm/cramFile/constants.js +51 -0
  145. package/esm/cramFile/constants.js.map +1 -0
  146. package/esm/cramFile/container/compressionScheme.d.ts +23 -0
  147. package/esm/cramFile/container/compressionScheme.js +123 -0
  148. package/esm/cramFile/container/compressionScheme.js.map +1 -0
  149. package/esm/cramFile/container/index.d.ts +13 -0
  150. package/esm/cramFile/container/index.js +84 -0
  151. package/esm/cramFile/container/index.js.map +1 -0
  152. package/esm/cramFile/file.d.ts +63 -0
  153. package/esm/cramFile/file.js +281 -0
  154. package/esm/cramFile/file.js.map +1 -0
  155. package/esm/cramFile/index.d.ts +2 -0
  156. package/esm/cramFile/index.js +3 -0
  157. package/esm/cramFile/index.js.map +1 -0
  158. package/esm/cramFile/record.d.ts +79 -0
  159. package/esm/cramFile/record.js +297 -0
  160. package/esm/cramFile/record.js.map +1 -0
  161. package/esm/cramFile/sectionParsers.d.ts +18 -0
  162. package/esm/cramFile/sectionParsers.js +347 -0
  163. package/esm/cramFile/sectionParsers.js.map +1 -0
  164. package/esm/cramFile/slice/decodeRecord.d.ts +2 -0
  165. package/esm/cramFile/slice/decodeRecord.js +299 -0
  166. package/esm/cramFile/slice/decodeRecord.js.map +1 -0
  167. package/esm/cramFile/slice/index.d.ts +20 -0
  168. package/esm/cramFile/slice/index.js +364 -0
  169. package/esm/cramFile/slice/index.js.map +1 -0
  170. package/esm/cramFile/util.d.ts +5 -0
  171. package/esm/cramFile/util.js +161 -0
  172. package/esm/cramFile/util.js.map +1 -0
  173. package/esm/errors.d.ts +23 -0
  174. package/esm/errors.js +24 -0
  175. package/esm/errors.js.map +1 -0
  176. package/esm/index.d.ts +4 -0
  177. package/esm/index.js +5 -0
  178. package/esm/index.js.map +1 -0
  179. package/esm/indexedCramFile.d.ts +39 -0
  180. package/esm/indexedCramFile.js +155 -0
  181. package/esm/indexedCramFile.js.map +1 -0
  182. package/esm/io/bufferCache.d.ts +12 -0
  183. package/esm/io/bufferCache.js +54 -0
  184. package/esm/io/bufferCache.js.map +1 -0
  185. package/esm/io/index.d.ts +5 -0
  186. package/esm/io/index.js +24 -0
  187. package/esm/io/index.js.map +1 -0
  188. package/esm/io/localFile.d.ts +10 -0
  189. package/esm/io/localFile.js +31 -0
  190. package/esm/io/localFile.js.map +1 -0
  191. package/esm/io/remoteFile.d.ts +16 -0
  192. package/esm/io/remoteFile.js +64 -0
  193. package/esm/io/remoteFile.js.map +1 -0
  194. package/esm/rans/constants.d.ts +3 -0
  195. package/esm/rans/constants.js +5 -0
  196. package/esm/rans/constants.js.map +1 -0
  197. package/esm/rans/d04.d.ts +1 -0
  198. package/esm/rans/d04.js +67 -0
  199. package/esm/rans/d04.js.map +1 -0
  200. package/esm/rans/d14.d.ts +1 -0
  201. package/esm/rans/d14.js +52 -0
  202. package/esm/rans/d14.js.map +1 -0
  203. package/esm/rans/decoding.d.ts +30 -0
  204. package/esm/rans/decoding.js +118 -0
  205. package/esm/rans/decoding.js.map +1 -0
  206. package/esm/rans/frequencies.d.ts +2 -0
  207. package/esm/rans/frequencies.js +110 -0
  208. package/esm/rans/frequencies.js.map +1 -0
  209. package/esm/rans/index.d.ts +1 -0
  210. package/esm/rans/index.js +195 -0
  211. package/esm/rans/index.js.map +1 -0
  212. package/esm/sam.d.ts +1 -0
  213. package/esm/sam.js +16 -0
  214. package/esm/sam.js.map +1 -0
  215. package/esm/unzip-pako.d.ts +2 -0
  216. package/esm/unzip-pako.js +5 -0
  217. package/esm/unzip-pako.js.map +1 -0
  218. package/esm/unzip.d.ts +2 -0
  219. package/esm/unzip.js +3 -0
  220. package/esm/unzip.js.map +1 -0
  221. package/package.json +38 -35
  222. package/src/craiIndex.js +180 -0
  223. package/src/cramFile/codecs/_base.js +49 -0
  224. package/src/cramFile/codecs/beta.js +23 -0
  225. package/src/cramFile/codecs/byteArrayLength.js +55 -0
  226. package/src/cramFile/codecs/byteArrayStop.js +50 -0
  227. package/src/cramFile/codecs/external.js +54 -0
  228. package/src/cramFile/codecs/gamma.js +30 -0
  229. package/src/cramFile/codecs/huffman.js +137 -0
  230. package/src/cramFile/codecs/index.js +38 -0
  231. package/src/cramFile/codecs/subexp.js +32 -0
  232. package/src/cramFile/constants.js +55 -0
  233. package/src/cramFile/container/compressionScheme.js +144 -0
  234. package/src/cramFile/container/index.js +119 -0
  235. package/src/cramFile/file.js +347 -0
  236. package/src/cramFile/index.js +3 -0
  237. package/src/cramFile/record.js +337 -0
  238. package/src/cramFile/sectionParsers.js +379 -0
  239. package/src/cramFile/slice/decodeRecord.js +362 -0
  240. package/src/cramFile/slice/index.js +497 -0
  241. package/src/cramFile/util.js +169 -0
  242. package/src/errors.js +22 -0
  243. package/src/index.js +5 -0
  244. package/src/indexedCramFile.js +191 -0
  245. package/src/io/bufferCache.js +66 -0
  246. package/src/io/index.js +26 -0
  247. package/src/io/localFile.js +35 -0
  248. package/src/io/remoteFile.js +71 -0
  249. package/src/rans/README.md +1 -0
  250. package/src/rans/constants.js +5 -0
  251. package/src/rans/d04.js +83 -0
  252. package/src/rans/d14.js +59 -0
  253. package/src/rans/decoding.js +141 -0
  254. package/src/rans/frequencies.js +121 -0
  255. package/src/rans/index.js +249 -0
  256. package/src/sam.js +15 -0
  257. package/src/unzip-pako.ts +5 -0
  258. package/src/unzip.ts +2 -0
@@ -0,0 +1,337 @@
1
+ import Constants from './constants'
2
+
3
+ function decodeReadSequence(cramRecord, refRegion) {
4
+ // if it has no length, it has no sequence
5
+ if (!cramRecord.lengthOnRef && !cramRecord.readLength) {
6
+ return undefined
7
+ }
8
+
9
+ if (cramRecord.isUnknownBases()) {
10
+ return undefined
11
+ }
12
+
13
+ // remember: all coordinates are 1-based closed
14
+ const regionSeqOffset = cramRecord.alignmentStart - refRegion.start
15
+
16
+ if (!cramRecord.readFeatures) {
17
+ return refRegion.seq
18
+ .substr(regionSeqOffset, cramRecord.lengthOnRef)
19
+ .toUpperCase()
20
+ }
21
+
22
+ let bases = ''
23
+ let regionPos = regionSeqOffset
24
+ let currentReadFeature = 0
25
+ while (bases.length < cramRecord.readLength) {
26
+ if (currentReadFeature < cramRecord.readFeatures.length) {
27
+ const feature = cramRecord.readFeatures[currentReadFeature]
28
+ if (feature.code === 'Q' || feature.code === 'q') {
29
+ currentReadFeature += 1
30
+ } else if (feature.pos === bases.length + 1) {
31
+ // process the read feature
32
+ currentReadFeature += 1
33
+
34
+ if (feature.code === 'b') {
35
+ // specify a base pair for some reason
36
+ const ret = feature.data.split(',')
37
+ const added = String.fromCharCode(...ret)
38
+ bases += added
39
+ regionPos += added.length
40
+ } else if (feature.code === 'B') {
41
+ // base pair and associated quality
42
+ // TODO: do we need to set the quality in the qual scores?
43
+ bases += feature.data[0]
44
+ regionPos += 1
45
+ } else if (feature.code === 'X') {
46
+ // base substitution
47
+ bases += feature.sub
48
+ regionPos += 1
49
+ } else if (feature.code === 'I') {
50
+ // insertion
51
+ bases += feature.data
52
+ } else if (feature.code === 'D') {
53
+ // deletion
54
+ regionPos += feature.data
55
+ } else if (feature.code === 'i') {
56
+ // insert single base
57
+ bases += feature.data
58
+ } else if (feature.code === 'N') {
59
+ // reference skip. delete some bases
60
+ // do nothing
61
+ // seqBases.splice(feature.pos - 1, feature.data)
62
+ regionPos += feature.data
63
+ } else if (feature.code === 'S') {
64
+ // soft clipped bases that should be present in the read seq
65
+ // seqBases.splice(feature.pos - 1, 0, ...feature.data.split(''))
66
+ bases += feature.data
67
+ } else if (feature.code === 'P') {
68
+ // padding, do nothing
69
+ } else if (feature.code === 'H') {
70
+ // hard clip, do nothing
71
+ }
72
+ } else if (currentReadFeature < cramRecord.readFeatures.length) {
73
+ // put down a chunk of sequence up to the next read feature
74
+ const chunk = refRegion.seq.substr(
75
+ regionPos,
76
+ cramRecord.readFeatures[currentReadFeature].pos - bases.length - 1,
77
+ )
78
+ bases += chunk
79
+ regionPos += chunk.length
80
+ }
81
+ } else {
82
+ // put down a chunk of reference up to the full read length
83
+ const chunk = refRegion.seq.substr(
84
+ regionPos,
85
+ cramRecord.readLength - bases.length,
86
+ )
87
+ bases += chunk
88
+ regionPos += chunk.length
89
+ }
90
+ }
91
+
92
+ return bases.toUpperCase()
93
+ }
94
+
95
+ const baseNumbers = {
96
+ a: 0,
97
+ A: 0,
98
+ c: 1,
99
+ C: 1,
100
+ g: 2,
101
+ G: 2,
102
+ t: 3,
103
+ T: 3,
104
+ n: 4,
105
+ N: 4,
106
+ }
107
+
108
+ function decodeBaseSubstitution(
109
+ cramRecord,
110
+ refRegion,
111
+ compressionScheme,
112
+ readFeature,
113
+ ) {
114
+ if (!refRegion) {
115
+ return
116
+ }
117
+
118
+ // decode base substitution code using the substitution matrix
119
+ const refCoord = readFeature.refPos - refRegion.start
120
+ const refBase = refRegion.seq.charAt(refCoord)
121
+ if (refBase) {
122
+ readFeature.ref = refBase
123
+ }
124
+ let baseNumber = baseNumbers[refBase]
125
+ if (baseNumber === undefined) {
126
+ baseNumber = 4
127
+ }
128
+ const substitutionScheme = compressionScheme.substitutionMatrix[baseNumber]
129
+ const base = substitutionScheme[readFeature.data]
130
+ if (base) {
131
+ readFeature.sub = base
132
+ }
133
+ }
134
+
135
+ /**
136
+ * Class of each CRAM record returned by this API.
137
+ */
138
+ export default class CramRecord {
139
+ constructor() {
140
+ this.tags = {}
141
+ }
142
+
143
+ /**
144
+ * @returns {boolean} true if the read is paired, regardless of whether both segments are mapped
145
+ */
146
+ isPaired() {
147
+ return !!(this.flags & Constants.BAM_FPAIRED)
148
+ }
149
+
150
+ /** @returns {boolean} true if the read is paired, and both segments are mapped */
151
+ isProperlyPaired() {
152
+ return !!(this.flags & Constants.BAM_FPROPER_PAIR)
153
+ }
154
+
155
+ /** @returns {boolean} true if the read itself is unmapped; conflictive with isProperlyPaired */
156
+ isSegmentUnmapped() {
157
+ return !!(this.flags & Constants.BAM_FUNMAP)
158
+ }
159
+
160
+ /** @returns {boolean} true if the read itself is unmapped; conflictive with isProperlyPaired */
161
+ isMateUnmapped() {
162
+ return !!(this.flags & Constants.BAM_FMUNMAP)
163
+ }
164
+
165
+ /** @returns {boolean} true if the read is mapped to the reverse strand */
166
+ isReverseComplemented() {
167
+ return !!(this.flags & Constants.BAM_FREVERSE)
168
+ }
169
+
170
+ /** @returns {boolean} true if the mate is mapped to the reverse strand */
171
+ isMateReverseComplemented() {
172
+ return !!(this.flags & Constants.BAM_FMREVERSE)
173
+ }
174
+
175
+ /** @returns {boolean} true if this is read number 1 in a pair */
176
+ isRead1() {
177
+ return !!(this.flags & Constants.BAM_FREAD1)
178
+ }
179
+
180
+ /** @returns {boolean} true if this is read number 2 in a pair */
181
+ isRead2() {
182
+ return !!(this.flags & Constants.BAM_FREAD2)
183
+ }
184
+
185
+ /** @returns {boolean} true if this is a secondary alignment */
186
+ isSecondary() {
187
+ return !!(this.flags & Constants.BAM_FSECONDARY)
188
+ }
189
+
190
+ /** @returns {boolean} true if this read has failed QC checks */
191
+ isFailedQc() {
192
+ return !!(this.flags & Constants.BAM_FQCFAIL)
193
+ }
194
+
195
+ /** @returns {boolean} true if the read is an optical or PCR duplicate */
196
+ isDuplicate() {
197
+ return !!(this.flags & Constants.BAM_FDUP)
198
+ }
199
+
200
+ /** @returns {boolean} true if this is a supplementary alignment */
201
+ isSupplementary() {
202
+ return !!(this.flags & Constants.BAM_FSUPPLEMENTARY)
203
+ }
204
+
205
+ /**
206
+ * @returns {boolean} true if the read is detached
207
+ */
208
+ isDetached() {
209
+ return !!(this.cramFlags & Constants.CRAM_FLAG_DETACHED)
210
+ }
211
+
212
+ /** @returns {boolean} true if the read has a mate in this same CRAM segment */
213
+ hasMateDownStream() {
214
+ return !!(this.cramFlags & Constants.CRAM_FLAG_MATE_DOWNSTREAM)
215
+ }
216
+
217
+ /** @returns {boolean} true if the read contains qual scores */
218
+ isPreservingQualityScores() {
219
+ return !!(this.cramFlags & Constants.CRAM_FLAG_PRESERVE_QUAL_SCORES)
220
+ }
221
+
222
+ /** @returns {boolean} true if the read has no sequence bases */
223
+ isUnknownBases() {
224
+ return !!(this.cramFlags & Constants.CRAM_FLAG_NO_SEQ)
225
+ }
226
+
227
+ /**
228
+ * Get the original sequence of this read.
229
+ * @returns {String} sequence basepairs
230
+ */
231
+ getReadBases() {
232
+ if (!this.readBases && this._refRegion) {
233
+ this.readBases = decodeReadSequence(this, this._refRegion)
234
+ }
235
+ return this.readBases
236
+ }
237
+
238
+ /**
239
+ * Get the pair orientation of a paired read. Adapted from igv.js
240
+ * @returns {String} of paired orientatin
241
+ */
242
+ getPairOrientation() {
243
+ if (
244
+ !this.isSegmentUnmapped() &&
245
+ this.isPaired() &&
246
+ !this.isMateUnmapped() &&
247
+ this.mate &&
248
+ this.sequenceId === this.mate.sequenceId
249
+ ) {
250
+ const s1 = this.isReverseComplemented() ? 'R' : 'F'
251
+ const s2 = this.isMateReverseComplemented() ? 'R' : 'F'
252
+ let o1 = ' '
253
+ let o2 = ' '
254
+ if (this.isRead1()) {
255
+ o1 = '1'
256
+ o2 = '2'
257
+ } else if (this.isRead2()) {
258
+ o1 = '2'
259
+ o2 = '1'
260
+ }
261
+
262
+ const tmp = []
263
+ let isize = this.templateLength || this.templateSize
264
+ if (this.alignmentStart > this.mate.alignmentStart && isize > 0) {
265
+ isize = -isize
266
+ }
267
+ if (isize > 0) {
268
+ tmp[0] = s1
269
+ tmp[1] = o1
270
+ tmp[2] = s2
271
+ tmp[3] = o2
272
+ } else {
273
+ tmp[2] = s1
274
+ tmp[3] = o1
275
+ tmp[0] = s2
276
+ tmp[1] = o2
277
+ }
278
+ return tmp.join('')
279
+ }
280
+ return null
281
+ }
282
+
283
+ /**
284
+ * Annotates this feature with the given reference sequence basepair
285
+ * information. This will add a `sub` and a `ref` item to base
286
+ * subsitution read features given the actual substituted and reference
287
+ * base pairs, and will make the `getReadSequence()` method work.
288
+ *
289
+ * @param {object} refRegion
290
+ * @param {number} refRegion.start
291
+ * @param {number} refRegion.end
292
+ * @param {string} refRegion.seq
293
+ * @param {CramContainerCompressionScheme} compressionScheme
294
+ * @returns {undefined} nothing
295
+ */
296
+ addReferenceSequence(refRegion, compressionScheme) {
297
+ if (this.readFeatures) {
298
+ // use the reference bases to decode the bases
299
+ // substituted in each base substitution
300
+ this.readFeatures.forEach(readFeature => {
301
+ if (readFeature.code === 'X') {
302
+ decodeBaseSubstitution(
303
+ this,
304
+ refRegion,
305
+ compressionScheme,
306
+ readFeature,
307
+ )
308
+ }
309
+ })
310
+ }
311
+
312
+ // if this region completely covers this read,
313
+ // keep a reference to it
314
+ if (
315
+ !this.readBases &&
316
+ refRegion.start <= this.alignmentStart &&
317
+ refRegion.end >=
318
+ this.alignmentStart + (this.lengthOnRef || this.readLength) - 1
319
+ ) {
320
+ this._refRegion = refRegion
321
+ }
322
+ }
323
+
324
+ toJSON() {
325
+ const data = {}
326
+ Object.keys(this).forEach(k => {
327
+ if (k.charAt(0) === '_') {
328
+ return
329
+ }
330
+ data[k] = this[k]
331
+ })
332
+
333
+ data.readBases = this.getReadBases()
334
+
335
+ return data
336
+ }
337
+ }
@@ -0,0 +1,379 @@
1
+ import { Parser } from '@gmod/binary-parser'
2
+
3
+ const singleItf8 = new Parser().itf8()
4
+
5
+ const cramFileDefinition = {
6
+ parser: new Parser()
7
+ .string('magic', { length: 4 })
8
+ .uint8('majorVersion')
9
+ .uint8('minorVersion')
10
+ .string('fileId', { length: 20, stripNull: true }),
11
+ maxLength: 26,
12
+ }
13
+
14
+ const cramBlockHeader = {
15
+ parser: new Parser()
16
+ .uint8('compressionMethod', {
17
+ formatter: /* istanbul ignore next */ b => {
18
+ const method = [
19
+ 'raw',
20
+ 'gzip',
21
+ 'bzip2',
22
+ 'lzma',
23
+ 'rans',
24
+ 'rans4x16',
25
+ 'arith',
26
+ 'fqzcomp',
27
+ 'tok3',
28
+ ][b]
29
+ if (!method) {
30
+ throw new Error(`compression method number ${b} not implemented`)
31
+ }
32
+ return method
33
+ },
34
+ })
35
+ .uint8('contentType', {
36
+ formatter: /* istanbul ignore next */ b => {
37
+ const type = [
38
+ 'FILE_HEADER',
39
+ 'COMPRESSION_HEADER',
40
+ 'MAPPED_SLICE_HEADER',
41
+ 'UNMAPPED_SLICE_HEADER', // < only used in cram v1
42
+ 'EXTERNAL_DATA',
43
+ 'CORE_DATA',
44
+ ][b]
45
+ if (!type) {
46
+ throw new Error(`invalid block content type id ${b}`)
47
+ }
48
+ return type
49
+ },
50
+ })
51
+ .itf8('contentId')
52
+ .itf8('compressedSize')
53
+ .itf8('uncompressedSize'),
54
+ maxLength: 17,
55
+ }
56
+
57
+ const cramBlockCrc32 = {
58
+ parser: new Parser().uint32('crc32'),
59
+ maxLength: 4,
60
+ }
61
+
62
+ // const ENCODING_NAMES = [
63
+ // 'NULL', // 0
64
+ // 'EXTERNAL', // 1
65
+ // 'GOLOMB', // 2
66
+ // 'HUFFMAN_INT', // 3
67
+ // 'BYTE_ARRAY_LEN', // 4
68
+ // 'BYTE_ARRAY_STOP', // 5
69
+ // 'BETA', // 6
70
+ // 'SUBEXP', // 7
71
+ // 'GOLOMB_RICE', // 8
72
+ // 'GAMMA', // 9
73
+ // ]
74
+
75
+ const cramTagDictionary = new Parser().itf8('size').buffer('ents', {
76
+ length: 'size',
77
+ formatter: /* istanbul ignore next */ buffer => {
78
+ function makeTagSet(stringStart, stringEnd) {
79
+ const str = buffer.toString('utf8', stringStart, stringEnd)
80
+ const tags = []
81
+ for (let i = 0; i < str.length; i += 3) {
82
+ tags.push(str.substr(i, 3))
83
+ }
84
+ return tags
85
+ }
86
+ /* eslint-disable */
87
+ var tagSets = []
88
+ var stringStart = 0
89
+ var i
90
+ /* eslint-enable */
91
+ for (i = 0; i < buffer.length; i += 1) {
92
+ if (!buffer[i]) {
93
+ tagSets.push(makeTagSet(stringStart, i))
94
+ stringStart = i + 1
95
+ }
96
+ }
97
+ if (i > stringStart) {
98
+ tagSets.push(makeTagSet(stringStart, i))
99
+ }
100
+ return tagSets
101
+ },
102
+ })
103
+
104
+ // const cramPreservationMapKeys = 'XX RN AP RR SM TD'.split(' ')
105
+ const parseByteAsBool = new Parser().uint8(null, {
106
+ formatter: /* istanbul ignore next */ val => !!val,
107
+ })
108
+
109
+ const cramPreservationMap = new Parser()
110
+ .itf8('mapSize')
111
+ .itf8('mapCount')
112
+ .array('ents', {
113
+ length: 'mapCount',
114
+ type: new Parser()
115
+ .string('key', {
116
+ length: 2,
117
+ stripNull: false,
118
+ // formatter: val => cramPreservationMapKeys[val] || 0,
119
+ })
120
+ .choice('value', {
121
+ tag: 'key',
122
+ choices: {
123
+ MI: parseByteAsBool,
124
+ UI: parseByteAsBool,
125
+ PI: parseByteAsBool,
126
+ RN: parseByteAsBool,
127
+ AP: parseByteAsBool,
128
+ RR: parseByteAsBool,
129
+ SM: new Parser().array(null, { type: 'uint8', length: 5 }),
130
+ TD: new Parser().nest(null, {
131
+ type: cramTagDictionary,
132
+ formatter: /* istanbul ignore next */ data => data.ents,
133
+ }),
134
+ },
135
+ }),
136
+ })
137
+
138
+ /* istanbul ignore next */
139
+ function formatMap(data) {
140
+ const map = {}
141
+ for (let i = 0; i < data.ents.length; i += 1) {
142
+ const { key, value } = data.ents[i]
143
+ if (map[key]) {
144
+ console.warn(`duplicate key ${key} in map`)
145
+ }
146
+ map[key] = value
147
+ }
148
+ return map
149
+ }
150
+
151
+ const unversionedParsers = {
152
+ cramFileDefinition,
153
+ cramBlockHeader,
154
+ cramBlockCrc32,
155
+ }
156
+
157
+ // each of these is a function of the major and minor version
158
+ const versionedParsers = {
159
+ // assemble a section parser for the unmapped slice header, with slight
160
+ // variations depending on the major version of the cram file
161
+ cramUnmappedSliceHeader(majorVersion) {
162
+ let maxLength = 0
163
+ let parser = new Parser().itf8('numRecords')
164
+ maxLength += 5
165
+
166
+ // recordCounter is itf8 in a CRAM v2 file, absent in CRAM v1
167
+ if (majorVersion >= 3) {
168
+ parser = parser.ltf8('recordCounter')
169
+ maxLength += 9
170
+ } else if (majorVersion === 2) {
171
+ parser = parser.itf8('recordCounter')
172
+ maxLength += 5
173
+ }
174
+
175
+ parser = parser
176
+ .itf8('numBlocks')
177
+ .itf8('numContentIds')
178
+ .array('contentIds', {
179
+ type: singleItf8,
180
+ length: 'numContentIds',
181
+ })
182
+ maxLength += 5 * 2 // + numContentIds*5
183
+
184
+ // the md5 sum is missing in cram v1
185
+ if (majorVersion >= 2) {
186
+ parser = parser.array('md5', { type: 'uint8', length: 16 })
187
+ maxLength += 16
188
+ }
189
+
190
+ const maxLengthFunc = numContentIds => maxLength + numContentIds * 5
191
+
192
+ return { parser, maxLength: maxLengthFunc } // : p, maxLength: numContentIds => 5 + 9 + 5 * 2 + 5 * numContentIds + 16 }
193
+ },
194
+
195
+ // assembles a section parser for the unmapped slice header, with slight
196
+ // variations depending on the major version of the cram file
197
+ cramMappedSliceHeader(majorVersion) {
198
+ let parser = new Parser()
199
+ .itf8('refSeqId')
200
+ .itf8('refSeqStart')
201
+ .itf8('refSeqSpan')
202
+ .itf8('numRecords')
203
+ let maxLength = 5 * 4
204
+
205
+ if (majorVersion >= 3) {
206
+ parser = parser.ltf8('recordCounter')
207
+ maxLength += 9
208
+ } else if (majorVersion === 2) {
209
+ parser = parser.itf8('recordCounter')
210
+ maxLength += 5
211
+ }
212
+
213
+ parser = parser
214
+ .itf8('numBlocks')
215
+ .itf8('numContentIds')
216
+ .array('contentIds', {
217
+ type: singleItf8,
218
+ length: 'numContentIds',
219
+ })
220
+ .itf8('refBaseBlockId')
221
+ maxLength += 5 * 3
222
+
223
+ // the md5 sum is missing in cram v1
224
+ if (majorVersion >= 2) {
225
+ parser = parser.array('md5', { type: 'uint8', length: 16 })
226
+ maxLength += 16
227
+ }
228
+
229
+ const maxLengthFunc = numContentIds => maxLength + numContentIds * 5
230
+
231
+ return { parser, maxLength: maxLengthFunc }
232
+ },
233
+
234
+ cramEncoding(majorVersion) {
235
+ const parser = new Parser()
236
+ .namely('cramEncoding')
237
+ .itf8('codecId')
238
+ .itf8('parametersBytes')
239
+ .choice('parameters', {
240
+ tag: 'codecId',
241
+ choices: {
242
+ 0: new Parser(), // NULL
243
+ 1: new Parser().itf8('blockContentId'), // EXTERNAL
244
+ 2: new Parser().itf8('offset').itf8('M'), // GOLOMB,
245
+ // HUFFMAN_INT
246
+ 3: Parser.start()
247
+ .itf8('numCodes')
248
+ .array('symbols', { length: 'numCodes', type: singleItf8 })
249
+ .itf8('numLengths')
250
+ .array('bitLengths', { length: 'numLengths', type: singleItf8 }),
251
+ 4: Parser.start() // BYTE_ARRAY_LEN
252
+ .nest('lengthsEncoding', { type: 'cramEncoding' })
253
+ .nest('valuesEncoding', { type: 'cramEncoding' }),
254
+ // BYTE_ARRAY_STOP is a little different for CRAM v1
255
+ 5: new Parser()
256
+ .uint8('stopByte')
257
+ [majorVersion > 1 ? 'itf8' : 'int']('blockContentId'),
258
+ 6: new Parser().itf8('offset').itf8('length'), // BETA
259
+ 7: new Parser().itf8('offset').itf8('K'), // SUBEXP
260
+ 8: new Parser().itf8('offset').itf8('log2m'), // GOLOMB_RICE
261
+ 9: new Parser().itf8('offset'), // GAMMA
262
+ },
263
+ })
264
+
265
+ return { parser }
266
+ },
267
+
268
+ cramDataSeriesEncodingMap(majorVersion) {
269
+ return new Parser()
270
+ .itf8('mapSize')
271
+ .itf8('mapCount')
272
+ .array('ents', {
273
+ length: 'mapCount',
274
+ type: new Parser()
275
+ .string('key', { length: 2, stripNull: false })
276
+ .nest('value', { type: this.cramEncoding(majorVersion).parser }),
277
+ })
278
+ },
279
+
280
+ cramTagEncodingMap(majorVersion) {
281
+ return new Parser()
282
+ .itf8('mapSize')
283
+ .itf8('mapCount')
284
+ .array('ents', {
285
+ length: 'mapCount',
286
+ type: new Parser()
287
+ .itf8('key', {
288
+ formatter: /* istanbul ignore next */ integerRepresentation =>
289
+ /* istanbul ignore next */
290
+ String.fromCharCode((integerRepresentation >> 16) & 0xff) +
291
+ String.fromCharCode((integerRepresentation >> 8) & 0xff) +
292
+ String.fromCharCode(integerRepresentation & 0xff),
293
+ })
294
+ .nest('value', { type: this.cramEncoding(majorVersion).parser }),
295
+ })
296
+ },
297
+
298
+ cramCompressionHeader(majorVersion) {
299
+ let parser = new Parser()
300
+ // TODO: if we want to support CRAM v1, we will need to refactor
301
+ // compression header into 2 parts to parse the landmarks,
302
+ // like the container header
303
+ parser = parser
304
+ .nest('preservation', {
305
+ type: cramPreservationMap,
306
+ formatter: formatMap,
307
+ })
308
+ .nest('dataSeriesEncoding', {
309
+ type: this.cramDataSeriesEncodingMap(majorVersion),
310
+ formatter: formatMap,
311
+ })
312
+ .nest('tagEncoding', {
313
+ type: this.cramTagEncodingMap(majorVersion),
314
+ formatter: formatMap,
315
+ })
316
+ return { parser }
317
+ },
318
+
319
+ cramContainerHeader1(majorVersion) {
320
+ let parser = new Parser()
321
+ .int32('length') // byte size of the container data (blocks)
322
+ .itf8('refSeqId') // reference sequence identifier, -1 for unmapped reads, -2 for multiple reference sequences
323
+ .itf8('refSeqStart') // the alignment start position or 0 for unmapped reads
324
+ .itf8('alignmentSpan') // the length of the alignment or 0 for unmapped reads
325
+ .itf8('numRecords') // number of records in the container
326
+ let maxLength = 4 + 5 * 4
327
+
328
+ if (majorVersion >= 3) {
329
+ parser = parser.ltf8('recordCounter') // 1-based sequential index of records in the file/stream.
330
+ maxLength += 9
331
+ } else if (majorVersion === 2) {
332
+ parser = parser.itf8('recordCounter')
333
+ maxLength += 5
334
+ }
335
+
336
+ if (majorVersion > 1) {
337
+ parser = parser.ltf8('numBases') // number of read bases
338
+ maxLength += 9
339
+ }
340
+ parser = parser
341
+ .itf8('numBlocks') // the number of blocks
342
+ .itf8('numLandmarks') // the number of landmarks
343
+ maxLength += 5 + 5
344
+
345
+ return { parser, maxLength }
346
+ },
347
+
348
+ cramContainerHeader2(majorVersion) {
349
+ let parser = new Parser()
350
+ .itf8('numLandmarks') // the number of blocks
351
+ // Each integer value of this array is a byte offset
352
+ // into the blocks byte array. Landmarks are used for
353
+ // random access indexing.
354
+ .array('landmarks', {
355
+ type: new Parser().itf8(),
356
+ length: 'numLandmarks',
357
+ })
358
+
359
+ let crcLength = 0
360
+ if (majorVersion >= 3) {
361
+ parser = parser.uint32('crc32')
362
+ crcLength = 4
363
+ }
364
+ return {
365
+ parser,
366
+ maxLength: numLandmarks => 5 + numLandmarks * 5 + crcLength,
367
+ }
368
+ },
369
+ }
370
+
371
+ function getSectionParsers(majorVersion) {
372
+ const parsers = Object.assign({}, unversionedParsers)
373
+ Object.keys(versionedParsers).forEach(parserName => {
374
+ parsers[parserName] = versionedParsers[parserName](majorVersion)
375
+ })
376
+ return parsers
377
+ }
378
+
379
+ export { cramFileDefinition, getSectionParsers }