@gmod/cram 1.5.9 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. package/CHANGELOG.md +90 -0
  2. package/README.md +182 -172
  3. package/dist/craiIndex.d.ts +37 -0
  4. package/dist/craiIndex.js +196 -301
  5. package/dist/craiIndex.js.map +1 -0
  6. package/dist/cram-bundle.js +6 -15
  7. package/dist/cramFile/codecs/_base.d.ts +6 -0
  8. package/dist/cramFile/codecs/_base.js +44 -53
  9. package/dist/cramFile/codecs/_base.js.map +1 -0
  10. package/dist/cramFile/codecs/beta.d.ts +4 -0
  11. package/dist/cramFile/codecs/beta.js +38 -48
  12. package/dist/cramFile/codecs/beta.js.map +1 -0
  13. package/dist/cramFile/codecs/byteArrayLength.d.ts +8 -0
  14. package/dist/cramFile/codecs/byteArrayLength.js +58 -78
  15. package/dist/cramFile/codecs/byteArrayLength.js.map +1 -0
  16. package/dist/cramFile/codecs/byteArrayStop.d.ts +6 -0
  17. package/dist/cramFile/codecs/byteArrayStop.js +62 -76
  18. package/dist/cramFile/codecs/byteArrayStop.js.map +1 -0
  19. package/dist/cramFile/codecs/external.d.ts +7 -0
  20. package/dist/cramFile/codecs/external.js +63 -81
  21. package/dist/cramFile/codecs/external.js.map +1 -0
  22. package/dist/cramFile/codecs/gamma.d.ts +4 -0
  23. package/dist/cramFile/codecs/gamma.js +43 -56
  24. package/dist/cramFile/codecs/gamma.js.map +1 -0
  25. package/dist/cramFile/codecs/huffman.d.ts +17 -0
  26. package/dist/cramFile/codecs/huffman.js +126 -199
  27. package/dist/cramFile/codecs/huffman.js.map +1 -0
  28. package/dist/cramFile/codecs/index.d.ts +2 -0
  29. package/dist/cramFile/codecs/index.js +31 -38
  30. package/dist/cramFile/codecs/index.js.map +1 -0
  31. package/dist/cramFile/codecs/subexp.d.ts +4 -0
  32. package/dist/cramFile/codecs/subexp.js +51 -64
  33. package/dist/cramFile/codecs/subexp.js.map +1 -0
  34. package/dist/cramFile/constants.d.ts +36 -0
  35. package/dist/cramFile/constants.js +52 -50
  36. package/dist/cramFile/constants.js.map +1 -0
  37. package/dist/cramFile/container/compressionScheme.d.ts +23 -0
  38. package/dist/cramFile/container/compressionScheme.js +115 -153
  39. package/dist/cramFile/container/compressionScheme.js.map +1 -0
  40. package/dist/cramFile/container/index.d.ts +13 -0
  41. package/dist/cramFile/container/index.js +169 -283
  42. package/dist/cramFile/container/index.js.map +1 -0
  43. package/dist/cramFile/file.d.ts +63 -0
  44. package/dist/cramFile/file.js +440 -766
  45. package/dist/cramFile/file.js.map +1 -0
  46. package/dist/cramFile/index.d.ts +2 -0
  47. package/dist/cramFile/index.js +7 -4
  48. package/dist/cramFile/index.js.map +1 -0
  49. package/dist/cramFile/record.d.ts +79 -0
  50. package/dist/cramFile/record.js +253 -308
  51. package/dist/cramFile/record.js.map +1 -0
  52. package/dist/cramFile/sectionParsers.d.ts +18 -0
  53. package/dist/cramFile/sectionParsers.js +324 -362
  54. package/dist/cramFile/sectionParsers.js.map +1 -0
  55. package/dist/cramFile/slice/decodeRecord.d.ts +2 -0
  56. package/dist/cramFile/slice/decodeRecord.js +278 -298
  57. package/dist/cramFile/slice/decodeRecord.js.map +1 -0
  58. package/dist/cramFile/slice/index.d.ts +20 -0
  59. package/dist/cramFile/slice/index.js +488 -789
  60. package/dist/cramFile/slice/index.js.map +1 -0
  61. package/dist/cramFile/util.d.ts +5 -0
  62. package/dist/cramFile/util.js +158 -144
  63. package/dist/cramFile/util.js.map +1 -0
  64. package/dist/errors.d.ts +23 -0
  65. package/dist/errors.js +66 -103
  66. package/dist/errors.js.map +1 -0
  67. package/dist/index.d.ts +4 -0
  68. package/dist/index.js +12 -12
  69. package/dist/index.js.map +1 -0
  70. package/dist/indexedCramFile.d.ts +39 -0
  71. package/dist/indexedCramFile.js +213 -315
  72. package/dist/indexedCramFile.js.map +1 -0
  73. package/dist/io/bufferCache.d.ts +12 -0
  74. package/dist/io/bufferCache.js +108 -128
  75. package/dist/io/bufferCache.js.map +1 -0
  76. package/dist/io/index.d.ts +5 -0
  77. package/dist/io/index.js +29 -27
  78. package/dist/io/index.js.map +1 -0
  79. package/dist/io/localFile.d.ts +10 -0
  80. package/dist/io/localFile.js +105 -162
  81. package/dist/io/localFile.js.map +1 -0
  82. package/dist/io/remoteFile.d.ts +16 -0
  83. package/dist/io/remoteFile.js +137 -206
  84. package/dist/io/remoteFile.js.map +1 -0
  85. package/dist/rans/constants.d.ts +3 -0
  86. package/dist/rans/constants.js +6 -6
  87. package/dist/rans/constants.js.map +1 -0
  88. package/dist/rans/d04.d.ts +1 -0
  89. package/dist/rans/d04.js +70 -99
  90. package/dist/rans/d04.js.map +1 -0
  91. package/dist/rans/d14.d.ts +1 -0
  92. package/dist/rans/d14.js +55 -93
  93. package/dist/rans/d14.js.map +1 -0
  94. package/dist/rans/decoding.d.ts +30 -0
  95. package/dist/rans/decoding.js +112 -159
  96. package/dist/rans/decoding.js.map +1 -0
  97. package/dist/rans/frequencies.d.ts +2 -0
  98. package/dist/rans/frequencies.js +110 -119
  99. package/dist/rans/frequencies.js.map +1 -0
  100. package/dist/rans/index.d.ts +1 -0
  101. package/dist/rans/index.js +111 -174
  102. package/dist/rans/index.js.map +1 -0
  103. package/dist/sam.d.ts +1 -0
  104. package/dist/sam.js +16 -41
  105. package/dist/sam.js.map +1 -0
  106. package/dist/unzip-pako.d.ts +2 -0
  107. package/dist/unzip-pako.js +9 -0
  108. package/dist/unzip-pako.js.map +1 -0
  109. package/dist/unzip.d.ts +2 -0
  110. package/dist/unzip.js +6 -0
  111. package/dist/unzip.js.map +1 -0
  112. package/errors.js +66 -103
  113. package/esm/craiIndex.d.ts +37 -0
  114. package/esm/craiIndex.js +158 -0
  115. package/esm/craiIndex.js.map +1 -0
  116. package/esm/cramFile/codecs/_base.d.ts +6 -0
  117. package/esm/cramFile/codecs/_base.js +42 -0
  118. package/esm/cramFile/codecs/_base.js.map +1 -0
  119. package/esm/cramFile/codecs/beta.d.ts +4 -0
  120. package/esm/cramFile/codecs/beta.js +15 -0
  121. package/esm/cramFile/codecs/beta.js.map +1 -0
  122. package/esm/cramFile/codecs/byteArrayLength.d.ts +8 -0
  123. package/esm/cramFile/codecs/byteArrayLength.js +35 -0
  124. package/esm/cramFile/codecs/byteArrayLength.js.map +1 -0
  125. package/esm/cramFile/codecs/byteArrayStop.d.ts +6 -0
  126. package/esm/cramFile/codecs/byteArrayStop.js +40 -0
  127. package/esm/cramFile/codecs/byteArrayStop.js.map +1 -0
  128. package/esm/cramFile/codecs/external.d.ts +7 -0
  129. package/esm/cramFile/codecs/external.js +40 -0
  130. package/esm/cramFile/codecs/external.js.map +1 -0
  131. package/esm/cramFile/codecs/gamma.d.ts +4 -0
  132. package/esm/cramFile/codecs/gamma.js +20 -0
  133. package/esm/cramFile/codecs/gamma.js.map +1 -0
  134. package/esm/cramFile/codecs/huffman.d.ts +17 -0
  135. package/esm/cramFile/codecs/huffman.js +107 -0
  136. package/esm/cramFile/codecs/huffman.js.map +1 -0
  137. package/esm/cramFile/codecs/index.d.ts +2 -0
  138. package/esm/cramFile/codecs/index.js +30 -0
  139. package/esm/cramFile/codecs/index.js.map +1 -0
  140. package/esm/cramFile/codecs/subexp.d.ts +4 -0
  141. package/esm/cramFile/codecs/subexp.js +28 -0
  142. package/esm/cramFile/codecs/subexp.js.map +1 -0
  143. package/esm/cramFile/constants.d.ts +36 -0
  144. package/esm/cramFile/constants.js +51 -0
  145. package/esm/cramFile/constants.js.map +1 -0
  146. package/esm/cramFile/container/compressionScheme.d.ts +23 -0
  147. package/esm/cramFile/container/compressionScheme.js +123 -0
  148. package/esm/cramFile/container/compressionScheme.js.map +1 -0
  149. package/esm/cramFile/container/index.d.ts +13 -0
  150. package/esm/cramFile/container/index.js +84 -0
  151. package/esm/cramFile/container/index.js.map +1 -0
  152. package/esm/cramFile/file.d.ts +63 -0
  153. package/esm/cramFile/file.js +281 -0
  154. package/esm/cramFile/file.js.map +1 -0
  155. package/esm/cramFile/index.d.ts +2 -0
  156. package/esm/cramFile/index.js +3 -0
  157. package/esm/cramFile/index.js.map +1 -0
  158. package/esm/cramFile/record.d.ts +79 -0
  159. package/esm/cramFile/record.js +297 -0
  160. package/esm/cramFile/record.js.map +1 -0
  161. package/esm/cramFile/sectionParsers.d.ts +18 -0
  162. package/esm/cramFile/sectionParsers.js +347 -0
  163. package/esm/cramFile/sectionParsers.js.map +1 -0
  164. package/esm/cramFile/slice/decodeRecord.d.ts +2 -0
  165. package/esm/cramFile/slice/decodeRecord.js +299 -0
  166. package/esm/cramFile/slice/decodeRecord.js.map +1 -0
  167. package/esm/cramFile/slice/index.d.ts +20 -0
  168. package/esm/cramFile/slice/index.js +364 -0
  169. package/esm/cramFile/slice/index.js.map +1 -0
  170. package/esm/cramFile/util.d.ts +5 -0
  171. package/esm/cramFile/util.js +161 -0
  172. package/esm/cramFile/util.js.map +1 -0
  173. package/esm/errors.d.ts +23 -0
  174. package/esm/errors.js +24 -0
  175. package/esm/errors.js.map +1 -0
  176. package/esm/index.d.ts +4 -0
  177. package/esm/index.js +5 -0
  178. package/esm/index.js.map +1 -0
  179. package/esm/indexedCramFile.d.ts +39 -0
  180. package/esm/indexedCramFile.js +155 -0
  181. package/esm/indexedCramFile.js.map +1 -0
  182. package/esm/io/bufferCache.d.ts +12 -0
  183. package/esm/io/bufferCache.js +54 -0
  184. package/esm/io/bufferCache.js.map +1 -0
  185. package/esm/io/index.d.ts +5 -0
  186. package/esm/io/index.js +24 -0
  187. package/esm/io/index.js.map +1 -0
  188. package/esm/io/localFile.d.ts +10 -0
  189. package/esm/io/localFile.js +31 -0
  190. package/esm/io/localFile.js.map +1 -0
  191. package/esm/io/remoteFile.d.ts +16 -0
  192. package/esm/io/remoteFile.js +64 -0
  193. package/esm/io/remoteFile.js.map +1 -0
  194. package/esm/rans/constants.d.ts +3 -0
  195. package/esm/rans/constants.js +5 -0
  196. package/esm/rans/constants.js.map +1 -0
  197. package/esm/rans/d04.d.ts +1 -0
  198. package/esm/rans/d04.js +67 -0
  199. package/esm/rans/d04.js.map +1 -0
  200. package/esm/rans/d14.d.ts +1 -0
  201. package/esm/rans/d14.js +52 -0
  202. package/esm/rans/d14.js.map +1 -0
  203. package/esm/rans/decoding.d.ts +30 -0
  204. package/esm/rans/decoding.js +118 -0
  205. package/esm/rans/decoding.js.map +1 -0
  206. package/esm/rans/frequencies.d.ts +2 -0
  207. package/esm/rans/frequencies.js +110 -0
  208. package/esm/rans/frequencies.js.map +1 -0
  209. package/esm/rans/index.d.ts +1 -0
  210. package/esm/rans/index.js +195 -0
  211. package/esm/rans/index.js.map +1 -0
  212. package/esm/sam.d.ts +1 -0
  213. package/esm/sam.js +16 -0
  214. package/esm/sam.js.map +1 -0
  215. package/esm/unzip-pako.d.ts +2 -0
  216. package/esm/unzip-pako.js +5 -0
  217. package/esm/unzip-pako.js.map +1 -0
  218. package/esm/unzip.d.ts +2 -0
  219. package/esm/unzip.js +3 -0
  220. package/esm/unzip.js.map +1 -0
  221. package/package.json +38 -35
  222. package/src/craiIndex.js +180 -0
  223. package/src/cramFile/codecs/_base.js +49 -0
  224. package/src/cramFile/codecs/beta.js +23 -0
  225. package/src/cramFile/codecs/byteArrayLength.js +55 -0
  226. package/src/cramFile/codecs/byteArrayStop.js +50 -0
  227. package/src/cramFile/codecs/external.js +54 -0
  228. package/src/cramFile/codecs/gamma.js +30 -0
  229. package/src/cramFile/codecs/huffman.js +137 -0
  230. package/src/cramFile/codecs/index.js +38 -0
  231. package/src/cramFile/codecs/subexp.js +32 -0
  232. package/src/cramFile/constants.js +55 -0
  233. package/src/cramFile/container/compressionScheme.js +144 -0
  234. package/src/cramFile/container/index.js +119 -0
  235. package/src/cramFile/file.js +347 -0
  236. package/src/cramFile/index.js +3 -0
  237. package/src/cramFile/record.js +337 -0
  238. package/src/cramFile/sectionParsers.js +379 -0
  239. package/src/cramFile/slice/decodeRecord.js +362 -0
  240. package/src/cramFile/slice/index.js +497 -0
  241. package/src/cramFile/util.js +169 -0
  242. package/src/errors.js +22 -0
  243. package/src/index.js +5 -0
  244. package/src/indexedCramFile.js +191 -0
  245. package/src/io/bufferCache.js +66 -0
  246. package/src/io/index.js +26 -0
  247. package/src/io/localFile.js +35 -0
  248. package/src/io/remoteFile.js +71 -0
  249. package/src/rans/README.md +1 -0
  250. package/src/rans/constants.js +5 -0
  251. package/src/rans/d04.js +83 -0
  252. package/src/rans/d14.js +59 -0
  253. package/src/rans/decoding.js +141 -0
  254. package/src/rans/frequencies.js +121 -0
  255. package/src/rans/index.js +249 -0
  256. package/src/sam.js +15 -0
  257. package/src/unzip-pako.ts +5 -0
  258. package/src/unzip.ts +2 -0
@@ -0,0 +1,297 @@
1
+ import Constants from './constants';
2
+ function decodeReadSequence(cramRecord, refRegion) {
3
+ // if it has no length, it has no sequence
4
+ if (!cramRecord.lengthOnRef && !cramRecord.readLength) {
5
+ return undefined;
6
+ }
7
+ if (cramRecord.isUnknownBases()) {
8
+ return undefined;
9
+ }
10
+ // remember: all coordinates are 1-based closed
11
+ const regionSeqOffset = cramRecord.alignmentStart - refRegion.start;
12
+ if (!cramRecord.readFeatures) {
13
+ return refRegion.seq
14
+ .substr(regionSeqOffset, cramRecord.lengthOnRef)
15
+ .toUpperCase();
16
+ }
17
+ let bases = '';
18
+ let regionPos = regionSeqOffset;
19
+ let currentReadFeature = 0;
20
+ while (bases.length < cramRecord.readLength) {
21
+ if (currentReadFeature < cramRecord.readFeatures.length) {
22
+ const feature = cramRecord.readFeatures[currentReadFeature];
23
+ if (feature.code === 'Q' || feature.code === 'q') {
24
+ currentReadFeature += 1;
25
+ }
26
+ else if (feature.pos === bases.length + 1) {
27
+ // process the read feature
28
+ currentReadFeature += 1;
29
+ if (feature.code === 'b') {
30
+ // specify a base pair for some reason
31
+ const ret = feature.data.split(',');
32
+ const added = String.fromCharCode(...ret);
33
+ bases += added;
34
+ regionPos += added.length;
35
+ }
36
+ else if (feature.code === 'B') {
37
+ // base pair and associated quality
38
+ // TODO: do we need to set the quality in the qual scores?
39
+ bases += feature.data[0];
40
+ regionPos += 1;
41
+ }
42
+ else if (feature.code === 'X') {
43
+ // base substitution
44
+ bases += feature.sub;
45
+ regionPos += 1;
46
+ }
47
+ else if (feature.code === 'I') {
48
+ // insertion
49
+ bases += feature.data;
50
+ }
51
+ else if (feature.code === 'D') {
52
+ // deletion
53
+ regionPos += feature.data;
54
+ }
55
+ else if (feature.code === 'i') {
56
+ // insert single base
57
+ bases += feature.data;
58
+ }
59
+ else if (feature.code === 'N') {
60
+ // reference skip. delete some bases
61
+ // do nothing
62
+ // seqBases.splice(feature.pos - 1, feature.data)
63
+ regionPos += feature.data;
64
+ }
65
+ else if (feature.code === 'S') {
66
+ // soft clipped bases that should be present in the read seq
67
+ // seqBases.splice(feature.pos - 1, 0, ...feature.data.split(''))
68
+ bases += feature.data;
69
+ }
70
+ else if (feature.code === 'P') {
71
+ // padding, do nothing
72
+ }
73
+ else if (feature.code === 'H') {
74
+ // hard clip, do nothing
75
+ }
76
+ }
77
+ else if (currentReadFeature < cramRecord.readFeatures.length) {
78
+ // put down a chunk of sequence up to the next read feature
79
+ const chunk = refRegion.seq.substr(regionPos, cramRecord.readFeatures[currentReadFeature].pos - bases.length - 1);
80
+ bases += chunk;
81
+ regionPos += chunk.length;
82
+ }
83
+ }
84
+ else {
85
+ // put down a chunk of reference up to the full read length
86
+ const chunk = refRegion.seq.substr(regionPos, cramRecord.readLength - bases.length);
87
+ bases += chunk;
88
+ regionPos += chunk.length;
89
+ }
90
+ }
91
+ return bases.toUpperCase();
92
+ }
93
+ const baseNumbers = {
94
+ a: 0,
95
+ A: 0,
96
+ c: 1,
97
+ C: 1,
98
+ g: 2,
99
+ G: 2,
100
+ t: 3,
101
+ T: 3,
102
+ n: 4,
103
+ N: 4,
104
+ };
105
+ function decodeBaseSubstitution(cramRecord, refRegion, compressionScheme, readFeature) {
106
+ if (!refRegion) {
107
+ return;
108
+ }
109
+ // decode base substitution code using the substitution matrix
110
+ const refCoord = readFeature.refPos - refRegion.start;
111
+ const refBase = refRegion.seq.charAt(refCoord);
112
+ if (refBase) {
113
+ readFeature.ref = refBase;
114
+ }
115
+ let baseNumber = baseNumbers[refBase];
116
+ if (baseNumber === undefined) {
117
+ baseNumber = 4;
118
+ }
119
+ const substitutionScheme = compressionScheme.substitutionMatrix[baseNumber];
120
+ const base = substitutionScheme[readFeature.data];
121
+ if (base) {
122
+ readFeature.sub = base;
123
+ }
124
+ }
125
+ /**
126
+ * Class of each CRAM record returned by this API.
127
+ */
128
+ export default class CramRecord {
129
+ constructor() {
130
+ this.tags = {};
131
+ }
132
+ /**
133
+ * @returns {boolean} true if the read is paired, regardless of whether both segments are mapped
134
+ */
135
+ isPaired() {
136
+ return !!(this.flags & Constants.BAM_FPAIRED);
137
+ }
138
+ /** @returns {boolean} true if the read is paired, and both segments are mapped */
139
+ isProperlyPaired() {
140
+ return !!(this.flags & Constants.BAM_FPROPER_PAIR);
141
+ }
142
+ /** @returns {boolean} true if the read itself is unmapped; conflictive with isProperlyPaired */
143
+ isSegmentUnmapped() {
144
+ return !!(this.flags & Constants.BAM_FUNMAP);
145
+ }
146
+ /** @returns {boolean} true if the read itself is unmapped; conflictive with isProperlyPaired */
147
+ isMateUnmapped() {
148
+ return !!(this.flags & Constants.BAM_FMUNMAP);
149
+ }
150
+ /** @returns {boolean} true if the read is mapped to the reverse strand */
151
+ isReverseComplemented() {
152
+ return !!(this.flags & Constants.BAM_FREVERSE);
153
+ }
154
+ /** @returns {boolean} true if the mate is mapped to the reverse strand */
155
+ isMateReverseComplemented() {
156
+ return !!(this.flags & Constants.BAM_FMREVERSE);
157
+ }
158
+ /** @returns {boolean} true if this is read number 1 in a pair */
159
+ isRead1() {
160
+ return !!(this.flags & Constants.BAM_FREAD1);
161
+ }
162
+ /** @returns {boolean} true if this is read number 2 in a pair */
163
+ isRead2() {
164
+ return !!(this.flags & Constants.BAM_FREAD2);
165
+ }
166
+ /** @returns {boolean} true if this is a secondary alignment */
167
+ isSecondary() {
168
+ return !!(this.flags & Constants.BAM_FSECONDARY);
169
+ }
170
+ /** @returns {boolean} true if this read has failed QC checks */
171
+ isFailedQc() {
172
+ return !!(this.flags & Constants.BAM_FQCFAIL);
173
+ }
174
+ /** @returns {boolean} true if the read is an optical or PCR duplicate */
175
+ isDuplicate() {
176
+ return !!(this.flags & Constants.BAM_FDUP);
177
+ }
178
+ /** @returns {boolean} true if this is a supplementary alignment */
179
+ isSupplementary() {
180
+ return !!(this.flags & Constants.BAM_FSUPPLEMENTARY);
181
+ }
182
+ /**
183
+ * @returns {boolean} true if the read is detached
184
+ */
185
+ isDetached() {
186
+ return !!(this.cramFlags & Constants.CRAM_FLAG_DETACHED);
187
+ }
188
+ /** @returns {boolean} true if the read has a mate in this same CRAM segment */
189
+ hasMateDownStream() {
190
+ return !!(this.cramFlags & Constants.CRAM_FLAG_MATE_DOWNSTREAM);
191
+ }
192
+ /** @returns {boolean} true if the read contains qual scores */
193
+ isPreservingQualityScores() {
194
+ return !!(this.cramFlags & Constants.CRAM_FLAG_PRESERVE_QUAL_SCORES);
195
+ }
196
+ /** @returns {boolean} true if the read has no sequence bases */
197
+ isUnknownBases() {
198
+ return !!(this.cramFlags & Constants.CRAM_FLAG_NO_SEQ);
199
+ }
200
+ /**
201
+ * Get the original sequence of this read.
202
+ * @returns {String} sequence basepairs
203
+ */
204
+ getReadBases() {
205
+ if (!this.readBases && this._refRegion) {
206
+ this.readBases = decodeReadSequence(this, this._refRegion);
207
+ }
208
+ return this.readBases;
209
+ }
210
+ /**
211
+ * Get the pair orientation of a paired read. Adapted from igv.js
212
+ * @returns {String} of paired orientatin
213
+ */
214
+ getPairOrientation() {
215
+ if (!this.isSegmentUnmapped() &&
216
+ this.isPaired() &&
217
+ !this.isMateUnmapped() &&
218
+ this.mate &&
219
+ this.sequenceId === this.mate.sequenceId) {
220
+ const s1 = this.isReverseComplemented() ? 'R' : 'F';
221
+ const s2 = this.isMateReverseComplemented() ? 'R' : 'F';
222
+ let o1 = ' ';
223
+ let o2 = ' ';
224
+ if (this.isRead1()) {
225
+ o1 = '1';
226
+ o2 = '2';
227
+ }
228
+ else if (this.isRead2()) {
229
+ o1 = '2';
230
+ o2 = '1';
231
+ }
232
+ const tmp = [];
233
+ let isize = this.templateLength || this.templateSize;
234
+ if (this.alignmentStart > this.mate.alignmentStart && isize > 0) {
235
+ isize = -isize;
236
+ }
237
+ if (isize > 0) {
238
+ tmp[0] = s1;
239
+ tmp[1] = o1;
240
+ tmp[2] = s2;
241
+ tmp[3] = o2;
242
+ }
243
+ else {
244
+ tmp[2] = s1;
245
+ tmp[3] = o1;
246
+ tmp[0] = s2;
247
+ tmp[1] = o2;
248
+ }
249
+ return tmp.join('');
250
+ }
251
+ return null;
252
+ }
253
+ /**
254
+ * Annotates this feature with the given reference sequence basepair
255
+ * information. This will add a `sub` and a `ref` item to base
256
+ * subsitution read features given the actual substituted and reference
257
+ * base pairs, and will make the `getReadSequence()` method work.
258
+ *
259
+ * @param {object} refRegion
260
+ * @param {number} refRegion.start
261
+ * @param {number} refRegion.end
262
+ * @param {string} refRegion.seq
263
+ * @param {CramContainerCompressionScheme} compressionScheme
264
+ * @returns {undefined} nothing
265
+ */
266
+ addReferenceSequence(refRegion, compressionScheme) {
267
+ if (this.readFeatures) {
268
+ // use the reference bases to decode the bases
269
+ // substituted in each base substitution
270
+ this.readFeatures.forEach(readFeature => {
271
+ if (readFeature.code === 'X') {
272
+ decodeBaseSubstitution(this, refRegion, compressionScheme, readFeature);
273
+ }
274
+ });
275
+ }
276
+ // if this region completely covers this read,
277
+ // keep a reference to it
278
+ if (!this.readBases &&
279
+ refRegion.start <= this.alignmentStart &&
280
+ refRegion.end >=
281
+ this.alignmentStart + (this.lengthOnRef || this.readLength) - 1) {
282
+ this._refRegion = refRegion;
283
+ }
284
+ }
285
+ toJSON() {
286
+ const data = {};
287
+ Object.keys(this).forEach(k => {
288
+ if (k.charAt(0) === '_') {
289
+ return;
290
+ }
291
+ data[k] = this[k];
292
+ });
293
+ data.readBases = this.getReadBases();
294
+ return data;
295
+ }
296
+ }
297
+ //# sourceMappingURL=record.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"record.js","sourceRoot":"","sources":["../../src/cramFile/record.js"],"names":[],"mappings":"AAAA,OAAO,SAAS,MAAM,aAAa,CAAA;AAEnC,SAAS,kBAAkB,CAAC,UAAU,EAAE,SAAS;IAC/C,0CAA0C;IAC1C,IAAI,CAAC,UAAU,CAAC,WAAW,IAAI,CAAC,UAAU,CAAC,UAAU,EAAE;QACrD,OAAO,SAAS,CAAA;KACjB;IAED,IAAI,UAAU,CAAC,cAAc,EAAE,EAAE;QAC/B,OAAO,SAAS,CAAA;KACjB;IAED,+CAA+C;IAC/C,MAAM,eAAe,GAAG,UAAU,CAAC,cAAc,GAAG,SAAS,CAAC,KAAK,CAAA;IAEnE,IAAI,CAAC,UAAU,CAAC,YAAY,EAAE;QAC5B,OAAO,SAAS,CAAC,GAAG;aACjB,MAAM,CAAC,eAAe,EAAE,UAAU,CAAC,WAAW,CAAC;aAC/C,WAAW,EAAE,CAAA;KACjB;IAED,IAAI,KAAK,GAAG,EAAE,CAAA;IACd,IAAI,SAAS,GAAG,eAAe,CAAA;IAC/B,IAAI,kBAAkB,GAAG,CAAC,CAAA;IAC1B,OAAO,KAAK,CAAC,MAAM,GAAG,UAAU,CAAC,UAAU,EAAE;QAC3C,IAAI,kBAAkB,GAAG,UAAU,CAAC,YAAY,CAAC,MAAM,EAAE;YACvD,MAAM,OAAO,GAAG,UAAU,CAAC,YAAY,CAAC,kBAAkB,CAAC,CAAA;YAC3D,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;gBAChD,kBAAkB,IAAI,CAAC,CAAA;aACxB;iBAAM,IAAI,OAAO,CAAC,GAAG,KAAK,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE;gBAC3C,2BAA2B;gBAC3B,kBAAkB,IAAI,CAAC,CAAA;gBAEvB,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;oBACxB,sCAAsC;oBACtC,MAAM,GAAG,GAAG,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;oBACnC,MAAM,KAAK,GAAG,MAAM,CAAC,YAAY,CAAC,GAAG,GAAG,CAAC,CAAA;oBACzC,KAAK,IAAI,KAAK,CAAA;oBACd,SAAS,IAAI,KAAK,CAAC,MAAM,CAAA;iBAC1B;qBAAM,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;oBAC/B,mCAAmC;oBACnC,0DAA0D;oBAC1D,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAA;oBACxB,SAAS,IAAI,CAAC,CAAA;iBACf;qBAAM,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;oBAC/B,oBAAoB;oBACpB,KAAK,IAAI,OAAO,CAAC,GAAG,CAAA;oBACpB,SAAS,IAAI,CAAC,CAAA;iBACf;qBAAM,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;oBAC/B,YAAY;oBACZ,KAAK,IAAI,OAAO,CAAC,IAAI,CAAA;iBACtB;qBAAM,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;oBAC/B,WAAW;oBACX,SAAS,IAAI,OAAO,CAAC,IAAI,CAAA;iBAC1B;qBAAM,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;oBAC/B,qBAAqB;oBACrB,KAAK,IAAI,OAAO,CAAC,IAAI,CAAA;iBACtB;qBAAM,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;oBAC/B,oCAAoC;oBACpC,aAAa;oBACb,iDAAiD;oBACjD,SAAS,IAAI,OAAO,CAAC,IAAI,CAAA;iBAC1B;qBAAM,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;oBAC/B,4DAA4D;oBAC5D,iEAAiE;oBACjE,KAAK,IAAI,OAAO,CAAC,IAAI,CAAA;iBACtB;qBAAM,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;oBAC/B,sBAAsB;iBACvB;qBAAM,IAAI,OAAO,CAAC,IAAI,KAAK,GAAG,EAAE;oBAC/B,wBAAwB;iBACzB;aACF;iBAAM,IAAI,kBAAkB,GAAG,UAAU,CAAC,YAAY,CAAC,MAAM,EAAE;gBAC9D,2DAA2D;gBAC3D,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CAAC,MAAM,CAChC,SAAS,EACT,UAAU,CAAC,YAAY,CAAC,kBAAkB,CAAC,CAAC,GAAG,GAAG,KAAK,CAAC,MAAM,GAAG,CAAC,CACnE,CAAA;gBACD,KAAK,IAAI,KAAK,CAAA;gBACd,SAAS,IAAI,KAAK,CAAC,MAAM,CAAA;aAC1B;SACF;aAAM;YACL,2DAA2D;YAC3D,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CAAC,MAAM,CAChC,SAAS,EACT,UAAU,CAAC,UAAU,GAAG,KAAK,CAAC,MAAM,CACrC,CAAA;YACD,KAAK,IAAI,KAAK,CAAA;YACd,SAAS,IAAI,KAAK,CAAC,MAAM,CAAA;SAC1B;KACF;IAED,OAAO,KAAK,CAAC,WAAW,EAAE,CAAA;AAC5B,CAAC;AAED,MAAM,WAAW,GAAG;IAClB,CAAC,EAAE,CAAC;IACJ,CAAC,EAAE,CAAC;IACJ,CAAC,EAAE,CAAC;IACJ,CAAC,EAAE,CAAC;IACJ,CAAC,EAAE,CAAC;IACJ,CAAC,EAAE,CAAC;IACJ,CAAC,EAAE,CAAC;IACJ,CAAC,EAAE,CAAC;IACJ,CAAC,EAAE,CAAC;IACJ,CAAC,EAAE,CAAC;CACL,CAAA;AAED,SAAS,sBAAsB,CAC7B,UAAU,EACV,SAAS,EACT,iBAAiB,EACjB,WAAW;IAEX,IAAI,CAAC,SAAS,EAAE;QACd,OAAM;KACP;IAED,8DAA8D;IAC9D,MAAM,QAAQ,GAAG,WAAW,CAAC,MAAM,GAAG,SAAS,CAAC,KAAK,CAAA;IACrD,MAAM,OAAO,GAAG,SAAS,CAAC,GAAG,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAA;IAC9C,IAAI,OAAO,EAAE;QACX,WAAW,CAAC,GAAG,GAAG,OAAO,CAAA;KAC1B;IACD,IAAI,UAAU,GAAG,WAAW,CAAC,OAAO,CAAC,CAAA;IACrC,IAAI,UAAU,KAAK,SAAS,EAAE;QAC5B,UAAU,GAAG,CAAC,CAAA;KACf;IACD,MAAM,kBAAkB,GAAG,iBAAiB,CAAC,kBAAkB,CAAC,UAAU,CAAC,CAAA;IAC3E,MAAM,IAAI,GAAG,kBAAkB,CAAC,WAAW,CAAC,IAAI,CAAC,CAAA;IACjD,IAAI,IAAI,EAAE;QACR,WAAW,CAAC,GAAG,GAAG,IAAI,CAAA;KACvB;AACH,CAAC;AAED;;GAEG;AACH,MAAM,CAAC,OAAO,OAAO,UAAU;IAC7B;QACE,IAAI,CAAC,IAAI,GAAG,EAAE,CAAA;IAChB,CAAC;IAED;;OAEG;IACH,QAAQ;QACN,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,CAAA;IAC/C,CAAC;IAED,kFAAkF;IAClF,gBAAgB;QACd,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,gBAAgB,CAAC,CAAA;IACpD,CAAC;IAED,gGAAgG;IAChG,iBAAiB;QACf,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,UAAU,CAAC,CAAA;IAC9C,CAAC;IAED,gGAAgG;IAChG,cAAc;QACZ,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,CAAA;IAC/C,CAAC;IAED,0EAA0E;IAC1E,qBAAqB;QACnB,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,YAAY,CAAC,CAAA;IAChD,CAAC;IAED,0EAA0E;IAC1E,yBAAyB;QACvB,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,aAAa,CAAC,CAAA;IACjD,CAAC;IAED,iEAAiE;IACjE,OAAO;QACL,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,UAAU,CAAC,CAAA;IAC9C,CAAC;IAED,iEAAiE;IACjE,OAAO;QACL,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,UAAU,CAAC,CAAA;IAC9C,CAAC;IAED,+DAA+D;IAC/D,WAAW;QACT,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,cAAc,CAAC,CAAA;IAClD,CAAC;IAED,gEAAgE;IAChE,UAAU;QACR,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,WAAW,CAAC,CAAA;IAC/C,CAAC;IAED,yEAAyE;IACzE,WAAW;QACT,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,QAAQ,CAAC,CAAA;IAC5C,CAAC;IAED,mEAAmE;IACnE,eAAe;QACb,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,GAAG,SAAS,CAAC,kBAAkB,CAAC,CAAA;IACtD,CAAC;IAED;;OAEG;IACH,UAAU;QACR,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC,kBAAkB,CAAC,CAAA;IAC1D,CAAC;IAED,+EAA+E;IAC/E,iBAAiB;QACf,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC,yBAAyB,CAAC,CAAA;IACjE,CAAC;IAED,+DAA+D;IAC/D,yBAAyB;QACvB,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC,8BAA8B,CAAC,CAAA;IACtE,CAAC;IAED,gEAAgE;IAChE,cAAc;QACZ,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,GAAG,SAAS,CAAC,gBAAgB,CAAC,CAAA;IACxD,CAAC;IAED;;;OAGG;IACH,YAAY;QACV,IAAI,CAAC,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,UAAU,EAAE;YACtC,IAAI,CAAC,SAAS,GAAG,kBAAkB,CAAC,IAAI,EAAE,IAAI,CAAC,UAAU,CAAC,CAAA;SAC3D;QACD,OAAO,IAAI,CAAC,SAAS,CAAA;IACvB,CAAC;IAED;;;OAGG;IACH,kBAAkB;QAChB,IACE,CAAC,IAAI,CAAC,iBAAiB,EAAE;YACzB,IAAI,CAAC,QAAQ,EAAE;YACf,CAAC,IAAI,CAAC,cAAc,EAAE;YACtB,IAAI,CAAC,IAAI;YACT,IAAI,CAAC,UAAU,KAAK,IAAI,CAAC,IAAI,CAAC,UAAU,EACxC;YACA,MAAM,EAAE,GAAG,IAAI,CAAC,qBAAqB,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAA;YACnD,MAAM,EAAE,GAAG,IAAI,CAAC,yBAAyB,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAA;YACvD,IAAI,EAAE,GAAG,GAAG,CAAA;YACZ,IAAI,EAAE,GAAG,GAAG,CAAA;YACZ,IAAI,IAAI,CAAC,OAAO,EAAE,EAAE;gBAClB,EAAE,GAAG,GAAG,CAAA;gBACR,EAAE,GAAG,GAAG,CAAA;aACT;iBAAM,IAAI,IAAI,CAAC,OAAO,EAAE,EAAE;gBACzB,EAAE,GAAG,GAAG,CAAA;gBACR,EAAE,GAAG,GAAG,CAAA;aACT;YAED,MAAM,GAAG,GAAG,EAAE,CAAA;YACd,IAAI,KAAK,GAAG,IAAI,CAAC,cAAc,IAAI,IAAI,CAAC,YAAY,CAAA;YACpD,IAAI,IAAI,CAAC,cAAc,GAAG,IAAI,CAAC,IAAI,CAAC,cAAc,IAAI,KAAK,GAAG,CAAC,EAAE;gBAC/D,KAAK,GAAG,CAAC,KAAK,CAAA;aACf;YACD,IAAI,KAAK,GAAG,CAAC,EAAE;gBACb,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAA;gBACX,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAA;gBACX,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAA;gBACX,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAA;aACZ;iBAAM;gBACL,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAA;gBACX,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAA;gBACX,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAA;gBACX,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,CAAA;aACZ;YACD,OAAO,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,CAAA;SACpB;QACD,OAAO,IAAI,CAAA;IACb,CAAC;IAED;;;;;;;;;;;;OAYG;IACH,oBAAoB,CAAC,SAAS,EAAE,iBAAiB;QAC/C,IAAI,IAAI,CAAC,YAAY,EAAE;YACrB,8CAA8C;YAC9C,wCAAwC;YACxC,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,WAAW,CAAC,EAAE;gBACtC,IAAI,WAAW,CAAC,IAAI,KAAK,GAAG,EAAE;oBAC5B,sBAAsB,CACpB,IAAI,EACJ,SAAS,EACT,iBAAiB,EACjB,WAAW,CACZ,CAAA;iBACF;YACH,CAAC,CAAC,CAAA;SACH;QAED,8CAA8C;QAC9C,yBAAyB;QACzB,IACE,CAAC,IAAI,CAAC,SAAS;YACf,SAAS,CAAC,KAAK,IAAI,IAAI,CAAC,cAAc;YACtC,SAAS,CAAC,GAAG;gBACX,IAAI,CAAC,cAAc,GAAG,CAAC,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,EACjE;YACA,IAAI,CAAC,UAAU,GAAG,SAAS,CAAA;SAC5B;IACH,CAAC;IAED,MAAM;QACJ,MAAM,IAAI,GAAG,EAAE,CAAA;QACf,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;YAC5B,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,GAAG,EAAE;gBACvB,OAAM;aACP;YACD,IAAI,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,CAAC,CAAA;QACnB,CAAC,CAAC,CAAA;QAEF,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,YAAY,EAAE,CAAA;QAEpC,OAAO,IAAI,CAAA;IACb,CAAC;CACF"}
@@ -0,0 +1,18 @@
1
+ export namespace cramFileDefinition {
2
+ const parser: any;
3
+ const maxLength: number;
4
+ }
5
+ export function getSectionParsers(majorVersion: any): {
6
+ cramFileDefinition: {
7
+ parser: any;
8
+ maxLength: number;
9
+ };
10
+ cramBlockHeader: {
11
+ parser: any;
12
+ maxLength: number;
13
+ };
14
+ cramBlockCrc32: {
15
+ parser: any;
16
+ maxLength: number;
17
+ };
18
+ };
@@ -0,0 +1,347 @@
1
+ import { Parser } from '@gmod/binary-parser';
2
+ const singleItf8 = new Parser().itf8();
3
+ const cramFileDefinition = {
4
+ parser: new Parser()
5
+ .string('magic', { length: 4 })
6
+ .uint8('majorVersion')
7
+ .uint8('minorVersion')
8
+ .string('fileId', { length: 20, stripNull: true }),
9
+ maxLength: 26,
10
+ };
11
+ const cramBlockHeader = {
12
+ parser: new Parser()
13
+ .uint8('compressionMethod', {
14
+ formatter: /* istanbul ignore next */ /* istanbul ignore next */ b => {
15
+ const method = [
16
+ 'raw',
17
+ 'gzip',
18
+ 'bzip2',
19
+ 'lzma',
20
+ 'rans',
21
+ 'rans4x16',
22
+ 'arith',
23
+ 'fqzcomp',
24
+ 'tok3',
25
+ ][b];
26
+ if (!method) {
27
+ throw new Error(`compression method number ${b} not implemented`);
28
+ }
29
+ return method;
30
+ },
31
+ })
32
+ .uint8('contentType', {
33
+ formatter: /* istanbul ignore next */ /* istanbul ignore next */ b => {
34
+ const type = [
35
+ 'FILE_HEADER',
36
+ 'COMPRESSION_HEADER',
37
+ 'MAPPED_SLICE_HEADER',
38
+ 'UNMAPPED_SLICE_HEADER',
39
+ 'EXTERNAL_DATA',
40
+ 'CORE_DATA',
41
+ ][b];
42
+ if (!type) {
43
+ throw new Error(`invalid block content type id ${b}`);
44
+ }
45
+ return type;
46
+ },
47
+ })
48
+ .itf8('contentId')
49
+ .itf8('compressedSize')
50
+ .itf8('uncompressedSize'),
51
+ maxLength: 17,
52
+ };
53
+ const cramBlockCrc32 = {
54
+ parser: new Parser().uint32('crc32'),
55
+ maxLength: 4,
56
+ };
57
+ // const ENCODING_NAMES = [
58
+ // 'NULL', // 0
59
+ // 'EXTERNAL', // 1
60
+ // 'GOLOMB', // 2
61
+ // 'HUFFMAN_INT', // 3
62
+ // 'BYTE_ARRAY_LEN', // 4
63
+ // 'BYTE_ARRAY_STOP', // 5
64
+ // 'BETA', // 6
65
+ // 'SUBEXP', // 7
66
+ // 'GOLOMB_RICE', // 8
67
+ // 'GAMMA', // 9
68
+ // ]
69
+ const cramTagDictionary = new Parser().itf8('size').buffer('ents', {
70
+ length: 'size',
71
+ formatter: /* istanbul ignore next */ /* istanbul ignore next */ buffer => {
72
+ function makeTagSet(stringStart, stringEnd) {
73
+ const str = buffer.toString('utf8', stringStart, stringEnd);
74
+ const tags = [];
75
+ for (let i = 0; i < str.length; i += 3) {
76
+ tags.push(str.substr(i, 3));
77
+ }
78
+ return tags;
79
+ }
80
+ /* eslint-disable */
81
+ var tagSets = [];
82
+ var stringStart = 0;
83
+ var i;
84
+ /* eslint-enable */
85
+ for (i = 0; i < buffer.length; i += 1) {
86
+ if (!buffer[i]) {
87
+ tagSets.push(makeTagSet(stringStart, i));
88
+ stringStart = i + 1;
89
+ }
90
+ }
91
+ if (i > stringStart) {
92
+ tagSets.push(makeTagSet(stringStart, i));
93
+ }
94
+ return tagSets;
95
+ },
96
+ });
97
+ // const cramPreservationMapKeys = 'XX RN AP RR SM TD'.split(' ')
98
+ const parseByteAsBool = new Parser().uint8(null, {
99
+ formatter: /* istanbul ignore next */ /* istanbul ignore next */ val => !!val,
100
+ });
101
+ const cramPreservationMap = new Parser()
102
+ .itf8('mapSize')
103
+ .itf8('mapCount')
104
+ .array('ents', {
105
+ length: 'mapCount',
106
+ type: new Parser()
107
+ .string('key', {
108
+ length: 2,
109
+ stripNull: false,
110
+ // formatter: val => cramPreservationMapKeys[val] || 0,
111
+ })
112
+ .choice('value', {
113
+ tag: 'key',
114
+ choices: {
115
+ MI: parseByteAsBool,
116
+ UI: parseByteAsBool,
117
+ PI: parseByteAsBool,
118
+ RN: parseByteAsBool,
119
+ AP: parseByteAsBool,
120
+ RR: parseByteAsBool,
121
+ SM: new Parser().array(null, { type: 'uint8', length: 5 }),
122
+ TD: new Parser().nest(null, {
123
+ type: cramTagDictionary,
124
+ formatter: /* istanbul ignore next */ /* istanbul ignore next */ data => data.ents,
125
+ }),
126
+ },
127
+ }),
128
+ });
129
+ /* istanbul ignore next */
130
+ function formatMap(data) {
131
+ const map = {};
132
+ for (let i = 0; i < data.ents.length; i += 1) {
133
+ const { key, value } = data.ents[i];
134
+ if (map[key]) {
135
+ console.warn(`duplicate key ${key} in map`);
136
+ }
137
+ map[key] = value;
138
+ }
139
+ return map;
140
+ }
141
+ const unversionedParsers = {
142
+ cramFileDefinition,
143
+ cramBlockHeader,
144
+ cramBlockCrc32,
145
+ };
146
+ // each of these is a function of the major and minor version
147
+ const versionedParsers = {
148
+ // assemble a section parser for the unmapped slice header, with slight
149
+ // variations depending on the major version of the cram file
150
+ cramUnmappedSliceHeader(majorVersion) {
151
+ let maxLength = 0;
152
+ let parser = new Parser().itf8('numRecords');
153
+ maxLength += 5;
154
+ // recordCounter is itf8 in a CRAM v2 file, absent in CRAM v1
155
+ if (majorVersion >= 3) {
156
+ parser = parser.ltf8('recordCounter');
157
+ maxLength += 9;
158
+ }
159
+ else if (majorVersion === 2) {
160
+ parser = parser.itf8('recordCounter');
161
+ maxLength += 5;
162
+ }
163
+ parser = parser
164
+ .itf8('numBlocks')
165
+ .itf8('numContentIds')
166
+ .array('contentIds', {
167
+ type: singleItf8,
168
+ length: 'numContentIds',
169
+ });
170
+ maxLength += 5 * 2; // + numContentIds*5
171
+ // the md5 sum is missing in cram v1
172
+ if (majorVersion >= 2) {
173
+ parser = parser.array('md5', { type: 'uint8', length: 16 });
174
+ maxLength += 16;
175
+ }
176
+ const maxLengthFunc = numContentIds => maxLength + numContentIds * 5;
177
+ return { parser, maxLength: maxLengthFunc }; // : p, maxLength: numContentIds => 5 + 9 + 5 * 2 + 5 * numContentIds + 16 }
178
+ },
179
+ // assembles a section parser for the unmapped slice header, with slight
180
+ // variations depending on the major version of the cram file
181
+ cramMappedSliceHeader(majorVersion) {
182
+ let parser = new Parser()
183
+ .itf8('refSeqId')
184
+ .itf8('refSeqStart')
185
+ .itf8('refSeqSpan')
186
+ .itf8('numRecords');
187
+ let maxLength = 5 * 4;
188
+ if (majorVersion >= 3) {
189
+ parser = parser.ltf8('recordCounter');
190
+ maxLength += 9;
191
+ }
192
+ else if (majorVersion === 2) {
193
+ parser = parser.itf8('recordCounter');
194
+ maxLength += 5;
195
+ }
196
+ parser = parser
197
+ .itf8('numBlocks')
198
+ .itf8('numContentIds')
199
+ .array('contentIds', {
200
+ type: singleItf8,
201
+ length: 'numContentIds',
202
+ })
203
+ .itf8('refBaseBlockId');
204
+ maxLength += 5 * 3;
205
+ // the md5 sum is missing in cram v1
206
+ if (majorVersion >= 2) {
207
+ parser = parser.array('md5', { type: 'uint8', length: 16 });
208
+ maxLength += 16;
209
+ }
210
+ const maxLengthFunc = numContentIds => maxLength + numContentIds * 5;
211
+ return { parser, maxLength: maxLengthFunc };
212
+ },
213
+ cramEncoding(majorVersion) {
214
+ const parser = new Parser()
215
+ .namely('cramEncoding')
216
+ .itf8('codecId')
217
+ .itf8('parametersBytes')
218
+ .choice('parameters', {
219
+ tag: 'codecId',
220
+ choices: {
221
+ 0: new Parser(),
222
+ 1: new Parser().itf8('blockContentId'),
223
+ 2: new Parser().itf8('offset').itf8('M'),
224
+ // HUFFMAN_INT
225
+ 3: Parser.start()
226
+ .itf8('numCodes')
227
+ .array('symbols', { length: 'numCodes', type: singleItf8 })
228
+ .itf8('numLengths')
229
+ .array('bitLengths', { length: 'numLengths', type: singleItf8 }),
230
+ 4: Parser.start() // BYTE_ARRAY_LEN
231
+ .nest('lengthsEncoding', { type: 'cramEncoding' })
232
+ .nest('valuesEncoding', { type: 'cramEncoding' }),
233
+ // BYTE_ARRAY_STOP is a little different for CRAM v1
234
+ 5: new Parser()
235
+ .uint8('stopByte')[majorVersion > 1 ? 'itf8' : 'int']('blockContentId'),
236
+ 6: new Parser().itf8('offset').itf8('length'),
237
+ 7: new Parser().itf8('offset').itf8('K'),
238
+ 8: new Parser().itf8('offset').itf8('log2m'),
239
+ 9: new Parser().itf8('offset'), // GAMMA
240
+ },
241
+ });
242
+ return { parser };
243
+ },
244
+ cramDataSeriesEncodingMap(majorVersion) {
245
+ return new Parser()
246
+ .itf8('mapSize')
247
+ .itf8('mapCount')
248
+ .array('ents', {
249
+ length: 'mapCount',
250
+ type: new Parser()
251
+ .string('key', { length: 2, stripNull: false })
252
+ .nest('value', { type: this.cramEncoding(majorVersion).parser }),
253
+ });
254
+ },
255
+ cramTagEncodingMap(majorVersion) {
256
+ return new Parser()
257
+ .itf8('mapSize')
258
+ .itf8('mapCount')
259
+ .array('ents', {
260
+ length: 'mapCount',
261
+ type: new Parser()
262
+ .itf8('key', {
263
+ formatter: /* istanbul ignore next */ /* istanbul ignore next */ integerRepresentation =>
264
+ /* istanbul ignore next */
265
+ String.fromCharCode((integerRepresentation >> 16) & 0xff) +
266
+ String.fromCharCode((integerRepresentation >> 8) & 0xff) +
267
+ String.fromCharCode(integerRepresentation & 0xff),
268
+ })
269
+ .nest('value', { type: this.cramEncoding(majorVersion).parser }),
270
+ });
271
+ },
272
+ cramCompressionHeader(majorVersion) {
273
+ let parser = new Parser();
274
+ // TODO: if we want to support CRAM v1, we will need to refactor
275
+ // compression header into 2 parts to parse the landmarks,
276
+ // like the container header
277
+ parser = parser
278
+ .nest('preservation', {
279
+ type: cramPreservationMap,
280
+ formatter: formatMap,
281
+ })
282
+ .nest('dataSeriesEncoding', {
283
+ type: this.cramDataSeriesEncodingMap(majorVersion),
284
+ formatter: formatMap,
285
+ })
286
+ .nest('tagEncoding', {
287
+ type: this.cramTagEncodingMap(majorVersion),
288
+ formatter: formatMap,
289
+ });
290
+ return { parser };
291
+ },
292
+ cramContainerHeader1(majorVersion) {
293
+ let parser = new Parser()
294
+ .int32('length') // byte size of the container data (blocks)
295
+ .itf8('refSeqId') // reference sequence identifier, -1 for unmapped reads, -2 for multiple reference sequences
296
+ .itf8('refSeqStart') // the alignment start position or 0 for unmapped reads
297
+ .itf8('alignmentSpan') // the length of the alignment or 0 for unmapped reads
298
+ .itf8('numRecords'); // number of records in the container
299
+ let maxLength = 4 + 5 * 4;
300
+ if (majorVersion >= 3) {
301
+ parser = parser.ltf8('recordCounter'); // 1-based sequential index of records in the file/stream.
302
+ maxLength += 9;
303
+ }
304
+ else if (majorVersion === 2) {
305
+ parser = parser.itf8('recordCounter');
306
+ maxLength += 5;
307
+ }
308
+ if (majorVersion > 1) {
309
+ parser = parser.ltf8('numBases'); // number of read bases
310
+ maxLength += 9;
311
+ }
312
+ parser = parser
313
+ .itf8('numBlocks') // the number of blocks
314
+ .itf8('numLandmarks'); // the number of landmarks
315
+ maxLength += 5 + 5;
316
+ return { parser, maxLength };
317
+ },
318
+ cramContainerHeader2(majorVersion) {
319
+ let parser = new Parser()
320
+ .itf8('numLandmarks') // the number of blocks
321
+ // Each integer value of this array is a byte offset
322
+ // into the blocks byte array. Landmarks are used for
323
+ // random access indexing.
324
+ .array('landmarks', {
325
+ type: new Parser().itf8(),
326
+ length: 'numLandmarks',
327
+ });
328
+ let crcLength = 0;
329
+ if (majorVersion >= 3) {
330
+ parser = parser.uint32('crc32');
331
+ crcLength = 4;
332
+ }
333
+ return {
334
+ parser,
335
+ maxLength: numLandmarks => 5 + numLandmarks * 5 + crcLength,
336
+ };
337
+ },
338
+ };
339
+ function getSectionParsers(majorVersion) {
340
+ const parsers = Object.assign({}, unversionedParsers);
341
+ Object.keys(versionedParsers).forEach(parserName => {
342
+ parsers[parserName] = versionedParsers[parserName](majorVersion);
343
+ });
344
+ return parsers;
345
+ }
346
+ export { cramFileDefinition, getSectionParsers };
347
+ //# sourceMappingURL=sectionParsers.js.map