@gmod/cram 1.5.9 → 1.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (258) hide show
  1. package/CHANGELOG.md +90 -0
  2. package/README.md +182 -172
  3. package/dist/craiIndex.d.ts +37 -0
  4. package/dist/craiIndex.js +196 -301
  5. package/dist/craiIndex.js.map +1 -0
  6. package/dist/cram-bundle.js +6 -15
  7. package/dist/cramFile/codecs/_base.d.ts +6 -0
  8. package/dist/cramFile/codecs/_base.js +44 -53
  9. package/dist/cramFile/codecs/_base.js.map +1 -0
  10. package/dist/cramFile/codecs/beta.d.ts +4 -0
  11. package/dist/cramFile/codecs/beta.js +38 -48
  12. package/dist/cramFile/codecs/beta.js.map +1 -0
  13. package/dist/cramFile/codecs/byteArrayLength.d.ts +8 -0
  14. package/dist/cramFile/codecs/byteArrayLength.js +58 -78
  15. package/dist/cramFile/codecs/byteArrayLength.js.map +1 -0
  16. package/dist/cramFile/codecs/byteArrayStop.d.ts +6 -0
  17. package/dist/cramFile/codecs/byteArrayStop.js +62 -76
  18. package/dist/cramFile/codecs/byteArrayStop.js.map +1 -0
  19. package/dist/cramFile/codecs/external.d.ts +7 -0
  20. package/dist/cramFile/codecs/external.js +63 -81
  21. package/dist/cramFile/codecs/external.js.map +1 -0
  22. package/dist/cramFile/codecs/gamma.d.ts +4 -0
  23. package/dist/cramFile/codecs/gamma.js +43 -56
  24. package/dist/cramFile/codecs/gamma.js.map +1 -0
  25. package/dist/cramFile/codecs/huffman.d.ts +17 -0
  26. package/dist/cramFile/codecs/huffman.js +126 -199
  27. package/dist/cramFile/codecs/huffman.js.map +1 -0
  28. package/dist/cramFile/codecs/index.d.ts +2 -0
  29. package/dist/cramFile/codecs/index.js +31 -38
  30. package/dist/cramFile/codecs/index.js.map +1 -0
  31. package/dist/cramFile/codecs/subexp.d.ts +4 -0
  32. package/dist/cramFile/codecs/subexp.js +51 -64
  33. package/dist/cramFile/codecs/subexp.js.map +1 -0
  34. package/dist/cramFile/constants.d.ts +36 -0
  35. package/dist/cramFile/constants.js +52 -50
  36. package/dist/cramFile/constants.js.map +1 -0
  37. package/dist/cramFile/container/compressionScheme.d.ts +23 -0
  38. package/dist/cramFile/container/compressionScheme.js +115 -153
  39. package/dist/cramFile/container/compressionScheme.js.map +1 -0
  40. package/dist/cramFile/container/index.d.ts +13 -0
  41. package/dist/cramFile/container/index.js +169 -283
  42. package/dist/cramFile/container/index.js.map +1 -0
  43. package/dist/cramFile/file.d.ts +63 -0
  44. package/dist/cramFile/file.js +440 -766
  45. package/dist/cramFile/file.js.map +1 -0
  46. package/dist/cramFile/index.d.ts +2 -0
  47. package/dist/cramFile/index.js +7 -4
  48. package/dist/cramFile/index.js.map +1 -0
  49. package/dist/cramFile/record.d.ts +79 -0
  50. package/dist/cramFile/record.js +253 -308
  51. package/dist/cramFile/record.js.map +1 -0
  52. package/dist/cramFile/sectionParsers.d.ts +18 -0
  53. package/dist/cramFile/sectionParsers.js +324 -362
  54. package/dist/cramFile/sectionParsers.js.map +1 -0
  55. package/dist/cramFile/slice/decodeRecord.d.ts +2 -0
  56. package/dist/cramFile/slice/decodeRecord.js +278 -298
  57. package/dist/cramFile/slice/decodeRecord.js.map +1 -0
  58. package/dist/cramFile/slice/index.d.ts +20 -0
  59. package/dist/cramFile/slice/index.js +488 -789
  60. package/dist/cramFile/slice/index.js.map +1 -0
  61. package/dist/cramFile/util.d.ts +5 -0
  62. package/dist/cramFile/util.js +158 -144
  63. package/dist/cramFile/util.js.map +1 -0
  64. package/dist/errors.d.ts +23 -0
  65. package/dist/errors.js +66 -103
  66. package/dist/errors.js.map +1 -0
  67. package/dist/index.d.ts +4 -0
  68. package/dist/index.js +12 -12
  69. package/dist/index.js.map +1 -0
  70. package/dist/indexedCramFile.d.ts +39 -0
  71. package/dist/indexedCramFile.js +213 -315
  72. package/dist/indexedCramFile.js.map +1 -0
  73. package/dist/io/bufferCache.d.ts +12 -0
  74. package/dist/io/bufferCache.js +108 -128
  75. package/dist/io/bufferCache.js.map +1 -0
  76. package/dist/io/index.d.ts +5 -0
  77. package/dist/io/index.js +29 -27
  78. package/dist/io/index.js.map +1 -0
  79. package/dist/io/localFile.d.ts +10 -0
  80. package/dist/io/localFile.js +105 -162
  81. package/dist/io/localFile.js.map +1 -0
  82. package/dist/io/remoteFile.d.ts +16 -0
  83. package/dist/io/remoteFile.js +137 -206
  84. package/dist/io/remoteFile.js.map +1 -0
  85. package/dist/rans/constants.d.ts +3 -0
  86. package/dist/rans/constants.js +6 -6
  87. package/dist/rans/constants.js.map +1 -0
  88. package/dist/rans/d04.d.ts +1 -0
  89. package/dist/rans/d04.js +70 -99
  90. package/dist/rans/d04.js.map +1 -0
  91. package/dist/rans/d14.d.ts +1 -0
  92. package/dist/rans/d14.js +55 -93
  93. package/dist/rans/d14.js.map +1 -0
  94. package/dist/rans/decoding.d.ts +30 -0
  95. package/dist/rans/decoding.js +112 -159
  96. package/dist/rans/decoding.js.map +1 -0
  97. package/dist/rans/frequencies.d.ts +2 -0
  98. package/dist/rans/frequencies.js +110 -119
  99. package/dist/rans/frequencies.js.map +1 -0
  100. package/dist/rans/index.d.ts +1 -0
  101. package/dist/rans/index.js +111 -174
  102. package/dist/rans/index.js.map +1 -0
  103. package/dist/sam.d.ts +1 -0
  104. package/dist/sam.js +16 -41
  105. package/dist/sam.js.map +1 -0
  106. package/dist/unzip-pako.d.ts +2 -0
  107. package/dist/unzip-pako.js +9 -0
  108. package/dist/unzip-pako.js.map +1 -0
  109. package/dist/unzip.d.ts +2 -0
  110. package/dist/unzip.js +6 -0
  111. package/dist/unzip.js.map +1 -0
  112. package/errors.js +66 -103
  113. package/esm/craiIndex.d.ts +37 -0
  114. package/esm/craiIndex.js +158 -0
  115. package/esm/craiIndex.js.map +1 -0
  116. package/esm/cramFile/codecs/_base.d.ts +6 -0
  117. package/esm/cramFile/codecs/_base.js +42 -0
  118. package/esm/cramFile/codecs/_base.js.map +1 -0
  119. package/esm/cramFile/codecs/beta.d.ts +4 -0
  120. package/esm/cramFile/codecs/beta.js +15 -0
  121. package/esm/cramFile/codecs/beta.js.map +1 -0
  122. package/esm/cramFile/codecs/byteArrayLength.d.ts +8 -0
  123. package/esm/cramFile/codecs/byteArrayLength.js +35 -0
  124. package/esm/cramFile/codecs/byteArrayLength.js.map +1 -0
  125. package/esm/cramFile/codecs/byteArrayStop.d.ts +6 -0
  126. package/esm/cramFile/codecs/byteArrayStop.js +40 -0
  127. package/esm/cramFile/codecs/byteArrayStop.js.map +1 -0
  128. package/esm/cramFile/codecs/external.d.ts +7 -0
  129. package/esm/cramFile/codecs/external.js +40 -0
  130. package/esm/cramFile/codecs/external.js.map +1 -0
  131. package/esm/cramFile/codecs/gamma.d.ts +4 -0
  132. package/esm/cramFile/codecs/gamma.js +20 -0
  133. package/esm/cramFile/codecs/gamma.js.map +1 -0
  134. package/esm/cramFile/codecs/huffman.d.ts +17 -0
  135. package/esm/cramFile/codecs/huffman.js +107 -0
  136. package/esm/cramFile/codecs/huffman.js.map +1 -0
  137. package/esm/cramFile/codecs/index.d.ts +2 -0
  138. package/esm/cramFile/codecs/index.js +30 -0
  139. package/esm/cramFile/codecs/index.js.map +1 -0
  140. package/esm/cramFile/codecs/subexp.d.ts +4 -0
  141. package/esm/cramFile/codecs/subexp.js +28 -0
  142. package/esm/cramFile/codecs/subexp.js.map +1 -0
  143. package/esm/cramFile/constants.d.ts +36 -0
  144. package/esm/cramFile/constants.js +51 -0
  145. package/esm/cramFile/constants.js.map +1 -0
  146. package/esm/cramFile/container/compressionScheme.d.ts +23 -0
  147. package/esm/cramFile/container/compressionScheme.js +123 -0
  148. package/esm/cramFile/container/compressionScheme.js.map +1 -0
  149. package/esm/cramFile/container/index.d.ts +13 -0
  150. package/esm/cramFile/container/index.js +84 -0
  151. package/esm/cramFile/container/index.js.map +1 -0
  152. package/esm/cramFile/file.d.ts +63 -0
  153. package/esm/cramFile/file.js +281 -0
  154. package/esm/cramFile/file.js.map +1 -0
  155. package/esm/cramFile/index.d.ts +2 -0
  156. package/esm/cramFile/index.js +3 -0
  157. package/esm/cramFile/index.js.map +1 -0
  158. package/esm/cramFile/record.d.ts +79 -0
  159. package/esm/cramFile/record.js +297 -0
  160. package/esm/cramFile/record.js.map +1 -0
  161. package/esm/cramFile/sectionParsers.d.ts +18 -0
  162. package/esm/cramFile/sectionParsers.js +347 -0
  163. package/esm/cramFile/sectionParsers.js.map +1 -0
  164. package/esm/cramFile/slice/decodeRecord.d.ts +2 -0
  165. package/esm/cramFile/slice/decodeRecord.js +299 -0
  166. package/esm/cramFile/slice/decodeRecord.js.map +1 -0
  167. package/esm/cramFile/slice/index.d.ts +20 -0
  168. package/esm/cramFile/slice/index.js +364 -0
  169. package/esm/cramFile/slice/index.js.map +1 -0
  170. package/esm/cramFile/util.d.ts +5 -0
  171. package/esm/cramFile/util.js +161 -0
  172. package/esm/cramFile/util.js.map +1 -0
  173. package/esm/errors.d.ts +23 -0
  174. package/esm/errors.js +24 -0
  175. package/esm/errors.js.map +1 -0
  176. package/esm/index.d.ts +4 -0
  177. package/esm/index.js +5 -0
  178. package/esm/index.js.map +1 -0
  179. package/esm/indexedCramFile.d.ts +39 -0
  180. package/esm/indexedCramFile.js +155 -0
  181. package/esm/indexedCramFile.js.map +1 -0
  182. package/esm/io/bufferCache.d.ts +12 -0
  183. package/esm/io/bufferCache.js +54 -0
  184. package/esm/io/bufferCache.js.map +1 -0
  185. package/esm/io/index.d.ts +5 -0
  186. package/esm/io/index.js +24 -0
  187. package/esm/io/index.js.map +1 -0
  188. package/esm/io/localFile.d.ts +10 -0
  189. package/esm/io/localFile.js +31 -0
  190. package/esm/io/localFile.js.map +1 -0
  191. package/esm/io/remoteFile.d.ts +16 -0
  192. package/esm/io/remoteFile.js +64 -0
  193. package/esm/io/remoteFile.js.map +1 -0
  194. package/esm/rans/constants.d.ts +3 -0
  195. package/esm/rans/constants.js +5 -0
  196. package/esm/rans/constants.js.map +1 -0
  197. package/esm/rans/d04.d.ts +1 -0
  198. package/esm/rans/d04.js +67 -0
  199. package/esm/rans/d04.js.map +1 -0
  200. package/esm/rans/d14.d.ts +1 -0
  201. package/esm/rans/d14.js +52 -0
  202. package/esm/rans/d14.js.map +1 -0
  203. package/esm/rans/decoding.d.ts +30 -0
  204. package/esm/rans/decoding.js +118 -0
  205. package/esm/rans/decoding.js.map +1 -0
  206. package/esm/rans/frequencies.d.ts +2 -0
  207. package/esm/rans/frequencies.js +110 -0
  208. package/esm/rans/frequencies.js.map +1 -0
  209. package/esm/rans/index.d.ts +1 -0
  210. package/esm/rans/index.js +195 -0
  211. package/esm/rans/index.js.map +1 -0
  212. package/esm/sam.d.ts +1 -0
  213. package/esm/sam.js +16 -0
  214. package/esm/sam.js.map +1 -0
  215. package/esm/unzip-pako.d.ts +2 -0
  216. package/esm/unzip-pako.js +5 -0
  217. package/esm/unzip-pako.js.map +1 -0
  218. package/esm/unzip.d.ts +2 -0
  219. package/esm/unzip.js +3 -0
  220. package/esm/unzip.js.map +1 -0
  221. package/package.json +38 -35
  222. package/src/craiIndex.js +180 -0
  223. package/src/cramFile/codecs/_base.js +49 -0
  224. package/src/cramFile/codecs/beta.js +23 -0
  225. package/src/cramFile/codecs/byteArrayLength.js +55 -0
  226. package/src/cramFile/codecs/byteArrayStop.js +50 -0
  227. package/src/cramFile/codecs/external.js +54 -0
  228. package/src/cramFile/codecs/gamma.js +30 -0
  229. package/src/cramFile/codecs/huffman.js +137 -0
  230. package/src/cramFile/codecs/index.js +38 -0
  231. package/src/cramFile/codecs/subexp.js +32 -0
  232. package/src/cramFile/constants.js +55 -0
  233. package/src/cramFile/container/compressionScheme.js +144 -0
  234. package/src/cramFile/container/index.js +119 -0
  235. package/src/cramFile/file.js +347 -0
  236. package/src/cramFile/index.js +3 -0
  237. package/src/cramFile/record.js +337 -0
  238. package/src/cramFile/sectionParsers.js +379 -0
  239. package/src/cramFile/slice/decodeRecord.js +362 -0
  240. package/src/cramFile/slice/index.js +497 -0
  241. package/src/cramFile/util.js +169 -0
  242. package/src/errors.js +22 -0
  243. package/src/index.js +5 -0
  244. package/src/indexedCramFile.js +191 -0
  245. package/src/io/bufferCache.js +66 -0
  246. package/src/io/index.js +26 -0
  247. package/src/io/localFile.js +35 -0
  248. package/src/io/remoteFile.js +71 -0
  249. package/src/rans/README.md +1 -0
  250. package/src/rans/constants.js +5 -0
  251. package/src/rans/d04.js +83 -0
  252. package/src/rans/d14.js +59 -0
  253. package/src/rans/decoding.js +141 -0
  254. package/src/rans/frequencies.js +121 -0
  255. package/src/rans/index.js +249 -0
  256. package/src/sam.js +15 -0
  257. package/src/unzip-pako.ts +5 -0
  258. package/src/unzip.ts +2 -0
@@ -0,0 +1,497 @@
1
+ import {
2
+ CramMalformedError,
3
+ CramBufferOverrunError,
4
+ CramArgumentError,
5
+ } from '../../errors'
6
+ import { parseItem, tinyMemoize, sequenceMD5 } from '../util'
7
+
8
+ import Constants from '../constants'
9
+ import decodeRecord from './decodeRecord'
10
+
11
+ /**
12
+ * @private
13
+ * Try to estimate the template length from a bunch of interrelated multi-segment reads.
14
+ * @param {Array[CramRecord]} allRecords
15
+ * @param {number} currentRecordNumber
16
+ * @param {CramRecord} thisRecord
17
+ */
18
+ function calculateMultiSegmentMatedTemplateLength(
19
+ allRecords,
20
+ currentRecordNumber,
21
+ thisRecord,
22
+ ) {
23
+ function getAllMatedRecords(startRecord) {
24
+ const records = [startRecord]
25
+ if (startRecord.mateRecordNumber >= 0) {
26
+ const mateRecord = allRecords[startRecord.mateRecordNumber]
27
+ if (!mateRecord) {
28
+ throw new CramMalformedError(
29
+ 'intra-slice mate record not found, this file seems malformed',
30
+ )
31
+ }
32
+ records.push(...getAllMatedRecords(mateRecord))
33
+ }
34
+ return records
35
+ }
36
+
37
+ const matedRecords = getAllMatedRecords(thisRecord)
38
+ const starts = matedRecords.map(r => r.alignmentStart)
39
+ const ends = matedRecords.map(r => r.alignmentStart + r.readLength - 1)
40
+ const estimatedTemplateLength = Math.max(...ends) - Math.min(...starts) + 1
41
+ if (estimatedTemplateLength >= 0) {
42
+ matedRecords.forEach(r => {
43
+ if (r.templateLength !== undefined) {
44
+ throw new CramMalformedError(
45
+ 'mate pair group has some members that have template lengths already, this file seems malformed',
46
+ )
47
+ }
48
+ r.templateLength = estimatedTemplateLength
49
+ })
50
+ }
51
+ }
52
+
53
+ /**
54
+ * @private
55
+ * Attempt to calculate the `templateLength` for a pair of intra-slice paired reads.
56
+ * Ported from htslib. Algorithm is imperfect.
57
+ * @param {CramRecord} thisRecord
58
+ * @param {CramRecord} mateRecord
59
+ */
60
+ function calculateIntraSliceMatePairTemplateLength(thisRecord, mateRecord) {
61
+ // this just estimates the template length by using the simple (non-gapped) end coordinate of each
62
+ // read, because gapping in the alignment doesn't mean the template is longer or shorter
63
+ const start = Math.min(thisRecord.alignmentStart, mateRecord.alignmentStart)
64
+ const end = Math.max(
65
+ thisRecord.alignmentStart + thisRecord.readLength - 1,
66
+ mateRecord.alignmentStart + mateRecord.readLength - 1,
67
+ )
68
+ const lengthEstimate = end - start + 1
69
+ thisRecord.templateLength = lengthEstimate
70
+ mateRecord.templateLength = lengthEstimate
71
+ }
72
+
73
+ /**
74
+ * @private establishes a mate-pair relationship between two records in the same slice.
75
+ * CRAM compresses mate-pair relationships between records in the same slice down into
76
+ * just one record having the index in the slice of its mate
77
+ */
78
+ function associateIntraSliceMate(
79
+ allRecords,
80
+ currentRecordNumber,
81
+ thisRecord,
82
+ mateRecord,
83
+ ) {
84
+ if (!mateRecord) {
85
+ throw new CramMalformedError(
86
+ 'could not resolve intra-slice mate pairs, file seems truncated or malformed',
87
+ )
88
+ }
89
+
90
+ const complicatedMultiSegment = !!(
91
+ mateRecord.mate ||
92
+ (mateRecord.mateRecordNumber !== undefined &&
93
+ mateRecord.mateRecordNumber !== currentRecordNumber)
94
+ )
95
+
96
+ // Deal with lossy read names
97
+ if (!thisRecord.readName) {
98
+ thisRecord.readName = String(thisRecord.uniqueId)
99
+ mateRecord.readName = thisRecord.readName
100
+ }
101
+
102
+ thisRecord.mate = {
103
+ sequenceId: mateRecord.sequenceId,
104
+ alignmentStart: mateRecord.alignmentStart,
105
+ uniqueId: mateRecord.uniqueId,
106
+ }
107
+ if (mateRecord.readName) {
108
+ thisRecord.mate.readName = mateRecord.readName
109
+ }
110
+
111
+ // the mate record might have its own mate pointer, if this is some kind of
112
+ // multi-segment (more than paired) scheme, so only relate that one back to this one
113
+ // if it does not have any other relationship
114
+ if (!mateRecord.mate && mateRecord.mateRecordNumber === undefined) {
115
+ mateRecord.mate = {
116
+ sequenceId: thisRecord.sequenceId,
117
+ alignmentStart: thisRecord.alignmentStart,
118
+ uniqueId: thisRecord.uniqueId,
119
+ }
120
+ if (thisRecord.readName) {
121
+ mateRecord.mate.readName = thisRecord.readName
122
+ }
123
+ }
124
+
125
+ // make sure the proper flags and cramFlags are set on both records
126
+ // paired
127
+ thisRecord.flags |= Constants.BAM_FPAIRED
128
+
129
+ // set mate unmapped if needed
130
+ if (mateRecord.flags & Constants.BAM_FUNMAP) {
131
+ thisRecord.flags |= Constants.BAM_FMUNMAP
132
+ // thisRecord.templateLength = 0
133
+ }
134
+ if (thisRecord.flags & Constants.BAM_FUNMAP) {
135
+ // thisRecord.templateLength = 0
136
+ mateRecord.flags |= Constants.BAM_FMUNMAP
137
+ }
138
+
139
+ // set mate reversed if needed
140
+ if (mateRecord.flags & Constants.BAM_FREVERSE) {
141
+ thisRecord.flags |= Constants.BAM_FMREVERSE
142
+ }
143
+ if (thisRecord.flags & Constants.BAM_FREVERSE) {
144
+ mateRecord.flags |= Constants.BAM_FMREVERSE
145
+ }
146
+
147
+ if (thisRecord.templateLength === undefined) {
148
+ if (complicatedMultiSegment) {
149
+ calculateMultiSegmentMatedTemplateLength(
150
+ allRecords,
151
+ currentRecordNumber,
152
+ thisRecord,
153
+ )
154
+ } else {
155
+ calculateIntraSliceMatePairTemplateLength(thisRecord, mateRecord)
156
+ }
157
+ }
158
+
159
+ // delete this last because it's used by the
160
+ // complicated template length estimation
161
+ delete thisRecord.mateRecordNumber
162
+ }
163
+
164
+ export default class CramSlice {
165
+ constructor(container, position) {
166
+ this.container = container
167
+ this.file = container.file
168
+ this.containerPosition = position
169
+ }
170
+
171
+ // memoize
172
+ async getHeader() {
173
+ // fetch and parse the slice header
174
+ const sectionParsers = await this.file.getSectionParsers()
175
+ const containerHeader = await this.container.getHeader()
176
+ const header = await this.file.readBlock(
177
+ containerHeader._endPosition + this.containerPosition,
178
+ )
179
+ if (header.contentType === 'MAPPED_SLICE_HEADER') {
180
+ header.content = parseItem(
181
+ header.content,
182
+ sectionParsers.cramMappedSliceHeader.parser,
183
+ 0,
184
+ containerHeader._endPosition,
185
+ )
186
+ } else if (header.contentType === 'UNMAPPED_SLICE_HEADER') {
187
+ header.content = parseItem(
188
+ header.content,
189
+ sectionParsers.cramUnmappedSliceHeader.parser,
190
+ 0,
191
+ containerHeader._endPosition,
192
+ )
193
+ } else {
194
+ throw new CramMalformedError(
195
+ `error reading slice header block, invalid content type ${header._contentType}`,
196
+ )
197
+ }
198
+ return header
199
+ }
200
+
201
+ // memoize
202
+ async getBlocks() {
203
+ const header = await this.getHeader()
204
+ // read all the blocks into memory and store them
205
+ let blockPosition = header._endPosition
206
+ const blocks = new Array(header.content.numBlocks)
207
+ for (let i = 0; i < blocks.length; i += 1) {
208
+ blocks[i] = await this.file.readBlock(blockPosition)
209
+ blockPosition = blocks[i]._endPosition
210
+ }
211
+
212
+ return blocks
213
+ }
214
+
215
+ // no memoize
216
+ async getCoreDataBlock() {
217
+ const blocks = await this.getBlocks()
218
+ // the core data block is always the first block in the slice
219
+ return blocks[0]
220
+ }
221
+
222
+ // memoize
223
+ async _getBlocksContentIdIndex() {
224
+ const blocks = await this.getBlocks()
225
+ const blocksByContentId = {}
226
+ blocks.forEach(block => {
227
+ if (block.contentType === 'EXTERNAL_DATA') {
228
+ blocksByContentId[block.contentId] = block
229
+ }
230
+ })
231
+ return blocksByContentId
232
+ }
233
+
234
+ async getBlockByContentId(id) {
235
+ const blocksByContentId = await this._getBlocksContentIdIndex()
236
+ return blocksByContentId[id]
237
+ }
238
+
239
+ async getReferenceRegion() {
240
+ // read the slice header
241
+ const sliceHeader = (await this.getHeader()).content
242
+
243
+ if (sliceHeader.refSeqId < 0) {
244
+ return undefined
245
+ }
246
+
247
+ const compressionScheme = await this.container.getCompressionScheme()
248
+
249
+ // console.log(JSON.stringify(sliceHeader, null, ' '))
250
+
251
+ if (sliceHeader.refBaseBlockId >= 0) {
252
+ const refBlock = this.getBlockByContentId(sliceHeader.refBaseBlockId)
253
+ if (!refBlock) {
254
+ throw new CramMalformedError(
255
+ 'embedded reference specified, but reference block does not exist',
256
+ )
257
+ }
258
+
259
+ if (sliceHeader.span > refBlock.uncompressedSize) {
260
+ throw new CramMalformedError('Embedded reference is too small')
261
+ }
262
+
263
+ return {
264
+ seq: refBlock.data.toString('utf8'),
265
+ start: sliceHeader.refSeqStart,
266
+ end: sliceHeader.refSeqStart + sliceHeader.refSeqSpan - 1,
267
+ span: sliceHeader.refSeqSpan,
268
+ }
269
+ }
270
+ if (
271
+ compressionScheme.referenceRequired ||
272
+ this.file.fetchReferenceSequenceCallback
273
+ ) {
274
+ if (!this.file.fetchReferenceSequenceCallback) {
275
+ throw new Error(
276
+ 'reference sequence not embedded, and seqFetch callback not provided, cannot fetch reference sequence',
277
+ )
278
+ }
279
+
280
+ const seq = await this.file.fetchReferenceSequenceCallback(
281
+ sliceHeader.refSeqId,
282
+ sliceHeader.refSeqStart,
283
+ sliceHeader.refSeqStart + sliceHeader.refSeqSpan - 1,
284
+ )
285
+
286
+ if (seq.length !== sliceHeader.refSeqSpan) {
287
+ throw new CramArgumentError(
288
+ 'seqFetch callback returned a reference sequence of the wrong length',
289
+ )
290
+ }
291
+
292
+ return {
293
+ seq,
294
+ start: sliceHeader.refSeqStart,
295
+ end: sliceHeader.refSeqStart + sliceHeader.refSeqSpan - 1,
296
+ span: sliceHeader.refSeqSpan,
297
+ }
298
+ }
299
+
300
+ return undefined
301
+ }
302
+
303
+ getAllRecords() {
304
+ return this.getRecords(() => true)
305
+ }
306
+
307
+ async _fetchRecords() {
308
+ const { majorVersion } = await this.file.getDefinition()
309
+
310
+ const compressionScheme = await this.container.getCompressionScheme()
311
+
312
+ const sliceHeader = await this.getHeader()
313
+
314
+ const blocksByContentId = await this._getBlocksContentIdIndex()
315
+
316
+ // check MD5 of reference if available
317
+ if (
318
+ majorVersion > 1 &&
319
+ this.file.options.checkSequenceMD5 &&
320
+ sliceHeader.content.refSeqId >= 0 &&
321
+ sliceHeader.content.md5.join('') !== '0000000000000000'
322
+ ) {
323
+ const refRegion = await this.getReferenceRegion()
324
+ if (refRegion) {
325
+ const { seq, start, end } = refRegion
326
+ const seqMd5 = sequenceMD5(seq)
327
+ const storedMd5 = sliceHeader.content.md5
328
+ .map(byte => (byte < 16 ? '0' : '') + byte.toString(16))
329
+ .join('')
330
+ if (seqMd5 !== storedMd5) {
331
+ throw new CramMalformedError(
332
+ `MD5 checksum reference mismatch for ref ${sliceHeader.content.refSeqId} pos ${start}..${end}. recorded MD5: ${storedMd5}, calculated MD5: ${seqMd5}`,
333
+ )
334
+ }
335
+ }
336
+ }
337
+
338
+ // tracks the read position within the block. codec.decode() methods
339
+ // advance the byte and bit positions in the cursor as they decode data
340
+ // note that we are only decoding a single block here, the core data block
341
+ const coreDataBlock = await this.getCoreDataBlock()
342
+ const cursors = {
343
+ lastAlignmentStart: sliceHeader.content.refSeqStart || 0,
344
+ coreBlock: { bitPosition: 7, bytePosition: 0 },
345
+ externalBlocks: {
346
+ getCursor(contentId) {
347
+ if (!this[contentId]) {
348
+ this[contentId] = { bitPosition: 7, bytePosition: 0 }
349
+ }
350
+ return this[contentId]
351
+ },
352
+ },
353
+ }
354
+
355
+ const decodeDataSeries = dataSeriesName => {
356
+ const codec = compressionScheme.getCodecForDataSeries(dataSeriesName)
357
+ if (!codec) {
358
+ throw new CramMalformedError(
359
+ `no codec defined for ${dataSeriesName} data series`,
360
+ )
361
+ }
362
+ // console.log(dataSeriesName, Object.getPrototypeOf(codec))
363
+ return codec.decode(this, coreDataBlock, blocksByContentId, cursors)
364
+ }
365
+ let records = new Array(sliceHeader.content.numRecords)
366
+ for (let i = 0; i < records.length; i += 1) {
367
+ try {
368
+ records[i] = decodeRecord(
369
+ this,
370
+ decodeDataSeries,
371
+ compressionScheme,
372
+ sliceHeader,
373
+ coreDataBlock,
374
+ blocksByContentId,
375
+ cursors,
376
+ majorVersion,
377
+ i,
378
+ )
379
+ records[i].uniqueId =
380
+ sliceHeader.contentPosition +
381
+ sliceHeader.content.recordCounter +
382
+ i +
383
+ 1
384
+ } catch (e) {
385
+ if (e instanceof CramBufferOverrunError) {
386
+ console.warn(
387
+ 'read attempted beyond end of buffer, file seems truncated.',
388
+ )
389
+ records = records.filter(r => !!r)
390
+ break
391
+ } else {
392
+ throw e
393
+ }
394
+ }
395
+ }
396
+
397
+ // interpret `recordsToNextFragment` attributes to make standard `mate` objects
398
+ // Resolve mate pair cross-references between records in this slice
399
+ for (let i = 0; i < records.length; i += 1) {
400
+ const { mateRecordNumber } = records[i]
401
+ if (mateRecordNumber >= 0) {
402
+ associateIntraSliceMate(
403
+ records,
404
+ i,
405
+ records[i],
406
+ records[mateRecordNumber],
407
+ )
408
+ }
409
+ }
410
+
411
+ return records
412
+ }
413
+
414
+ async getRecords(filterFunction) {
415
+ // fetch the features if necessary, using the file-level feature cache
416
+ const cacheKey = this.container.filePosition + this.containerPosition
417
+ let recordsPromise = this.file.featureCache.get(cacheKey)
418
+ if (!recordsPromise) {
419
+ recordsPromise = this._fetchRecords()
420
+ this.file.featureCache.set(cacheKey, recordsPromise)
421
+ }
422
+
423
+ const records = (await recordsPromise).filter(filterFunction)
424
+
425
+ // if we can fetch reference sequence, add the reference sequence to the records
426
+ if (records.length && this.file.fetchReferenceSequenceCallback) {
427
+ const sliceHeader = await this.getHeader()
428
+ if (
429
+ sliceHeader.content.refSeqId >= 0 || // single-ref slice
430
+ sliceHeader.content.refSeqId === -2 // multi-ref slice
431
+ ) {
432
+ const singleRefId =
433
+ sliceHeader.content.refSeqId >= 0
434
+ ? sliceHeader.content.refSeqId
435
+ : undefined
436
+ const compressionScheme = await this.container.getCompressionScheme()
437
+ const refRegions = {} // seqId => { start, end, seq }
438
+
439
+ // iterate over the records to find the spans of the reference sequences we need to fetch
440
+ for (let i = 0; i < records.length; i += 1) {
441
+ const seqId =
442
+ singleRefId !== undefined ? singleRefId : records[i].sequenceId
443
+ let refRegion = refRegions[seqId]
444
+ if (!refRegion) {
445
+ refRegion = {
446
+ id: seqId,
447
+ start: records[i].alignmentStart,
448
+ end: -Infinity,
449
+ }
450
+ refRegions[seqId] = refRegion
451
+ }
452
+
453
+ const end =
454
+ records[i].alignmentStart +
455
+ (records[i].lengthOnRef || records[i].readLength) -
456
+ 1
457
+ if (end > refRegion.end) {
458
+ refRegion.end = end
459
+ }
460
+ if (records[i].alignmentStart < refRegion.start) {
461
+ refRegion.start = records[i].alignmentStart
462
+ }
463
+ }
464
+
465
+ // fetch the `seq` for all of the ref regions
466
+ await Promise.all(
467
+ Object.values(refRegions).map(async refRegion => {
468
+ if (refRegion.id !== -1 && refRegion.start <= refRegion.end) {
469
+ refRegion.seq = await this.file.fetchReferenceSequenceCallback(
470
+ refRegion.id,
471
+ refRegion.start,
472
+ refRegion.end,
473
+ )
474
+ }
475
+ }),
476
+ )
477
+
478
+ // now decorate all the records with them
479
+ for (let i = 0; i < records.length; i += 1) {
480
+ const seqId =
481
+ singleRefId !== undefined ? singleRefId : records[i].sequenceId
482
+ const refRegion = refRegions[seqId]
483
+ if (refRegion && refRegion.seq) {
484
+ records[i].addReferenceSequence(refRegion, compressionScheme)
485
+ }
486
+ }
487
+ }
488
+ }
489
+
490
+ return records
491
+ }
492
+ }
493
+
494
+ // memoize several methods in the class for performance
495
+ 'getHeader getBlocks _getBlocksContentIdIndex'
496
+ .split(' ')
497
+ .forEach(method => tinyMemoize(CramSlice, method))
@@ -0,0 +1,169 @@
1
+ import md5 from 'md5'
2
+ import { CramBufferOverrunError } from '../errors'
3
+
4
+ export function itf8Size(v) {
5
+ if (!(v & ~0x7f)) {
6
+ return 1
7
+ }
8
+ if (!(v & ~0x3fff)) {
9
+ return 2
10
+ }
11
+ if (!(v & ~0x1fffff)) {
12
+ return 3
13
+ }
14
+ if (!(v & ~0xfffffff)) {
15
+ return 4
16
+ }
17
+ return 5
18
+ }
19
+
20
+ export function parseItf8(buffer, initialOffset) {
21
+ let offset = initialOffset
22
+ const countFlags = buffer[offset]
23
+ let result
24
+ if (countFlags < 0x80) {
25
+ result = countFlags
26
+ offset += 1
27
+ } else if (countFlags < 0xc0) {
28
+ result = ((countFlags << 8) | buffer[offset + 1]) & 0x3fff
29
+ offset += 2
30
+ } else if (countFlags < 0xe0) {
31
+ result =
32
+ ((countFlags << 16) | (buffer[offset + 1] << 8) | buffer[offset + 2]) &
33
+ 0x1fffff
34
+ offset += 3
35
+ } else if (countFlags < 0xf0) {
36
+ result =
37
+ ((countFlags << 24) |
38
+ (buffer[offset + 1] << 16) |
39
+ (buffer[offset + 2] << 8) |
40
+ buffer[offset + 3]) &
41
+ 0x0fffffff
42
+ offset += 4
43
+ } else {
44
+ result =
45
+ ((countFlags & 0x0f) << 28) |
46
+ (buffer[offset + 1] << 20) |
47
+ (buffer[offset + 2] << 12) |
48
+ (buffer[offset + 3] << 4) |
49
+ (buffer[offset + 4] & 0x0f)
50
+ // x=((0xff & 0x0f)<<28) | (0xff<<20) | (0xff<<12) | (0xff<<4) | (0x0f & 0x0f);
51
+ // TODO *val_p = uv < 0x80000000UL ? uv : -((int32_t) (0xffffffffUL - uv)) - 1;
52
+ offset += 5
53
+ }
54
+ if (offset > buffer.length) {
55
+ throw new CramBufferOverrunError(
56
+ 'Attempted to read beyond end of buffer; this file seems truncated.',
57
+ )
58
+ }
59
+ return [result, offset - initialOffset]
60
+ }
61
+
62
+ // parseLtf8(buffer, initialOffset) {
63
+ // let offset = initialOffset
64
+ // const countFlags = buffer[offset]
65
+ // let result
66
+ // if (countFlags < 0x80) {
67
+ // result = countFlags
68
+ // offset += 1
69
+ // } else if (countFlags < 0xc0) {
70
+ // result = ((buffer[offset] << 8) | buffer[offset + 1]) & 0x3fff
71
+ // offset += 2
72
+ // } else if (countFlags < 0xe0) {
73
+ // result =
74
+ // ((buffer[offset] << 16) |
75
+ // (buffer[offset + 1] << 8) |
76
+ // buffer[offset + 2]) &
77
+ // 0x1fffff
78
+ // offset += 3
79
+ // } else if (countFlags < 0xf0) {
80
+ // result =
81
+ // ((buffer[offset] << 24) |
82
+ // (buffer[offset + 1] << 16) |
83
+ // (buffer[offset + 2] << 8) |
84
+ // buffer[offset + 3]) &
85
+ // 0x0fffffff
86
+ // offset += 4
87
+ // } else if (countFlags < 0xf8) {
88
+ // result =
89
+ // ((buffer[offset] & 15) * Math.pow(2,32) + (buffer[offset + 1] << 24)) |
90
+ // ((buffer[offset + 2] << 16) |
91
+ // (buffer[offset + 3] << 8) |
92
+ // buffer[offset + 4])
93
+ // // TODO *val_p = uv < 0x80000000UL ? uv : -((int32_t) (0xffffffffUL - uv)) - 1;
94
+ // offset += 5
95
+ // } else if (countFlags < 0xfc) {
96
+ // result =
97
+ // ((((buffer[offset] & 7) << 8) | buffer[offset + 1]) * Math.pow(2,32) +
98
+ // (buffer[offset + 2] << 24)) |
99
+ // ((buffer[offset + 3] << 16) |
100
+ // (buffer[offset + 4] << 8) |
101
+ // buffer[offset + 5])
102
+ // offset += 6
103
+ // } else if (countFlags < 0xfe) {
104
+ // result =
105
+ // ((((buffer[offset] & 3) << 16) |
106
+ // (buffer[offset + 1] << 8) |
107
+ // buffer[offset + 2]) *
108
+ // Math.pow(2,32) +
109
+ // (buffer[offset + 3] << 24)) |
110
+ // ((buffer[offset + 4] << 16) |
111
+ // (buffer[offset + 5] << 8) |
112
+ // buffer[offset + 6])
113
+ // offset += 7
114
+ // } else if (countFlags < 0xff) {
115
+ // result = Long.fromBytesBE(buffer.slice(offset + 1, offset + 8))
116
+ // if (
117
+ // result.greaterThan(Number.MAX_SAFE_INTEGER) ||
118
+ // result.lessThan(Number.MIN_SAFE_INTEGER)
119
+ // )
120
+ // throw new CramUnimplementedError('integer overflow')
121
+ // result = result.toNumber()
122
+ // offset += 8
123
+ // } else {
124
+ // result = Long.fromBytesBE(buffer.slice(offset + 1, offset + 9))
125
+ // if (
126
+ // result.greaterThan(Number.MAX_SAFE_INTEGER) ||
127
+ // result.lessThan(Number.MIN_SAFE_INTEGER)
128
+ // )
129
+ // throw new CramUnimplementedError('integer overflow')
130
+ // result = result.toNumber()
131
+ // offset += 9
132
+ // }
133
+ // return [result, offset - initialOffset]
134
+ // },
135
+
136
+ export function parseItem(
137
+ buffer,
138
+ parser,
139
+ startBufferPosition = 0,
140
+ startFilePosition = 0,
141
+ ) {
142
+ const { offset, result } = parser.parse(buffer)
143
+ result._endPosition = offset + startFilePosition
144
+ result._size = offset - startBufferPosition
145
+ return result
146
+ }
147
+
148
+ // this would be nice as a decorator, but i'm a little worried about
149
+ // babel support for it going away or changing.
150
+ // memoizes a method in the stupidest possible way, with no regard for the
151
+ // arguments. actually, this only works on methods that take no arguments
152
+ export function tinyMemoize(_class, methodName) {
153
+ const method = _class.prototype[methodName]
154
+ const memoAttrName = `_memo_${methodName}`
155
+ _class.prototype[methodName] = function _tinyMemoized() {
156
+ if (!(memoAttrName in this)) {
157
+ const res = method.call(this)
158
+ this[memoAttrName] = res
159
+ Promise.resolve(res).catch(() => {
160
+ delete this[memoAttrName]
161
+ })
162
+ }
163
+ return this[memoAttrName]
164
+ }
165
+ }
166
+
167
+ export function sequenceMD5(seq) {
168
+ return md5(seq.toUpperCase().replace(/[^\x21-\x7e]/g, ''))
169
+ }
package/src/errors.js ADDED
@@ -0,0 +1,22 @@
1
+ export class CramError extends Error {}
2
+
3
+ /** Error caused by encountering a part of the CRAM spec that has not yet been implemented */
4
+ export class CramUnimplementedError extends Error {}
5
+
6
+ /** An error caused by malformed data. */
7
+ export class CramMalformedError extends CramError {}
8
+
9
+ /**
10
+ * An error caused by attempting to read beyond the end of the defined data.
11
+ */
12
+ export class CramBufferOverrunError extends CramMalformedError {}
13
+
14
+ /**
15
+ * An error caused by data being too big, exceeding a size limit.
16
+ */
17
+ export class CramSizeLimitError extends CramError {}
18
+
19
+ /**
20
+ * An invalid argument was supplied to a cram-js method or object.
21
+ */
22
+ export class CramArgumentError extends CramError {}
package/src/index.js ADDED
@@ -0,0 +1,5 @@
1
+ import CramFile from './cramFile'
2
+ import IndexedCramFile from './indexedCramFile'
3
+ import CraiIndex from './craiIndex'
4
+
5
+ export { CramFile, IndexedCramFile, CraiIndex }