@gmod/cram 3.0.6 → 3.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/dist/cram-bundle.js +1 -1
  2. package/dist/cramFile/codecs/byteArrayStop.d.ts +2 -2
  3. package/dist/cramFile/file.js +8 -10
  4. package/dist/cramFile/file.js.map +1 -1
  5. package/dist/cramFile/util.js.map +1 -1
  6. package/dist/htscodecs/arith_gen.d.ts +26 -0
  7. package/dist/htscodecs/arith_gen.js +558 -0
  8. package/dist/htscodecs/arith_gen.js.map +1 -0
  9. package/dist/htscodecs/arith_sh.d.ts +16 -0
  10. package/dist/htscodecs/arith_sh.js +128 -0
  11. package/dist/htscodecs/arith_sh.js.map +1 -0
  12. package/dist/htscodecs/byte_model.d.ts +11 -0
  13. package/dist/htscodecs/byte_model.js +113 -0
  14. package/dist/htscodecs/byte_model.js.map +1 -0
  15. package/dist/htscodecs/fqzcomp.d.ts +2 -0
  16. package/dist/htscodecs/fqzcomp.js +744 -0
  17. package/dist/htscodecs/fqzcomp.js.map +1 -0
  18. package/dist/htscodecs/index.d.ts +5 -0
  19. package/dist/htscodecs/index.js +70 -0
  20. package/dist/htscodecs/index.js.map +1 -0
  21. package/dist/htscodecs/iostream.d.ts +26 -0
  22. package/dist/htscodecs/iostream.js +242 -0
  23. package/dist/htscodecs/iostream.js.map +1 -0
  24. package/dist/htscodecs/main_arith_gen.d.ts +1 -0
  25. package/dist/htscodecs/main_arith_gen.js +86 -0
  26. package/dist/htscodecs/main_arith_gen.js.map +1 -0
  27. package/dist/htscodecs/main_fqzcomp.d.ts +1 -0
  28. package/dist/htscodecs/main_fqzcomp.js +112 -0
  29. package/dist/htscodecs/main_fqzcomp.js.map +1 -0
  30. package/dist/htscodecs/main_rans.d.ts +1 -0
  31. package/dist/htscodecs/main_rans.js +83 -0
  32. package/dist/htscodecs/main_rans.js.map +1 -0
  33. package/dist/htscodecs/main_rans4x16.d.ts +1 -0
  34. package/dist/htscodecs/main_rans4x16.js +82 -0
  35. package/dist/htscodecs/main_rans4x16.js.map +1 -0
  36. package/dist/htscodecs/main_tok3.d.ts +1 -0
  37. package/dist/htscodecs/main_tok3.js +84 -0
  38. package/dist/htscodecs/main_tok3.js.map +1 -0
  39. package/dist/htscodecs/rans.d.ts +2 -0
  40. package/dist/htscodecs/rans.js +480 -0
  41. package/dist/htscodecs/rans.js.map +1 -0
  42. package/dist/htscodecs/rans4x16.d.ts +2 -0
  43. package/dist/htscodecs/rans4x16.js +896 -0
  44. package/dist/htscodecs/rans4x16.js.map +1 -0
  45. package/dist/htscodecs/tok3.d.ts +2 -0
  46. package/dist/htscodecs/tok3.js +347 -0
  47. package/dist/htscodecs/tok3.js.map +1 -0
  48. package/esm/cramFile/codecs/byteArrayStop.d.ts +2 -2
  49. package/esm/cramFile/file.js +8 -10
  50. package/esm/cramFile/file.js.map +1 -1
  51. package/esm/cramFile/util.js.map +1 -1
  52. package/esm/htscodecs/arith_gen.d.ts +26 -0
  53. package/esm/htscodecs/arith_gen.js +558 -0
  54. package/esm/htscodecs/arith_gen.js.map +1 -0
  55. package/esm/htscodecs/arith_sh.d.ts +16 -0
  56. package/esm/htscodecs/arith_sh.js +128 -0
  57. package/esm/htscodecs/arith_sh.js.map +1 -0
  58. package/esm/htscodecs/byte_model.d.ts +11 -0
  59. package/esm/htscodecs/byte_model.js +113 -0
  60. package/esm/htscodecs/byte_model.js.map +1 -0
  61. package/esm/htscodecs/fqzcomp.d.ts +2 -0
  62. package/esm/htscodecs/fqzcomp.js +744 -0
  63. package/esm/htscodecs/fqzcomp.js.map +1 -0
  64. package/esm/htscodecs/index.d.ts +5 -0
  65. package/esm/htscodecs/index.js +70 -0
  66. package/esm/htscodecs/index.js.map +1 -0
  67. package/esm/htscodecs/iostream.d.ts +26 -0
  68. package/esm/htscodecs/iostream.js +242 -0
  69. package/esm/htscodecs/iostream.js.map +1 -0
  70. package/esm/htscodecs/main_arith_gen.d.ts +1 -0
  71. package/esm/htscodecs/main_arith_gen.js +86 -0
  72. package/esm/htscodecs/main_arith_gen.js.map +1 -0
  73. package/esm/htscodecs/main_fqzcomp.d.ts +1 -0
  74. package/esm/htscodecs/main_fqzcomp.js +112 -0
  75. package/esm/htscodecs/main_fqzcomp.js.map +1 -0
  76. package/esm/htscodecs/main_rans.d.ts +1 -0
  77. package/esm/htscodecs/main_rans.js +83 -0
  78. package/esm/htscodecs/main_rans.js.map +1 -0
  79. package/esm/htscodecs/main_rans4x16.d.ts +1 -0
  80. package/esm/htscodecs/main_rans4x16.js +82 -0
  81. package/esm/htscodecs/main_rans4x16.js.map +1 -0
  82. package/esm/htscodecs/main_tok3.d.ts +1 -0
  83. package/esm/htscodecs/main_tok3.js +84 -0
  84. package/esm/htscodecs/main_tok3.js.map +1 -0
  85. package/esm/htscodecs/rans.d.ts +2 -0
  86. package/esm/htscodecs/rans.js +480 -0
  87. package/esm/htscodecs/rans.js.map +1 -0
  88. package/esm/htscodecs/rans4x16.d.ts +2 -0
  89. package/esm/htscodecs/rans4x16.js +896 -0
  90. package/esm/htscodecs/rans4x16.js.map +1 -0
  91. package/esm/htscodecs/tok3.d.ts +2 -0
  92. package/esm/htscodecs/tok3.js +347 -0
  93. package/esm/htscodecs/tok3.js.map +1 -0
  94. package/package.json +1 -2
  95. package/src/cramFile/file.ts +8 -10
  96. package/src/cramFile/util.ts +0 -1
  97. package/src/htscodecs/Makefile +142 -0
  98. package/src/htscodecs/README.md +64 -0
  99. package/src/htscodecs/arith_gen.js +607 -0
  100. package/src/htscodecs/arith_sh.js +138 -0
  101. package/src/htscodecs/byte_model.js +126 -0
  102. package/src/htscodecs/fqzcomp.js +834 -0
  103. package/src/htscodecs/index.js +79 -0
  104. package/src/htscodecs/iostream.js +256 -0
  105. package/src/htscodecs/main_arith_gen.js +96 -0
  106. package/src/htscodecs/main_fqzcomp.js +113 -0
  107. package/src/htscodecs/main_rans.js +88 -0
  108. package/src/htscodecs/main_rans4x16.js +87 -0
  109. package/src/htscodecs/main_tok3.js +86 -0
  110. package/src/htscodecs/rans.js +545 -0
  111. package/src/htscodecs/rans4x16.js +1003 -0
  112. package/src/htscodecs/tok3.js +396 -0
  113. package/src/typings/htscodecs.d.ts +0 -6
@@ -0,0 +1,396 @@
1
+ /*
2
+ * Copyright (c) 2019 Genome Research Ltd.
3
+ * Author(s): James Bonfield
4
+ *
5
+ * Redistribution and use in source and binary forms, with or without
6
+ * modification, are permitted provided that the following conditions are met:
7
+ *
8
+ * 1. Redistributions of source code must retain the above copyright notice,
9
+ * this list of conditions and the following disclaimer.
10
+ *
11
+ * 2. Redistributions in binary form must reproduce the above
12
+ * copyright notice, this list of conditions and the following
13
+ * disclaimer in the documentation and/or other materials provided
14
+ * with the distribution.
15
+ *
16
+ * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
17
+ * Institute nor the names of its contributors may be used to endorse
18
+ * or promote products derived from this software without specific
19
+ * prior written permission.
20
+ *
21
+ * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
22
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
25
+ * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
+ */
33
+
34
+ // Name tokeniser
35
+ //
36
+ // This is a reference implementation designed to match the
37
+ // written specification as closely as possible. It is *NOT*
38
+ // an efficient implementation, but see comments below.
39
+
40
+ const IOStream = require('./iostream')
41
+ const rans = require('./rans4x16')
42
+ const arith_gen = require('./arith_gen')
43
+
44
+ var arith = new arith_gen()
45
+
46
+ const TOK_TYPE = 0
47
+ const TOK_STRING = 1
48
+ const TOK_CHAR = 2
49
+ const TOK_DIGITS0 = 3
50
+ const TOK_DZLEN = 4
51
+ const TOK_DUP = 5
52
+ const TOK_DIFF = 6
53
+ const TOK_DIGITS = 7
54
+ const TOK_DELTA = 8
55
+ const TOK_DELTA0 = 9
56
+ const TOK_MATCH = 10
57
+ const TOK_NOP = 11
58
+ const TOK_END = 12
59
+
60
+ //----------------------------------------------------------------------
61
+ // Token byte streams
62
+ function DecodeTokenByteStreams(src, in_size, use_arith, nnames) {
63
+ var t = -1
64
+
65
+ var B = new Array(256)
66
+
67
+ while (!src.EOF()) {
68
+ var ttype = src.ReadByte()
69
+ var tok_new = ttype & 128
70
+ var tok_dup = ttype & 64
71
+ var type = ttype & 63
72
+
73
+ if (tok_new) {
74
+ t++
75
+ B[t] = new Array(13)
76
+ }
77
+
78
+ if (type != TOK_TYPE && tok_new) {
79
+ var M = new Array(nnames - 1).fill(TOK_MATCH)
80
+ B[t][TOK_TYPE] = new IOStream(Buffer.from([type].concat(M)))
81
+ }
82
+
83
+ if (tok_dup) {
84
+ var dup_pos = src.ReadByte()
85
+ var dup_type = src.ReadByte()
86
+ B[t][type] = new IOStream(B[dup_pos][dup_type].buf)
87
+ } else {
88
+ var clen = src.ReadUint7()
89
+ var data = src.ReadData(clen)
90
+
91
+ if (use_arith) B[t][type] = arith.decode(data)
92
+ else B[t][type] = rans.decode(data)
93
+ B[t][type] = new IOStream(B[t][type])
94
+ }
95
+ }
96
+
97
+ return B
98
+ }
99
+
100
+ //----------------------------------------------------------------------
101
+ // Token decode
102
+ function LeftPadNumber(val, len) {
103
+ var str = val + ''
104
+ while (str.length < len) str = '0' + str
105
+
106
+ return str
107
+ }
108
+
109
+ function DecodeSingleName(B, N, T, n) {
110
+ var type = B[0][TOK_TYPE].ReadByte()
111
+ var dist = B[0][type].ReadUint32()
112
+ var m = n - dist
113
+
114
+ if (type == TOK_DUP) {
115
+ N[n] = N[m]
116
+ T[n] = T[m]
117
+ return N[n]
118
+ }
119
+
120
+ var t = 1
121
+ N[n] = ''
122
+ T[n] = new Array(256)
123
+ do {
124
+ type = B[t][TOK_TYPE].ReadByte()
125
+
126
+ switch (type) {
127
+ case TOK_CHAR:
128
+ T[n][t] = B[t][TOK_CHAR].ReadChar()
129
+ break
130
+
131
+ case TOK_STRING:
132
+ T[n][t] = B[t][TOK_STRING].ReadString()
133
+ break
134
+
135
+ case TOK_DIGITS:
136
+ T[n][t] = B[t][TOK_DIGITS].ReadUint32()
137
+ break
138
+
139
+ case TOK_DIGITS0:
140
+ var d = B[t][TOK_DIGITS0].ReadUint32()
141
+ var l = B[t][TOK_DZLEN].ReadByte()
142
+ T[n][t] = LeftPadNumber(d, l)
143
+ break
144
+
145
+ case TOK_DELTA:
146
+ T[n][t] = (T[m][t] >> 0) + B[t][TOK_DELTA].ReadByte()
147
+ break
148
+
149
+ case TOK_DELTA0:
150
+ var d = (T[m][t] >> 0) + B[t][TOK_DELTA0].ReadByte()
151
+ var l = T[m][t].length
152
+ T[n][t] = LeftPadNumber(d, l)
153
+ break
154
+
155
+ case TOK_MATCH:
156
+ T[n][t] = T[m][t]
157
+ break
158
+
159
+ default:
160
+ T[n][t] = ''
161
+ break
162
+ }
163
+
164
+ N[n] += T[n][t++]
165
+ } while (type != TOK_END)
166
+
167
+ return N[n]
168
+ }
169
+
170
+ //----------------------------------------------------------------------
171
+ // Main tokeniser decode entry function: decodes a compressed src and
172
+ // returns the uncompressed buffer.
173
+ function decode(src, len, separator) {
174
+ var src = new IOStream(src)
175
+ var ulen = src.ReadUint32()
176
+ var nnames = src.ReadUint32()
177
+ var use_arith = src.ReadByte()
178
+
179
+ var B = DecodeTokenByteStreams(src, len, use_arith, nnames)
180
+ var N = new Array(nnames)
181
+ var T = new Array(nnames)
182
+
183
+ var str = ''
184
+ if (typeof separator === 'undefined') separator = '\n'
185
+ for (var i = 0; i < nnames; i++)
186
+ str += DecodeSingleName(B, N, T, i) + separator
187
+
188
+ return str
189
+ }
190
+
191
+ //----------------------------------------------------------------------
192
+ // Main tokeniser encode function
193
+
194
+ // Encoder is trickier than decode as we have a lot of decisions to make.
195
+ // However here we just make a simple guess without anything complex,
196
+ // to demonstrate the basic idea. See the C implementation for further
197
+ // expansion on this.
198
+ function encode(src, use_arith) {
199
+ // Convert buffer to array of names
200
+ var str = src.toString()
201
+ if (str[str.length - 1] == '\n') str = str.substring(0, str.length - 1)
202
+ var names = str.split('\n')
203
+
204
+ var out = new IOStream('', 0, str.length * 2 + 10000) // guess max size
205
+ out.WriteUint32(str.length)
206
+ out.WriteUint32(names.length)
207
+ out.WriteByte(use_arith)
208
+
209
+ // Tokenise names
210
+ var T = new Array(names.length)
211
+ var H = {}
212
+ var F = new Array(256).fill(0) // DELTA vs DIGIT frequency
213
+ var max_tok = 0
214
+ var max_len = 0
215
+ for (var i = 0; i < names.length; i++) {
216
+ var [ntok, len] = TokeniseName(T, H, F, names[i], i)
217
+ if (max_tok < ntok) max_tok = ntok
218
+ if (max_len < len) max_len = len
219
+ }
220
+
221
+ // Convert tokens to byte streams and serialise
222
+ for (var tnum = 0; tnum < max_tok; tnum++) {
223
+ var B = new Array(TOK_END + 1)
224
+ for (var type = 0; type <= TOK_END; type++)
225
+ B[type] = new IOStream('', 0, names.length * max_len)
226
+
227
+ FillByteStreams(B, T, tnum, names, max_tok, max_len)
228
+ SerialiseByteStreams(B, tnum, use_arith, out)
229
+ }
230
+
231
+ return out.buf.slice(0, out.pos)
232
+ }
233
+
234
+ function FillByteStreams(B, T, tnum, names, max_tok, max_len) {
235
+ // Create byte streams B[]
236
+ for (var n = 0; n < names.length; n++) {
237
+ if (tnum > 0 && T[n][0].type == TOK_DUP) continue
238
+
239
+ if (!T[n][tnum]) continue
240
+
241
+ B[TOK_TYPE].WriteByte(T[n][tnum].type)
242
+
243
+ switch (T[n][tnum].type) {
244
+ case TOK_DIFF:
245
+ B[TOK_DIFF].WriteUint32(T[n][tnum].val)
246
+ break
247
+
248
+ case TOK_DUP:
249
+ B[TOK_DUP].WriteUint32(T[n][tnum].val)
250
+ break
251
+
252
+ case TOK_STRING:
253
+ B[TOK_STRING].WriteString(T[n][tnum].val)
254
+ break
255
+
256
+ case TOK_CHAR:
257
+ B[TOK_CHAR].WriteChar(T[n][tnum].val)
258
+ break
259
+
260
+ case TOK_DIGITS:
261
+ B[TOK_DIGITS].WriteUint32(T[n][tnum].val)
262
+ break
263
+
264
+ case TOK_DIGITS0:
265
+ B[TOK_DIGITS0].WriteUint32(T[n][tnum].val)
266
+ B[TOK_DZLEN].WriteByte(T[n][tnum].val.length)
267
+ break
268
+
269
+ case TOK_DELTA:
270
+ B[T[n][tnum].type].WriteByte(T[n][tnum].val)
271
+ break
272
+
273
+ case TOK_DELTA0:
274
+ B[T[n][tnum].type].WriteByte(T[n][tnum].val)
275
+ break
276
+ }
277
+ }
278
+ }
279
+
280
+ function SerialiseByteStreams(B, tnum, use_arith, out) {
281
+ // Compress and serialise byte streams B[]
282
+ for (var type = 0; type <= TOK_END; type++) {
283
+ if (B[type].pos <= 0) continue
284
+
285
+ out.WriteByte(type + (type == 0 ? 128 : 0))
286
+
287
+ // IOStream to sized buffer
288
+ B[type] = B[type].buf.slice(0, B[type].pos)
289
+ var comp = try_compress(B[type], use_arith)
290
+
291
+ out.WriteUint7(comp.length)
292
+ out.WriteData(comp, comp.length)
293
+ }
294
+ }
295
+
296
+ function try_compress(src, use_arith) {
297
+ var best = 1 << 30
298
+ var comp
299
+
300
+ var methods = [0, 1, 64, 65, 128, 129, 193 + 8]
301
+ for (var i in methods) {
302
+ var lvl = methods[i]
303
+ if (lvl & 1 && src.length < 100) continue
304
+
305
+ if (lvl & 8 && src.length % 4 != 0) continue
306
+
307
+ try {
308
+ var tmp = use_arith ? arith.encode(src, lvl) : rans.encode(src, lvl)
309
+ } catch (e) {
310
+ var tmp = 0
311
+ }
312
+ if (tmp && best > tmp.length) {
313
+ best = tmp.length
314
+ comp = tmp
315
+ }
316
+ }
317
+
318
+ return comp
319
+ }
320
+
321
+ function TokeniseName(T, H, F, name, n) {
322
+ var max_len = 0
323
+
324
+ // Always compare against last name only
325
+ var p = n - 1
326
+ T[n] = new Array(256)
327
+
328
+ if (H[name]) {
329
+ //console.error(name,H[name],n)
330
+ T[n][0] = {
331
+ type: TOK_DUP,
332
+ val: n - H[name],
333
+ }
334
+ } else {
335
+ T[n][0] = {
336
+ type: TOK_DIFF,
337
+ val: n == 0 ? 0 : 1,
338
+ }
339
+ }
340
+
341
+ H[name] = n
342
+
343
+ // Splits on alphanumerics, punctuation
344
+ var tok = name.match(/([a-zA-Z0-9]{1,9})|([^a-zA-Z0-9]+)/g)
345
+ for (var i = 0; i < tok.length; i++) {
346
+ var t = i + 1 // token 0 = DIFF vs DUP
347
+ var type = TOK_STRING
348
+ var val = tok[i]
349
+ if (tok[i].match(/^0+[0-9]*$/g)) type = TOK_DIGITS0
350
+ else if (tok[i].match(/^[0-9]+$/g)) type = TOK_DIGITS
351
+ else if (tok[i].length == 1) type = TOK_CHAR
352
+
353
+ if (p >= 0 && T[p][t]) {
354
+ if (T[p][t].str == tok[i]) {
355
+ type = TOK_MATCH
356
+ val = ''
357
+ } else if (T[p][t].type == TOK_DIGITS || T[p][t].type == TOK_DELTA) {
358
+ var d = val - T[p][t].str
359
+ F[t]++
360
+ if (d >= 0 && d < 256 && F[t] > n / 2) {
361
+ type = TOK_DELTA
362
+ val = d
363
+ }
364
+ } else if (
365
+ (T[p][t].type == TOK_DIGITS0 || T[p][t].type == TOK_DELTA0) &&
366
+ T[p][t].str.length == val.length
367
+ ) {
368
+ var d = val - T[p][t].str
369
+ F[t]++
370
+ if (d >= 0 && d < 256 && F[t] > n / 2) {
371
+ type = TOK_DELTA0
372
+ val = d
373
+ }
374
+ }
375
+ }
376
+
377
+ T[n][t] = {
378
+ str: tok[i],
379
+ val: val,
380
+ type: type,
381
+ }
382
+
383
+ if (max_len < T[n][t].val.length + 3)
384
+ // +3 for integers; 5 -> (Uint32)5
385
+ max_len = T[n][t].val.length + 3
386
+
387
+ //console.error(t,T[n][t])
388
+ }
389
+ T[n][++t] = {
390
+ type: TOK_END,
391
+ }
392
+
393
+ return [t + 1, max_len]
394
+ }
395
+
396
+ module.exports = { encode, decode }
@@ -1,6 +0,0 @@
1
- declare module '@jkbonfield/htscodecs' {
2
- function r4x16_uncompress(input: Buffer, output: Buffer): void
3
- function arith_uncompress(input: Buffer, output: Buffer): void
4
- function fqzcomp_uncompress(input: Buffer, output: Buffer): void
5
- function tok3_uncompress(input: Buffer, output: Buffer): void
6
- }