@gmod/cram 4.0.3 → 4.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/dist/cram-bundle.js +1 -1
  2. package/dist/cramFile/file.js +43 -9
  3. package/dist/cramFile/file.js.map +1 -1
  4. package/dist/cramFile/slice/decodeRecord.js +3 -3
  5. package/dist/cramFile/slice/decodeRecord.js.map +1 -1
  6. package/dist/cramFile/util.d.ts +0 -2
  7. package/dist/cramFile/util.js +3 -31
  8. package/dist/cramFile/util.js.map +1 -1
  9. package/dist/htscodecs/arith_gen.d.ts +5 -7
  10. package/dist/htscodecs/arith_gen.js +122 -105
  11. package/dist/htscodecs/arith_gen.js.map +1 -1
  12. package/dist/htscodecs/arith_sh.d.ts +1 -8
  13. package/dist/htscodecs/arith_sh.js +16 -10
  14. package/dist/htscodecs/arith_sh.js.map +1 -1
  15. package/dist/htscodecs/byte_model.d.ts +1 -6
  16. package/dist/htscodecs/byte_model.js +25 -17
  17. package/dist/htscodecs/byte_model.js.map +1 -1
  18. package/dist/htscodecs/fqzcomp.d.ts +1 -1
  19. package/dist/htscodecs/fqzcomp.js +98 -77
  20. package/dist/htscodecs/fqzcomp.js.map +1 -1
  21. package/dist/htscodecs/index.d.ts +5 -5
  22. package/dist/htscodecs/index.js +53 -16
  23. package/dist/htscodecs/index.js.map +1 -1
  24. package/dist/htscodecs/iostream.d.ts +9 -20
  25. package/dist/htscodecs/iostream.js +21 -116
  26. package/dist/htscodecs/iostream.js.map +1 -1
  27. package/dist/htscodecs/rans.d.ts +1 -1
  28. package/dist/htscodecs/rans.js +65 -54
  29. package/dist/htscodecs/rans.js.map +1 -1
  30. package/dist/htscodecs/rans4x16.d.ts +1 -1
  31. package/dist/htscodecs/rans4x16.js +151 -111
  32. package/dist/htscodecs/rans4x16.js.map +1 -1
  33. package/dist/htscodecs/tok3.d.ts +1 -2
  34. package/dist/htscodecs/tok3.js +82 -239
  35. package/dist/htscodecs/tok3.js.map +1 -1
  36. package/dist/util.d.ts +1 -0
  37. package/dist/util.js +20 -0
  38. package/dist/util.js.map +1 -0
  39. package/esm/cramFile/file.js +3 -2
  40. package/esm/cramFile/file.js.map +1 -1
  41. package/esm/cramFile/slice/decodeRecord.js +3 -3
  42. package/esm/cramFile/slice/decodeRecord.js.map +1 -1
  43. package/esm/cramFile/util.d.ts +0 -2
  44. package/esm/cramFile/util.js +3 -29
  45. package/esm/cramFile/util.js.map +1 -1
  46. package/esm/htscodecs/arith_gen.d.ts +5 -7
  47. package/esm/htscodecs/arith_gen.js +108 -97
  48. package/esm/htscodecs/arith_gen.js.map +1 -1
  49. package/esm/htscodecs/arith_sh.d.ts +1 -8
  50. package/esm/htscodecs/arith_sh.js +14 -11
  51. package/esm/htscodecs/arith_sh.js.map +1 -1
  52. package/esm/htscodecs/byte_model.d.ts +1 -6
  53. package/esm/htscodecs/byte_model.js +23 -18
  54. package/esm/htscodecs/byte_model.js.map +1 -1
  55. package/esm/htscodecs/fqzcomp.d.ts +1 -1
  56. package/esm/htscodecs/fqzcomp.js +91 -76
  57. package/esm/htscodecs/fqzcomp.js.map +1 -1
  58. package/esm/htscodecs/index.d.ts +5 -5
  59. package/esm/htscodecs/index.js +14 -20
  60. package/esm/htscodecs/index.js.map +1 -1
  61. package/esm/htscodecs/iostream.d.ts +9 -20
  62. package/esm/htscodecs/iostream.js +19 -117
  63. package/esm/htscodecs/iostream.js.map +1 -1
  64. package/esm/htscodecs/rans.d.ts +1 -1
  65. package/esm/htscodecs/rans.js +61 -56
  66. package/esm/htscodecs/rans.js.map +1 -1
  67. package/esm/htscodecs/rans4x16.d.ts +1 -1
  68. package/esm/htscodecs/rans4x16.js +143 -109
  69. package/esm/htscodecs/rans4x16.js.map +1 -1
  70. package/esm/htscodecs/tok3.d.ts +1 -2
  71. package/esm/htscodecs/tok3.js +41 -237
  72. package/esm/htscodecs/tok3.js.map +1 -1
  73. package/esm/util.d.ts +1 -0
  74. package/esm/util.js +17 -0
  75. package/esm/util.js.map +1 -0
  76. package/package.json +2 -3
  77. package/src/cramFile/file.ts +3 -2
  78. package/src/cramFile/slice/decodeRecord.ts +3 -5
  79. package/src/cramFile/util.ts +6 -39
  80. package/src/htscodecs/{arith_gen.js → arith_gen.ts} +133 -95
  81. package/src/htscodecs/{arith_sh.js → arith_sh.ts} +17 -9
  82. package/src/htscodecs/{byte_model.js → byte_model.ts} +26 -16
  83. package/src/htscodecs/{fqzcomp.js → fqzcomp.ts} +108 -74
  84. package/src/htscodecs/{index.js → index.ts} +14 -20
  85. package/src/htscodecs/iostream.ts +159 -0
  86. package/src/htscodecs/{rans.js → rans.ts} +73 -56
  87. package/src/htscodecs/{rans4x16.js → rans4x16.ts} +180 -111
  88. package/src/htscodecs/tok3.ts +197 -0
  89. package/src/util.ts +16 -0
  90. package/src/htscodecs/iostream.js +0 -257
  91. package/src/htscodecs/tok3.js +0 -413
@@ -1,413 +0,0 @@
1
- /*
2
- * Copyright (c) 2019 Genome Research Ltd.
3
- * Author(s): James Bonfield
4
- *
5
- * Redistribution and use in source and binary forms, with or without
6
- * modification, are permitted provided that the following conditions are met:
7
- *
8
- * 1. Redistributions of source code must retain the above copyright notice,
9
- * this list of conditions and the following disclaimer.
10
- *
11
- * 2. Redistributions in binary form must reproduce the above
12
- * copyright notice, this list of conditions and the following
13
- * disclaimer in the documentation and/or other materials provided
14
- * with the distribution.
15
- *
16
- * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
17
- * Institute nor the names of its contributors may be used to endorse
18
- * or promote products derived from this software without specific
19
- * prior written permission.
20
- *
21
- * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
22
- * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23
- * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24
- * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
25
- * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
- */
33
-
34
- // Name tokeniser
35
- //
36
- // This is a reference implementation designed to match the
37
- // written specification as closely as possible. It is *NOT*
38
- // an efficient implementation, but see comments below.
39
-
40
- const IOStream = require('./iostream')
41
- const rans = require('./rans4x16')
42
- const arith_gen = require('./arith_gen')
43
-
44
- function sum(array) {
45
- let sum = 0
46
- for (const entry of array) {
47
- sum += entry.length
48
- }
49
- return sum
50
- }
51
- function concatUint8Array(args) {
52
- const mergedArray = new Uint8Array(sum(args))
53
- let offset = 0
54
- for (const entry of args) {
55
- mergedArray.set(entry, offset)
56
- offset += entry.length
57
- }
58
- return mergedArray
59
- }
60
-
61
- var arith = new arith_gen()
62
-
63
- const TOK_TYPE = 0
64
- const TOK_STRING = 1
65
- const TOK_CHAR = 2
66
- const TOK_DIGITS0 = 3
67
- const TOK_DZLEN = 4
68
- const TOK_DUP = 5
69
- const TOK_DIFF = 6
70
- const TOK_DIGITS = 7
71
- const TOK_DELTA = 8
72
- const TOK_DELTA0 = 9
73
- const TOK_MATCH = 10
74
- const TOK_NOP = 11
75
- const TOK_END = 12
76
-
77
- //----------------------------------------------------------------------
78
- // Token byte streams
79
- function DecodeTokenByteStreams(src, in_size, use_arith, nnames) {
80
- var t = -1
81
-
82
- var B = new Array(256)
83
-
84
- while (!src.EOF()) {
85
- var ttype = src.ReadByte()
86
- var tok_new = ttype & 128
87
- var tok_dup = ttype & 64
88
- var type = ttype & 63
89
-
90
- if (tok_new) {
91
- t++
92
- B[t] = new Array(13)
93
- }
94
-
95
- if (type != TOK_TYPE && tok_new) {
96
- var M = new Array(nnames - 1).fill(TOK_MATCH)
97
- B[t][TOK_TYPE] = new IOStream(concatUint8Array([new Uint8Array(type), M]))
98
- }
99
-
100
- if (tok_dup) {
101
- var dup_pos = src.ReadByte()
102
- var dup_type = src.ReadByte()
103
- B[t][type] = new IOStream(B[dup_pos][dup_type].buf)
104
- } else {
105
- var clen = src.ReadUint7()
106
- var data = src.ReadData(clen)
107
-
108
- if (use_arith) B[t][type] = arith.decode(data)
109
- else B[t][type] = rans.decode(data)
110
- B[t][type] = new IOStream(B[t][type])
111
- }
112
- }
113
-
114
- return B
115
- }
116
-
117
- //----------------------------------------------------------------------
118
- // Token decode
119
- function LeftPadNumber(val, len) {
120
- var str = val + ''
121
- while (str.length < len) str = '0' + str
122
-
123
- return str
124
- }
125
-
126
- function DecodeSingleName(B, N, T, n) {
127
- var type = B[0][TOK_TYPE].ReadByte()
128
- var dist = B[0][type].ReadUint32()
129
- var m = n - dist
130
-
131
- if (type == TOK_DUP) {
132
- N[n] = N[m]
133
- T[n] = T[m]
134
- return N[n]
135
- }
136
-
137
- var t = 1
138
- N[n] = ''
139
- T[n] = new Array(256)
140
- do {
141
- type = B[t][TOK_TYPE].ReadByte()
142
-
143
- switch (type) {
144
- case TOK_CHAR:
145
- T[n][t] = B[t][TOK_CHAR].ReadChar()
146
- break
147
-
148
- case TOK_STRING:
149
- T[n][t] = B[t][TOK_STRING].ReadString()
150
- break
151
-
152
- case TOK_DIGITS:
153
- T[n][t] = B[t][TOK_DIGITS].ReadUint32()
154
- break
155
-
156
- case TOK_DIGITS0:
157
- var d = B[t][TOK_DIGITS0].ReadUint32()
158
- var l = B[t][TOK_DZLEN].ReadByte()
159
- T[n][t] = LeftPadNumber(d, l)
160
- break
161
-
162
- case TOK_DELTA:
163
- T[n][t] = (T[m][t] >> 0) + B[t][TOK_DELTA].ReadByte()
164
- break
165
-
166
- case TOK_DELTA0:
167
- var d = (T[m][t] >> 0) + B[t][TOK_DELTA0].ReadByte()
168
- var l = T[m][t].length
169
- T[n][t] = LeftPadNumber(d, l)
170
- break
171
-
172
- case TOK_MATCH:
173
- T[n][t] = T[m][t]
174
- break
175
-
176
- default:
177
- T[n][t] = ''
178
- break
179
- }
180
-
181
- N[n] += T[n][t++]
182
- } while (type != TOK_END)
183
-
184
- return N[n]
185
- }
186
-
187
- //----------------------------------------------------------------------
188
- // Main tokeniser decode entry function: decodes a compressed src and
189
- // returns the uncompressed buffer.
190
- function decode(src, len, separator) {
191
- var src = new IOStream(src)
192
- var ulen = src.ReadUint32()
193
- var nnames = src.ReadUint32()
194
- var use_arith = src.ReadByte()
195
-
196
- var B = DecodeTokenByteStreams(src, len, use_arith, nnames)
197
- var N = new Array(nnames)
198
- var T = new Array(nnames)
199
-
200
- var str = ''
201
- if (typeof separator === 'undefined') separator = '\n'
202
- for (var i = 0; i < nnames; i++)
203
- str += DecodeSingleName(B, N, T, i) + separator
204
-
205
- return str
206
- }
207
-
208
- //----------------------------------------------------------------------
209
- // Main tokeniser encode function
210
-
211
- // Encoder is trickier than decode as we have a lot of decisions to make.
212
- // However here we just make a simple guess without anything complex,
213
- // to demonstrate the basic idea. See the C implementation for further
214
- // expansion on this.
215
- function encode(src, use_arith) {
216
- // Convert buffer to array of names
217
- var str = src.toString()
218
- if (str[str.length - 1] == '\n') str = str.substring(0, str.length - 1)
219
- var names = str.split('\n')
220
-
221
- var out = new IOStream('', 0, str.length * 2 + 10000) // guess max size
222
- out.WriteUint32(str.length)
223
- out.WriteUint32(names.length)
224
- out.WriteByte(use_arith)
225
-
226
- // Tokenise names
227
- var T = new Array(names.length)
228
- var H = {}
229
- var F = new Array(256).fill(0) // DELTA vs DIGIT frequency
230
- var max_tok = 0
231
- var max_len = 0
232
- for (var i = 0; i < names.length; i++) {
233
- var [ntok, len] = TokeniseName(T, H, F, names[i], i)
234
- if (max_tok < ntok) max_tok = ntok
235
- if (max_len < len) max_len = len
236
- }
237
-
238
- // Convert tokens to byte streams and serialise
239
- for (var tnum = 0; tnum < max_tok; tnum++) {
240
- var B = new Array(TOK_END + 1)
241
- for (var type = 0; type <= TOK_END; type++)
242
- B[type] = new IOStream('', 0, names.length * max_len)
243
-
244
- FillByteStreams(B, T, tnum, names, max_tok, max_len)
245
- SerialiseByteStreams(B, tnum, use_arith, out)
246
- }
247
-
248
- return out.buf.slice(0, out.pos)
249
- }
250
-
251
- function FillByteStreams(B, T, tnum, names, max_tok, max_len) {
252
- // Create byte streams B[]
253
- for (var n = 0; n < names.length; n++) {
254
- if (tnum > 0 && T[n][0].type == TOK_DUP) continue
255
-
256
- if (!T[n][tnum]) continue
257
-
258
- B[TOK_TYPE].WriteByte(T[n][tnum].type)
259
-
260
- switch (T[n][tnum].type) {
261
- case TOK_DIFF:
262
- B[TOK_DIFF].WriteUint32(T[n][tnum].val)
263
- break
264
-
265
- case TOK_DUP:
266
- B[TOK_DUP].WriteUint32(T[n][tnum].val)
267
- break
268
-
269
- case TOK_STRING:
270
- B[TOK_STRING].WriteString(T[n][tnum].val)
271
- break
272
-
273
- case TOK_CHAR:
274
- B[TOK_CHAR].WriteChar(T[n][tnum].val)
275
- break
276
-
277
- case TOK_DIGITS:
278
- B[TOK_DIGITS].WriteUint32(T[n][tnum].val)
279
- break
280
-
281
- case TOK_DIGITS0:
282
- B[TOK_DIGITS0].WriteUint32(T[n][tnum].val)
283
- B[TOK_DZLEN].WriteByte(T[n][tnum].val.length)
284
- break
285
-
286
- case TOK_DELTA:
287
- B[T[n][tnum].type].WriteByte(T[n][tnum].val)
288
- break
289
-
290
- case TOK_DELTA0:
291
- B[T[n][tnum].type].WriteByte(T[n][tnum].val)
292
- break
293
- }
294
- }
295
- }
296
-
297
- function SerialiseByteStreams(B, tnum, use_arith, out) {
298
- // Compress and serialise byte streams B[]
299
- for (var type = 0; type <= TOK_END; type++) {
300
- if (B[type].pos <= 0) continue
301
-
302
- out.WriteByte(type + (type == 0 ? 128 : 0))
303
-
304
- // IOStream to sized buffer
305
- B[type] = B[type].buf.slice(0, B[type].pos)
306
- var comp = try_compress(B[type], use_arith)
307
-
308
- out.WriteUint7(comp.length)
309
- out.WriteData(comp, comp.length)
310
- }
311
- }
312
-
313
- function try_compress(src, use_arith) {
314
- var best = 1 << 30
315
- var comp
316
-
317
- var methods = [0, 1, 64, 65, 128, 129, 193 + 8]
318
- for (var i in methods) {
319
- var lvl = methods[i]
320
- if (lvl & 1 && src.length < 100) continue
321
-
322
- if (lvl & 8 && src.length % 4 != 0) continue
323
-
324
- try {
325
- var tmp = use_arith ? arith.encode(src, lvl) : rans.encode(src, lvl)
326
- } catch (e) {
327
- var tmp = 0
328
- }
329
- if (tmp && best > tmp.length) {
330
- best = tmp.length
331
- comp = tmp
332
- }
333
- }
334
-
335
- return comp
336
- }
337
-
338
- function TokeniseName(T, H, F, name, n) {
339
- var max_len = 0
340
-
341
- // Always compare against last name only
342
- var p = n - 1
343
- T[n] = new Array(256)
344
-
345
- if (H[name]) {
346
- //console.error(name,H[name],n)
347
- T[n][0] = {
348
- type: TOK_DUP,
349
- val: n - H[name],
350
- }
351
- } else {
352
- T[n][0] = {
353
- type: TOK_DIFF,
354
- val: n == 0 ? 0 : 1,
355
- }
356
- }
357
-
358
- H[name] = n
359
-
360
- // Splits on alphanumerics, punctuation
361
- var tok = name.match(/([a-zA-Z0-9]{1,9})|([^a-zA-Z0-9]+)/g)
362
- for (var i = 0; i < tok.length; i++) {
363
- var t = i + 1 // token 0 = DIFF vs DUP
364
- var type = TOK_STRING
365
- var val = tok[i]
366
- if (tok[i].match(/^0+[0-9]*$/g)) type = TOK_DIGITS0
367
- else if (tok[i].match(/^[0-9]+$/g)) type = TOK_DIGITS
368
- else if (tok[i].length == 1) type = TOK_CHAR
369
-
370
- if (p >= 0 && T[p][t]) {
371
- if (T[p][t].str == tok[i]) {
372
- type = TOK_MATCH
373
- val = ''
374
- } else if (T[p][t].type == TOK_DIGITS || T[p][t].type == TOK_DELTA) {
375
- var d = val - T[p][t].str
376
- F[t]++
377
- if (d >= 0 && d < 256 && F[t] > n / 2) {
378
- type = TOK_DELTA
379
- val = d
380
- }
381
- } else if (
382
- (T[p][t].type == TOK_DIGITS0 || T[p][t].type == TOK_DELTA0) &&
383
- T[p][t].str.length == val.length
384
- ) {
385
- var d = val - T[p][t].str
386
- F[t]++
387
- if (d >= 0 && d < 256 && F[t] > n / 2) {
388
- type = TOK_DELTA0
389
- val = d
390
- }
391
- }
392
- }
393
-
394
- T[n][t] = {
395
- str: tok[i],
396
- val: val,
397
- type: type,
398
- }
399
-
400
- if (max_len < T[n][t].val.length + 3)
401
- // +3 for integers; 5 -> (Uint32)5
402
- max_len = T[n][t].val.length + 3
403
-
404
- //console.error(t,T[n][t])
405
- }
406
- T[n][++t] = {
407
- type: TOK_END,
408
- }
409
-
410
- return [t + 1, max_len]
411
- }
412
-
413
- module.exports = { encode, decode }