oodle-kraken-ruby 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,33 @@
1
+ /*
2
+ Copyright (C) 2022, Kerilk
3
+
4
+ This program is free software: you can redistribute it and/or modify
5
+ it under the terms of the GNU General Public License as published by
6
+ the Free Software Foundation, either version 3 of the License, or
7
+ (at your option) any later version.
8
+
9
+ This program is distributed in the hope that it will be useful,
10
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ GNU General Public License for more details.
13
+
14
+ You should have received a copy of the GNU General Public License
15
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+ */
17
+ #ifndef KRAKEN_H
18
+ #define KRAKEN_H
19
+
20
+ #include <stdlib.h>
21
+ #include <stdint.h>
22
+
23
+ #ifdef __cplusplus
24
+ extern "C" {
25
+ #endif
26
+
27
+ extern ssize_t Kraken_Decompress(const uint8_t *src, size_t src_len, uint8_t *dst, size_t dst_len);
28
+
29
+ #ifdef __cplusplus
30
+ }
31
+ #endif
32
+
33
+ #endif
@@ -0,0 +1,617 @@
1
+ /*
2
+ Copyright (C) 2016, Powzix
3
+ Copyright (C) 2019, rarten
4
+
5
+ This program is free software: you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation, either version 3 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ */
18
+
19
+ #include "stdafx.h"
20
+
21
+
22
+ typedef uint16 LznaBitModel;
23
+
24
+ // State for a 4-bit value RANS model
25
+ struct LznaNibbleModel {
26
+ uint16 prob[17];
27
+ };
28
+
29
+ // State for a 3-bit value RANS model
30
+ struct Lzna3bitModel {
31
+ uint16 prob[9];
32
+ };
33
+
34
+ // State for the literal model
35
+ struct LznaLiteralModel {
36
+ LznaNibbleModel upper[16];
37
+ LznaNibbleModel lower[16];
38
+ LznaNibbleModel nomatch[16];
39
+ };
40
+
41
+ // State for a model representing a far distance
42
+ struct LznaFarDistModel {
43
+ LznaNibbleModel first_lo;
44
+ LznaNibbleModel first_hi;
45
+ LznaBitModel second[31];
46
+ LznaBitModel third[2][31];
47
+ };
48
+
49
+ // State for a model representing a near distance
50
+ struct LznaNearDistModel {
51
+ LznaNibbleModel first;
52
+ LznaBitModel second[16];
53
+ LznaBitModel third[2][16];
54
+ };
55
+
56
+ // State for model representing the low bits of a distance
57
+ struct LznaLowBitsDistanceModel {
58
+ LznaNibbleModel d[2];
59
+ LznaBitModel v;
60
+ };
61
+
62
+ // State for model used for the short lengths for recent matches
63
+ struct LznaShortLengthRecentModel {
64
+ Lzna3bitModel a[4];
65
+ };
66
+
67
+ // State for model for long lengths
68
+ struct LznaLongLengthModel {
69
+ LznaNibbleModel first[4];
70
+ LznaNibbleModel second;
71
+ LznaNibbleModel third;
72
+ };
73
+
74
+ // Complete LZNA state
75
+ struct LznaState {
76
+ uint32 match_history[8];
77
+ LznaLiteralModel literal[4];
78
+ LznaBitModel is_literal[12 * 8];
79
+ LznaNibbleModel type[12 * 8];
80
+ LznaShortLengthRecentModel short_length_recent[4];
81
+ LznaLongLengthModel long_length_recent;
82
+ LznaLowBitsDistanceModel low_bits_of_distance[2];
83
+ LznaBitModel short_length[12][4];
84
+ LznaNearDistModel near_dist[2];
85
+ Lzna3bitModel medium_length;
86
+ LznaLongLengthModel long_length;
87
+ LznaFarDistModel far_distance;
88
+ };
89
+
90
+ static LznaNibbleModel lzna_initializer_4bit = {
91
+ 0x0, 0x800, 0x1000, 0x1800, 0x2000, 0x2800, 0x3000, 0x3800, 0x4000, 0x4800, 0x5000, 0x5800, 0x6000, 0x6800, 0x7000, 0x7800, 0x8000,
92
+ };
93
+
94
+ static Lzna3bitModel lzna_initializer_3bit = {
95
+ 0x0, 0x1000, 0x2000, 0x3000, 0x4000, 0x5000, 0x6000, 0x7000, 0x8000
96
+ };
97
+
98
+ static void LznaNibbleModel_Init(LznaNibbleModel *d) {
99
+ *d = lzna_initializer_4bit;
100
+ }
101
+
102
+ static void Lzna3bitModel_Init(Lzna3bitModel *d) {
103
+ *d = lzna_initializer_3bit;
104
+ }
105
+
106
+ static void LznaNibbleModel_InitN(LznaNibbleModel *d, int n) {
107
+ do LznaNibbleModel_Init(d++); while (--n);
108
+ }
109
+
110
+ static void LznaLiteralModel_InitN(LznaLiteralModel *d, int n) {
111
+ do {
112
+ LznaNibbleModel_InitN(d->upper, 16);
113
+ LznaNibbleModel_InitN(d->lower, 16);
114
+ LznaNibbleModel_InitN(d->nomatch, 16);
115
+ } while (d++, --n);
116
+ }
117
+
118
+ static void LznaShortLengthRecentModel_InitN(LznaShortLengthRecentModel *d, int n) {
119
+ do {
120
+ Lzna3bitModel_Init(&d->a[0]);
121
+ Lzna3bitModel_Init(&d->a[1]);
122
+ Lzna3bitModel_Init(&d->a[2]);
123
+ Lzna3bitModel_Init(&d->a[3]);
124
+ } while (d++, --n);
125
+ }
126
+
127
+ static void LznaNearDistModel_Init(LznaNearDistModel *d, int n) {
128
+ int i;
129
+ do {
130
+ LznaNibbleModel_Init(&d->first);
131
+
132
+ for (i = 0; i < 16; i++) {
133
+ d->second[i] = 0x2000;
134
+ d->third[0][i] = 0x2000;
135
+ d->third[1][i] = 0x2000;
136
+ }
137
+
138
+ } while (d++, --n);
139
+ }
140
+
141
+ static void LznaLowBitsDistanceModel_Init(LznaLowBitsDistanceModel *d, int n) {
142
+ do {
143
+ d->v = 0x2000;
144
+ LznaNibbleModel_InitN(d->d, 2);
145
+ } while (d++, --n);
146
+ }
147
+
148
+ static void LznaFarDistModel_Init(LznaFarDistModel *d) {
149
+ int i;
150
+ LznaNibbleModel_Init(&d->first_lo);
151
+ LznaNibbleModel_Init(&d->first_hi);
152
+ for (i = 0; i < 31; i++) {
153
+ d->second[i] = 0x2000;
154
+ d->third[0][i] = 0x2000;
155
+ d->third[1][i] = 0x2000;
156
+ }
157
+ }
158
+
159
+ void LZNA_InitLookup(LznaState *lut) {
160
+ int i, j;
161
+
162
+ for (i = 0; i < 4; i++)
163
+ lut->match_history[i + 4] = 1;
164
+
165
+ for (i = 0; i < 96; i++)
166
+ lut->is_literal[i] = 0x1000;
167
+
168
+ LznaNibbleModel_InitN(lut->type, 96);
169
+ LznaLiteralModel_InitN(lut->literal, 4);
170
+ LznaShortLengthRecentModel_InitN(lut->short_length_recent, 4);
171
+
172
+ LznaNibbleModel_InitN(lut->long_length_recent.first, 4);
173
+ LznaNibbleModel_Init(&lut->long_length_recent.second);
174
+ LznaNibbleModel_InitN(&lut->long_length_recent.third, 1);
175
+
176
+ for (i = 0; i < 12; i++)
177
+ for(j = 0; j < 4; j++)
178
+ lut->short_length[i][j] = 0x2000;
179
+
180
+ LznaNearDistModel_Init(lut->near_dist, 2);
181
+ LznaLowBitsDistanceModel_Init(lut->low_bits_of_distance, 2);
182
+
183
+ Lzna3bitModel_Init(&lut->medium_length);
184
+
185
+ LznaNibbleModel_InitN(lut->long_length.first, 4);
186
+ LznaNibbleModel_Init(&lut->long_length.second);
187
+ LznaNibbleModel_InitN(&lut->long_length.third, 1);
188
+ LznaFarDistModel_Init(&lut->far_distance);
189
+ }
190
+
191
+ struct LznaBitReader {
192
+ uint64 bits_a, bits_b;
193
+ const uint32 *src, *src_start;
194
+ };
195
+
196
+ // Initialize bit reader with 2 parallel streams. Every decode operation
197
+ // swaps the two streams.
198
+ static void LznaBitReader_Init(LznaBitReader *tab, const byte *src) {
199
+ int d, n, i;
200
+ uint64 v;
201
+
202
+ tab->src_start = (uint32*)src;
203
+
204
+ d = *src++;
205
+ n = d >> 4;
206
+ assert(n <= 8);
207
+ for (i = 0, v = 0; i < n; i++)
208
+ v = (v << 8) | *src++;
209
+ tab->bits_a = (v << 4) | (d & 0xF);
210
+
211
+ d = *src++;
212
+ n = d >> 4;
213
+ assert(n <= 8);
214
+ for (i = 0, v = 0; i < n; i++)
215
+ v = (v << 8) | *src++;
216
+ tab->bits_b = (v << 4) | (d & 0xF);
217
+ tab->src = (uint32*)src;
218
+ }
219
+
220
+ // Renormalize by filling up the RANS state and swapping the two streams
221
+ static void __forceinline LznaRenormalize(LznaBitReader *tab) {
222
+ uint64 x = tab->bits_a;
223
+ if (x < 0x80000000)
224
+ x = (x << 32) | *tab->src++;
225
+ tab->bits_a = tab->bits_b;
226
+ tab->bits_b = x;
227
+ }
228
+
229
+ // Read a single bit with a uniform distribution.
230
+ static uint32 __forceinline LznaReadBit(LznaBitReader *tab) {
231
+ int r = tab->bits_a & 1;
232
+ tab->bits_a >>= 1;
233
+ LznaRenormalize(tab);
234
+ return r;
235
+ }
236
+
237
+ // Read a number of bits with a uniform distribution.
238
+ static uint32 __forceinline LznaReadNBits(LznaBitReader *tab, int bits) {
239
+ uint32 rv = tab->bits_a & ((1 << bits) - 1);
240
+ tab->bits_a >>= bits;
241
+ LznaRenormalize(tab);
242
+ return rv;
243
+ }
244
+
245
+
246
+ // Read a 4-bit value using an adaptive RANS model
247
+ static uint32 __forceinline LznaReadNibble(LznaBitReader *tab, LznaNibbleModel *model) {
248
+ __m128i t, t0, t1, c0, c1;
249
+ unsigned long bitindex;
250
+ unsigned int start, end;
251
+ uint64 x = tab->bits_a;
252
+
253
+ t0 = _mm_loadu_si128((const __m128i *)&model->prob[0]);
254
+ t1 = _mm_loadu_si128((const __m128i *)&model->prob[8]);
255
+
256
+ t = _mm_cvtsi32_si128((int16)x);
257
+ t = _mm_and_si128(_mm_shuffle_epi32(_mm_unpacklo_epi16(t, t), 0), _mm_set1_epi16(0x7FFF));
258
+
259
+ c0 = _mm_cmpgt_epi16(t0, t);
260
+ c1 = _mm_cmpgt_epi16(t1, t);
261
+
262
+ _BitScanForward(&bitindex, _mm_movemask_epi8(_mm_packs_epi16(c0, c1)) | 0x10000);
263
+ start = model->prob[bitindex - 1];
264
+ end = model->prob[bitindex];
265
+
266
+ c0 = _mm_and_si128(_mm_set1_epi16(0x7FD9), c0);
267
+ c1 = _mm_and_si128(_mm_set1_epi16(0x7FD9), c1);
268
+
269
+ c0 = _mm_add_epi16(c0, _mm_set_epi16(56, 48, 40, 32, 24, 16, 8, 0));
270
+ c1 = _mm_add_epi16(c1, _mm_set_epi16(120, 112, 104, 96, 88, 80, 72, 64));
271
+
272
+ t0 = _mm_add_epi16(_mm_srai_epi16(_mm_sub_epi16(c0, t0), 7), t0);
273
+ t1 = _mm_add_epi16(_mm_srai_epi16(_mm_sub_epi16(c1, t1), 7), t1);
274
+
275
+ _mm_storeu_si128((__m128i *)&model->prob[0], t0);
276
+ _mm_storeu_si128((__m128i *)&model->prob[8], t1);
277
+
278
+ tab->bits_a = (end - start) * (x >> 15) + (x & 0x7FFF) - start;
279
+ LznaRenormalize(tab);
280
+ return (int)bitindex - 1;
281
+ }
282
+
283
+ // Read a 3-bit value using an adaptive RANS model
284
+ static uint32 __forceinline LznaRead3bit(LznaBitReader *tab, Lzna3bitModel *model) {
285
+ __m128i t, t0, c0;
286
+ unsigned long bitindex;
287
+ unsigned int start, end;
288
+ uint64 x = tab->bits_a;
289
+
290
+ t0 = _mm_loadu_si128((const __m128i *)&model->prob[0]);
291
+ t = _mm_cvtsi32_si128(x & 0x7FFF);
292
+ t = _mm_shuffle_epi32(_mm_unpacklo_epi16(t, t), 0);
293
+ c0 = _mm_cmpgt_epi16(t0, t);
294
+
295
+ _BitScanForward(&bitindex, _mm_movemask_epi8(c0) | 0x10000);
296
+ bitindex >>= 1;
297
+ start = model->prob[bitindex - 1];
298
+ end = model->prob[bitindex];
299
+
300
+ c0 = _mm_and_si128(_mm_set1_epi16(0x7FE5), c0);
301
+ c0 = _mm_add_epi16(c0, _mm_set_epi16(56, 48, 40, 32, 24, 16, 8, 0));
302
+ t0 = _mm_add_epi16(_mm_srai_epi16(_mm_sub_epi16(c0, t0), 7), t0);
303
+ _mm_storeu_si128((__m128i *)&model->prob[0], t0);
304
+
305
+ tab->bits_a = (end - start) * (x >> 15) + (x & 0x7FFF) - start;
306
+ LznaRenormalize(tab);
307
+ return bitindex - 1;
308
+ }
309
+
310
+ // Read a 1-bit value using an adaptive RANS model
311
+ static uint32 __forceinline LznaRead1Bit(LznaBitReader *tab, LznaBitModel *model, int nbits, int shift) {
312
+ uint64 q;
313
+ int magn = 1 << nbits;
314
+ q = *model * (tab->bits_a >> nbits);
315
+ if ((tab->bits_a & (magn - 1)) >= *model) {
316
+ tab->bits_a -= q + *model;
317
+ *model = *model - (*model >> shift);
318
+ LznaRenormalize(tab);
319
+ return 1;
320
+ } else {
321
+ tab->bits_a = (tab->bits_a & (magn - 1)) + q;
322
+ *model = *model + ((magn - *model) >> shift);
323
+ LznaRenormalize(tab);
324
+ return 0;
325
+ }
326
+ }
327
+
328
+ // Read a far distance using the far distance model
329
+ static uint32 __forceinline LznaReadFarDistance(LznaBitReader *tab, LznaState *lut) {
330
+ uint32 n = LznaReadNibble(tab, &lut->far_distance.first_lo);
331
+ uint32 hi;
332
+ if (n >= 15)
333
+ n = 15 + LznaReadNibble(tab, &lut->far_distance.first_hi);
334
+ hi = 0;
335
+ if (n != 0) {
336
+ hi = LznaRead1Bit(tab, &lut->far_distance.second[n - 1], 14, 6) + 2;
337
+ if (n != 1) {
338
+ hi = (hi << 1) + LznaRead1Bit(tab, &lut->far_distance.third[hi - 2][n - 1], 14, 6);
339
+ if (n != 2)
340
+ hi = (hi << (n - 2)) + LznaReadNBits(tab, n - 2);
341
+ }
342
+ hi -= 1;
343
+ }
344
+ LznaLowBitsDistanceModel *lutd = &lut->low_bits_of_distance[hi == 0];
345
+ uint32 low_bit = LznaRead1Bit(tab, &lutd->v, 14, 6);
346
+ uint32 low_nibble = LznaReadNibble(tab, &lutd->d[low_bit]);
347
+ return low_bit + (2 * low_nibble) + (32 * hi) + 1;
348
+ }
349
+
350
+ // Read a near distance using a near distance model
351
+ static uint32 __forceinline LznaReadNearDistance(LznaBitReader *tab, LznaState *lut, LznaNearDistModel *model) {
352
+ uint32 nb = LznaReadNibble(tab, &model->first);
353
+ uint32 hi = 0;
354
+ if (nb != 0) {
355
+ hi = LznaRead1Bit(tab, &model->second[nb - 1], 14, 6) + 2;
356
+ if (nb != 1) {
357
+ hi = (hi << 1) + LznaRead1Bit(tab, &model->third[hi - 2][nb - 1], 14, 6);
358
+ if (nb != 2)
359
+ hi = (hi << (nb - 2)) + LznaReadNBits(tab, nb - 2);
360
+ }
361
+ hi -= 1;
362
+ }
363
+ LznaLowBitsDistanceModel *lutd = &lut->low_bits_of_distance[hi == 0];
364
+ uint32 low_bit = LznaRead1Bit(tab, &lutd->v, 14, 6);
365
+ uint32 low_nibble = LznaReadNibble(tab, &lutd->d[low_bit]);
366
+ return low_bit + (2 * low_nibble) + (32 * hi) + 1;
367
+ }
368
+
369
+ // Read a length using the length model.
370
+ static uint32 __forceinline LznaReadLength(LznaBitReader *tab, LznaLongLengthModel *model, int64 dst_offs) {
371
+ uint32 length = LznaReadNibble(tab, &model->first[dst_offs & 3]);
372
+ if (length >= 12) {
373
+ uint32 b = LznaReadNibble(tab, &model->second);
374
+ if (b >= 15)
375
+ b = 15 + LznaReadNibble(tab, &model->third);
376
+ uint32 n = 0;
377
+ uint32 base = 0;
378
+ if (b) {
379
+ n = (b - 1) >> 1;
380
+ base = ((((b - 1) & 1) + 2) << n) - 1;
381
+ }
382
+ length += (LznaReadNBits(tab, n) + base) * 4;
383
+ }
384
+ return length;
385
+ }
386
+
387
+ static const uint8 next_state_lit[12] = {
388
+ 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5
389
+ };
390
+
391
+ static void LznaCopyLongDist(byte *dst, size_t dist, size_t length) {
392
+ const byte *src = dst - dist;
393
+ ((uint64*)dst)[0] = ((uint64*)src)[0];
394
+ ((uint64*)dst)[1] = ((uint64*)src)[1];
395
+ if (length > 16) {
396
+ do {
397
+ ((uint64*)dst)[2] = ((uint64*)src)[2];
398
+ dst += 8;
399
+ src += 8;
400
+ length -= 8;
401
+ } while (length > 16);
402
+ }
403
+ }
404
+
405
+ static void LznaCopyShortDist(byte *dst, size_t dist, size_t length) {
406
+ const byte *src = dst - dist;
407
+ if (dist >= 4) {
408
+ ((uint32*)dst)[0] = ((uint32*)src)[0];
409
+ ((uint32*)dst)[1] = ((uint32*)src)[1];
410
+ ((uint32*)dst)[2] = ((uint32*)src)[2];
411
+ if (length > 12) {
412
+ ((uint32*)dst)[3] = ((uint32*)src)[3];
413
+ if (length > 16) {
414
+ do {
415
+ ((uint32*)dst)[4] = ((uint32*)src)[4];
416
+ length -= 4;
417
+ dst += 4;
418
+ src += 4;
419
+ } while (length > 16);
420
+ }
421
+ }
422
+ } else if (dist == 1) {
423
+ memset(dst, *src, length);
424
+ } else {
425
+ ((byte*)dst)[0] = ((byte*)src)[0];
426
+ ((byte*)dst)[1] = ((byte*)src)[1];
427
+ ((byte*)dst)[2] = ((byte*)src)[2];
428
+ ((byte*)dst)[3] = ((byte*)src)[3];
429
+ ((byte*)dst)[4] = ((byte*)src)[4];
430
+ ((byte*)dst)[5] = ((byte*)src)[5];
431
+ ((byte*)dst)[6] = ((byte*)src)[6];
432
+ ((byte*)dst)[7] = ((byte*)src)[7];
433
+ ((byte*)dst)[8] = ((byte*)src)[8];
434
+ while (length > 9) {
435
+ ((byte*)dst)[9] = ((byte*)src)[9];
436
+ dst += 1;
437
+ src += 1;
438
+ length -= 1;
439
+ }
440
+ }
441
+ }
442
+
443
+ static void LznaCopy4to12(byte *dst, size_t dist, size_t length) {
444
+ const byte *src = dst - dist;
445
+ dst[0] = src[0];
446
+ dst[1] = src[1];
447
+ dst[2] = src[2];
448
+ dst[3] = src[3];
449
+ if (length > 4) {
450
+ dst[4] = src[4];
451
+ dst[5] = src[5];
452
+ dst[6] = src[6];
453
+ dst[7] = src[7];
454
+ if (length > 8) {
455
+ dst[8] = src[8];
456
+ dst[9] = src[9];
457
+ dst[10] = src[10];
458
+ dst[11] = src[11];
459
+ }
460
+ }
461
+ }
462
+
463
+ static void LznaPreprocessMatchHistory(LznaState *lut) {
464
+ if (lut->match_history[4] >= 0xc000) {
465
+ size_t i = 0;
466
+ while (lut->match_history[4 + i] >= 0xC000) {
467
+ ++i;
468
+ if (i >= 4) {
469
+ lut->match_history[7] = lut->match_history[6];
470
+ lut->match_history[6] = lut->match_history[5];
471
+ lut->match_history[5] = lut->match_history[4];
472
+ lut->match_history[4] = 4;
473
+ return;
474
+ }
475
+ }
476
+ uint32 t = lut->match_history[i + 4];
477
+ lut->match_history[i + 4] = lut->match_history[i + 3];
478
+ lut->match_history[i + 3] = lut->match_history[i + 2];
479
+ lut->match_history[i + 2] = lut->match_history[i + 1];
480
+ lut->match_history[4] = t;
481
+ }
482
+ }
483
+
484
+ int LZNA_DecodeQuantum(byte *dst, byte *dst_end, byte *dst_start,
485
+ const byte *src_in, const byte *src_end,
486
+ LznaState *lut) {
487
+ LznaBitReader tab;
488
+ uint32 x;
489
+ uint32 dst_offs = dst - dst_start;
490
+ uint32 match_val;
491
+ uint32 state;
492
+ uint32 length;
493
+ uint32 dist;
494
+
495
+ LznaPreprocessMatchHistory(lut);
496
+ LznaBitReader_Init(&tab, src_in);
497
+ dist = lut->match_history[4];
498
+
499
+ state = 5;
500
+ dst_end -= 8;
501
+
502
+ if (dst_offs == 0) {
503
+ if (LznaReadBit(&tab)) {
504
+ x = 0;
505
+ } else {
506
+ LznaLiteralModel *model = &lut->literal[0];
507
+ x = LznaReadNibble(&tab, &model->upper[0]);
508
+ x = (x << 4) + LznaReadNibble(&tab, (x != 0) ? &model->nomatch[x] : &model->lower[0]);
509
+ }
510
+ *dst++ = x;
511
+ dst_offs += 1;
512
+ }
513
+ while (dst < dst_end) {
514
+ match_val = *(dst - dist);
515
+
516
+ if (LznaRead1Bit(&tab, &lut->is_literal[(dst_offs & 7) + 8 * state], 13, 5)) {
517
+ x = LznaReadNibble(&tab, &lut->type[(dst_offs & 7) + 8 * state]);
518
+ if (x == 0) {
519
+ // Copy 1 byte from most recent distance
520
+ *dst++ = match_val;
521
+ dst_offs += 1;
522
+ state = (state >= 7) ? 11 : 9;
523
+ } else if (x < 4) {
524
+ if (x == 1) {
525
+ // Copy count 3-4
526
+ length = 3 + LznaRead1Bit(&tab, &lut->short_length[state][dst_offs & 3], 14, 4);
527
+ dist = LznaReadNearDistance(&tab, lut, &lut->near_dist[length - 3]);
528
+ dst[0] = (dst - dist)[0];
529
+ dst[1] = (dst - dist)[1];
530
+ dst[2] = (dst - dist)[2];
531
+ dst[3] = (dst - dist)[3];
532
+ } else if (x == 2) {
533
+ // Copy count 5-12
534
+ length = 5 + LznaRead3bit(&tab, &lut->medium_length);
535
+ dist = LznaReadFarDistance(&tab, lut);
536
+ if (dist >= 8) {
537
+ ((uint64*)dst)[0] = ((uint64*)(dst - dist))[0];
538
+ ((uint64*)dst)[1] = ((uint64*)(dst - dist))[1];
539
+ } else {
540
+ LznaCopy4to12(dst, dist, length);
541
+ }
542
+ } else {
543
+ // Copy count 13-
544
+ length = LznaReadLength(&tab, &lut->long_length, dst_offs) + 13;
545
+ dist = LznaReadFarDistance(&tab, lut);
546
+ if (dist >= 8)
547
+ LznaCopyLongDist(dst, dist, length);
548
+ else
549
+ LznaCopyShortDist(dst, dist, length);
550
+ }
551
+ state = (state >= 7) ? 10 : 7;
552
+ lut->match_history[7] = lut->match_history[6];
553
+ lut->match_history[6] = lut->match_history[5];
554
+ lut->match_history[5] = lut->match_history[4];
555
+ lut->match_history[4] = dist;
556
+ dst += length;
557
+ dst_offs += length;
558
+ } else if (x >= 12) {
559
+ // Copy 2 bytes from a recent distance
560
+ size_t idx = x - 12;
561
+ dist = lut->match_history[4 + idx];
562
+ lut->match_history[4 + idx] = lut->match_history[3 + idx];
563
+ lut->match_history[3 + idx] = lut->match_history[2 + idx];
564
+ lut->match_history[2 + idx] = lut->match_history[1 + idx];
565
+ lut->match_history[4] = dist;
566
+ dst[0] = *(dst - dist + 0);
567
+ dst[1] = *(dst - dist + 1);
568
+ state = (state >= 7) ? 11 : 8;
569
+ dst_offs += 2;
570
+ dst += 2;
571
+ } else {
572
+ size_t idx = (x - 4) >> 1;
573
+ dist = lut->match_history[4 + idx];
574
+ lut->match_history[4 + idx] = lut->match_history[3 + idx];
575
+ lut->match_history[3 + idx] = lut->match_history[2 + idx];
576
+ lut->match_history[2 + idx] = lut->match_history[1 + idx];
577
+ lut->match_history[4] = dist;
578
+ if (x & 1) {
579
+ // Copy 11- bytes from recent distance
580
+ length = 11 + LznaReadLength(&tab, &lut->long_length_recent, dst_offs);
581
+ if (dist >= 8) {
582
+ LznaCopyLongDist(dst, dist, length);
583
+ } else {
584
+ LznaCopyShortDist(dst, dist, length);
585
+ }
586
+ } else {
587
+ // Copy 3-10 bytes from recent distance
588
+ length = 3 + LznaRead3bit(&tab, &lut->short_length_recent[idx].a[dst_offs & 3]);
589
+ if (dist >= 8) {
590
+ ((uint64*)dst)[0] = ((uint64*)(dst - dist))[0];
591
+ ((uint64*)dst)[1] = ((uint64*)(dst - dist))[1];
592
+ } else {
593
+ LznaCopy4to12(dst, dist, length);
594
+ }
595
+ }
596
+ state = (state >= 7) ? 11 : 8;
597
+ dst_offs += length;
598
+ dst += length;
599
+ }
600
+ } else {
601
+ // Output a literal
602
+ LznaLiteralModel *model = &lut->literal[dst_offs & 3];
603
+ x = LznaReadNibble(&tab, &model->upper[match_val >> 4]);
604
+ x = (x << 4) + LznaReadNibble(&tab, ((match_val >> 4) != x) ? &model->nomatch[x] : &model->lower[match_val & 0xF]);
605
+ *dst++ = x;
606
+ dst_offs += 1;
607
+ state = next_state_lit[state];
608
+ }
609
+ }
610
+
611
+ if (dst != dst_end)
612
+ return -1;
613
+
614
+ *(uint64*)dst = (uint32)tab.bits_a | (tab.bits_b << 32);
615
+
616
+ return (byte*)tab.src - src_in;
617
+ }