oodle-kraken-ruby 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,33 @@
1
+ /*
2
+ Copyright (C) 2022, Kerilk
3
+
4
+ This program is free software: you can redistribute it and/or modify
5
+ it under the terms of the GNU General Public License as published by
6
+ the Free Software Foundation, either version 3 of the License, or
7
+ (at your option) any later version.
8
+
9
+ This program is distributed in the hope that it will be useful,
10
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
11
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12
+ GNU General Public License for more details.
13
+
14
+ You should have received a copy of the GNU General Public License
15
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
16
+ */
17
+ #ifndef KRAKEN_H
18
+ #define KRAKEN_H
19
+
20
+ #include <stdlib.h>
21
+ #include <stdint.h>
22
+
23
+ #ifdef __cplusplus
24
+ extern "C" {
25
+ #endif
26
+
27
+ extern ssize_t Kraken_Decompress(const uint8_t *src, size_t src_len, uint8_t *dst, size_t dst_len);
28
+
29
+ #ifdef __cplusplus
30
+ }
31
+ #endif
32
+
33
+ #endif
@@ -0,0 +1,617 @@
1
+ /*
2
+ Copyright (C) 2016, Powzix
3
+ Copyright (C) 2019, rarten
4
+
5
+ This program is free software: you can redistribute it and/or modify
6
+ it under the terms of the GNU General Public License as published by
7
+ the Free Software Foundation, either version 3 of the License, or
8
+ (at your option) any later version.
9
+
10
+ This program is distributed in the hope that it will be useful,
11
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ GNU General Public License for more details.
14
+
15
+ You should have received a copy of the GNU General Public License
16
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
17
+ */
18
+
19
+ #include "stdafx.h"
20
+
21
+
22
+ typedef uint16 LznaBitModel;
23
+
24
+ // State for a 4-bit value RANS model
25
+ struct LznaNibbleModel {
26
+ uint16 prob[17];
27
+ };
28
+
29
+ // State for a 3-bit value RANS model
30
+ struct Lzna3bitModel {
31
+ uint16 prob[9];
32
+ };
33
+
34
+ // State for the literal model
35
+ struct LznaLiteralModel {
36
+ LznaNibbleModel upper[16];
37
+ LznaNibbleModel lower[16];
38
+ LznaNibbleModel nomatch[16];
39
+ };
40
+
41
+ // State for a model representing a far distance
42
+ struct LznaFarDistModel {
43
+ LznaNibbleModel first_lo;
44
+ LznaNibbleModel first_hi;
45
+ LznaBitModel second[31];
46
+ LznaBitModel third[2][31];
47
+ };
48
+
49
+ // State for a model representing a near distance
50
+ struct LznaNearDistModel {
51
+ LznaNibbleModel first;
52
+ LznaBitModel second[16];
53
+ LznaBitModel third[2][16];
54
+ };
55
+
56
+ // State for model representing the low bits of a distance
57
+ struct LznaLowBitsDistanceModel {
58
+ LznaNibbleModel d[2];
59
+ LznaBitModel v;
60
+ };
61
+
62
+ // State for model used for the short lengths for recent matches
63
+ struct LznaShortLengthRecentModel {
64
+ Lzna3bitModel a[4];
65
+ };
66
+
67
+ // State for model for long lengths
68
+ struct LznaLongLengthModel {
69
+ LznaNibbleModel first[4];
70
+ LznaNibbleModel second;
71
+ LznaNibbleModel third;
72
+ };
73
+
74
+ // Complete LZNA state
75
+ struct LznaState {
76
+ uint32 match_history[8];
77
+ LznaLiteralModel literal[4];
78
+ LznaBitModel is_literal[12 * 8];
79
+ LznaNibbleModel type[12 * 8];
80
+ LznaShortLengthRecentModel short_length_recent[4];
81
+ LznaLongLengthModel long_length_recent;
82
+ LznaLowBitsDistanceModel low_bits_of_distance[2];
83
+ LznaBitModel short_length[12][4];
84
+ LznaNearDistModel near_dist[2];
85
+ Lzna3bitModel medium_length;
86
+ LznaLongLengthModel long_length;
87
+ LznaFarDistModel far_distance;
88
+ };
89
+
90
+ static LznaNibbleModel lzna_initializer_4bit = {
91
+ 0x0, 0x800, 0x1000, 0x1800, 0x2000, 0x2800, 0x3000, 0x3800, 0x4000, 0x4800, 0x5000, 0x5800, 0x6000, 0x6800, 0x7000, 0x7800, 0x8000,
92
+ };
93
+
94
+ static Lzna3bitModel lzna_initializer_3bit = {
95
+ 0x0, 0x1000, 0x2000, 0x3000, 0x4000, 0x5000, 0x6000, 0x7000, 0x8000
96
+ };
97
+
98
+ static void LznaNibbleModel_Init(LznaNibbleModel *d) {
99
+ *d = lzna_initializer_4bit;
100
+ }
101
+
102
+ static void Lzna3bitModel_Init(Lzna3bitModel *d) {
103
+ *d = lzna_initializer_3bit;
104
+ }
105
+
106
+ static void LznaNibbleModel_InitN(LznaNibbleModel *d, int n) {
107
+ do LznaNibbleModel_Init(d++); while (--n);
108
+ }
109
+
110
+ static void LznaLiteralModel_InitN(LznaLiteralModel *d, int n) {
111
+ do {
112
+ LznaNibbleModel_InitN(d->upper, 16);
113
+ LznaNibbleModel_InitN(d->lower, 16);
114
+ LznaNibbleModel_InitN(d->nomatch, 16);
115
+ } while (d++, --n);
116
+ }
117
+
118
+ static void LznaShortLengthRecentModel_InitN(LznaShortLengthRecentModel *d, int n) {
119
+ do {
120
+ Lzna3bitModel_Init(&d->a[0]);
121
+ Lzna3bitModel_Init(&d->a[1]);
122
+ Lzna3bitModel_Init(&d->a[2]);
123
+ Lzna3bitModel_Init(&d->a[3]);
124
+ } while (d++, --n);
125
+ }
126
+
127
+ static void LznaNearDistModel_Init(LznaNearDistModel *d, int n) {
128
+ int i;
129
+ do {
130
+ LznaNibbleModel_Init(&d->first);
131
+
132
+ for (i = 0; i < 16; i++) {
133
+ d->second[i] = 0x2000;
134
+ d->third[0][i] = 0x2000;
135
+ d->third[1][i] = 0x2000;
136
+ }
137
+
138
+ } while (d++, --n);
139
+ }
140
+
141
+ static void LznaLowBitsDistanceModel_Init(LznaLowBitsDistanceModel *d, int n) {
142
+ do {
143
+ d->v = 0x2000;
144
+ LznaNibbleModel_InitN(d->d, 2);
145
+ } while (d++, --n);
146
+ }
147
+
148
+ static void LznaFarDistModel_Init(LznaFarDistModel *d) {
149
+ int i;
150
+ LznaNibbleModel_Init(&d->first_lo);
151
+ LznaNibbleModel_Init(&d->first_hi);
152
+ for (i = 0; i < 31; i++) {
153
+ d->second[i] = 0x2000;
154
+ d->third[0][i] = 0x2000;
155
+ d->third[1][i] = 0x2000;
156
+ }
157
+ }
158
+
159
+ void LZNA_InitLookup(LznaState *lut) {
160
+ int i, j;
161
+
162
+ for (i = 0; i < 4; i++)
163
+ lut->match_history[i + 4] = 1;
164
+
165
+ for (i = 0; i < 96; i++)
166
+ lut->is_literal[i] = 0x1000;
167
+
168
+ LznaNibbleModel_InitN(lut->type, 96);
169
+ LznaLiteralModel_InitN(lut->literal, 4);
170
+ LznaShortLengthRecentModel_InitN(lut->short_length_recent, 4);
171
+
172
+ LznaNibbleModel_InitN(lut->long_length_recent.first, 4);
173
+ LznaNibbleModel_Init(&lut->long_length_recent.second);
174
+ LznaNibbleModel_InitN(&lut->long_length_recent.third, 1);
175
+
176
+ for (i = 0; i < 12; i++)
177
+ for(j = 0; j < 4; j++)
178
+ lut->short_length[i][j] = 0x2000;
179
+
180
+ LznaNearDistModel_Init(lut->near_dist, 2);
181
+ LznaLowBitsDistanceModel_Init(lut->low_bits_of_distance, 2);
182
+
183
+ Lzna3bitModel_Init(&lut->medium_length);
184
+
185
+ LznaNibbleModel_InitN(lut->long_length.first, 4);
186
+ LznaNibbleModel_Init(&lut->long_length.second);
187
+ LznaNibbleModel_InitN(&lut->long_length.third, 1);
188
+ LznaFarDistModel_Init(&lut->far_distance);
189
+ }
190
+
191
+ struct LznaBitReader {
192
+ uint64 bits_a, bits_b;
193
+ const uint32 *src, *src_start;
194
+ };
195
+
196
+ // Initialize bit reader with 2 parallel streams. Every decode operation
197
+ // swaps the two streams.
198
+ static void LznaBitReader_Init(LznaBitReader *tab, const byte *src) {
199
+ int d, n, i;
200
+ uint64 v;
201
+
202
+ tab->src_start = (uint32*)src;
203
+
204
+ d = *src++;
205
+ n = d >> 4;
206
+ assert(n <= 8);
207
+ for (i = 0, v = 0; i < n; i++)
208
+ v = (v << 8) | *src++;
209
+ tab->bits_a = (v << 4) | (d & 0xF);
210
+
211
+ d = *src++;
212
+ n = d >> 4;
213
+ assert(n <= 8);
214
+ for (i = 0, v = 0; i < n; i++)
215
+ v = (v << 8) | *src++;
216
+ tab->bits_b = (v << 4) | (d & 0xF);
217
+ tab->src = (uint32*)src;
218
+ }
219
+
220
+ // Renormalize by filling up the RANS state and swapping the two streams
221
+ static void __forceinline LznaRenormalize(LznaBitReader *tab) {
222
+ uint64 x = tab->bits_a;
223
+ if (x < 0x80000000)
224
+ x = (x << 32) | *tab->src++;
225
+ tab->bits_a = tab->bits_b;
226
+ tab->bits_b = x;
227
+ }
228
+
229
+ // Read a single bit with a uniform distribution.
230
+ static uint32 __forceinline LznaReadBit(LznaBitReader *tab) {
231
+ int r = tab->bits_a & 1;
232
+ tab->bits_a >>= 1;
233
+ LznaRenormalize(tab);
234
+ return r;
235
+ }
236
+
237
+ // Read a number of bits with a uniform distribution.
238
+ static uint32 __forceinline LznaReadNBits(LznaBitReader *tab, int bits) {
239
+ uint32 rv = tab->bits_a & ((1 << bits) - 1);
240
+ tab->bits_a >>= bits;
241
+ LznaRenormalize(tab);
242
+ return rv;
243
+ }
244
+
245
+
246
+ // Read a 4-bit value using an adaptive RANS model
247
+ static uint32 __forceinline LznaReadNibble(LznaBitReader *tab, LznaNibbleModel *model) {
248
+ __m128i t, t0, t1, c0, c1;
249
+ unsigned long bitindex;
250
+ unsigned int start, end;
251
+ uint64 x = tab->bits_a;
252
+
253
+ t0 = _mm_loadu_si128((const __m128i *)&model->prob[0]);
254
+ t1 = _mm_loadu_si128((const __m128i *)&model->prob[8]);
255
+
256
+ t = _mm_cvtsi32_si128((int16)x);
257
+ t = _mm_and_si128(_mm_shuffle_epi32(_mm_unpacklo_epi16(t, t), 0), _mm_set1_epi16(0x7FFF));
258
+
259
+ c0 = _mm_cmpgt_epi16(t0, t);
260
+ c1 = _mm_cmpgt_epi16(t1, t);
261
+
262
+ _BitScanForward(&bitindex, _mm_movemask_epi8(_mm_packs_epi16(c0, c1)) | 0x10000);
263
+ start = model->prob[bitindex - 1];
264
+ end = model->prob[bitindex];
265
+
266
+ c0 = _mm_and_si128(_mm_set1_epi16(0x7FD9), c0);
267
+ c1 = _mm_and_si128(_mm_set1_epi16(0x7FD9), c1);
268
+
269
+ c0 = _mm_add_epi16(c0, _mm_set_epi16(56, 48, 40, 32, 24, 16, 8, 0));
270
+ c1 = _mm_add_epi16(c1, _mm_set_epi16(120, 112, 104, 96, 88, 80, 72, 64));
271
+
272
+ t0 = _mm_add_epi16(_mm_srai_epi16(_mm_sub_epi16(c0, t0), 7), t0);
273
+ t1 = _mm_add_epi16(_mm_srai_epi16(_mm_sub_epi16(c1, t1), 7), t1);
274
+
275
+ _mm_storeu_si128((__m128i *)&model->prob[0], t0);
276
+ _mm_storeu_si128((__m128i *)&model->prob[8], t1);
277
+
278
+ tab->bits_a = (end - start) * (x >> 15) + (x & 0x7FFF) - start;
279
+ LznaRenormalize(tab);
280
+ return (int)bitindex - 1;
281
+ }
282
+
283
+ // Read a 3-bit value using an adaptive RANS model
284
+ static uint32 __forceinline LznaRead3bit(LznaBitReader *tab, Lzna3bitModel *model) {
285
+ __m128i t, t0, c0;
286
+ unsigned long bitindex;
287
+ unsigned int start, end;
288
+ uint64 x = tab->bits_a;
289
+
290
+ t0 = _mm_loadu_si128((const __m128i *)&model->prob[0]);
291
+ t = _mm_cvtsi32_si128(x & 0x7FFF);
292
+ t = _mm_shuffle_epi32(_mm_unpacklo_epi16(t, t), 0);
293
+ c0 = _mm_cmpgt_epi16(t0, t);
294
+
295
+ _BitScanForward(&bitindex, _mm_movemask_epi8(c0) | 0x10000);
296
+ bitindex >>= 1;
297
+ start = model->prob[bitindex - 1];
298
+ end = model->prob[bitindex];
299
+
300
+ c0 = _mm_and_si128(_mm_set1_epi16(0x7FE5), c0);
301
+ c0 = _mm_add_epi16(c0, _mm_set_epi16(56, 48, 40, 32, 24, 16, 8, 0));
302
+ t0 = _mm_add_epi16(_mm_srai_epi16(_mm_sub_epi16(c0, t0), 7), t0);
303
+ _mm_storeu_si128((__m128i *)&model->prob[0], t0);
304
+
305
+ tab->bits_a = (end - start) * (x >> 15) + (x & 0x7FFF) - start;
306
+ LznaRenormalize(tab);
307
+ return bitindex - 1;
308
+ }
309
+
310
+ // Read a 1-bit value using an adaptive RANS model
311
+ static uint32 __forceinline LznaRead1Bit(LznaBitReader *tab, LznaBitModel *model, int nbits, int shift) {
312
+ uint64 q;
313
+ int magn = 1 << nbits;
314
+ q = *model * (tab->bits_a >> nbits);
315
+ if ((tab->bits_a & (magn - 1)) >= *model) {
316
+ tab->bits_a -= q + *model;
317
+ *model = *model - (*model >> shift);
318
+ LznaRenormalize(tab);
319
+ return 1;
320
+ } else {
321
+ tab->bits_a = (tab->bits_a & (magn - 1)) + q;
322
+ *model = *model + ((magn - *model) >> shift);
323
+ LznaRenormalize(tab);
324
+ return 0;
325
+ }
326
+ }
327
+
328
+ // Read a far distance using the far distance model
329
+ static uint32 __forceinline LznaReadFarDistance(LznaBitReader *tab, LznaState *lut) {
330
+ uint32 n = LznaReadNibble(tab, &lut->far_distance.first_lo);
331
+ uint32 hi;
332
+ if (n >= 15)
333
+ n = 15 + LznaReadNibble(tab, &lut->far_distance.first_hi);
334
+ hi = 0;
335
+ if (n != 0) {
336
+ hi = LznaRead1Bit(tab, &lut->far_distance.second[n - 1], 14, 6) + 2;
337
+ if (n != 1) {
338
+ hi = (hi << 1) + LznaRead1Bit(tab, &lut->far_distance.third[hi - 2][n - 1], 14, 6);
339
+ if (n != 2)
340
+ hi = (hi << (n - 2)) + LznaReadNBits(tab, n - 2);
341
+ }
342
+ hi -= 1;
343
+ }
344
+ LznaLowBitsDistanceModel *lutd = &lut->low_bits_of_distance[hi == 0];
345
+ uint32 low_bit = LznaRead1Bit(tab, &lutd->v, 14, 6);
346
+ uint32 low_nibble = LznaReadNibble(tab, &lutd->d[low_bit]);
347
+ return low_bit + (2 * low_nibble) + (32 * hi) + 1;
348
+ }
349
+
350
+ // Read a near distance using a near distance model
351
+ static uint32 __forceinline LznaReadNearDistance(LznaBitReader *tab, LznaState *lut, LznaNearDistModel *model) {
352
+ uint32 nb = LznaReadNibble(tab, &model->first);
353
+ uint32 hi = 0;
354
+ if (nb != 0) {
355
+ hi = LznaRead1Bit(tab, &model->second[nb - 1], 14, 6) + 2;
356
+ if (nb != 1) {
357
+ hi = (hi << 1) + LznaRead1Bit(tab, &model->third[hi - 2][nb - 1], 14, 6);
358
+ if (nb != 2)
359
+ hi = (hi << (nb - 2)) + LznaReadNBits(tab, nb - 2);
360
+ }
361
+ hi -= 1;
362
+ }
363
+ LznaLowBitsDistanceModel *lutd = &lut->low_bits_of_distance[hi == 0];
364
+ uint32 low_bit = LznaRead1Bit(tab, &lutd->v, 14, 6);
365
+ uint32 low_nibble = LznaReadNibble(tab, &lutd->d[low_bit]);
366
+ return low_bit + (2 * low_nibble) + (32 * hi) + 1;
367
+ }
368
+
369
+ // Read a length using the length model.
370
+ static uint32 __forceinline LznaReadLength(LznaBitReader *tab, LznaLongLengthModel *model, int64 dst_offs) {
371
+ uint32 length = LznaReadNibble(tab, &model->first[dst_offs & 3]);
372
+ if (length >= 12) {
373
+ uint32 b = LznaReadNibble(tab, &model->second);
374
+ if (b >= 15)
375
+ b = 15 + LznaReadNibble(tab, &model->third);
376
+ uint32 n = 0;
377
+ uint32 base = 0;
378
+ if (b) {
379
+ n = (b - 1) >> 1;
380
+ base = ((((b - 1) & 1) + 2) << n) - 1;
381
+ }
382
+ length += (LznaReadNBits(tab, n) + base) * 4;
383
+ }
384
+ return length;
385
+ }
386
+
387
+ static const uint8 next_state_lit[12] = {
388
+ 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5
389
+ };
390
+
391
+ static void LznaCopyLongDist(byte *dst, size_t dist, size_t length) {
392
+ const byte *src = dst - dist;
393
+ ((uint64*)dst)[0] = ((uint64*)src)[0];
394
+ ((uint64*)dst)[1] = ((uint64*)src)[1];
395
+ if (length > 16) {
396
+ do {
397
+ ((uint64*)dst)[2] = ((uint64*)src)[2];
398
+ dst += 8;
399
+ src += 8;
400
+ length -= 8;
401
+ } while (length > 16);
402
+ }
403
+ }
404
+
405
+ static void LznaCopyShortDist(byte *dst, size_t dist, size_t length) {
406
+ const byte *src = dst - dist;
407
+ if (dist >= 4) {
408
+ ((uint32*)dst)[0] = ((uint32*)src)[0];
409
+ ((uint32*)dst)[1] = ((uint32*)src)[1];
410
+ ((uint32*)dst)[2] = ((uint32*)src)[2];
411
+ if (length > 12) {
412
+ ((uint32*)dst)[3] = ((uint32*)src)[3];
413
+ if (length > 16) {
414
+ do {
415
+ ((uint32*)dst)[4] = ((uint32*)src)[4];
416
+ length -= 4;
417
+ dst += 4;
418
+ src += 4;
419
+ } while (length > 16);
420
+ }
421
+ }
422
+ } else if (dist == 1) {
423
+ memset(dst, *src, length);
424
+ } else {
425
+ ((byte*)dst)[0] = ((byte*)src)[0];
426
+ ((byte*)dst)[1] = ((byte*)src)[1];
427
+ ((byte*)dst)[2] = ((byte*)src)[2];
428
+ ((byte*)dst)[3] = ((byte*)src)[3];
429
+ ((byte*)dst)[4] = ((byte*)src)[4];
430
+ ((byte*)dst)[5] = ((byte*)src)[5];
431
+ ((byte*)dst)[6] = ((byte*)src)[6];
432
+ ((byte*)dst)[7] = ((byte*)src)[7];
433
+ ((byte*)dst)[8] = ((byte*)src)[8];
434
+ while (length > 9) {
435
+ ((byte*)dst)[9] = ((byte*)src)[9];
436
+ dst += 1;
437
+ src += 1;
438
+ length -= 1;
439
+ }
440
+ }
441
+ }
442
+
443
+ static void LznaCopy4to12(byte *dst, size_t dist, size_t length) {
444
+ const byte *src = dst - dist;
445
+ dst[0] = src[0];
446
+ dst[1] = src[1];
447
+ dst[2] = src[2];
448
+ dst[3] = src[3];
449
+ if (length > 4) {
450
+ dst[4] = src[4];
451
+ dst[5] = src[5];
452
+ dst[6] = src[6];
453
+ dst[7] = src[7];
454
+ if (length > 8) {
455
+ dst[8] = src[8];
456
+ dst[9] = src[9];
457
+ dst[10] = src[10];
458
+ dst[11] = src[11];
459
+ }
460
+ }
461
+ }
462
+
463
+ static void LznaPreprocessMatchHistory(LznaState *lut) {
464
+ if (lut->match_history[4] >= 0xc000) {
465
+ size_t i = 0;
466
+ while (lut->match_history[4 + i] >= 0xC000) {
467
+ ++i;
468
+ if (i >= 4) {
469
+ lut->match_history[7] = lut->match_history[6];
470
+ lut->match_history[6] = lut->match_history[5];
471
+ lut->match_history[5] = lut->match_history[4];
472
+ lut->match_history[4] = 4;
473
+ return;
474
+ }
475
+ }
476
+ uint32 t = lut->match_history[i + 4];
477
+ lut->match_history[i + 4] = lut->match_history[i + 3];
478
+ lut->match_history[i + 3] = lut->match_history[i + 2];
479
+ lut->match_history[i + 2] = lut->match_history[i + 1];
480
+ lut->match_history[4] = t;
481
+ }
482
+ }
483
+
484
+ int LZNA_DecodeQuantum(byte *dst, byte *dst_end, byte *dst_start,
485
+ const byte *src_in, const byte *src_end,
486
+ LznaState *lut) {
487
+ LznaBitReader tab;
488
+ uint32 x;
489
+ uint32 dst_offs = dst - dst_start;
490
+ uint32 match_val;
491
+ uint32 state;
492
+ uint32 length;
493
+ uint32 dist;
494
+
495
+ LznaPreprocessMatchHistory(lut);
496
+ LznaBitReader_Init(&tab, src_in);
497
+ dist = lut->match_history[4];
498
+
499
+ state = 5;
500
+ dst_end -= 8;
501
+
502
+ if (dst_offs == 0) {
503
+ if (LznaReadBit(&tab)) {
504
+ x = 0;
505
+ } else {
506
+ LznaLiteralModel *model = &lut->literal[0];
507
+ x = LznaReadNibble(&tab, &model->upper[0]);
508
+ x = (x << 4) + LznaReadNibble(&tab, (x != 0) ? &model->nomatch[x] : &model->lower[0]);
509
+ }
510
+ *dst++ = x;
511
+ dst_offs += 1;
512
+ }
513
+ while (dst < dst_end) {
514
+ match_val = *(dst - dist);
515
+
516
+ if (LznaRead1Bit(&tab, &lut->is_literal[(dst_offs & 7) + 8 * state], 13, 5)) {
517
+ x = LznaReadNibble(&tab, &lut->type[(dst_offs & 7) + 8 * state]);
518
+ if (x == 0) {
519
+ // Copy 1 byte from most recent distance
520
+ *dst++ = match_val;
521
+ dst_offs += 1;
522
+ state = (state >= 7) ? 11 : 9;
523
+ } else if (x < 4) {
524
+ if (x == 1) {
525
+ // Copy count 3-4
526
+ length = 3 + LznaRead1Bit(&tab, &lut->short_length[state][dst_offs & 3], 14, 4);
527
+ dist = LznaReadNearDistance(&tab, lut, &lut->near_dist[length - 3]);
528
+ dst[0] = (dst - dist)[0];
529
+ dst[1] = (dst - dist)[1];
530
+ dst[2] = (dst - dist)[2];
531
+ dst[3] = (dst - dist)[3];
532
+ } else if (x == 2) {
533
+ // Copy count 5-12
534
+ length = 5 + LznaRead3bit(&tab, &lut->medium_length);
535
+ dist = LznaReadFarDistance(&tab, lut);
536
+ if (dist >= 8) {
537
+ ((uint64*)dst)[0] = ((uint64*)(dst - dist))[0];
538
+ ((uint64*)dst)[1] = ((uint64*)(dst - dist))[1];
539
+ } else {
540
+ LznaCopy4to12(dst, dist, length);
541
+ }
542
+ } else {
543
+ // Copy count 13-
544
+ length = LznaReadLength(&tab, &lut->long_length, dst_offs) + 13;
545
+ dist = LznaReadFarDistance(&tab, lut);
546
+ if (dist >= 8)
547
+ LznaCopyLongDist(dst, dist, length);
548
+ else
549
+ LznaCopyShortDist(dst, dist, length);
550
+ }
551
+ state = (state >= 7) ? 10 : 7;
552
+ lut->match_history[7] = lut->match_history[6];
553
+ lut->match_history[6] = lut->match_history[5];
554
+ lut->match_history[5] = lut->match_history[4];
555
+ lut->match_history[4] = dist;
556
+ dst += length;
557
+ dst_offs += length;
558
+ } else if (x >= 12) {
559
+ // Copy 2 bytes from a recent distance
560
+ size_t idx = x - 12;
561
+ dist = lut->match_history[4 + idx];
562
+ lut->match_history[4 + idx] = lut->match_history[3 + idx];
563
+ lut->match_history[3 + idx] = lut->match_history[2 + idx];
564
+ lut->match_history[2 + idx] = lut->match_history[1 + idx];
565
+ lut->match_history[4] = dist;
566
+ dst[0] = *(dst - dist + 0);
567
+ dst[1] = *(dst - dist + 1);
568
+ state = (state >= 7) ? 11 : 8;
569
+ dst_offs += 2;
570
+ dst += 2;
571
+ } else {
572
+ size_t idx = (x - 4) >> 1;
573
+ dist = lut->match_history[4 + idx];
574
+ lut->match_history[4 + idx] = lut->match_history[3 + idx];
575
+ lut->match_history[3 + idx] = lut->match_history[2 + idx];
576
+ lut->match_history[2 + idx] = lut->match_history[1 + idx];
577
+ lut->match_history[4] = dist;
578
+ if (x & 1) {
579
+ // Copy 11- bytes from recent distance
580
+ length = 11 + LznaReadLength(&tab, &lut->long_length_recent, dst_offs);
581
+ if (dist >= 8) {
582
+ LznaCopyLongDist(dst, dist, length);
583
+ } else {
584
+ LznaCopyShortDist(dst, dist, length);
585
+ }
586
+ } else {
587
+ // Copy 3-10 bytes from recent distance
588
+ length = 3 + LznaRead3bit(&tab, &lut->short_length_recent[idx].a[dst_offs & 3]);
589
+ if (dist >= 8) {
590
+ ((uint64*)dst)[0] = ((uint64*)(dst - dist))[0];
591
+ ((uint64*)dst)[1] = ((uint64*)(dst - dist))[1];
592
+ } else {
593
+ LznaCopy4to12(dst, dist, length);
594
+ }
595
+ }
596
+ state = (state >= 7) ? 11 : 8;
597
+ dst_offs += length;
598
+ dst += length;
599
+ }
600
+ } else {
601
+ // Output a literal
602
+ LznaLiteralModel *model = &lut->literal[dst_offs & 3];
603
+ x = LznaReadNibble(&tab, &model->upper[match_val >> 4]);
604
+ x = (x << 4) + LznaReadNibble(&tab, ((match_val >> 4) != x) ? &model->nomatch[x] : &model->lower[match_val & 0xF]);
605
+ *dst++ = x;
606
+ dst_offs += 1;
607
+ state = next_state_lit[state];
608
+ }
609
+ }
610
+
611
+ if (dst != dst_end)
612
+ return -1;
613
+
614
+ *(uint64*)dst = (uint32)tab.bits_a | (tab.bits_b << 32);
615
+
616
+ return (byte*)tab.src - src_in;
617
+ }