oodle-kraken-ruby 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +14 -0
- data/ext/oodle-kraken/extconf.rb +11 -0
- data/ext/oodle-kraken/oodle_kraken_c.c +38 -0
- data/ext/oodle-kraken/ooz/LICENSE +15 -0
- data/ext/oodle-kraken/ooz/README.md +23 -0
- data/ext/oodle-kraken/ooz/bitknit.cpp +429 -0
- data/ext/oodle-kraken/ooz/kraken.cpp +4153 -0
- data/ext/oodle-kraken/ooz/kraken.h +33 -0
- data/ext/oodle-kraken/ooz/lzna.cpp +617 -0
- data/ext/oodle-kraken/ooz/ooz.cpp +342 -0
- data/ext/oodle-kraken/ooz/stdafx.cpp +8 -0
- data/ext/oodle-kraken/ooz/stdafx.h +68 -0
- data/ext/oodle-kraken/ooz/targetver.h +8 -0
- data/lib/oodle-kraken-ruby.rb +2 -0
- metadata +58 -0
@@ -0,0 +1,33 @@
|
|
1
|
+
/*
|
2
|
+
Copyright (C) 2022, Kerilk
|
3
|
+
|
4
|
+
This program is free software: you can redistribute it and/or modify
|
5
|
+
it under the terms of the GNU General Public License as published by
|
6
|
+
the Free Software Foundation, either version 3 of the License, or
|
7
|
+
(at your option) any later version.
|
8
|
+
|
9
|
+
This program is distributed in the hope that it will be useful,
|
10
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
11
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
12
|
+
GNU General Public License for more details.
|
13
|
+
|
14
|
+
You should have received a copy of the GNU General Public License
|
15
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
16
|
+
*/
|
17
|
+
#ifndef KRAKEN_H
|
18
|
+
#define KRAKEN_H
|
19
|
+
|
20
|
+
#include <stdlib.h>
|
21
|
+
#include <stdint.h>
|
22
|
+
|
23
|
+
#ifdef __cplusplus
|
24
|
+
extern "C" {
|
25
|
+
#endif
|
26
|
+
|
27
|
+
extern ssize_t Kraken_Decompress(const uint8_t *src, size_t src_len, uint8_t *dst, size_t dst_len);
|
28
|
+
|
29
|
+
#ifdef __cplusplus
|
30
|
+
}
|
31
|
+
#endif
|
32
|
+
|
33
|
+
#endif
|
@@ -0,0 +1,617 @@
|
|
1
|
+
/*
|
2
|
+
Copyright (C) 2016, Powzix
|
3
|
+
Copyright (C) 2019, rarten
|
4
|
+
|
5
|
+
This program is free software: you can redistribute it and/or modify
|
6
|
+
it under the terms of the GNU General Public License as published by
|
7
|
+
the Free Software Foundation, either version 3 of the License, or
|
8
|
+
(at your option) any later version.
|
9
|
+
|
10
|
+
This program is distributed in the hope that it will be useful,
|
11
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
GNU General Public License for more details.
|
14
|
+
|
15
|
+
You should have received a copy of the GNU General Public License
|
16
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
*/
|
18
|
+
|
19
|
+
#include "stdafx.h"
|
20
|
+
|
21
|
+
|
22
|
+
typedef uint16 LznaBitModel;
|
23
|
+
|
24
|
+
// State for a 4-bit value RANS model
|
25
|
+
struct LznaNibbleModel {
|
26
|
+
uint16 prob[17];
|
27
|
+
};
|
28
|
+
|
29
|
+
// State for a 3-bit value RANS model
|
30
|
+
struct Lzna3bitModel {
|
31
|
+
uint16 prob[9];
|
32
|
+
};
|
33
|
+
|
34
|
+
// State for the literal model
|
35
|
+
struct LznaLiteralModel {
|
36
|
+
LznaNibbleModel upper[16];
|
37
|
+
LznaNibbleModel lower[16];
|
38
|
+
LznaNibbleModel nomatch[16];
|
39
|
+
};
|
40
|
+
|
41
|
+
// State for a model representing a far distance
|
42
|
+
struct LznaFarDistModel {
|
43
|
+
LznaNibbleModel first_lo;
|
44
|
+
LznaNibbleModel first_hi;
|
45
|
+
LznaBitModel second[31];
|
46
|
+
LznaBitModel third[2][31];
|
47
|
+
};
|
48
|
+
|
49
|
+
// State for a model representing a near distance
|
50
|
+
struct LznaNearDistModel {
|
51
|
+
LznaNibbleModel first;
|
52
|
+
LznaBitModel second[16];
|
53
|
+
LznaBitModel third[2][16];
|
54
|
+
};
|
55
|
+
|
56
|
+
// State for model representing the low bits of a distance
|
57
|
+
struct LznaLowBitsDistanceModel {
|
58
|
+
LznaNibbleModel d[2];
|
59
|
+
LznaBitModel v;
|
60
|
+
};
|
61
|
+
|
62
|
+
// State for model used for the short lengths for recent matches
|
63
|
+
struct LznaShortLengthRecentModel {
|
64
|
+
Lzna3bitModel a[4];
|
65
|
+
};
|
66
|
+
|
67
|
+
// State for model for long lengths
|
68
|
+
struct LznaLongLengthModel {
|
69
|
+
LznaNibbleModel first[4];
|
70
|
+
LznaNibbleModel second;
|
71
|
+
LznaNibbleModel third;
|
72
|
+
};
|
73
|
+
|
74
|
+
// Complete LZNA state
|
75
|
+
struct LznaState {
|
76
|
+
uint32 match_history[8];
|
77
|
+
LznaLiteralModel literal[4];
|
78
|
+
LznaBitModel is_literal[12 * 8];
|
79
|
+
LznaNibbleModel type[12 * 8];
|
80
|
+
LznaShortLengthRecentModel short_length_recent[4];
|
81
|
+
LznaLongLengthModel long_length_recent;
|
82
|
+
LznaLowBitsDistanceModel low_bits_of_distance[2];
|
83
|
+
LznaBitModel short_length[12][4];
|
84
|
+
LznaNearDistModel near_dist[2];
|
85
|
+
Lzna3bitModel medium_length;
|
86
|
+
LznaLongLengthModel long_length;
|
87
|
+
LznaFarDistModel far_distance;
|
88
|
+
};
|
89
|
+
|
90
|
+
static LznaNibbleModel lzna_initializer_4bit = {
|
91
|
+
0x0, 0x800, 0x1000, 0x1800, 0x2000, 0x2800, 0x3000, 0x3800, 0x4000, 0x4800, 0x5000, 0x5800, 0x6000, 0x6800, 0x7000, 0x7800, 0x8000,
|
92
|
+
};
|
93
|
+
|
94
|
+
static Lzna3bitModel lzna_initializer_3bit = {
|
95
|
+
0x0, 0x1000, 0x2000, 0x3000, 0x4000, 0x5000, 0x6000, 0x7000, 0x8000
|
96
|
+
};
|
97
|
+
|
98
|
+
static void LznaNibbleModel_Init(LznaNibbleModel *d) {
|
99
|
+
*d = lzna_initializer_4bit;
|
100
|
+
}
|
101
|
+
|
102
|
+
static void Lzna3bitModel_Init(Lzna3bitModel *d) {
|
103
|
+
*d = lzna_initializer_3bit;
|
104
|
+
}
|
105
|
+
|
106
|
+
static void LznaNibbleModel_InitN(LznaNibbleModel *d, int n) {
|
107
|
+
do LznaNibbleModel_Init(d++); while (--n);
|
108
|
+
}
|
109
|
+
|
110
|
+
static void LznaLiteralModel_InitN(LznaLiteralModel *d, int n) {
|
111
|
+
do {
|
112
|
+
LznaNibbleModel_InitN(d->upper, 16);
|
113
|
+
LznaNibbleModel_InitN(d->lower, 16);
|
114
|
+
LznaNibbleModel_InitN(d->nomatch, 16);
|
115
|
+
} while (d++, --n);
|
116
|
+
}
|
117
|
+
|
118
|
+
static void LznaShortLengthRecentModel_InitN(LznaShortLengthRecentModel *d, int n) {
|
119
|
+
do {
|
120
|
+
Lzna3bitModel_Init(&d->a[0]);
|
121
|
+
Lzna3bitModel_Init(&d->a[1]);
|
122
|
+
Lzna3bitModel_Init(&d->a[2]);
|
123
|
+
Lzna3bitModel_Init(&d->a[3]);
|
124
|
+
} while (d++, --n);
|
125
|
+
}
|
126
|
+
|
127
|
+
static void LznaNearDistModel_Init(LznaNearDistModel *d, int n) {
|
128
|
+
int i;
|
129
|
+
do {
|
130
|
+
LznaNibbleModel_Init(&d->first);
|
131
|
+
|
132
|
+
for (i = 0; i < 16; i++) {
|
133
|
+
d->second[i] = 0x2000;
|
134
|
+
d->third[0][i] = 0x2000;
|
135
|
+
d->third[1][i] = 0x2000;
|
136
|
+
}
|
137
|
+
|
138
|
+
} while (d++, --n);
|
139
|
+
}
|
140
|
+
|
141
|
+
static void LznaLowBitsDistanceModel_Init(LznaLowBitsDistanceModel *d, int n) {
|
142
|
+
do {
|
143
|
+
d->v = 0x2000;
|
144
|
+
LznaNibbleModel_InitN(d->d, 2);
|
145
|
+
} while (d++, --n);
|
146
|
+
}
|
147
|
+
|
148
|
+
static void LznaFarDistModel_Init(LznaFarDistModel *d) {
|
149
|
+
int i;
|
150
|
+
LznaNibbleModel_Init(&d->first_lo);
|
151
|
+
LznaNibbleModel_Init(&d->first_hi);
|
152
|
+
for (i = 0; i < 31; i++) {
|
153
|
+
d->second[i] = 0x2000;
|
154
|
+
d->third[0][i] = 0x2000;
|
155
|
+
d->third[1][i] = 0x2000;
|
156
|
+
}
|
157
|
+
}
|
158
|
+
|
159
|
+
void LZNA_InitLookup(LznaState *lut) {
|
160
|
+
int i, j;
|
161
|
+
|
162
|
+
for (i = 0; i < 4; i++)
|
163
|
+
lut->match_history[i + 4] = 1;
|
164
|
+
|
165
|
+
for (i = 0; i < 96; i++)
|
166
|
+
lut->is_literal[i] = 0x1000;
|
167
|
+
|
168
|
+
LznaNibbleModel_InitN(lut->type, 96);
|
169
|
+
LznaLiteralModel_InitN(lut->literal, 4);
|
170
|
+
LznaShortLengthRecentModel_InitN(lut->short_length_recent, 4);
|
171
|
+
|
172
|
+
LznaNibbleModel_InitN(lut->long_length_recent.first, 4);
|
173
|
+
LznaNibbleModel_Init(&lut->long_length_recent.second);
|
174
|
+
LznaNibbleModel_InitN(&lut->long_length_recent.third, 1);
|
175
|
+
|
176
|
+
for (i = 0; i < 12; i++)
|
177
|
+
for(j = 0; j < 4; j++)
|
178
|
+
lut->short_length[i][j] = 0x2000;
|
179
|
+
|
180
|
+
LznaNearDistModel_Init(lut->near_dist, 2);
|
181
|
+
LznaLowBitsDistanceModel_Init(lut->low_bits_of_distance, 2);
|
182
|
+
|
183
|
+
Lzna3bitModel_Init(&lut->medium_length);
|
184
|
+
|
185
|
+
LznaNibbleModel_InitN(lut->long_length.first, 4);
|
186
|
+
LznaNibbleModel_Init(&lut->long_length.second);
|
187
|
+
LznaNibbleModel_InitN(&lut->long_length.third, 1);
|
188
|
+
LznaFarDistModel_Init(&lut->far_distance);
|
189
|
+
}
|
190
|
+
|
191
|
+
struct LznaBitReader {
|
192
|
+
uint64 bits_a, bits_b;
|
193
|
+
const uint32 *src, *src_start;
|
194
|
+
};
|
195
|
+
|
196
|
+
// Initialize bit reader with 2 parallel streams. Every decode operation
|
197
|
+
// swaps the two streams.
|
198
|
+
static void LznaBitReader_Init(LznaBitReader *tab, const byte *src) {
|
199
|
+
int d, n, i;
|
200
|
+
uint64 v;
|
201
|
+
|
202
|
+
tab->src_start = (uint32*)src;
|
203
|
+
|
204
|
+
d = *src++;
|
205
|
+
n = d >> 4;
|
206
|
+
assert(n <= 8);
|
207
|
+
for (i = 0, v = 0; i < n; i++)
|
208
|
+
v = (v << 8) | *src++;
|
209
|
+
tab->bits_a = (v << 4) | (d & 0xF);
|
210
|
+
|
211
|
+
d = *src++;
|
212
|
+
n = d >> 4;
|
213
|
+
assert(n <= 8);
|
214
|
+
for (i = 0, v = 0; i < n; i++)
|
215
|
+
v = (v << 8) | *src++;
|
216
|
+
tab->bits_b = (v << 4) | (d & 0xF);
|
217
|
+
tab->src = (uint32*)src;
|
218
|
+
}
|
219
|
+
|
220
|
+
// Renormalize by filling up the RANS state and swapping the two streams
|
221
|
+
static void __forceinline LznaRenormalize(LznaBitReader *tab) {
|
222
|
+
uint64 x = tab->bits_a;
|
223
|
+
if (x < 0x80000000)
|
224
|
+
x = (x << 32) | *tab->src++;
|
225
|
+
tab->bits_a = tab->bits_b;
|
226
|
+
tab->bits_b = x;
|
227
|
+
}
|
228
|
+
|
229
|
+
// Read a single bit with a uniform distribution.
|
230
|
+
static uint32 __forceinline LznaReadBit(LznaBitReader *tab) {
|
231
|
+
int r = tab->bits_a & 1;
|
232
|
+
tab->bits_a >>= 1;
|
233
|
+
LznaRenormalize(tab);
|
234
|
+
return r;
|
235
|
+
}
|
236
|
+
|
237
|
+
// Read a number of bits with a uniform distribution.
|
238
|
+
static uint32 __forceinline LznaReadNBits(LznaBitReader *tab, int bits) {
|
239
|
+
uint32 rv = tab->bits_a & ((1 << bits) - 1);
|
240
|
+
tab->bits_a >>= bits;
|
241
|
+
LznaRenormalize(tab);
|
242
|
+
return rv;
|
243
|
+
}
|
244
|
+
|
245
|
+
|
246
|
+
// Read a 4-bit value using an adaptive RANS model
|
247
|
+
static uint32 __forceinline LznaReadNibble(LznaBitReader *tab, LznaNibbleModel *model) {
|
248
|
+
__m128i t, t0, t1, c0, c1;
|
249
|
+
unsigned long bitindex;
|
250
|
+
unsigned int start, end;
|
251
|
+
uint64 x = tab->bits_a;
|
252
|
+
|
253
|
+
t0 = _mm_loadu_si128((const __m128i *)&model->prob[0]);
|
254
|
+
t1 = _mm_loadu_si128((const __m128i *)&model->prob[8]);
|
255
|
+
|
256
|
+
t = _mm_cvtsi32_si128((int16)x);
|
257
|
+
t = _mm_and_si128(_mm_shuffle_epi32(_mm_unpacklo_epi16(t, t), 0), _mm_set1_epi16(0x7FFF));
|
258
|
+
|
259
|
+
c0 = _mm_cmpgt_epi16(t0, t);
|
260
|
+
c1 = _mm_cmpgt_epi16(t1, t);
|
261
|
+
|
262
|
+
_BitScanForward(&bitindex, _mm_movemask_epi8(_mm_packs_epi16(c0, c1)) | 0x10000);
|
263
|
+
start = model->prob[bitindex - 1];
|
264
|
+
end = model->prob[bitindex];
|
265
|
+
|
266
|
+
c0 = _mm_and_si128(_mm_set1_epi16(0x7FD9), c0);
|
267
|
+
c1 = _mm_and_si128(_mm_set1_epi16(0x7FD9), c1);
|
268
|
+
|
269
|
+
c0 = _mm_add_epi16(c0, _mm_set_epi16(56, 48, 40, 32, 24, 16, 8, 0));
|
270
|
+
c1 = _mm_add_epi16(c1, _mm_set_epi16(120, 112, 104, 96, 88, 80, 72, 64));
|
271
|
+
|
272
|
+
t0 = _mm_add_epi16(_mm_srai_epi16(_mm_sub_epi16(c0, t0), 7), t0);
|
273
|
+
t1 = _mm_add_epi16(_mm_srai_epi16(_mm_sub_epi16(c1, t1), 7), t1);
|
274
|
+
|
275
|
+
_mm_storeu_si128((__m128i *)&model->prob[0], t0);
|
276
|
+
_mm_storeu_si128((__m128i *)&model->prob[8], t1);
|
277
|
+
|
278
|
+
tab->bits_a = (end - start) * (x >> 15) + (x & 0x7FFF) - start;
|
279
|
+
LznaRenormalize(tab);
|
280
|
+
return (int)bitindex - 1;
|
281
|
+
}
|
282
|
+
|
283
|
+
// Read a 3-bit value using an adaptive RANS model
|
284
|
+
static uint32 __forceinline LznaRead3bit(LznaBitReader *tab, Lzna3bitModel *model) {
|
285
|
+
__m128i t, t0, c0;
|
286
|
+
unsigned long bitindex;
|
287
|
+
unsigned int start, end;
|
288
|
+
uint64 x = tab->bits_a;
|
289
|
+
|
290
|
+
t0 = _mm_loadu_si128((const __m128i *)&model->prob[0]);
|
291
|
+
t = _mm_cvtsi32_si128(x & 0x7FFF);
|
292
|
+
t = _mm_shuffle_epi32(_mm_unpacklo_epi16(t, t), 0);
|
293
|
+
c0 = _mm_cmpgt_epi16(t0, t);
|
294
|
+
|
295
|
+
_BitScanForward(&bitindex, _mm_movemask_epi8(c0) | 0x10000);
|
296
|
+
bitindex >>= 1;
|
297
|
+
start = model->prob[bitindex - 1];
|
298
|
+
end = model->prob[bitindex];
|
299
|
+
|
300
|
+
c0 = _mm_and_si128(_mm_set1_epi16(0x7FE5), c0);
|
301
|
+
c0 = _mm_add_epi16(c0, _mm_set_epi16(56, 48, 40, 32, 24, 16, 8, 0));
|
302
|
+
t0 = _mm_add_epi16(_mm_srai_epi16(_mm_sub_epi16(c0, t0), 7), t0);
|
303
|
+
_mm_storeu_si128((__m128i *)&model->prob[0], t0);
|
304
|
+
|
305
|
+
tab->bits_a = (end - start) * (x >> 15) + (x & 0x7FFF) - start;
|
306
|
+
LznaRenormalize(tab);
|
307
|
+
return bitindex - 1;
|
308
|
+
}
|
309
|
+
|
310
|
+
// Read a 1-bit value using an adaptive RANS model
|
311
|
+
static uint32 __forceinline LznaRead1Bit(LznaBitReader *tab, LznaBitModel *model, int nbits, int shift) {
|
312
|
+
uint64 q;
|
313
|
+
int magn = 1 << nbits;
|
314
|
+
q = *model * (tab->bits_a >> nbits);
|
315
|
+
if ((tab->bits_a & (magn - 1)) >= *model) {
|
316
|
+
tab->bits_a -= q + *model;
|
317
|
+
*model = *model - (*model >> shift);
|
318
|
+
LznaRenormalize(tab);
|
319
|
+
return 1;
|
320
|
+
} else {
|
321
|
+
tab->bits_a = (tab->bits_a & (magn - 1)) + q;
|
322
|
+
*model = *model + ((magn - *model) >> shift);
|
323
|
+
LznaRenormalize(tab);
|
324
|
+
return 0;
|
325
|
+
}
|
326
|
+
}
|
327
|
+
|
328
|
+
// Read a far distance using the far distance model
|
329
|
+
static uint32 __forceinline LznaReadFarDistance(LznaBitReader *tab, LznaState *lut) {
|
330
|
+
uint32 n = LznaReadNibble(tab, &lut->far_distance.first_lo);
|
331
|
+
uint32 hi;
|
332
|
+
if (n >= 15)
|
333
|
+
n = 15 + LznaReadNibble(tab, &lut->far_distance.first_hi);
|
334
|
+
hi = 0;
|
335
|
+
if (n != 0) {
|
336
|
+
hi = LznaRead1Bit(tab, &lut->far_distance.second[n - 1], 14, 6) + 2;
|
337
|
+
if (n != 1) {
|
338
|
+
hi = (hi << 1) + LznaRead1Bit(tab, &lut->far_distance.third[hi - 2][n - 1], 14, 6);
|
339
|
+
if (n != 2)
|
340
|
+
hi = (hi << (n - 2)) + LznaReadNBits(tab, n - 2);
|
341
|
+
}
|
342
|
+
hi -= 1;
|
343
|
+
}
|
344
|
+
LznaLowBitsDistanceModel *lutd = &lut->low_bits_of_distance[hi == 0];
|
345
|
+
uint32 low_bit = LznaRead1Bit(tab, &lutd->v, 14, 6);
|
346
|
+
uint32 low_nibble = LznaReadNibble(tab, &lutd->d[low_bit]);
|
347
|
+
return low_bit + (2 * low_nibble) + (32 * hi) + 1;
|
348
|
+
}
|
349
|
+
|
350
|
+
// Read a near distance using a near distance model
|
351
|
+
static uint32 __forceinline LznaReadNearDistance(LznaBitReader *tab, LznaState *lut, LznaNearDistModel *model) {
|
352
|
+
uint32 nb = LznaReadNibble(tab, &model->first);
|
353
|
+
uint32 hi = 0;
|
354
|
+
if (nb != 0) {
|
355
|
+
hi = LznaRead1Bit(tab, &model->second[nb - 1], 14, 6) + 2;
|
356
|
+
if (nb != 1) {
|
357
|
+
hi = (hi << 1) + LznaRead1Bit(tab, &model->third[hi - 2][nb - 1], 14, 6);
|
358
|
+
if (nb != 2)
|
359
|
+
hi = (hi << (nb - 2)) + LznaReadNBits(tab, nb - 2);
|
360
|
+
}
|
361
|
+
hi -= 1;
|
362
|
+
}
|
363
|
+
LznaLowBitsDistanceModel *lutd = &lut->low_bits_of_distance[hi == 0];
|
364
|
+
uint32 low_bit = LznaRead1Bit(tab, &lutd->v, 14, 6);
|
365
|
+
uint32 low_nibble = LznaReadNibble(tab, &lutd->d[low_bit]);
|
366
|
+
return low_bit + (2 * low_nibble) + (32 * hi) + 1;
|
367
|
+
}
|
368
|
+
|
369
|
+
// Read a length using the length model.
|
370
|
+
static uint32 __forceinline LznaReadLength(LznaBitReader *tab, LznaLongLengthModel *model, int64 dst_offs) {
|
371
|
+
uint32 length = LznaReadNibble(tab, &model->first[dst_offs & 3]);
|
372
|
+
if (length >= 12) {
|
373
|
+
uint32 b = LznaReadNibble(tab, &model->second);
|
374
|
+
if (b >= 15)
|
375
|
+
b = 15 + LznaReadNibble(tab, &model->third);
|
376
|
+
uint32 n = 0;
|
377
|
+
uint32 base = 0;
|
378
|
+
if (b) {
|
379
|
+
n = (b - 1) >> 1;
|
380
|
+
base = ((((b - 1) & 1) + 2) << n) - 1;
|
381
|
+
}
|
382
|
+
length += (LznaReadNBits(tab, n) + base) * 4;
|
383
|
+
}
|
384
|
+
return length;
|
385
|
+
}
|
386
|
+
|
387
|
+
static const uint8 next_state_lit[12] = {
|
388
|
+
0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5
|
389
|
+
};
|
390
|
+
|
391
|
+
static void LznaCopyLongDist(byte *dst, size_t dist, size_t length) {
|
392
|
+
const byte *src = dst - dist;
|
393
|
+
((uint64*)dst)[0] = ((uint64*)src)[0];
|
394
|
+
((uint64*)dst)[1] = ((uint64*)src)[1];
|
395
|
+
if (length > 16) {
|
396
|
+
do {
|
397
|
+
((uint64*)dst)[2] = ((uint64*)src)[2];
|
398
|
+
dst += 8;
|
399
|
+
src += 8;
|
400
|
+
length -= 8;
|
401
|
+
} while (length > 16);
|
402
|
+
}
|
403
|
+
}
|
404
|
+
|
405
|
+
static void LznaCopyShortDist(byte *dst, size_t dist, size_t length) {
|
406
|
+
const byte *src = dst - dist;
|
407
|
+
if (dist >= 4) {
|
408
|
+
((uint32*)dst)[0] = ((uint32*)src)[0];
|
409
|
+
((uint32*)dst)[1] = ((uint32*)src)[1];
|
410
|
+
((uint32*)dst)[2] = ((uint32*)src)[2];
|
411
|
+
if (length > 12) {
|
412
|
+
((uint32*)dst)[3] = ((uint32*)src)[3];
|
413
|
+
if (length > 16) {
|
414
|
+
do {
|
415
|
+
((uint32*)dst)[4] = ((uint32*)src)[4];
|
416
|
+
length -= 4;
|
417
|
+
dst += 4;
|
418
|
+
src += 4;
|
419
|
+
} while (length > 16);
|
420
|
+
}
|
421
|
+
}
|
422
|
+
} else if (dist == 1) {
|
423
|
+
memset(dst, *src, length);
|
424
|
+
} else {
|
425
|
+
((byte*)dst)[0] = ((byte*)src)[0];
|
426
|
+
((byte*)dst)[1] = ((byte*)src)[1];
|
427
|
+
((byte*)dst)[2] = ((byte*)src)[2];
|
428
|
+
((byte*)dst)[3] = ((byte*)src)[3];
|
429
|
+
((byte*)dst)[4] = ((byte*)src)[4];
|
430
|
+
((byte*)dst)[5] = ((byte*)src)[5];
|
431
|
+
((byte*)dst)[6] = ((byte*)src)[6];
|
432
|
+
((byte*)dst)[7] = ((byte*)src)[7];
|
433
|
+
((byte*)dst)[8] = ((byte*)src)[8];
|
434
|
+
while (length > 9) {
|
435
|
+
((byte*)dst)[9] = ((byte*)src)[9];
|
436
|
+
dst += 1;
|
437
|
+
src += 1;
|
438
|
+
length -= 1;
|
439
|
+
}
|
440
|
+
}
|
441
|
+
}
|
442
|
+
|
443
|
+
static void LznaCopy4to12(byte *dst, size_t dist, size_t length) {
|
444
|
+
const byte *src = dst - dist;
|
445
|
+
dst[0] = src[0];
|
446
|
+
dst[1] = src[1];
|
447
|
+
dst[2] = src[2];
|
448
|
+
dst[3] = src[3];
|
449
|
+
if (length > 4) {
|
450
|
+
dst[4] = src[4];
|
451
|
+
dst[5] = src[5];
|
452
|
+
dst[6] = src[6];
|
453
|
+
dst[7] = src[7];
|
454
|
+
if (length > 8) {
|
455
|
+
dst[8] = src[8];
|
456
|
+
dst[9] = src[9];
|
457
|
+
dst[10] = src[10];
|
458
|
+
dst[11] = src[11];
|
459
|
+
}
|
460
|
+
}
|
461
|
+
}
|
462
|
+
|
463
|
+
static void LznaPreprocessMatchHistory(LznaState *lut) {
|
464
|
+
if (lut->match_history[4] >= 0xc000) {
|
465
|
+
size_t i = 0;
|
466
|
+
while (lut->match_history[4 + i] >= 0xC000) {
|
467
|
+
++i;
|
468
|
+
if (i >= 4) {
|
469
|
+
lut->match_history[7] = lut->match_history[6];
|
470
|
+
lut->match_history[6] = lut->match_history[5];
|
471
|
+
lut->match_history[5] = lut->match_history[4];
|
472
|
+
lut->match_history[4] = 4;
|
473
|
+
return;
|
474
|
+
}
|
475
|
+
}
|
476
|
+
uint32 t = lut->match_history[i + 4];
|
477
|
+
lut->match_history[i + 4] = lut->match_history[i + 3];
|
478
|
+
lut->match_history[i + 3] = lut->match_history[i + 2];
|
479
|
+
lut->match_history[i + 2] = lut->match_history[i + 1];
|
480
|
+
lut->match_history[4] = t;
|
481
|
+
}
|
482
|
+
}
|
483
|
+
|
484
|
+
int LZNA_DecodeQuantum(byte *dst, byte *dst_end, byte *dst_start,
|
485
|
+
const byte *src_in, const byte *src_end,
|
486
|
+
LznaState *lut) {
|
487
|
+
LznaBitReader tab;
|
488
|
+
uint32 x;
|
489
|
+
uint32 dst_offs = dst - dst_start;
|
490
|
+
uint32 match_val;
|
491
|
+
uint32 state;
|
492
|
+
uint32 length;
|
493
|
+
uint32 dist;
|
494
|
+
|
495
|
+
LznaPreprocessMatchHistory(lut);
|
496
|
+
LznaBitReader_Init(&tab, src_in);
|
497
|
+
dist = lut->match_history[4];
|
498
|
+
|
499
|
+
state = 5;
|
500
|
+
dst_end -= 8;
|
501
|
+
|
502
|
+
if (dst_offs == 0) {
|
503
|
+
if (LznaReadBit(&tab)) {
|
504
|
+
x = 0;
|
505
|
+
} else {
|
506
|
+
LznaLiteralModel *model = &lut->literal[0];
|
507
|
+
x = LznaReadNibble(&tab, &model->upper[0]);
|
508
|
+
x = (x << 4) + LznaReadNibble(&tab, (x != 0) ? &model->nomatch[x] : &model->lower[0]);
|
509
|
+
}
|
510
|
+
*dst++ = x;
|
511
|
+
dst_offs += 1;
|
512
|
+
}
|
513
|
+
while (dst < dst_end) {
|
514
|
+
match_val = *(dst - dist);
|
515
|
+
|
516
|
+
if (LznaRead1Bit(&tab, &lut->is_literal[(dst_offs & 7) + 8 * state], 13, 5)) {
|
517
|
+
x = LznaReadNibble(&tab, &lut->type[(dst_offs & 7) + 8 * state]);
|
518
|
+
if (x == 0) {
|
519
|
+
// Copy 1 byte from most recent distance
|
520
|
+
*dst++ = match_val;
|
521
|
+
dst_offs += 1;
|
522
|
+
state = (state >= 7) ? 11 : 9;
|
523
|
+
} else if (x < 4) {
|
524
|
+
if (x == 1) {
|
525
|
+
// Copy count 3-4
|
526
|
+
length = 3 + LznaRead1Bit(&tab, &lut->short_length[state][dst_offs & 3], 14, 4);
|
527
|
+
dist = LznaReadNearDistance(&tab, lut, &lut->near_dist[length - 3]);
|
528
|
+
dst[0] = (dst - dist)[0];
|
529
|
+
dst[1] = (dst - dist)[1];
|
530
|
+
dst[2] = (dst - dist)[2];
|
531
|
+
dst[3] = (dst - dist)[3];
|
532
|
+
} else if (x == 2) {
|
533
|
+
// Copy count 5-12
|
534
|
+
length = 5 + LznaRead3bit(&tab, &lut->medium_length);
|
535
|
+
dist = LznaReadFarDistance(&tab, lut);
|
536
|
+
if (dist >= 8) {
|
537
|
+
((uint64*)dst)[0] = ((uint64*)(dst - dist))[0];
|
538
|
+
((uint64*)dst)[1] = ((uint64*)(dst - dist))[1];
|
539
|
+
} else {
|
540
|
+
LznaCopy4to12(dst, dist, length);
|
541
|
+
}
|
542
|
+
} else {
|
543
|
+
// Copy count 13-
|
544
|
+
length = LznaReadLength(&tab, &lut->long_length, dst_offs) + 13;
|
545
|
+
dist = LznaReadFarDistance(&tab, lut);
|
546
|
+
if (dist >= 8)
|
547
|
+
LznaCopyLongDist(dst, dist, length);
|
548
|
+
else
|
549
|
+
LznaCopyShortDist(dst, dist, length);
|
550
|
+
}
|
551
|
+
state = (state >= 7) ? 10 : 7;
|
552
|
+
lut->match_history[7] = lut->match_history[6];
|
553
|
+
lut->match_history[6] = lut->match_history[5];
|
554
|
+
lut->match_history[5] = lut->match_history[4];
|
555
|
+
lut->match_history[4] = dist;
|
556
|
+
dst += length;
|
557
|
+
dst_offs += length;
|
558
|
+
} else if (x >= 12) {
|
559
|
+
// Copy 2 bytes from a recent distance
|
560
|
+
size_t idx = x - 12;
|
561
|
+
dist = lut->match_history[4 + idx];
|
562
|
+
lut->match_history[4 + idx] = lut->match_history[3 + idx];
|
563
|
+
lut->match_history[3 + idx] = lut->match_history[2 + idx];
|
564
|
+
lut->match_history[2 + idx] = lut->match_history[1 + idx];
|
565
|
+
lut->match_history[4] = dist;
|
566
|
+
dst[0] = *(dst - dist + 0);
|
567
|
+
dst[1] = *(dst - dist + 1);
|
568
|
+
state = (state >= 7) ? 11 : 8;
|
569
|
+
dst_offs += 2;
|
570
|
+
dst += 2;
|
571
|
+
} else {
|
572
|
+
size_t idx = (x - 4) >> 1;
|
573
|
+
dist = lut->match_history[4 + idx];
|
574
|
+
lut->match_history[4 + idx] = lut->match_history[3 + idx];
|
575
|
+
lut->match_history[3 + idx] = lut->match_history[2 + idx];
|
576
|
+
lut->match_history[2 + idx] = lut->match_history[1 + idx];
|
577
|
+
lut->match_history[4] = dist;
|
578
|
+
if (x & 1) {
|
579
|
+
// Copy 11- bytes from recent distance
|
580
|
+
length = 11 + LznaReadLength(&tab, &lut->long_length_recent, dst_offs);
|
581
|
+
if (dist >= 8) {
|
582
|
+
LznaCopyLongDist(dst, dist, length);
|
583
|
+
} else {
|
584
|
+
LznaCopyShortDist(dst, dist, length);
|
585
|
+
}
|
586
|
+
} else {
|
587
|
+
// Copy 3-10 bytes from recent distance
|
588
|
+
length = 3 + LznaRead3bit(&tab, &lut->short_length_recent[idx].a[dst_offs & 3]);
|
589
|
+
if (dist >= 8) {
|
590
|
+
((uint64*)dst)[0] = ((uint64*)(dst - dist))[0];
|
591
|
+
((uint64*)dst)[1] = ((uint64*)(dst - dist))[1];
|
592
|
+
} else {
|
593
|
+
LznaCopy4to12(dst, dist, length);
|
594
|
+
}
|
595
|
+
}
|
596
|
+
state = (state >= 7) ? 11 : 8;
|
597
|
+
dst_offs += length;
|
598
|
+
dst += length;
|
599
|
+
}
|
600
|
+
} else {
|
601
|
+
// Output a literal
|
602
|
+
LznaLiteralModel *model = &lut->literal[dst_offs & 3];
|
603
|
+
x = LznaReadNibble(&tab, &model->upper[match_val >> 4]);
|
604
|
+
x = (x << 4) + LznaReadNibble(&tab, ((match_val >> 4) != x) ? &model->nomatch[x] : &model->lower[match_val & 0xF]);
|
605
|
+
*dst++ = x;
|
606
|
+
dst_offs += 1;
|
607
|
+
state = next_state_lit[state];
|
608
|
+
}
|
609
|
+
}
|
610
|
+
|
611
|
+
if (dst != dst_end)
|
612
|
+
return -1;
|
613
|
+
|
614
|
+
*(uint64*)dst = (uint32)tab.bits_a | (tab.bits_b << 32);
|
615
|
+
|
616
|
+
return (byte*)tab.src - src_in;
|
617
|
+
}
|