oodle-kraken-ruby 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +14 -0
- data/ext/oodle-kraken/extconf.rb +11 -0
- data/ext/oodle-kraken/oodle_kraken_c.c +38 -0
- data/ext/oodle-kraken/ooz/LICENSE +15 -0
- data/ext/oodle-kraken/ooz/README.md +23 -0
- data/ext/oodle-kraken/ooz/bitknit.cpp +429 -0
- data/ext/oodle-kraken/ooz/kraken.cpp +4153 -0
- data/ext/oodle-kraken/ooz/kraken.h +33 -0
- data/ext/oodle-kraken/ooz/lzna.cpp +617 -0
- data/ext/oodle-kraken/ooz/ooz.cpp +342 -0
- data/ext/oodle-kraken/ooz/stdafx.cpp +8 -0
- data/ext/oodle-kraken/ooz/stdafx.h +68 -0
- data/ext/oodle-kraken/ooz/targetver.h +8 -0
- data/lib/oodle-kraken-ruby.rb +2 -0
- metadata +58 -0
@@ -0,0 +1,33 @@
|
|
1
|
+
/*
|
2
|
+
Copyright (C) 2022, Kerilk
|
3
|
+
|
4
|
+
This program is free software: you can redistribute it and/or modify
|
5
|
+
it under the terms of the GNU General Public License as published by
|
6
|
+
the Free Software Foundation, either version 3 of the License, or
|
7
|
+
(at your option) any later version.
|
8
|
+
|
9
|
+
This program is distributed in the hope that it will be useful,
|
10
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
11
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
12
|
+
GNU General Public License for more details.
|
13
|
+
|
14
|
+
You should have received a copy of the GNU General Public License
|
15
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
16
|
+
*/
|
17
|
+
#ifndef KRAKEN_H
|
18
|
+
#define KRAKEN_H
|
19
|
+
|
20
|
+
#include <stdlib.h>
|
21
|
+
#include <stdint.h>
|
22
|
+
|
23
|
+
#ifdef __cplusplus
|
24
|
+
extern "C" {
|
25
|
+
#endif
|
26
|
+
|
27
|
+
extern ssize_t Kraken_Decompress(const uint8_t *src, size_t src_len, uint8_t *dst, size_t dst_len);
|
28
|
+
|
29
|
+
#ifdef __cplusplus
|
30
|
+
}
|
31
|
+
#endif
|
32
|
+
|
33
|
+
#endif
|
@@ -0,0 +1,617 @@
|
|
1
|
+
/*
|
2
|
+
Copyright (C) 2016, Powzix
|
3
|
+
Copyright (C) 2019, rarten
|
4
|
+
|
5
|
+
This program is free software: you can redistribute it and/or modify
|
6
|
+
it under the terms of the GNU General Public License as published by
|
7
|
+
the Free Software Foundation, either version 3 of the License, or
|
8
|
+
(at your option) any later version.
|
9
|
+
|
10
|
+
This program is distributed in the hope that it will be useful,
|
11
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
GNU General Public License for more details.
|
14
|
+
|
15
|
+
You should have received a copy of the GNU General Public License
|
16
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
*/
|
18
|
+
|
19
|
+
#include "stdafx.h"
|
20
|
+
|
21
|
+
|
22
|
+
typedef uint16 LznaBitModel;
|
23
|
+
|
24
|
+
// State for a 4-bit value RANS model
|
25
|
+
struct LznaNibbleModel {
|
26
|
+
uint16 prob[17];
|
27
|
+
};
|
28
|
+
|
29
|
+
// State for a 3-bit value RANS model
|
30
|
+
struct Lzna3bitModel {
|
31
|
+
uint16 prob[9];
|
32
|
+
};
|
33
|
+
|
34
|
+
// State for the literal model
|
35
|
+
struct LznaLiteralModel {
|
36
|
+
LznaNibbleModel upper[16];
|
37
|
+
LznaNibbleModel lower[16];
|
38
|
+
LznaNibbleModel nomatch[16];
|
39
|
+
};
|
40
|
+
|
41
|
+
// State for a model representing a far distance
|
42
|
+
struct LznaFarDistModel {
|
43
|
+
LznaNibbleModel first_lo;
|
44
|
+
LznaNibbleModel first_hi;
|
45
|
+
LznaBitModel second[31];
|
46
|
+
LznaBitModel third[2][31];
|
47
|
+
};
|
48
|
+
|
49
|
+
// State for a model representing a near distance
|
50
|
+
struct LznaNearDistModel {
|
51
|
+
LznaNibbleModel first;
|
52
|
+
LznaBitModel second[16];
|
53
|
+
LznaBitModel third[2][16];
|
54
|
+
};
|
55
|
+
|
56
|
+
// State for model representing the low bits of a distance
|
57
|
+
struct LznaLowBitsDistanceModel {
|
58
|
+
LznaNibbleModel d[2];
|
59
|
+
LznaBitModel v;
|
60
|
+
};
|
61
|
+
|
62
|
+
// State for model used for the short lengths for recent matches
|
63
|
+
struct LznaShortLengthRecentModel {
|
64
|
+
Lzna3bitModel a[4];
|
65
|
+
};
|
66
|
+
|
67
|
+
// State for model for long lengths
|
68
|
+
struct LznaLongLengthModel {
|
69
|
+
LznaNibbleModel first[4];
|
70
|
+
LznaNibbleModel second;
|
71
|
+
LznaNibbleModel third;
|
72
|
+
};
|
73
|
+
|
74
|
+
// Complete LZNA state
|
75
|
+
struct LznaState {
|
76
|
+
uint32 match_history[8];
|
77
|
+
LznaLiteralModel literal[4];
|
78
|
+
LznaBitModel is_literal[12 * 8];
|
79
|
+
LznaNibbleModel type[12 * 8];
|
80
|
+
LznaShortLengthRecentModel short_length_recent[4];
|
81
|
+
LznaLongLengthModel long_length_recent;
|
82
|
+
LznaLowBitsDistanceModel low_bits_of_distance[2];
|
83
|
+
LznaBitModel short_length[12][4];
|
84
|
+
LznaNearDistModel near_dist[2];
|
85
|
+
Lzna3bitModel medium_length;
|
86
|
+
LznaLongLengthModel long_length;
|
87
|
+
LznaFarDistModel far_distance;
|
88
|
+
};
|
89
|
+
|
90
|
+
static LznaNibbleModel lzna_initializer_4bit = {
|
91
|
+
0x0, 0x800, 0x1000, 0x1800, 0x2000, 0x2800, 0x3000, 0x3800, 0x4000, 0x4800, 0x5000, 0x5800, 0x6000, 0x6800, 0x7000, 0x7800, 0x8000,
|
92
|
+
};
|
93
|
+
|
94
|
+
static Lzna3bitModel lzna_initializer_3bit = {
|
95
|
+
0x0, 0x1000, 0x2000, 0x3000, 0x4000, 0x5000, 0x6000, 0x7000, 0x8000
|
96
|
+
};
|
97
|
+
|
98
|
+
static void LznaNibbleModel_Init(LznaNibbleModel *d) {
|
99
|
+
*d = lzna_initializer_4bit;
|
100
|
+
}
|
101
|
+
|
102
|
+
static void Lzna3bitModel_Init(Lzna3bitModel *d) {
|
103
|
+
*d = lzna_initializer_3bit;
|
104
|
+
}
|
105
|
+
|
106
|
+
static void LznaNibbleModel_InitN(LznaNibbleModel *d, int n) {
|
107
|
+
do LznaNibbleModel_Init(d++); while (--n);
|
108
|
+
}
|
109
|
+
|
110
|
+
static void LznaLiteralModel_InitN(LznaLiteralModel *d, int n) {
|
111
|
+
do {
|
112
|
+
LznaNibbleModel_InitN(d->upper, 16);
|
113
|
+
LznaNibbleModel_InitN(d->lower, 16);
|
114
|
+
LznaNibbleModel_InitN(d->nomatch, 16);
|
115
|
+
} while (d++, --n);
|
116
|
+
}
|
117
|
+
|
118
|
+
static void LznaShortLengthRecentModel_InitN(LznaShortLengthRecentModel *d, int n) {
|
119
|
+
do {
|
120
|
+
Lzna3bitModel_Init(&d->a[0]);
|
121
|
+
Lzna3bitModel_Init(&d->a[1]);
|
122
|
+
Lzna3bitModel_Init(&d->a[2]);
|
123
|
+
Lzna3bitModel_Init(&d->a[3]);
|
124
|
+
} while (d++, --n);
|
125
|
+
}
|
126
|
+
|
127
|
+
static void LznaNearDistModel_Init(LznaNearDistModel *d, int n) {
|
128
|
+
int i;
|
129
|
+
do {
|
130
|
+
LznaNibbleModel_Init(&d->first);
|
131
|
+
|
132
|
+
for (i = 0; i < 16; i++) {
|
133
|
+
d->second[i] = 0x2000;
|
134
|
+
d->third[0][i] = 0x2000;
|
135
|
+
d->third[1][i] = 0x2000;
|
136
|
+
}
|
137
|
+
|
138
|
+
} while (d++, --n);
|
139
|
+
}
|
140
|
+
|
141
|
+
static void LznaLowBitsDistanceModel_Init(LznaLowBitsDistanceModel *d, int n) {
|
142
|
+
do {
|
143
|
+
d->v = 0x2000;
|
144
|
+
LznaNibbleModel_InitN(d->d, 2);
|
145
|
+
} while (d++, --n);
|
146
|
+
}
|
147
|
+
|
148
|
+
static void LznaFarDistModel_Init(LznaFarDistModel *d) {
|
149
|
+
int i;
|
150
|
+
LznaNibbleModel_Init(&d->first_lo);
|
151
|
+
LznaNibbleModel_Init(&d->first_hi);
|
152
|
+
for (i = 0; i < 31; i++) {
|
153
|
+
d->second[i] = 0x2000;
|
154
|
+
d->third[0][i] = 0x2000;
|
155
|
+
d->third[1][i] = 0x2000;
|
156
|
+
}
|
157
|
+
}
|
158
|
+
|
159
|
+
void LZNA_InitLookup(LznaState *lut) {
|
160
|
+
int i, j;
|
161
|
+
|
162
|
+
for (i = 0; i < 4; i++)
|
163
|
+
lut->match_history[i + 4] = 1;
|
164
|
+
|
165
|
+
for (i = 0; i < 96; i++)
|
166
|
+
lut->is_literal[i] = 0x1000;
|
167
|
+
|
168
|
+
LznaNibbleModel_InitN(lut->type, 96);
|
169
|
+
LznaLiteralModel_InitN(lut->literal, 4);
|
170
|
+
LznaShortLengthRecentModel_InitN(lut->short_length_recent, 4);
|
171
|
+
|
172
|
+
LznaNibbleModel_InitN(lut->long_length_recent.first, 4);
|
173
|
+
LznaNibbleModel_Init(&lut->long_length_recent.second);
|
174
|
+
LznaNibbleModel_InitN(&lut->long_length_recent.third, 1);
|
175
|
+
|
176
|
+
for (i = 0; i < 12; i++)
|
177
|
+
for(j = 0; j < 4; j++)
|
178
|
+
lut->short_length[i][j] = 0x2000;
|
179
|
+
|
180
|
+
LznaNearDistModel_Init(lut->near_dist, 2);
|
181
|
+
LznaLowBitsDistanceModel_Init(lut->low_bits_of_distance, 2);
|
182
|
+
|
183
|
+
Lzna3bitModel_Init(&lut->medium_length);
|
184
|
+
|
185
|
+
LznaNibbleModel_InitN(lut->long_length.first, 4);
|
186
|
+
LznaNibbleModel_Init(&lut->long_length.second);
|
187
|
+
LznaNibbleModel_InitN(&lut->long_length.third, 1);
|
188
|
+
LznaFarDistModel_Init(&lut->far_distance);
|
189
|
+
}
|
190
|
+
|
191
|
+
struct LznaBitReader {
|
192
|
+
uint64 bits_a, bits_b;
|
193
|
+
const uint32 *src, *src_start;
|
194
|
+
};
|
195
|
+
|
196
|
+
// Initialize bit reader with 2 parallel streams. Every decode operation
|
197
|
+
// swaps the two streams.
|
198
|
+
static void LznaBitReader_Init(LznaBitReader *tab, const byte *src) {
|
199
|
+
int d, n, i;
|
200
|
+
uint64 v;
|
201
|
+
|
202
|
+
tab->src_start = (uint32*)src;
|
203
|
+
|
204
|
+
d = *src++;
|
205
|
+
n = d >> 4;
|
206
|
+
assert(n <= 8);
|
207
|
+
for (i = 0, v = 0; i < n; i++)
|
208
|
+
v = (v << 8) | *src++;
|
209
|
+
tab->bits_a = (v << 4) | (d & 0xF);
|
210
|
+
|
211
|
+
d = *src++;
|
212
|
+
n = d >> 4;
|
213
|
+
assert(n <= 8);
|
214
|
+
for (i = 0, v = 0; i < n; i++)
|
215
|
+
v = (v << 8) | *src++;
|
216
|
+
tab->bits_b = (v << 4) | (d & 0xF);
|
217
|
+
tab->src = (uint32*)src;
|
218
|
+
}
|
219
|
+
|
220
|
+
// Renormalize by filling up the RANS state and swapping the two streams
|
221
|
+
static void __forceinline LznaRenormalize(LznaBitReader *tab) {
|
222
|
+
uint64 x = tab->bits_a;
|
223
|
+
if (x < 0x80000000)
|
224
|
+
x = (x << 32) | *tab->src++;
|
225
|
+
tab->bits_a = tab->bits_b;
|
226
|
+
tab->bits_b = x;
|
227
|
+
}
|
228
|
+
|
229
|
+
// Read a single bit with a uniform distribution.
|
230
|
+
static uint32 __forceinline LznaReadBit(LznaBitReader *tab) {
|
231
|
+
int r = tab->bits_a & 1;
|
232
|
+
tab->bits_a >>= 1;
|
233
|
+
LznaRenormalize(tab);
|
234
|
+
return r;
|
235
|
+
}
|
236
|
+
|
237
|
+
// Read a number of bits with a uniform distribution.
|
238
|
+
static uint32 __forceinline LznaReadNBits(LznaBitReader *tab, int bits) {
|
239
|
+
uint32 rv = tab->bits_a & ((1 << bits) - 1);
|
240
|
+
tab->bits_a >>= bits;
|
241
|
+
LznaRenormalize(tab);
|
242
|
+
return rv;
|
243
|
+
}
|
244
|
+
|
245
|
+
|
246
|
+
// Read a 4-bit value using an adaptive RANS model
|
247
|
+
static uint32 __forceinline LznaReadNibble(LznaBitReader *tab, LznaNibbleModel *model) {
|
248
|
+
__m128i t, t0, t1, c0, c1;
|
249
|
+
unsigned long bitindex;
|
250
|
+
unsigned int start, end;
|
251
|
+
uint64 x = tab->bits_a;
|
252
|
+
|
253
|
+
t0 = _mm_loadu_si128((const __m128i *)&model->prob[0]);
|
254
|
+
t1 = _mm_loadu_si128((const __m128i *)&model->prob[8]);
|
255
|
+
|
256
|
+
t = _mm_cvtsi32_si128((int16)x);
|
257
|
+
t = _mm_and_si128(_mm_shuffle_epi32(_mm_unpacklo_epi16(t, t), 0), _mm_set1_epi16(0x7FFF));
|
258
|
+
|
259
|
+
c0 = _mm_cmpgt_epi16(t0, t);
|
260
|
+
c1 = _mm_cmpgt_epi16(t1, t);
|
261
|
+
|
262
|
+
_BitScanForward(&bitindex, _mm_movemask_epi8(_mm_packs_epi16(c0, c1)) | 0x10000);
|
263
|
+
start = model->prob[bitindex - 1];
|
264
|
+
end = model->prob[bitindex];
|
265
|
+
|
266
|
+
c0 = _mm_and_si128(_mm_set1_epi16(0x7FD9), c0);
|
267
|
+
c1 = _mm_and_si128(_mm_set1_epi16(0x7FD9), c1);
|
268
|
+
|
269
|
+
c0 = _mm_add_epi16(c0, _mm_set_epi16(56, 48, 40, 32, 24, 16, 8, 0));
|
270
|
+
c1 = _mm_add_epi16(c1, _mm_set_epi16(120, 112, 104, 96, 88, 80, 72, 64));
|
271
|
+
|
272
|
+
t0 = _mm_add_epi16(_mm_srai_epi16(_mm_sub_epi16(c0, t0), 7), t0);
|
273
|
+
t1 = _mm_add_epi16(_mm_srai_epi16(_mm_sub_epi16(c1, t1), 7), t1);
|
274
|
+
|
275
|
+
_mm_storeu_si128((__m128i *)&model->prob[0], t0);
|
276
|
+
_mm_storeu_si128((__m128i *)&model->prob[8], t1);
|
277
|
+
|
278
|
+
tab->bits_a = (end - start) * (x >> 15) + (x & 0x7FFF) - start;
|
279
|
+
LznaRenormalize(tab);
|
280
|
+
return (int)bitindex - 1;
|
281
|
+
}
|
282
|
+
|
283
|
+
// Read a 3-bit value using an adaptive RANS model
|
284
|
+
static uint32 __forceinline LznaRead3bit(LznaBitReader *tab, Lzna3bitModel *model) {
|
285
|
+
__m128i t, t0, c0;
|
286
|
+
unsigned long bitindex;
|
287
|
+
unsigned int start, end;
|
288
|
+
uint64 x = tab->bits_a;
|
289
|
+
|
290
|
+
t0 = _mm_loadu_si128((const __m128i *)&model->prob[0]);
|
291
|
+
t = _mm_cvtsi32_si128(x & 0x7FFF);
|
292
|
+
t = _mm_shuffle_epi32(_mm_unpacklo_epi16(t, t), 0);
|
293
|
+
c0 = _mm_cmpgt_epi16(t0, t);
|
294
|
+
|
295
|
+
_BitScanForward(&bitindex, _mm_movemask_epi8(c0) | 0x10000);
|
296
|
+
bitindex >>= 1;
|
297
|
+
start = model->prob[bitindex - 1];
|
298
|
+
end = model->prob[bitindex];
|
299
|
+
|
300
|
+
c0 = _mm_and_si128(_mm_set1_epi16(0x7FE5), c0);
|
301
|
+
c0 = _mm_add_epi16(c0, _mm_set_epi16(56, 48, 40, 32, 24, 16, 8, 0));
|
302
|
+
t0 = _mm_add_epi16(_mm_srai_epi16(_mm_sub_epi16(c0, t0), 7), t0);
|
303
|
+
_mm_storeu_si128((__m128i *)&model->prob[0], t0);
|
304
|
+
|
305
|
+
tab->bits_a = (end - start) * (x >> 15) + (x & 0x7FFF) - start;
|
306
|
+
LznaRenormalize(tab);
|
307
|
+
return bitindex - 1;
|
308
|
+
}
|
309
|
+
|
310
|
+
// Read a 1-bit value using an adaptive RANS model
|
311
|
+
static uint32 __forceinline LznaRead1Bit(LznaBitReader *tab, LznaBitModel *model, int nbits, int shift) {
|
312
|
+
uint64 q;
|
313
|
+
int magn = 1 << nbits;
|
314
|
+
q = *model * (tab->bits_a >> nbits);
|
315
|
+
if ((tab->bits_a & (magn - 1)) >= *model) {
|
316
|
+
tab->bits_a -= q + *model;
|
317
|
+
*model = *model - (*model >> shift);
|
318
|
+
LznaRenormalize(tab);
|
319
|
+
return 1;
|
320
|
+
} else {
|
321
|
+
tab->bits_a = (tab->bits_a & (magn - 1)) + q;
|
322
|
+
*model = *model + ((magn - *model) >> shift);
|
323
|
+
LznaRenormalize(tab);
|
324
|
+
return 0;
|
325
|
+
}
|
326
|
+
}
|
327
|
+
|
328
|
+
// Read a far distance using the far distance model
|
329
|
+
static uint32 __forceinline LznaReadFarDistance(LznaBitReader *tab, LznaState *lut) {
|
330
|
+
uint32 n = LznaReadNibble(tab, &lut->far_distance.first_lo);
|
331
|
+
uint32 hi;
|
332
|
+
if (n >= 15)
|
333
|
+
n = 15 + LznaReadNibble(tab, &lut->far_distance.first_hi);
|
334
|
+
hi = 0;
|
335
|
+
if (n != 0) {
|
336
|
+
hi = LznaRead1Bit(tab, &lut->far_distance.second[n - 1], 14, 6) + 2;
|
337
|
+
if (n != 1) {
|
338
|
+
hi = (hi << 1) + LznaRead1Bit(tab, &lut->far_distance.third[hi - 2][n - 1], 14, 6);
|
339
|
+
if (n != 2)
|
340
|
+
hi = (hi << (n - 2)) + LznaReadNBits(tab, n - 2);
|
341
|
+
}
|
342
|
+
hi -= 1;
|
343
|
+
}
|
344
|
+
LznaLowBitsDistanceModel *lutd = &lut->low_bits_of_distance[hi == 0];
|
345
|
+
uint32 low_bit = LznaRead1Bit(tab, &lutd->v, 14, 6);
|
346
|
+
uint32 low_nibble = LznaReadNibble(tab, &lutd->d[low_bit]);
|
347
|
+
return low_bit + (2 * low_nibble) + (32 * hi) + 1;
|
348
|
+
}
|
349
|
+
|
350
|
+
// Read a near distance using a near distance model
|
351
|
+
static uint32 __forceinline LznaReadNearDistance(LznaBitReader *tab, LznaState *lut, LznaNearDistModel *model) {
|
352
|
+
uint32 nb = LznaReadNibble(tab, &model->first);
|
353
|
+
uint32 hi = 0;
|
354
|
+
if (nb != 0) {
|
355
|
+
hi = LznaRead1Bit(tab, &model->second[nb - 1], 14, 6) + 2;
|
356
|
+
if (nb != 1) {
|
357
|
+
hi = (hi << 1) + LznaRead1Bit(tab, &model->third[hi - 2][nb - 1], 14, 6);
|
358
|
+
if (nb != 2)
|
359
|
+
hi = (hi << (nb - 2)) + LznaReadNBits(tab, nb - 2);
|
360
|
+
}
|
361
|
+
hi -= 1;
|
362
|
+
}
|
363
|
+
LznaLowBitsDistanceModel *lutd = &lut->low_bits_of_distance[hi == 0];
|
364
|
+
uint32 low_bit = LznaRead1Bit(tab, &lutd->v, 14, 6);
|
365
|
+
uint32 low_nibble = LznaReadNibble(tab, &lutd->d[low_bit]);
|
366
|
+
return low_bit + (2 * low_nibble) + (32 * hi) + 1;
|
367
|
+
}
|
368
|
+
|
369
|
+
// Read a length using the length model.
|
370
|
+
static uint32 __forceinline LznaReadLength(LznaBitReader *tab, LznaLongLengthModel *model, int64 dst_offs) {
|
371
|
+
uint32 length = LznaReadNibble(tab, &model->first[dst_offs & 3]);
|
372
|
+
if (length >= 12) {
|
373
|
+
uint32 b = LznaReadNibble(tab, &model->second);
|
374
|
+
if (b >= 15)
|
375
|
+
b = 15 + LznaReadNibble(tab, &model->third);
|
376
|
+
uint32 n = 0;
|
377
|
+
uint32 base = 0;
|
378
|
+
if (b) {
|
379
|
+
n = (b - 1) >> 1;
|
380
|
+
base = ((((b - 1) & 1) + 2) << n) - 1;
|
381
|
+
}
|
382
|
+
length += (LznaReadNBits(tab, n) + base) * 4;
|
383
|
+
}
|
384
|
+
return length;
|
385
|
+
}
|
386
|
+
|
387
|
+
static const uint8 next_state_lit[12] = {
|
388
|
+
0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5
|
389
|
+
};
|
390
|
+
|
391
|
+
static void LznaCopyLongDist(byte *dst, size_t dist, size_t length) {
|
392
|
+
const byte *src = dst - dist;
|
393
|
+
((uint64*)dst)[0] = ((uint64*)src)[0];
|
394
|
+
((uint64*)dst)[1] = ((uint64*)src)[1];
|
395
|
+
if (length > 16) {
|
396
|
+
do {
|
397
|
+
((uint64*)dst)[2] = ((uint64*)src)[2];
|
398
|
+
dst += 8;
|
399
|
+
src += 8;
|
400
|
+
length -= 8;
|
401
|
+
} while (length > 16);
|
402
|
+
}
|
403
|
+
}
|
404
|
+
|
405
|
+
static void LznaCopyShortDist(byte *dst, size_t dist, size_t length) {
|
406
|
+
const byte *src = dst - dist;
|
407
|
+
if (dist >= 4) {
|
408
|
+
((uint32*)dst)[0] = ((uint32*)src)[0];
|
409
|
+
((uint32*)dst)[1] = ((uint32*)src)[1];
|
410
|
+
((uint32*)dst)[2] = ((uint32*)src)[2];
|
411
|
+
if (length > 12) {
|
412
|
+
((uint32*)dst)[3] = ((uint32*)src)[3];
|
413
|
+
if (length > 16) {
|
414
|
+
do {
|
415
|
+
((uint32*)dst)[4] = ((uint32*)src)[4];
|
416
|
+
length -= 4;
|
417
|
+
dst += 4;
|
418
|
+
src += 4;
|
419
|
+
} while (length > 16);
|
420
|
+
}
|
421
|
+
}
|
422
|
+
} else if (dist == 1) {
|
423
|
+
memset(dst, *src, length);
|
424
|
+
} else {
|
425
|
+
((byte*)dst)[0] = ((byte*)src)[0];
|
426
|
+
((byte*)dst)[1] = ((byte*)src)[1];
|
427
|
+
((byte*)dst)[2] = ((byte*)src)[2];
|
428
|
+
((byte*)dst)[3] = ((byte*)src)[3];
|
429
|
+
((byte*)dst)[4] = ((byte*)src)[4];
|
430
|
+
((byte*)dst)[5] = ((byte*)src)[5];
|
431
|
+
((byte*)dst)[6] = ((byte*)src)[6];
|
432
|
+
((byte*)dst)[7] = ((byte*)src)[7];
|
433
|
+
((byte*)dst)[8] = ((byte*)src)[8];
|
434
|
+
while (length > 9) {
|
435
|
+
((byte*)dst)[9] = ((byte*)src)[9];
|
436
|
+
dst += 1;
|
437
|
+
src += 1;
|
438
|
+
length -= 1;
|
439
|
+
}
|
440
|
+
}
|
441
|
+
}
|
442
|
+
|
443
|
+
static void LznaCopy4to12(byte *dst, size_t dist, size_t length) {
|
444
|
+
const byte *src = dst - dist;
|
445
|
+
dst[0] = src[0];
|
446
|
+
dst[1] = src[1];
|
447
|
+
dst[2] = src[2];
|
448
|
+
dst[3] = src[3];
|
449
|
+
if (length > 4) {
|
450
|
+
dst[4] = src[4];
|
451
|
+
dst[5] = src[5];
|
452
|
+
dst[6] = src[6];
|
453
|
+
dst[7] = src[7];
|
454
|
+
if (length > 8) {
|
455
|
+
dst[8] = src[8];
|
456
|
+
dst[9] = src[9];
|
457
|
+
dst[10] = src[10];
|
458
|
+
dst[11] = src[11];
|
459
|
+
}
|
460
|
+
}
|
461
|
+
}
|
462
|
+
|
463
|
+
static void LznaPreprocessMatchHistory(LznaState *lut) {
|
464
|
+
if (lut->match_history[4] >= 0xc000) {
|
465
|
+
size_t i = 0;
|
466
|
+
while (lut->match_history[4 + i] >= 0xC000) {
|
467
|
+
++i;
|
468
|
+
if (i >= 4) {
|
469
|
+
lut->match_history[7] = lut->match_history[6];
|
470
|
+
lut->match_history[6] = lut->match_history[5];
|
471
|
+
lut->match_history[5] = lut->match_history[4];
|
472
|
+
lut->match_history[4] = 4;
|
473
|
+
return;
|
474
|
+
}
|
475
|
+
}
|
476
|
+
uint32 t = lut->match_history[i + 4];
|
477
|
+
lut->match_history[i + 4] = lut->match_history[i + 3];
|
478
|
+
lut->match_history[i + 3] = lut->match_history[i + 2];
|
479
|
+
lut->match_history[i + 2] = lut->match_history[i + 1];
|
480
|
+
lut->match_history[4] = t;
|
481
|
+
}
|
482
|
+
}
|
483
|
+
|
484
|
+
int LZNA_DecodeQuantum(byte *dst, byte *dst_end, byte *dst_start,
|
485
|
+
const byte *src_in, const byte *src_end,
|
486
|
+
LznaState *lut) {
|
487
|
+
LznaBitReader tab;
|
488
|
+
uint32 x;
|
489
|
+
uint32 dst_offs = dst - dst_start;
|
490
|
+
uint32 match_val;
|
491
|
+
uint32 state;
|
492
|
+
uint32 length;
|
493
|
+
uint32 dist;
|
494
|
+
|
495
|
+
LznaPreprocessMatchHistory(lut);
|
496
|
+
LznaBitReader_Init(&tab, src_in);
|
497
|
+
dist = lut->match_history[4];
|
498
|
+
|
499
|
+
state = 5;
|
500
|
+
dst_end -= 8;
|
501
|
+
|
502
|
+
if (dst_offs == 0) {
|
503
|
+
if (LznaReadBit(&tab)) {
|
504
|
+
x = 0;
|
505
|
+
} else {
|
506
|
+
LznaLiteralModel *model = &lut->literal[0];
|
507
|
+
x = LznaReadNibble(&tab, &model->upper[0]);
|
508
|
+
x = (x << 4) + LznaReadNibble(&tab, (x != 0) ? &model->nomatch[x] : &model->lower[0]);
|
509
|
+
}
|
510
|
+
*dst++ = x;
|
511
|
+
dst_offs += 1;
|
512
|
+
}
|
513
|
+
while (dst < dst_end) {
|
514
|
+
match_val = *(dst - dist);
|
515
|
+
|
516
|
+
if (LznaRead1Bit(&tab, &lut->is_literal[(dst_offs & 7) + 8 * state], 13, 5)) {
|
517
|
+
x = LznaReadNibble(&tab, &lut->type[(dst_offs & 7) + 8 * state]);
|
518
|
+
if (x == 0) {
|
519
|
+
// Copy 1 byte from most recent distance
|
520
|
+
*dst++ = match_val;
|
521
|
+
dst_offs += 1;
|
522
|
+
state = (state >= 7) ? 11 : 9;
|
523
|
+
} else if (x < 4) {
|
524
|
+
if (x == 1) {
|
525
|
+
// Copy count 3-4
|
526
|
+
length = 3 + LznaRead1Bit(&tab, &lut->short_length[state][dst_offs & 3], 14, 4);
|
527
|
+
dist = LznaReadNearDistance(&tab, lut, &lut->near_dist[length - 3]);
|
528
|
+
dst[0] = (dst - dist)[0];
|
529
|
+
dst[1] = (dst - dist)[1];
|
530
|
+
dst[2] = (dst - dist)[2];
|
531
|
+
dst[3] = (dst - dist)[3];
|
532
|
+
} else if (x == 2) {
|
533
|
+
// Copy count 5-12
|
534
|
+
length = 5 + LznaRead3bit(&tab, &lut->medium_length);
|
535
|
+
dist = LznaReadFarDistance(&tab, lut);
|
536
|
+
if (dist >= 8) {
|
537
|
+
((uint64*)dst)[0] = ((uint64*)(dst - dist))[0];
|
538
|
+
((uint64*)dst)[1] = ((uint64*)(dst - dist))[1];
|
539
|
+
} else {
|
540
|
+
LznaCopy4to12(dst, dist, length);
|
541
|
+
}
|
542
|
+
} else {
|
543
|
+
// Copy count 13-
|
544
|
+
length = LznaReadLength(&tab, &lut->long_length, dst_offs) + 13;
|
545
|
+
dist = LznaReadFarDistance(&tab, lut);
|
546
|
+
if (dist >= 8)
|
547
|
+
LznaCopyLongDist(dst, dist, length);
|
548
|
+
else
|
549
|
+
LznaCopyShortDist(dst, dist, length);
|
550
|
+
}
|
551
|
+
state = (state >= 7) ? 10 : 7;
|
552
|
+
lut->match_history[7] = lut->match_history[6];
|
553
|
+
lut->match_history[6] = lut->match_history[5];
|
554
|
+
lut->match_history[5] = lut->match_history[4];
|
555
|
+
lut->match_history[4] = dist;
|
556
|
+
dst += length;
|
557
|
+
dst_offs += length;
|
558
|
+
} else if (x >= 12) {
|
559
|
+
// Copy 2 bytes from a recent distance
|
560
|
+
size_t idx = x - 12;
|
561
|
+
dist = lut->match_history[4 + idx];
|
562
|
+
lut->match_history[4 + idx] = lut->match_history[3 + idx];
|
563
|
+
lut->match_history[3 + idx] = lut->match_history[2 + idx];
|
564
|
+
lut->match_history[2 + idx] = lut->match_history[1 + idx];
|
565
|
+
lut->match_history[4] = dist;
|
566
|
+
dst[0] = *(dst - dist + 0);
|
567
|
+
dst[1] = *(dst - dist + 1);
|
568
|
+
state = (state >= 7) ? 11 : 8;
|
569
|
+
dst_offs += 2;
|
570
|
+
dst += 2;
|
571
|
+
} else {
|
572
|
+
size_t idx = (x - 4) >> 1;
|
573
|
+
dist = lut->match_history[4 + idx];
|
574
|
+
lut->match_history[4 + idx] = lut->match_history[3 + idx];
|
575
|
+
lut->match_history[3 + idx] = lut->match_history[2 + idx];
|
576
|
+
lut->match_history[2 + idx] = lut->match_history[1 + idx];
|
577
|
+
lut->match_history[4] = dist;
|
578
|
+
if (x & 1) {
|
579
|
+
// Copy 11- bytes from recent distance
|
580
|
+
length = 11 + LznaReadLength(&tab, &lut->long_length_recent, dst_offs);
|
581
|
+
if (dist >= 8) {
|
582
|
+
LznaCopyLongDist(dst, dist, length);
|
583
|
+
} else {
|
584
|
+
LznaCopyShortDist(dst, dist, length);
|
585
|
+
}
|
586
|
+
} else {
|
587
|
+
// Copy 3-10 bytes from recent distance
|
588
|
+
length = 3 + LznaRead3bit(&tab, &lut->short_length_recent[idx].a[dst_offs & 3]);
|
589
|
+
if (dist >= 8) {
|
590
|
+
((uint64*)dst)[0] = ((uint64*)(dst - dist))[0];
|
591
|
+
((uint64*)dst)[1] = ((uint64*)(dst - dist))[1];
|
592
|
+
} else {
|
593
|
+
LznaCopy4to12(dst, dist, length);
|
594
|
+
}
|
595
|
+
}
|
596
|
+
state = (state >= 7) ? 11 : 8;
|
597
|
+
dst_offs += length;
|
598
|
+
dst += length;
|
599
|
+
}
|
600
|
+
} else {
|
601
|
+
// Output a literal
|
602
|
+
LznaLiteralModel *model = &lut->literal[dst_offs & 3];
|
603
|
+
x = LznaReadNibble(&tab, &model->upper[match_val >> 4]);
|
604
|
+
x = (x << 4) + LznaReadNibble(&tab, ((match_val >> 4) != x) ? &model->nomatch[x] : &model->lower[match_val & 0xF]);
|
605
|
+
*dst++ = x;
|
606
|
+
dst_offs += 1;
|
607
|
+
state = next_state_lit[state];
|
608
|
+
}
|
609
|
+
}
|
610
|
+
|
611
|
+
if (dst != dst_end)
|
612
|
+
return -1;
|
613
|
+
|
614
|
+
*(uint64*)dst = (uint32)tab.bits_a | (tab.bits_b << 32);
|
615
|
+
|
616
|
+
return (byte*)tab.src - src_in;
|
617
|
+
}
|