oodle-kraken-ruby 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +14 -0
- data/ext/oodle-kraken/extconf.rb +11 -0
- data/ext/oodle-kraken/oodle_kraken_c.c +38 -0
- data/ext/oodle-kraken/ooz/LICENSE +15 -0
- data/ext/oodle-kraken/ooz/README.md +23 -0
- data/ext/oodle-kraken/ooz/bitknit.cpp +429 -0
- data/ext/oodle-kraken/ooz/kraken.cpp +4153 -0
- data/ext/oodle-kraken/ooz/kraken.h +33 -0
- data/ext/oodle-kraken/ooz/lzna.cpp +617 -0
- data/ext/oodle-kraken/ooz/ooz.cpp +342 -0
- data/ext/oodle-kraken/ooz/stdafx.cpp +8 -0
- data/ext/oodle-kraken/ooz/stdafx.h +68 -0
- data/ext/oodle-kraken/ooz/targetver.h +8 -0
- data/lib/oodle-kraken-ruby.rb +2 -0
- metadata +58 -0
@@ -0,0 +1,4153 @@
|
|
1
|
+
/*
|
2
|
+
Copyright (C) 2016, Powzix
|
3
|
+
Copyright (C) 2019, rarten
|
4
|
+
Copyright (C) 2022, Kerilk
|
5
|
+
|
6
|
+
This program is free software: you can redistribute it and/or modify
|
7
|
+
it under the terms of the GNU General Public License as published by
|
8
|
+
the Free Software Foundation, either version 3 of the License, or
|
9
|
+
(at your option) any later version.
|
10
|
+
|
11
|
+
This program is distributed in the hope that it will be useful,
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
GNU General Public License for more details.
|
15
|
+
|
16
|
+
You should have received a copy of the GNU General Public License
|
17
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
*/
|
19
|
+
|
20
|
+
#include "stdafx.h"
|
21
|
+
#include "kraken.h"
|
22
|
+
|
23
|
+
// Header in front of each 256k block
|
24
|
+
typedef struct KrakenHeader {
|
25
|
+
// Type of decoder used, 6 means kraken
|
26
|
+
int decoder_type;
|
27
|
+
|
28
|
+
// Whether to restart the decoder
|
29
|
+
bool restart_decoder;
|
30
|
+
|
31
|
+
// Whether this block is uncompressed
|
32
|
+
bool uncompressed;
|
33
|
+
|
34
|
+
// Whether this block uses checksums.
|
35
|
+
bool use_checksums;
|
36
|
+
} KrakenHeader;
|
37
|
+
|
38
|
+
// Additional header in front of each 256k block ("quantum").
|
39
|
+
typedef struct KrakenQuantumHeader {
|
40
|
+
// The compressed size of this quantum. If this value is 0 it means
|
41
|
+
// the quantum is a special quantum such as memset.
|
42
|
+
uint32 compressed_size;
|
43
|
+
// If checksums are enabled, holds the checksum.
|
44
|
+
uint32 checksum;
|
45
|
+
// Two flags
|
46
|
+
uint8 flag1;
|
47
|
+
uint8 flag2;
|
48
|
+
// Whether the whole block matched a previous block
|
49
|
+
uint32 whole_match_distance;
|
50
|
+
} KrakenQuantumHeader;
|
51
|
+
|
52
|
+
// Kraken decompression happens in two phases, first one decodes
|
53
|
+
// all the literals and copy lengths using huffman and second
|
54
|
+
// phase runs the copy loop. This holds the tables needed by stage 2.
|
55
|
+
typedef struct KrakenLzTable {
|
56
|
+
// Stream of (literal, match) pairs. The flag byte contains
|
57
|
+
// the length of the match, the length of the literal and whether
|
58
|
+
// to use a recent offset.
|
59
|
+
byte *cmd_stream;
|
60
|
+
int cmd_stream_size;
|
61
|
+
|
62
|
+
// Holds the actual distances in case we're not using a recent
|
63
|
+
// offset.
|
64
|
+
int *offs_stream;
|
65
|
+
int offs_stream_size;
|
66
|
+
|
67
|
+
// Holds the sequence of literals. All literal copying happens from
|
68
|
+
// here.
|
69
|
+
byte *lit_stream;
|
70
|
+
int lit_stream_size;
|
71
|
+
|
72
|
+
// Holds the lengths that do not fit in the flag stream. Both literal
|
73
|
+
// lengths and match length are stored in the same array.
|
74
|
+
int *len_stream;
|
75
|
+
int len_stream_size;
|
76
|
+
} KrakenLzTable;
|
77
|
+
|
78
|
+
|
79
|
+
// Mermaid/Selkie decompression also happens in two phases, just like in Kraken,
|
80
|
+
// but the match copier works differently.
|
81
|
+
// Both Mermaid and Selkie use the same on-disk format, only the compressor
|
82
|
+
// differs.
|
83
|
+
typedef struct MermaidLzTable {
|
84
|
+
// Flag stream. Format of flags:
|
85
|
+
// Read flagbyte from |cmd_stream|
|
86
|
+
// If flagbyte >= 24:
|
87
|
+
// flagbyte & 0x80 == 0 : Read from |off16_stream| into |recent_offs|.
|
88
|
+
// != 0 : Don't read offset.
|
89
|
+
// flagbyte & 7 = Number of literals to copy first from |lit_stream|.
|
90
|
+
// (flagbyte >> 3) & 0xF = Number of bytes to copy from |recent_offs|.
|
91
|
+
//
|
92
|
+
// If flagbyte == 0 :
|
93
|
+
// Read byte L from |length_stream|
|
94
|
+
// If L > 251: L += 4 * Read word from |length_stream|
|
95
|
+
// L += 64
|
96
|
+
// Copy L bytes from |lit_stream|.
|
97
|
+
//
|
98
|
+
// If flagbyte == 1 :
|
99
|
+
// Read byte L from |length_stream|
|
100
|
+
// If L > 251: L += 4 * Read word from |length_stream|
|
101
|
+
// L += 91
|
102
|
+
// Copy L bytes from match pointed by next offset from |off16_stream|
|
103
|
+
//
|
104
|
+
// If flagbyte == 2 :
|
105
|
+
// Read byte L from |length_stream|
|
106
|
+
// If L > 251: L += 4 * Read word from |length_stream|
|
107
|
+
// L += 29
|
108
|
+
// Copy L bytes from match pointed by next offset from |off32_stream|,
|
109
|
+
// relative to start of block.
|
110
|
+
// Then prefetch |off32_stream[3]|
|
111
|
+
//
|
112
|
+
// If flagbyte > 2:
|
113
|
+
// L = flagbyte + 5
|
114
|
+
// Copy L bytes from match pointed by next offset from |off32_stream|,
|
115
|
+
// relative to start of block.
|
116
|
+
// Then prefetch |off32_stream[3]|
|
117
|
+
const byte *cmd_stream, *cmd_stream_end;
|
118
|
+
|
119
|
+
// Length stream
|
120
|
+
const byte *length_stream;
|
121
|
+
|
122
|
+
// Literal stream
|
123
|
+
const byte *lit_stream, *lit_stream_end;
|
124
|
+
|
125
|
+
// Near offsets
|
126
|
+
const uint16 *off16_stream, *off16_stream_end;
|
127
|
+
|
128
|
+
// Far offsets for current chunk
|
129
|
+
uint32 *off32_stream, *off32_stream_end;
|
130
|
+
|
131
|
+
// Holds the offsets for the two chunks
|
132
|
+
uint32 *off32_stream_1, *off32_stream_2;
|
133
|
+
uint32 off32_size_1, off32_size_2;
|
134
|
+
|
135
|
+
// Flag offsets for next 64k chunk.
|
136
|
+
uint32 cmd_stream_2_offs, cmd_stream_2_offs_end;
|
137
|
+
} MermaidLzTable;
|
138
|
+
|
139
|
+
|
140
|
+
typedef struct KrakenDecoder {
|
141
|
+
// Updated after the |*_DecodeStep| function completes to hold
|
142
|
+
// the number of bytes read and written.
|
143
|
+
int src_used, dst_used;
|
144
|
+
|
145
|
+
// Pointer to a 256k buffer that holds the intermediate state
|
146
|
+
// in between decode phase 1 and 2.
|
147
|
+
byte *scratch;
|
148
|
+
size_t scratch_size;
|
149
|
+
|
150
|
+
KrakenHeader hdr;
|
151
|
+
} KrakenDecoder;
|
152
|
+
|
153
|
+
typedef struct BitReader {
|
154
|
+
// |p| holds the current byte and |p_end| the end of the buffer.
|
155
|
+
const byte *p, *p_end;
|
156
|
+
// Bits accumulated so far
|
157
|
+
uint32 bits;
|
158
|
+
// Next byte will end up in the |bitpos| position in |bits|.
|
159
|
+
int bitpos;
|
160
|
+
} BitReader;
|
161
|
+
|
162
|
+
struct HuffRevLut {
|
163
|
+
uint8 bits2len[2048];
|
164
|
+
uint8 bits2sym[2048];
|
165
|
+
};
|
166
|
+
|
167
|
+
typedef struct HuffReader {
|
168
|
+
// Array to hold the output of the huffman read array operation
|
169
|
+
byte *output, *output_end;
|
170
|
+
// We decode three parallel streams, two forwards, |src| and |src_mid|
|
171
|
+
// while |src_end| is decoded backwards.
|
172
|
+
const byte *src, *src_mid, *src_end, *src_mid_org;
|
173
|
+
int src_bitpos, src_mid_bitpos, src_end_bitpos;
|
174
|
+
uint32 src_bits, src_mid_bits, src_end_bits;
|
175
|
+
} HuffReader;
|
176
|
+
|
177
|
+
inline size_t Max(size_t a, size_t b) { return a > b ? a : b; }
|
178
|
+
inline size_t Min(size_t a, size_t b) { return a < b ? a : b; }
|
179
|
+
|
180
|
+
#define ALIGN_POINTER(p, align) ((uint8*)(((uintptr_t)(p) + (align - 1)) & ~(align - 1)))
|
181
|
+
|
182
|
+
struct HuffRange;
|
183
|
+
|
184
|
+
int Kraken_DecodeBytes(byte **output, const byte *src, const byte *src_end, int *decoded_size, size_t output_size, bool force_memmove, uint8 *scratch, uint8 *scratch_end);
|
185
|
+
int Kraken_GetBlockSize(const uint8 *src, const uint8 *src_end, int *dest_size, int dest_capacity);
|
186
|
+
int Huff_ConvertToRanges(HuffRange *range, int num_symbols, int P, const uint8 *symlen, BitReader *bits);
|
187
|
+
|
188
|
+
// Allocate memory with a specific alignment
|
189
|
+
void *MallocAligned(size_t size, size_t alignment) {
|
190
|
+
void *x = malloc(size + (alignment - 1) + sizeof(void*)), *x_org = x;
|
191
|
+
if (x) {
|
192
|
+
x = (void*)(((intptr_t)x + alignment - 1 + sizeof(void*)) & ~(alignment - 1));
|
193
|
+
((void**)x)[-1] = x_org;
|
194
|
+
}
|
195
|
+
return x;
|
196
|
+
}
|
197
|
+
|
198
|
+
// Free memory allocated through |MallocAligned|
|
199
|
+
void FreeAligned(void *p) {
|
200
|
+
free(((void**)p)[-1]);
|
201
|
+
}
|
202
|
+
|
203
|
+
uint32 BSR(uint32 x) {
|
204
|
+
unsigned long index;
|
205
|
+
_BitScanReverse(&index, x);
|
206
|
+
return index;
|
207
|
+
}
|
208
|
+
|
209
|
+
uint32 BSF(uint32 x) {
|
210
|
+
unsigned long index;
|
211
|
+
_BitScanForward(&index, x);
|
212
|
+
return index;
|
213
|
+
}
|
214
|
+
|
215
|
+
// Read more bytes to make sure we always have at least 24 bits in |bits|.
|
216
|
+
void BitReader_Refill(BitReader *bits) {
|
217
|
+
assert(bits->bitpos <= 24);
|
218
|
+
while (bits->bitpos > 0) {
|
219
|
+
bits->bits |= (bits->p < bits->p_end ? *bits->p : 0) << bits->bitpos;
|
220
|
+
bits->bitpos -= 8;
|
221
|
+
bits->p++;
|
222
|
+
}
|
223
|
+
}
|
224
|
+
|
225
|
+
// Read more bytes to make sure we always have at least 24 bits in |bits|,
|
226
|
+
// used when reading backwards.
|
227
|
+
void BitReader_RefillBackwards(BitReader *bits) {
|
228
|
+
assert(bits->bitpos <= 24);
|
229
|
+
while (bits->bitpos > 0) {
|
230
|
+
bits->p--;
|
231
|
+
bits->bits |= (bits->p >= bits->p_end ? *bits->p : 0) << bits->bitpos;
|
232
|
+
bits->bitpos -= 8;
|
233
|
+
}
|
234
|
+
}
|
235
|
+
|
236
|
+
// Refill bits then read a single bit.
|
237
|
+
int BitReader_ReadBit(BitReader *bits) {
|
238
|
+
int r;
|
239
|
+
BitReader_Refill(bits);
|
240
|
+
r = bits->bits >> 31;
|
241
|
+
bits->bits <<= 1;
|
242
|
+
bits->bitpos += 1;
|
243
|
+
return r;
|
244
|
+
}
|
245
|
+
|
246
|
+
int BitReader_ReadBitNoRefill(BitReader *bits) {
|
247
|
+
int r;
|
248
|
+
r = bits->bits >> 31;
|
249
|
+
bits->bits <<= 1;
|
250
|
+
bits->bitpos += 1;
|
251
|
+
return r;
|
252
|
+
}
|
253
|
+
|
254
|
+
|
255
|
+
// Read |n| bits without refilling.
|
256
|
+
int BitReader_ReadBitsNoRefill(BitReader *bits, int n) {
|
257
|
+
int r = (bits->bits >> (32 - n));
|
258
|
+
bits->bits <<= n;
|
259
|
+
bits->bitpos += n;
|
260
|
+
return r;
|
261
|
+
}
|
262
|
+
|
263
|
+
// Read |n| bits without refilling, n may be zero.
|
264
|
+
int BitReader_ReadBitsNoRefillZero(BitReader *bits, int n) {
|
265
|
+
int r = (bits->bits >> 1 >> (31 - n));
|
266
|
+
bits->bits <<= n;
|
267
|
+
bits->bitpos += n;
|
268
|
+
return r;
|
269
|
+
}
|
270
|
+
|
271
|
+
uint32 BitReader_ReadMoreThan24Bits(BitReader *bits, int n) {
|
272
|
+
uint32 rv;
|
273
|
+
if (n <= 24) {
|
274
|
+
rv = BitReader_ReadBitsNoRefillZero(bits, n);
|
275
|
+
} else {
|
276
|
+
rv = BitReader_ReadBitsNoRefill(bits, 24) << (n - 24);
|
277
|
+
BitReader_Refill(bits);
|
278
|
+
rv += BitReader_ReadBitsNoRefill(bits, n - 24);
|
279
|
+
}
|
280
|
+
BitReader_Refill(bits);
|
281
|
+
return rv;
|
282
|
+
}
|
283
|
+
|
284
|
+
uint32 BitReader_ReadMoreThan24BitsB(BitReader *bits, int n) {
|
285
|
+
uint32 rv;
|
286
|
+
if (n <= 24) {
|
287
|
+
rv = BitReader_ReadBitsNoRefillZero(bits, n);
|
288
|
+
} else {
|
289
|
+
rv = BitReader_ReadBitsNoRefill(bits, 24) << (n - 24);
|
290
|
+
BitReader_RefillBackwards(bits);
|
291
|
+
rv += BitReader_ReadBitsNoRefill(bits, n - 24);
|
292
|
+
}
|
293
|
+
BitReader_RefillBackwards(bits);
|
294
|
+
return rv;
|
295
|
+
}
|
296
|
+
|
297
|
+
// Reads a gamma value.
|
298
|
+
// Assumes bitreader is already filled with at least 23 bits
|
299
|
+
int BitReader_ReadGamma(BitReader *bits) {
|
300
|
+
unsigned long bitresult;
|
301
|
+
int n;
|
302
|
+
int r;
|
303
|
+
if (bits->bits != 0) {
|
304
|
+
_BitScanReverse(&bitresult, bits->bits);
|
305
|
+
n = 31 - bitresult;
|
306
|
+
} else {
|
307
|
+
n = 32;
|
308
|
+
}
|
309
|
+
n = 2 * n + 2;
|
310
|
+
assert(n < 24);
|
311
|
+
bits->bitpos += n;
|
312
|
+
r = bits->bits >> (32 - n);
|
313
|
+
bits->bits <<= n;
|
314
|
+
return r - 2;
|
315
|
+
}
|
316
|
+
|
317
|
+
int CountLeadingZeros(uint32 bits) {
|
318
|
+
unsigned long x;
|
319
|
+
_BitScanReverse(&x, bits);
|
320
|
+
return 31 - x;
|
321
|
+
}
|
322
|
+
|
323
|
+
// Reads a gamma value with |forced| number of forced bits.
|
324
|
+
int BitReader_ReadGammaX(BitReader *bits, int forced) {
|
325
|
+
unsigned long bitresult;
|
326
|
+
int r;
|
327
|
+
if (bits->bits != 0) {
|
328
|
+
_BitScanReverse(&bitresult, bits->bits);
|
329
|
+
int lz = 31 - bitresult;
|
330
|
+
assert(lz < 24);
|
331
|
+
r = (bits->bits >> (31 - lz - forced)) + ((lz - 1) << forced);
|
332
|
+
bits->bits <<= lz + forced + 1;
|
333
|
+
bits->bitpos += lz + forced + 1;
|
334
|
+
return r;
|
335
|
+
}
|
336
|
+
return 0;
|
337
|
+
}
|
338
|
+
|
339
|
+
// Reads a offset code parametrized by |v|.
|
340
|
+
uint32 BitReader_ReadDistance(BitReader *bits, uint32 v) {
|
341
|
+
uint32 w, m, n, rv;
|
342
|
+
if (v < 0xF0) {
|
343
|
+
n = (v >> 4) + 4;
|
344
|
+
w = _rotl(bits->bits | 1, n);
|
345
|
+
bits->bitpos += n;
|
346
|
+
m = (2 << n) - 1;
|
347
|
+
bits->bits = w & ~m;
|
348
|
+
rv = ((w & m) << 4) + (v & 0xF) - 248;
|
349
|
+
} else {
|
350
|
+
n = v - 0xF0 + 4;
|
351
|
+
w = _rotl(bits->bits | 1, n);
|
352
|
+
bits->bitpos += n;
|
353
|
+
m = (2 << n) - 1;
|
354
|
+
bits->bits = w & ~m;
|
355
|
+
rv = 8322816 + ((w & m) << 12);
|
356
|
+
BitReader_Refill(bits);
|
357
|
+
rv += (bits->bits >> 20);
|
358
|
+
bits->bitpos += 12;
|
359
|
+
bits->bits <<= 12;
|
360
|
+
}
|
361
|
+
BitReader_Refill(bits);
|
362
|
+
return rv;
|
363
|
+
}
|
364
|
+
|
365
|
+
|
366
|
+
// Reads a offset code parametrized by |v|, backwards.
|
367
|
+
uint32 BitReader_ReadDistanceB(BitReader *bits, uint32 v) {
|
368
|
+
uint32 w, m, n, rv;
|
369
|
+
if (v < 0xF0) {
|
370
|
+
n = (v >> 4) + 4;
|
371
|
+
w = _rotl(bits->bits | 1, n);
|
372
|
+
bits->bitpos += n;
|
373
|
+
m = (2 << n) - 1;
|
374
|
+
bits->bits = w & ~m;
|
375
|
+
rv = ((w & m) << 4) + (v & 0xF) - 248;
|
376
|
+
} else {
|
377
|
+
n = v - 0xF0 + 4;
|
378
|
+
w = _rotl(bits->bits | 1, n);
|
379
|
+
bits->bitpos += n;
|
380
|
+
m = (2 << n) - 1;
|
381
|
+
bits->bits = w & ~m;
|
382
|
+
rv = 8322816 + ((w & m) << 12);
|
383
|
+
BitReader_RefillBackwards(bits);
|
384
|
+
rv += (bits->bits >> (32 - 12));
|
385
|
+
bits->bitpos += 12;
|
386
|
+
bits->bits <<= 12;
|
387
|
+
}
|
388
|
+
BitReader_RefillBackwards(bits);
|
389
|
+
return rv;
|
390
|
+
}
|
391
|
+
|
392
|
+
// Reads a length code.
|
393
|
+
bool BitReader_ReadLength(BitReader *bits, uint32 *v) {
|
394
|
+
unsigned long bitresult;
|
395
|
+
int n;
|
396
|
+
uint32 rv;
|
397
|
+
_BitScanReverse(&bitresult, bits->bits);
|
398
|
+
n = 31 - bitresult;
|
399
|
+
if (n > 12) return false;
|
400
|
+
bits->bitpos += n;
|
401
|
+
bits->bits <<= n;
|
402
|
+
BitReader_Refill(bits);
|
403
|
+
n += 7;
|
404
|
+
bits->bitpos += n;
|
405
|
+
rv = (bits->bits >> (32 - n)) - 64;
|
406
|
+
bits->bits <<= n;
|
407
|
+
*v = rv;
|
408
|
+
BitReader_Refill(bits);
|
409
|
+
return true;
|
410
|
+
}
|
411
|
+
|
412
|
+
// Reads a length code, backwards.
|
413
|
+
bool BitReader_ReadLengthB(BitReader *bits, uint32 *v) {
|
414
|
+
unsigned long bitresult;
|
415
|
+
int n;
|
416
|
+
uint32 rv;
|
417
|
+
_BitScanReverse(&bitresult, bits->bits);
|
418
|
+
n = 31 - bitresult;
|
419
|
+
if (n > 12) return false;
|
420
|
+
bits->bitpos += n;
|
421
|
+
bits->bits <<= n;
|
422
|
+
BitReader_RefillBackwards(bits);
|
423
|
+
n += 7;
|
424
|
+
bits->bitpos += n;
|
425
|
+
rv = (bits->bits >> (32 - n)) - 64;
|
426
|
+
bits->bits <<= n;
|
427
|
+
*v = rv;
|
428
|
+
BitReader_RefillBackwards(bits);
|
429
|
+
return true;
|
430
|
+
}
|
431
|
+
|
432
|
+
int Log2RoundUp(uint32 v) {
|
433
|
+
if (v > 1) {
|
434
|
+
unsigned long idx;
|
435
|
+
_BitScanReverse(&idx, v - 1);
|
436
|
+
return idx + 1;
|
437
|
+
} else {
|
438
|
+
return 0;
|
439
|
+
}
|
440
|
+
}
|
441
|
+
|
442
|
+
#define ALIGN_16(x) (((x)+15)&~15)
|
443
|
+
#define COPY_64(d, s) {*(uint64*)(d) = *(uint64*)(s); }
|
444
|
+
#define COPY_64_BYTES(d, s) { \
|
445
|
+
_mm_storeu_si128((__m128i*)d + 0, _mm_loadu_si128((__m128i*)s + 0)); \
|
446
|
+
_mm_storeu_si128((__m128i*)d + 1, _mm_loadu_si128((__m128i*)s + 1)); \
|
447
|
+
_mm_storeu_si128((__m128i*)d + 2, _mm_loadu_si128((__m128i*)s + 2)); \
|
448
|
+
_mm_storeu_si128((__m128i*)d + 3, _mm_loadu_si128((__m128i*)s + 3)); \
|
449
|
+
}
|
450
|
+
|
451
|
+
#define COPY_64_ADD(d, s, t) _mm_storel_epi64((__m128i *)(d), _mm_add_epi8(_mm_loadl_epi64((__m128i *)(s)), _mm_loadl_epi64((__m128i *)(t))))
|
452
|
+
|
453
|
+
KrakenDecoder *Kraken_Create() {
|
454
|
+
size_t scratch_size = 0x6C000;
|
455
|
+
size_t memory_needed = sizeof(KrakenDecoder) + scratch_size;
|
456
|
+
KrakenDecoder *dec = (KrakenDecoder*)MallocAligned(memory_needed, 16);
|
457
|
+
memset(dec, 0, sizeof(KrakenDecoder));
|
458
|
+
dec->scratch_size = scratch_size;
|
459
|
+
dec->scratch = (byte*)(dec + 1);
|
460
|
+
return dec;
|
461
|
+
}
|
462
|
+
|
463
|
+
void Kraken_Destroy(KrakenDecoder *kraken) {
|
464
|
+
FreeAligned(kraken);
|
465
|
+
}
|
466
|
+
|
467
|
+
const byte *Kraken_ParseHeader(KrakenHeader *hdr, const byte *p) {
|
468
|
+
int b = p[0];
|
469
|
+
if ((b & 0xF) == 0xC) {
|
470
|
+
if (((b >> 4) & 3) != 0) return NULL;
|
471
|
+
hdr->restart_decoder = (b >> 7) & 1;
|
472
|
+
hdr->uncompressed = (b >> 6) & 1;
|
473
|
+
b = p[1];
|
474
|
+
hdr->decoder_type = b & 0x7F;
|
475
|
+
hdr->use_checksums = !!(b >> 7);
|
476
|
+
if (hdr->decoder_type != 6 && hdr->decoder_type != 10 && hdr->decoder_type != 5 && hdr->decoder_type != 11 && hdr->decoder_type != 12)
|
477
|
+
return NULL;
|
478
|
+
return p + 2;
|
479
|
+
}
|
480
|
+
|
481
|
+
return NULL;
|
482
|
+
}
|
483
|
+
|
484
|
+
const byte *Kraken_ParseQuantumHeader(KrakenQuantumHeader *hdr, const byte *p, bool use_checksum) {
|
485
|
+
uint32 v = (p[0] << 16) | (p[1] << 8) | p[2];
|
486
|
+
uint32 size = v & 0x3FFFF;
|
487
|
+
if (size != 0x3ffff) {
|
488
|
+
hdr->compressed_size = size + 1;
|
489
|
+
hdr->flag1 = (v >> 18) & 1;
|
490
|
+
hdr->flag2 = (v >> 19) & 1;
|
491
|
+
if (use_checksum) {
|
492
|
+
hdr->checksum = (p[3] << 16) | (p[4] << 8) | p[5];
|
493
|
+
return p + 6;
|
494
|
+
} else {
|
495
|
+
return p + 3;
|
496
|
+
}
|
497
|
+
}
|
498
|
+
v >>= 18;
|
499
|
+
if (v == 1) {
|
500
|
+
// memset
|
501
|
+
hdr->checksum = p[3];
|
502
|
+
hdr->compressed_size = 0;
|
503
|
+
hdr->whole_match_distance = 0;
|
504
|
+
return p + 4;
|
505
|
+
}
|
506
|
+
return NULL;
|
507
|
+
|
508
|
+
}
|
509
|
+
|
510
|
+
const byte *LZNA_ParseWholeMatchInfo(const byte *p, uint32 *dist) {
|
511
|
+
uint32 v = _byteswap_ushort(*(uint16*)p);
|
512
|
+
|
513
|
+
if (v < 0x8000) {
|
514
|
+
uint32 x = 0, b, pos = 0;
|
515
|
+
for (;;) {
|
516
|
+
b = p[2];
|
517
|
+
p += 1;
|
518
|
+
if (b & 0x80)
|
519
|
+
break;
|
520
|
+
x += (b + 0x80) << pos;
|
521
|
+
pos += 7;
|
522
|
+
|
523
|
+
}
|
524
|
+
x += (b - 128) << pos;
|
525
|
+
*dist = 0x8000 + v + (x << 15) + 1;
|
526
|
+
return p + 2;
|
527
|
+
} else {
|
528
|
+
*dist = v - 0x8000 + 1;
|
529
|
+
return p + 2;
|
530
|
+
}
|
531
|
+
}
|
532
|
+
|
533
|
+
const byte *LZNA_ParseQuantumHeader(KrakenQuantumHeader *hdr, const byte *p, bool use_checksum, int raw_len) {
|
534
|
+
uint32 v = (p[0] << 8) | p[1];
|
535
|
+
uint32 size = v & 0x3FFF;
|
536
|
+
if (size != 0x3fff) {
|
537
|
+
hdr->compressed_size = size + 1;
|
538
|
+
hdr->flag1 = (v >> 14) & 1;
|
539
|
+
hdr->flag2 = (v >> 15) & 1;
|
540
|
+
if (use_checksum) {
|
541
|
+
hdr->checksum = (p[2] << 16) | (p[3] << 8) | p[4];
|
542
|
+
return p + 5;
|
543
|
+
} else {
|
544
|
+
return p + 2;
|
545
|
+
}
|
546
|
+
}
|
547
|
+
v >>= 14;
|
548
|
+
if (v == 0) {
|
549
|
+
p = LZNA_ParseWholeMatchInfo(p + 2, &hdr->whole_match_distance);
|
550
|
+
hdr->compressed_size = 0;
|
551
|
+
return p;
|
552
|
+
}
|
553
|
+
if (v == 1) {
|
554
|
+
// memset
|
555
|
+
hdr->checksum = p[2];
|
556
|
+
hdr->compressed_size = 0;
|
557
|
+
hdr->whole_match_distance = 0;
|
558
|
+
return p + 3;
|
559
|
+
}
|
560
|
+
if (v == 2) {
|
561
|
+
// uncompressed
|
562
|
+
hdr->compressed_size = raw_len;
|
563
|
+
return p + 2;
|
564
|
+
}
|
565
|
+
return NULL;
|
566
|
+
}
|
567
|
+
|
568
|
+
|
569
|
+
uint32 Kraken_GetCrc(const byte *p, size_t p_size) {
|
570
|
+
// TODO: implement
|
571
|
+
return 0;
|
572
|
+
}
|
573
|
+
|
574
|
+
// Rearranges elements in the input array so that bits in the index
|
575
|
+
// get flipped.
|
576
|
+
static void ReverseBitsArray2048(const byte *input, byte *output) {
|
577
|
+
static const uint8 offsets[32] = {
|
578
|
+
0, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
|
579
|
+
0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8
|
580
|
+
};
|
581
|
+
__m128i t0, t1, t2, t3, s0, s1, s2, s3;
|
582
|
+
int i, j;
|
583
|
+
for(i = 0; i != 32; i++) {
|
584
|
+
j = offsets[i];
|
585
|
+
t0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)&input[j]),
|
586
|
+
_mm_loadl_epi64((const __m128i *)&input[j + 256]));
|
587
|
+
t1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)&input[j + 512]),
|
588
|
+
_mm_loadl_epi64((const __m128i *)&input[j + 768]));
|
589
|
+
t2 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)&input[j + 1024]),
|
590
|
+
_mm_loadl_epi64((const __m128i *)&input[j + 1280]));
|
591
|
+
t3 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)&input[j + 1536]),
|
592
|
+
_mm_loadl_epi64((const __m128i *)&input[j + 1792]));
|
593
|
+
|
594
|
+
s0 = _mm_unpacklo_epi8(t0, t1);
|
595
|
+
s1 = _mm_unpacklo_epi8(t2, t3);
|
596
|
+
s2 = _mm_unpackhi_epi8(t0, t1);
|
597
|
+
s3 = _mm_unpackhi_epi8(t2, t3);
|
598
|
+
|
599
|
+
t0 = _mm_unpacklo_epi8(s0, s1);
|
600
|
+
t1 = _mm_unpacklo_epi8(s2, s3);
|
601
|
+
t2 = _mm_unpackhi_epi8(s0, s1);
|
602
|
+
t3 = _mm_unpackhi_epi8(s2, s3);
|
603
|
+
|
604
|
+
_mm_storel_epi64((__m128i *)&output[0], t0);
|
605
|
+
_mm_storeh_pi((__m64*)&output[1024], _mm_castsi128_ps(t0));
|
606
|
+
_mm_storel_epi64((__m128i *)&output[256], t1);
|
607
|
+
_mm_storeh_pi((__m64*)&output[1280], _mm_castsi128_ps(t1));
|
608
|
+
_mm_storel_epi64((__m128i *)&output[512], t2);
|
609
|
+
_mm_storeh_pi((__m64*)&output[1536], _mm_castsi128_ps(t2));
|
610
|
+
_mm_storel_epi64((__m128i *)&output[768], t3);
|
611
|
+
_mm_storeh_pi((__m64*)&output[1792], _mm_castsi128_ps(t3));
|
612
|
+
output += 8;
|
613
|
+
}
|
614
|
+
}
|
615
|
+
|
616
|
+
bool Kraken_DecodeBytesCore(HuffReader *hr, HuffRevLut *lut) {
|
617
|
+
const byte *src = hr->src;
|
618
|
+
uint32 src_bits = hr->src_bits;
|
619
|
+
int src_bitpos = hr->src_bitpos;
|
620
|
+
|
621
|
+
const byte *src_mid = hr->src_mid;
|
622
|
+
uint32 src_mid_bits = hr->src_mid_bits;
|
623
|
+
int src_mid_bitpos = hr->src_mid_bitpos;
|
624
|
+
|
625
|
+
const byte *src_end = hr->src_end;
|
626
|
+
uint32 src_end_bits = hr->src_end_bits;
|
627
|
+
int src_end_bitpos = hr->src_end_bitpos;
|
628
|
+
|
629
|
+
int k, n;
|
630
|
+
|
631
|
+
byte *dst = hr->output;
|
632
|
+
byte *dst_end = hr->output_end;
|
633
|
+
|
634
|
+
if (src > src_mid)
|
635
|
+
return false;
|
636
|
+
|
637
|
+
if (hr->src_end - src_mid >= 4 && dst_end - dst >= 6) {
|
638
|
+
dst_end -= 5;
|
639
|
+
src_end -= 4;
|
640
|
+
|
641
|
+
while (dst < dst_end && src <= src_mid && src_mid <= src_end) {
|
642
|
+
src_bits |= *(uint32*)src << src_bitpos;
|
643
|
+
src += (31 - src_bitpos) >> 3;
|
644
|
+
|
645
|
+
src_end_bits |= _byteswap_ulong(*(uint32*)src_end) << src_end_bitpos;
|
646
|
+
src_end -= (31 - src_end_bitpos) >> 3;
|
647
|
+
|
648
|
+
src_mid_bits |= *(uint32*)src_mid << src_mid_bitpos;
|
649
|
+
src_mid += (31 - src_mid_bitpos) >> 3;
|
650
|
+
|
651
|
+
src_bitpos |= 0x18;
|
652
|
+
src_end_bitpos |= 0x18;
|
653
|
+
src_mid_bitpos |= 0x18;
|
654
|
+
|
655
|
+
k = src_bits & 0x7FF;
|
656
|
+
n = lut->bits2len[k];
|
657
|
+
src_bits >>= n;
|
658
|
+
src_bitpos -= n;
|
659
|
+
dst[0] = lut->bits2sym[k];
|
660
|
+
|
661
|
+
k = src_end_bits & 0x7FF;
|
662
|
+
n = lut->bits2len[k];
|
663
|
+
src_end_bits >>= n;
|
664
|
+
src_end_bitpos -= n;
|
665
|
+
dst[1] = lut->bits2sym[k];
|
666
|
+
|
667
|
+
k = src_mid_bits & 0x7FF;
|
668
|
+
n = lut->bits2len[k];
|
669
|
+
src_mid_bits >>= n;
|
670
|
+
src_mid_bitpos -= n;
|
671
|
+
dst[2] = lut->bits2sym[k];
|
672
|
+
|
673
|
+
k = src_bits & 0x7FF;
|
674
|
+
n = lut->bits2len[k];
|
675
|
+
src_bits >>= n;
|
676
|
+
src_bitpos -= n;
|
677
|
+
dst[3] = lut->bits2sym[k];
|
678
|
+
|
679
|
+
k = src_end_bits & 0x7FF;
|
680
|
+
n = lut->bits2len[k];
|
681
|
+
src_end_bits >>= n;
|
682
|
+
src_end_bitpos -= n;
|
683
|
+
dst[4] = lut->bits2sym[k];
|
684
|
+
|
685
|
+
k = src_mid_bits & 0x7FF;
|
686
|
+
n = lut->bits2len[k];
|
687
|
+
src_mid_bits >>= n;
|
688
|
+
src_mid_bitpos -= n;
|
689
|
+
dst[5] = lut->bits2sym[k];
|
690
|
+
dst += 6;
|
691
|
+
}
|
692
|
+
dst_end += 5;
|
693
|
+
|
694
|
+
src -= src_bitpos >> 3;
|
695
|
+
src_bitpos &= 7;
|
696
|
+
|
697
|
+
src_end += 4 + (src_end_bitpos >> 3);
|
698
|
+
src_end_bitpos &= 7;
|
699
|
+
|
700
|
+
src_mid -= src_mid_bitpos >> 3;
|
701
|
+
src_mid_bitpos &= 7;
|
702
|
+
}
|
703
|
+
for(;;) {
|
704
|
+
if (dst >= dst_end)
|
705
|
+
break;
|
706
|
+
|
707
|
+
if (src_mid - src <= 1) {
|
708
|
+
if (src_mid - src == 1)
|
709
|
+
src_bits |= *src << src_bitpos;
|
710
|
+
} else {
|
711
|
+
src_bits |= *(uint16 *)src << src_bitpos;
|
712
|
+
}
|
713
|
+
k = src_bits & 0x7FF;
|
714
|
+
n = lut->bits2len[k];
|
715
|
+
src_bitpos -= n;
|
716
|
+
src_bits >>= n;
|
717
|
+
*dst++ = lut->bits2sym[k];
|
718
|
+
src += (7 - src_bitpos) >> 3;
|
719
|
+
src_bitpos &= 7;
|
720
|
+
|
721
|
+
if (dst < dst_end) {
|
722
|
+
if (src_end - src_mid <= 1) {
|
723
|
+
if (src_end - src_mid == 1) {
|
724
|
+
src_end_bits |= *src_mid << src_end_bitpos;
|
725
|
+
src_mid_bits |= *src_mid << src_mid_bitpos;
|
726
|
+
}
|
727
|
+
} else {
|
728
|
+
unsigned int v = *(uint16*)(src_end - 2);
|
729
|
+
src_end_bits |= (((v >> 8) | (v << 8)) & 0xffff) << src_end_bitpos;
|
730
|
+
src_mid_bits |= *(uint16*)src_mid << src_mid_bitpos;
|
731
|
+
}
|
732
|
+
n = lut->bits2len[src_end_bits & 0x7FF];
|
733
|
+
*dst++ = lut->bits2sym[src_end_bits & 0x7FF];
|
734
|
+
src_end_bitpos -= n;
|
735
|
+
src_end_bits >>= n;
|
736
|
+
src_end -= (7 - src_end_bitpos) >> 3;
|
737
|
+
src_end_bitpos &= 7;
|
738
|
+
if (dst < dst_end) {
|
739
|
+
n = lut->bits2len[src_mid_bits & 0x7FF];
|
740
|
+
*dst++ = lut->bits2sym[src_mid_bits & 0x7FF];
|
741
|
+
src_mid_bitpos -= n;
|
742
|
+
src_mid_bits >>= n;
|
743
|
+
src_mid += (7 - src_mid_bitpos) >> 3;
|
744
|
+
src_mid_bitpos &= 7;
|
745
|
+
}
|
746
|
+
}
|
747
|
+
if (src > src_mid || src_mid > src_end)
|
748
|
+
return false;
|
749
|
+
}
|
750
|
+
if (src != hr->src_mid_org || src_end != src_mid)
|
751
|
+
return false;
|
752
|
+
return true;
|
753
|
+
}
|
754
|
+
|
755
|
+
int Huff_ReadCodeLengthsOld(BitReader *bits, uint8 *syms, uint32 *code_prefix) {
|
756
|
+
if (BitReader_ReadBitNoRefill(bits)) {
|
757
|
+
int n, sym = 0, codelen, num_symbols = 0;
|
758
|
+
int avg_bits_x4 = 32;
|
759
|
+
int forced_bits = BitReader_ReadBitsNoRefill(bits, 2);
|
760
|
+
|
761
|
+
uint32 thres_for_valid_gamma_bits = 1 << (31 - (20u >> forced_bits));
|
762
|
+
if (BitReader_ReadBit(bits))
|
763
|
+
goto SKIP_INITIAL_ZEROS;
|
764
|
+
do {
|
765
|
+
// Run of zeros
|
766
|
+
if (!(bits->bits & 0xff000000))
|
767
|
+
return -1;
|
768
|
+
sym += BitReader_ReadBitsNoRefill(bits, 2 * (CountLeadingZeros(bits->bits) + 1)) - 2 + 1;
|
769
|
+
if (sym >= 256)
|
770
|
+
break;
|
771
|
+
SKIP_INITIAL_ZEROS:
|
772
|
+
BitReader_Refill(bits);
|
773
|
+
// Read out the gamma value for the # of symbols
|
774
|
+
if (!(bits->bits & 0xff000000))
|
775
|
+
return -1;
|
776
|
+
n = BitReader_ReadBitsNoRefill(bits, 2 * (CountLeadingZeros(bits->bits) + 1)) - 2 + 1;
|
777
|
+
// Overflow?
|
778
|
+
if (sym + n > 256)
|
779
|
+
return -1;
|
780
|
+
BitReader_Refill(bits);
|
781
|
+
num_symbols += n;
|
782
|
+
do {
|
783
|
+
if (bits->bits < thres_for_valid_gamma_bits)
|
784
|
+
return -1; // too big gamma value?
|
785
|
+
|
786
|
+
int lz = CountLeadingZeros(bits->bits);
|
787
|
+
int v = BitReader_ReadBitsNoRefill(bits, lz + forced_bits + 1) + ((lz - 1) << forced_bits);
|
788
|
+
codelen = (-(int)(v & 1) ^ (v >> 1)) + ((avg_bits_x4 + 2) >> 2);
|
789
|
+
if (codelen < 1 || codelen > 11)
|
790
|
+
return -1;
|
791
|
+
avg_bits_x4 = codelen + ((3 * avg_bits_x4 + 2) >> 2);
|
792
|
+
BitReader_Refill(bits);
|
793
|
+
syms[code_prefix[codelen]++] = sym++;
|
794
|
+
} while (--n);
|
795
|
+
} while (sym != 256);
|
796
|
+
return (sym == 256) && (num_symbols >= 2) ? num_symbols : -1;
|
797
|
+
} else {
|
798
|
+
// Sparse symbol encoding
|
799
|
+
int num_symbols = BitReader_ReadBitsNoRefill(bits, 8);
|
800
|
+
if (num_symbols == 0)
|
801
|
+
return -1;
|
802
|
+
if (num_symbols == 1) {
|
803
|
+
syms[0] = BitReader_ReadBitsNoRefill(bits, 8);
|
804
|
+
} else {
|
805
|
+
int codelen_bits = BitReader_ReadBitsNoRefill(bits, 3);
|
806
|
+
if (codelen_bits > 4)
|
807
|
+
return -1;
|
808
|
+
for (int i = 0; i < num_symbols; i++) {
|
809
|
+
BitReader_Refill(bits);
|
810
|
+
int sym = BitReader_ReadBitsNoRefill(bits, 8);
|
811
|
+
int codelen = BitReader_ReadBitsNoRefillZero(bits, codelen_bits) + 1;
|
812
|
+
if (codelen > 11)
|
813
|
+
return -1;
|
814
|
+
syms[code_prefix[codelen]++] = sym;
|
815
|
+
}
|
816
|
+
}
|
817
|
+
return num_symbols;
|
818
|
+
}
|
819
|
+
}
|
820
|
+
|
821
|
+
int BitReader_ReadFluff(BitReader *bits, int num_symbols) {
|
822
|
+
unsigned long y;
|
823
|
+
|
824
|
+
if (num_symbols == 256)
|
825
|
+
return 0;
|
826
|
+
|
827
|
+
int x = 257 - num_symbols;
|
828
|
+
if (x > num_symbols)
|
829
|
+
x = num_symbols;
|
830
|
+
|
831
|
+
x *= 2;
|
832
|
+
|
833
|
+
_BitScanReverse(&y, x - 1);
|
834
|
+
y += 1;
|
835
|
+
|
836
|
+
uint32 v = bits->bits >> (32 - y);
|
837
|
+
uint32 z = (1 << y) - x;
|
838
|
+
|
839
|
+
if ((v >> 1) >= z) {
|
840
|
+
bits->bits <<= y;
|
841
|
+
bits->bitpos += y;
|
842
|
+
return v - z;
|
843
|
+
} else {
|
844
|
+
bits->bits <<= (y - 1);
|
845
|
+
bits->bitpos += (y - 1);
|
846
|
+
return (v >> 1);
|
847
|
+
}
|
848
|
+
}
|
849
|
+
|
850
|
+
struct BitReader2 {
|
851
|
+
const uint8 *p, *p_end;
|
852
|
+
uint32 bitpos;
|
853
|
+
};
|
854
|
+
|
855
|
+
static const uint32 kRiceCodeBits2Value[256] = {
|
856
|
+
0x80000000, 0x00000007, 0x10000006, 0x00000006, 0x20000005, 0x00000105, 0x10000005, 0x00000005,
|
857
|
+
0x30000004, 0x00000204, 0x10000104, 0x00000104, 0x20000004, 0x00010004, 0x10000004, 0x00000004,
|
858
|
+
0x40000003, 0x00000303, 0x10000203, 0x00000203, 0x20000103, 0x00010103, 0x10000103, 0x00000103,
|
859
|
+
0x30000003, 0x00020003, 0x10010003, 0x00010003, 0x20000003, 0x01000003, 0x10000003, 0x00000003,
|
860
|
+
0x50000002, 0x00000402, 0x10000302, 0x00000302, 0x20000202, 0x00010202, 0x10000202, 0x00000202,
|
861
|
+
0x30000102, 0x00020102, 0x10010102, 0x00010102, 0x20000102, 0x01000102, 0x10000102, 0x00000102,
|
862
|
+
0x40000002, 0x00030002, 0x10020002, 0x00020002, 0x20010002, 0x01010002, 0x10010002, 0x00010002,
|
863
|
+
0x30000002, 0x02000002, 0x11000002, 0x01000002, 0x20000002, 0x00000012, 0x10000002, 0x00000002,
|
864
|
+
0x60000001, 0x00000501, 0x10000401, 0x00000401, 0x20000301, 0x00010301, 0x10000301, 0x00000301,
|
865
|
+
0x30000201, 0x00020201, 0x10010201, 0x00010201, 0x20000201, 0x01000201, 0x10000201, 0x00000201,
|
866
|
+
0x40000101, 0x00030101, 0x10020101, 0x00020101, 0x20010101, 0x01010101, 0x10010101, 0x00010101,
|
867
|
+
0x30000101, 0x02000101, 0x11000101, 0x01000101, 0x20000101, 0x00000111, 0x10000101, 0x00000101,
|
868
|
+
0x50000001, 0x00040001, 0x10030001, 0x00030001, 0x20020001, 0x01020001, 0x10020001, 0x00020001,
|
869
|
+
0x30010001, 0x02010001, 0x11010001, 0x01010001, 0x20010001, 0x00010011, 0x10010001, 0x00010001,
|
870
|
+
0x40000001, 0x03000001, 0x12000001, 0x02000001, 0x21000001, 0x01000011, 0x11000001, 0x01000001,
|
871
|
+
0x30000001, 0x00000021, 0x10000011, 0x00000011, 0x20000001, 0x00001001, 0x10000001, 0x00000001,
|
872
|
+
0x70000000, 0x00000600, 0x10000500, 0x00000500, 0x20000400, 0x00010400, 0x10000400, 0x00000400,
|
873
|
+
0x30000300, 0x00020300, 0x10010300, 0x00010300, 0x20000300, 0x01000300, 0x10000300, 0x00000300,
|
874
|
+
0x40000200, 0x00030200, 0x10020200, 0x00020200, 0x20010200, 0x01010200, 0x10010200, 0x00010200,
|
875
|
+
0x30000200, 0x02000200, 0x11000200, 0x01000200, 0x20000200, 0x00000210, 0x10000200, 0x00000200,
|
876
|
+
0x50000100, 0x00040100, 0x10030100, 0x00030100, 0x20020100, 0x01020100, 0x10020100, 0x00020100,
|
877
|
+
0x30010100, 0x02010100, 0x11010100, 0x01010100, 0x20010100, 0x00010110, 0x10010100, 0x00010100,
|
878
|
+
0x40000100, 0x03000100, 0x12000100, 0x02000100, 0x21000100, 0x01000110, 0x11000100, 0x01000100,
|
879
|
+
0x30000100, 0x00000120, 0x10000110, 0x00000110, 0x20000100, 0x00001100, 0x10000100, 0x00000100,
|
880
|
+
0x60000000, 0x00050000, 0x10040000, 0x00040000, 0x20030000, 0x01030000, 0x10030000, 0x00030000,
|
881
|
+
0x30020000, 0x02020000, 0x11020000, 0x01020000, 0x20020000, 0x00020010, 0x10020000, 0x00020000,
|
882
|
+
0x40010000, 0x03010000, 0x12010000, 0x02010000, 0x21010000, 0x01010010, 0x11010000, 0x01010000,
|
883
|
+
0x30010000, 0x00010020, 0x10010010, 0x00010010, 0x20010000, 0x00011000, 0x10010000, 0x00010000,
|
884
|
+
0x50000000, 0x04000000, 0x13000000, 0x03000000, 0x22000000, 0x02000010, 0x12000000, 0x02000000,
|
885
|
+
0x31000000, 0x01000020, 0x11000010, 0x01000010, 0x21000000, 0x01001000, 0x11000000, 0x01000000,
|
886
|
+
0x40000000, 0x00000030, 0x10000020, 0x00000020, 0x20000010, 0x00001010, 0x10000010, 0x00000010,
|
887
|
+
0x30000000, 0x00002000, 0x10001000, 0x00001000, 0x20000000, 0x00100000, 0x10000000, 0x00000000,
|
888
|
+
};
|
889
|
+
|
890
|
+
static const uint8 kRiceCodeBits2Len[256] = {
|
891
|
+
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
892
|
+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
893
|
+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
894
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
895
|
+
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
896
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
897
|
+
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
898
|
+
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
|
899
|
+
};
|
900
|
+
|
901
|
+
|
902
|
+
bool DecodeGolombRiceLengths(uint8 *dst, size_t size, BitReader2 *br) {
|
903
|
+
const uint8 *p = br->p, *p_end = br->p_end;
|
904
|
+
uint8 *dst_end = dst + size;
|
905
|
+
if (p >= p_end)
|
906
|
+
return false;
|
907
|
+
|
908
|
+
int count = -(int)br->bitpos;
|
909
|
+
uint32 v = *p++ & (255 >> br->bitpos);
|
910
|
+
for (;;) {
|
911
|
+
if (v == 0) {
|
912
|
+
count += 8;
|
913
|
+
} else {
|
914
|
+
uint32 x = kRiceCodeBits2Value[v];
|
915
|
+
*(uint32*)&dst[0] = count + (x & 0x0f0f0f0f);
|
916
|
+
*(uint32*)&dst[4] = (x >> 4) & 0x0f0f0f0f;
|
917
|
+
dst += kRiceCodeBits2Len[v];
|
918
|
+
if (dst >= dst_end)
|
919
|
+
break;
|
920
|
+
count = x >> 28;
|
921
|
+
}
|
922
|
+
if (p >= p_end)
|
923
|
+
return false;
|
924
|
+
v = *p++;
|
925
|
+
}
|
926
|
+
// went too far, step back
|
927
|
+
if (dst > dst_end) {
|
928
|
+
int n = dst - dst_end;
|
929
|
+
do v &= (v - 1); while (--n);
|
930
|
+
}
|
931
|
+
// step back if byte not finished
|
932
|
+
int bitpos = 0;
|
933
|
+
if (!(v & 1)) {
|
934
|
+
p--;
|
935
|
+
unsigned long q;
|
936
|
+
_BitScanForward(&q, v);
|
937
|
+
bitpos = 8 - q;
|
938
|
+
}
|
939
|
+
br->p = p;
|
940
|
+
br->bitpos = bitpos;
|
941
|
+
return true;
|
942
|
+
}
|
943
|
+
|
944
|
+
bool DecodeGolombRiceBits(uint8 *dst, uint size, uint bitcount, BitReader2 *br) {
|
945
|
+
if (bitcount == 0)
|
946
|
+
return true;
|
947
|
+
uint8 *dst_end = dst + size;
|
948
|
+
const uint8 *p = br->p;
|
949
|
+
int bitpos = br->bitpos;
|
950
|
+
|
951
|
+
uint bits_required = bitpos + bitcount * size;
|
952
|
+
uint bytes_required = (bits_required + 7) >> 3;
|
953
|
+
if (bytes_required > br->p_end - p)
|
954
|
+
return false;
|
955
|
+
|
956
|
+
br->p = p + (bits_required >> 3);
|
957
|
+
br->bitpos = bits_required & 7;
|
958
|
+
|
959
|
+
// todo. handle r/w outside of range
|
960
|
+
uint64 bak = *(uint64*)dst_end;
|
961
|
+
|
962
|
+
if (bitcount < 2) {
|
963
|
+
assert(bitcount == 1);
|
964
|
+
do {
|
965
|
+
// Read the next byte
|
966
|
+
uint64 bits = (uint8)(_byteswap_ulong(*(uint32*)p) >> (24 - bitpos));
|
967
|
+
p += 1;
|
968
|
+
// Expand each bit into each byte of the uint64.
|
969
|
+
bits = (bits | (bits << 28)) & 0xF0000000Full;
|
970
|
+
bits = (bits | (bits << 14)) & 0x3000300030003ull;
|
971
|
+
bits = (bits | (bits << 7)) & 0x0101010101010101ull;
|
972
|
+
*(uint64*)dst = *(uint64*)dst * 2 + _byteswap_uint64(bits);
|
973
|
+
dst += 8;
|
974
|
+
} while (dst < dst_end);
|
975
|
+
} else if (bitcount == 2) {
|
976
|
+
do {
|
977
|
+
// Read the next 2 bytes
|
978
|
+
uint64 bits = (uint16)(_byteswap_ulong(*(uint32*)p) >> (16 - bitpos));
|
979
|
+
p += 2;
|
980
|
+
// Expand each bit into each byte of the uint64.
|
981
|
+
bits = (bits | (bits << 24)) & 0xFF000000FFull;
|
982
|
+
bits = (bits | (bits << 12)) & 0xF000F000F000Full;
|
983
|
+
bits = (bits | (bits << 6)) & 0x0303030303030303ull;
|
984
|
+
*(uint64*)dst = *(uint64*)dst * 4 + _byteswap_uint64(bits);
|
985
|
+
dst += 8;
|
986
|
+
} while (dst < dst_end);
|
987
|
+
|
988
|
+
} else {
|
989
|
+
assert(bitcount == 3);
|
990
|
+
do {
|
991
|
+
// Read the next 3 bytes
|
992
|
+
uint64 bits = (_byteswap_ulong(*(uint32*)p) >> (8 - bitpos)) & 0xffffff;
|
993
|
+
p += 3;
|
994
|
+
// Expand each bit into each byte of the uint64.
|
995
|
+
bits = (bits | (bits << 20)) & 0xFFF00000FFFull;
|
996
|
+
bits = (bits | (bits << 10)) & 0x3F003F003F003Full;
|
997
|
+
bits = (bits | (bits << 5)) & 0x0707070707070707ull;
|
998
|
+
*(uint64*)dst = *(uint64*)dst * 8 + _byteswap_uint64(bits);
|
999
|
+
dst += 8;
|
1000
|
+
} while (dst < dst_end);
|
1001
|
+
}
|
1002
|
+
*(uint64*)dst_end = bak;
|
1003
|
+
return true;
|
1004
|
+
}
|
1005
|
+
|
1006
|
+
struct HuffRange {
|
1007
|
+
uint16 symbol;
|
1008
|
+
uint16 num;
|
1009
|
+
};
|
1010
|
+
|
1011
|
+
int Huff_ConvertToRanges(HuffRange *range, int num_symbols, int P, const uint8 *symlen, BitReader *bits) {
|
1012
|
+
int num_ranges = P >> 1, v, sym_idx = 0;
|
1013
|
+
|
1014
|
+
// Start with space?
|
1015
|
+
if (P & 1) {
|
1016
|
+
BitReader_Refill(bits);
|
1017
|
+
v = *symlen++;
|
1018
|
+
if (v >= 8)
|
1019
|
+
return -1;
|
1020
|
+
sym_idx = BitReader_ReadBitsNoRefill(bits, v + 1) + (1 << (v + 1)) - 1;
|
1021
|
+
}
|
1022
|
+
int syms_used = 0;
|
1023
|
+
|
1024
|
+
for (int i = 0; i < num_ranges; i++) {
|
1025
|
+
BitReader_Refill(bits);
|
1026
|
+
v = symlen[0];
|
1027
|
+
if (v >= 9)
|
1028
|
+
return -1;
|
1029
|
+
int num = BitReader_ReadBitsNoRefillZero(bits, v) + (1 << v);
|
1030
|
+
v = symlen[1];
|
1031
|
+
if (v >= 8)
|
1032
|
+
return -1;
|
1033
|
+
int space = BitReader_ReadBitsNoRefill(bits, v + 1) + (1 << (v + 1)) - 1;
|
1034
|
+
range[i].symbol = sym_idx;
|
1035
|
+
range[i].num = num;
|
1036
|
+
syms_used += num;
|
1037
|
+
sym_idx += num + space;
|
1038
|
+
symlen += 2;
|
1039
|
+
}
|
1040
|
+
|
1041
|
+
if (sym_idx >= 256 || syms_used >= num_symbols || sym_idx + num_symbols - syms_used > 256)
|
1042
|
+
return -1;
|
1043
|
+
|
1044
|
+
range[num_ranges].symbol = sym_idx;
|
1045
|
+
range[num_ranges].num = num_symbols - syms_used;
|
1046
|
+
|
1047
|
+
return num_ranges + 1;
|
1048
|
+
}
|
1049
|
+
|
1050
|
+
int Huff_ReadCodeLengthsNew(BitReader *bits, uint8 *syms, uint32 *code_prefix) {
|
1051
|
+
int forced_bits = BitReader_ReadBitsNoRefill(bits, 2);
|
1052
|
+
|
1053
|
+
int num_symbols = BitReader_ReadBitsNoRefill(bits, 8) + 1;
|
1054
|
+
|
1055
|
+
int fluff = BitReader_ReadFluff(bits, num_symbols);
|
1056
|
+
|
1057
|
+
uint8 code_len[512];
|
1058
|
+
BitReader2 br2;
|
1059
|
+
br2.bitpos = (bits->bitpos - 24) & 7;
|
1060
|
+
br2.p_end = bits->p_end;
|
1061
|
+
br2.p = bits->p - (unsigned)((24 - bits->bitpos + 7) >> 3);
|
1062
|
+
|
1063
|
+
if (!DecodeGolombRiceLengths(code_len, num_symbols + fluff, &br2))
|
1064
|
+
return -1;
|
1065
|
+
memset(code_len + (num_symbols + fluff), 0, 16);
|
1066
|
+
if (!DecodeGolombRiceBits(code_len, num_symbols, forced_bits, &br2))
|
1067
|
+
return -1;
|
1068
|
+
|
1069
|
+
// Reset the bits decoder.
|
1070
|
+
bits->bitpos = 24;
|
1071
|
+
bits->p = br2.p;
|
1072
|
+
bits->bits = 0;
|
1073
|
+
BitReader_Refill(bits);
|
1074
|
+
bits->bits <<= br2.bitpos;
|
1075
|
+
bits->bitpos += br2.bitpos;
|
1076
|
+
|
1077
|
+
if (1) {
|
1078
|
+
uint running_sum = 0x1e;
|
1079
|
+
int maxlen = 11;
|
1080
|
+
for (int i = 0; i < num_symbols; i++) {
|
1081
|
+
int v = code_len[i];
|
1082
|
+
v = -(int)(v & 1) ^ (v >> 1);
|
1083
|
+
code_len[i] = v + (running_sum >> 2) + 1;
|
1084
|
+
if (code_len[i] < 1 || code_len[i] > 11)
|
1085
|
+
return -1;
|
1086
|
+
running_sum += v;
|
1087
|
+
}
|
1088
|
+
|
1089
|
+
} else {
|
1090
|
+
// Ensure we don't read unknown data that could contaminate
|
1091
|
+
// max_codeword_len.
|
1092
|
+
__m128i bak = _mm_loadu_si128((__m128i*)&code_len[num_symbols]);
|
1093
|
+
_mm_storeu_si128((__m128i*)&code_len[num_symbols], _mm_set1_epi32(0));
|
1094
|
+
// apply a filter
|
1095
|
+
__m128i avg = _mm_set1_epi8(0x1e);
|
1096
|
+
__m128i ones = _mm_set1_epi8(1);
|
1097
|
+
__m128i max_codeword_len = _mm_set1_epi8(10);
|
1098
|
+
for (uint i = 0; i < num_symbols; i += 16) {
|
1099
|
+
__m128i v = _mm_loadu_si128((__m128i*)&code_len[i]), t;
|
1100
|
+
// avg[0..15] = avg[15]
|
1101
|
+
avg = _mm_unpackhi_epi8(avg, avg);
|
1102
|
+
avg = _mm_unpackhi_epi8(avg, avg);
|
1103
|
+
avg = _mm_shuffle_epi32(avg, 255);
|
1104
|
+
// v = -(int)(v & 1) ^ (v >> 1)
|
1105
|
+
v = _mm_xor_si128(_mm_sub_epi8(_mm_set1_epi8(0), _mm_and_si128(v, ones)),
|
1106
|
+
_mm_and_si128(_mm_srli_epi16(v, 1), _mm_set1_epi8(0x7f)));
|
1107
|
+
// create all the sums. v[n] = v[0] + ... + v[n]
|
1108
|
+
t = _mm_add_epi8(_mm_slli_si128(v, 1), v);
|
1109
|
+
t = _mm_add_epi8(_mm_slli_si128(t, 2), t);
|
1110
|
+
t = _mm_add_epi8(_mm_slli_si128(t, 4), t);
|
1111
|
+
t = _mm_add_epi8(_mm_slli_si128(t, 8), t);
|
1112
|
+
// u[x] = (avg + t[x-1]) >> 2
|
1113
|
+
__m128i u = _mm_and_si128(_mm_srli_epi16(_mm_add_epi8(_mm_slli_si128(t, 1), avg), 2u), _mm_set1_epi8(0x3f));
|
1114
|
+
// v += u
|
1115
|
+
v = _mm_add_epi8(v, u);
|
1116
|
+
// avg += t
|
1117
|
+
avg = _mm_add_epi8(avg, t);
|
1118
|
+
// max_codeword_len = max(max_codeword_len, v)
|
1119
|
+
max_codeword_len = _mm_max_epu8(max_codeword_len, v);
|
1120
|
+
// mem[] = v+1
|
1121
|
+
_mm_storeu_si128((__m128i*)&code_len[i], _mm_add_epi8(v, _mm_set1_epi8(1)));
|
1122
|
+
}
|
1123
|
+
_mm_storeu_si128((__m128i*)&code_len[num_symbols], bak);
|
1124
|
+
if (_mm_movemask_epi8(_mm_cmpeq_epi8(max_codeword_len, _mm_set1_epi8(10))) != 0xffff)
|
1125
|
+
return -1; // codeword too big?
|
1126
|
+
}
|
1127
|
+
|
1128
|
+
HuffRange range[128];
|
1129
|
+
int ranges = Huff_ConvertToRanges(range, num_symbols, fluff, &code_len[num_symbols], bits);
|
1130
|
+
if (ranges <= 0)
|
1131
|
+
return -1;
|
1132
|
+
|
1133
|
+
uint8 *cp = code_len;
|
1134
|
+
for (int i = 0; i < ranges; i++) {
|
1135
|
+
int sym = range[i].symbol;
|
1136
|
+
int n = range[i].num;
|
1137
|
+
do {
|
1138
|
+
syms[code_prefix[*cp++]++] = sym++;
|
1139
|
+
} while (--n);
|
1140
|
+
}
|
1141
|
+
|
1142
|
+
return num_symbols;
|
1143
|
+
}
|
1144
|
+
|
1145
|
+
struct NewHuffLut {
|
1146
|
+
// Mapping that maps a bit pattern to a code length.
|
1147
|
+
uint8 bits2len[2048 + 16];
|
1148
|
+
// Mapping that maps a bit pattern to a symbol.
|
1149
|
+
uint8 bits2sym[2048 + 16];
|
1150
|
+
};
|
1151
|
+
|
1152
|
+
// May overflow 16 bytes past the end
|
1153
|
+
void FillByteOverflow16(uint8 *dst, uint8 v, size_t n) {
|
1154
|
+
memset(dst, v, n);
|
1155
|
+
}
|
1156
|
+
|
1157
|
+
bool Huff_MakeLut(const uint32 *prefix_org, const uint32 *prefix_cur, NewHuffLut *hufflut, uint8 *syms) {
|
1158
|
+
uint32 currslot = 0;
|
1159
|
+
for(uint32 i = 1; i < 11; i++) {
|
1160
|
+
uint32 start = prefix_org[i];
|
1161
|
+
uint32 count = prefix_cur[i] - start;
|
1162
|
+
if (count) {
|
1163
|
+
uint32 stepsize = 1 << (11 - i);
|
1164
|
+
uint32 num_to_set = count << (11 - i);
|
1165
|
+
if (currslot + num_to_set > 2048)
|
1166
|
+
return false;
|
1167
|
+
FillByteOverflow16(&hufflut->bits2len[currslot], i, num_to_set);
|
1168
|
+
|
1169
|
+
uint8 *p = &hufflut->bits2sym[currslot];
|
1170
|
+
for (uint32 j = 0; j != count; j++, p += stepsize)
|
1171
|
+
FillByteOverflow16(p, syms[start + j], stepsize);
|
1172
|
+
currslot += num_to_set;
|
1173
|
+
}
|
1174
|
+
}
|
1175
|
+
if (prefix_cur[11] - prefix_org[11] != 0) {
|
1176
|
+
uint32 num_to_set = prefix_cur[11] - prefix_org[11];
|
1177
|
+
if (currslot + num_to_set > 2048)
|
1178
|
+
return false;
|
1179
|
+
FillByteOverflow16(&hufflut->bits2len[currslot], 11, num_to_set);
|
1180
|
+
memcpy(&hufflut->bits2sym[currslot], &syms[prefix_org[11]], num_to_set);
|
1181
|
+
currslot += num_to_set;
|
1182
|
+
}
|
1183
|
+
return currslot == 2048;
|
1184
|
+
}
|
1185
|
+
|
1186
|
+
int Kraken_DecodeBytes_Type12(const byte *src, size_t src_size, byte *output, int output_size, int type) {
|
1187
|
+
BitReader bits;
|
1188
|
+
int half_output_size;
|
1189
|
+
uint32 split_left, split_mid, split_right;
|
1190
|
+
const byte *src_mid;
|
1191
|
+
NewHuffLut huff_lut;
|
1192
|
+
HuffReader hr;
|
1193
|
+
HuffRevLut rev_lut;
|
1194
|
+
const uint8 *src_end = src + src_size;
|
1195
|
+
|
1196
|
+
bits.bitpos = 24;
|
1197
|
+
bits.bits = 0;
|
1198
|
+
bits.p = src;
|
1199
|
+
bits.p_end = src_end;
|
1200
|
+
BitReader_Refill(&bits);
|
1201
|
+
|
1202
|
+
static const uint32 code_prefix_org[12] = { 0x0, 0x0, 0x2, 0x6, 0xE, 0x1E, 0x3E, 0x7E, 0xFE, 0x1FE, 0x2FE, 0x3FE };
|
1203
|
+
uint32 code_prefix[12] = { 0x0, 0x0, 0x2, 0x6, 0xE, 0x1E, 0x3E, 0x7E, 0xFE, 0x1FE, 0x2FE, 0x3FE };
|
1204
|
+
uint8 syms[1280];
|
1205
|
+
int num_syms;
|
1206
|
+
if (!BitReader_ReadBitNoRefill(&bits)) {
|
1207
|
+
num_syms = Huff_ReadCodeLengthsOld(&bits, syms, code_prefix);
|
1208
|
+
} else if (!BitReader_ReadBitNoRefill(&bits)) {
|
1209
|
+
num_syms = Huff_ReadCodeLengthsNew(&bits, syms, code_prefix);
|
1210
|
+
} else {
|
1211
|
+
return -1;
|
1212
|
+
}
|
1213
|
+
|
1214
|
+
if (num_syms < 1)
|
1215
|
+
return -1;
|
1216
|
+
src = bits.p - ((24 - bits.bitpos) / 8);
|
1217
|
+
|
1218
|
+
if (num_syms == 1) {
|
1219
|
+
memset(output, syms[0], output_size);
|
1220
|
+
return src - src_end;
|
1221
|
+
}
|
1222
|
+
|
1223
|
+
if (!Huff_MakeLut(code_prefix_org, code_prefix, &huff_lut, syms))
|
1224
|
+
return -1;
|
1225
|
+
|
1226
|
+
ReverseBitsArray2048(huff_lut.bits2len, rev_lut.bits2len);
|
1227
|
+
ReverseBitsArray2048(huff_lut.bits2sym, rev_lut.bits2sym);
|
1228
|
+
|
1229
|
+
if (type == 1) {
|
1230
|
+
if (src + 3 > src_end)
|
1231
|
+
return -1;
|
1232
|
+
split_mid = *(uint16*)src;
|
1233
|
+
src += 2;
|
1234
|
+
hr.output = output;
|
1235
|
+
hr.output_end = output + output_size;
|
1236
|
+
hr.src = src;
|
1237
|
+
hr.src_end = src_end;
|
1238
|
+
hr.src_mid_org = hr.src_mid = src + split_mid;
|
1239
|
+
hr.src_bitpos = 0;
|
1240
|
+
hr.src_bits = 0;
|
1241
|
+
hr.src_mid_bitpos = 0;
|
1242
|
+
hr.src_mid_bits = 0;
|
1243
|
+
hr.src_end_bitpos = 0;
|
1244
|
+
hr.src_end_bits = 0;
|
1245
|
+
if (!Kraken_DecodeBytesCore(&hr, &rev_lut))
|
1246
|
+
return -1;
|
1247
|
+
} else {
|
1248
|
+
if (src + 6 > src_end)
|
1249
|
+
return -1;
|
1250
|
+
|
1251
|
+
half_output_size = (output_size + 1) >> 1;
|
1252
|
+
split_mid = *(uint32*)src & 0xFFFFFF;
|
1253
|
+
src += 3;
|
1254
|
+
if (split_mid > (src_end - src))
|
1255
|
+
return -1;
|
1256
|
+
src_mid = src + split_mid;
|
1257
|
+
split_left = *(uint16*)src;
|
1258
|
+
src += 2;
|
1259
|
+
if (src_mid - src < split_left + 2 || src_end - src_mid < 3)
|
1260
|
+
return -1;
|
1261
|
+
split_right = *(uint16*)src_mid;
|
1262
|
+
if (src_end - (src_mid + 2) < split_right + 2)
|
1263
|
+
return -1;
|
1264
|
+
|
1265
|
+
hr.output = output;
|
1266
|
+
hr.output_end = output + half_output_size;
|
1267
|
+
hr.src = src;
|
1268
|
+
hr.src_end = src_mid;
|
1269
|
+
hr.src_mid_org = hr.src_mid = src + split_left;
|
1270
|
+
hr.src_bitpos = 0;
|
1271
|
+
hr.src_bits = 0;
|
1272
|
+
hr.src_mid_bitpos = 0;
|
1273
|
+
hr.src_mid_bits = 0;
|
1274
|
+
hr.src_end_bitpos = 0;
|
1275
|
+
hr.src_end_bits = 0;
|
1276
|
+
if (!Kraken_DecodeBytesCore(&hr, &rev_lut))
|
1277
|
+
return -1;
|
1278
|
+
|
1279
|
+
hr.output = output + half_output_size;
|
1280
|
+
hr.output_end = output + output_size;
|
1281
|
+
hr.src = src_mid + 2;
|
1282
|
+
hr.src_end = src_end;
|
1283
|
+
hr.src_mid_org = hr.src_mid = src_mid + 2 + split_right;
|
1284
|
+
hr.src_bitpos = 0;
|
1285
|
+
hr.src_bits = 0;
|
1286
|
+
hr.src_mid_bitpos = 0;
|
1287
|
+
hr.src_mid_bits = 0;
|
1288
|
+
hr.src_end_bitpos = 0;
|
1289
|
+
hr.src_end_bits = 0;
|
1290
|
+
if (!Kraken_DecodeBytesCore(&hr, &rev_lut))
|
1291
|
+
return -1;
|
1292
|
+
}
|
1293
|
+
return (int)src_size;
|
1294
|
+
}
|
1295
|
+
|
1296
|
+
static uint32 bitmasks[32] = {
|
1297
|
+
0x1, 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff,
|
1298
|
+
0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff,
|
1299
|
+
0x1ffff, 0x3ffff, 0x7ffff, 0xfffff, 0x1fffff, 0x3fffff, 0x7fffff,
|
1300
|
+
0xffffff, 0x1ffffff, 0x3ffffff, 0x7ffffff, 0xfffffff, 0x1fffffff, 0x3fffffff, 0x7fffffff, 0xffffffff
|
1301
|
+
};
|
1302
|
+
|
1303
|
+
int Kraken_DecodeMultiArray(const uint8 *src, const uint8 *src_end,
|
1304
|
+
uint8 *dst, uint8 *dst_end,
|
1305
|
+
uint8 **array_data, int *array_lens, int array_count,
|
1306
|
+
int *total_size_out, bool force_memmove, uint8 *scratch, uint8 *scratch_end) {
|
1307
|
+
const uint8 *src_org = src;
|
1308
|
+
|
1309
|
+
if (src_end - src < 4)
|
1310
|
+
return -1;
|
1311
|
+
|
1312
|
+
int decoded_size;
|
1313
|
+
int num_arrays_in_file = *src++;
|
1314
|
+
if (!(num_arrays_in_file & 0x80))
|
1315
|
+
return -1;
|
1316
|
+
num_arrays_in_file &= 0x3f;
|
1317
|
+
|
1318
|
+
if (dst == scratch) {
|
1319
|
+
// todo: ensure scratch space first?
|
1320
|
+
scratch += (scratch_end - scratch - 0xc000) >> 1;
|
1321
|
+
dst_end = scratch;
|
1322
|
+
}
|
1323
|
+
|
1324
|
+
int total_size = 0;
|
1325
|
+
|
1326
|
+
if (num_arrays_in_file == 0) {
|
1327
|
+
for (int i = 0; i < array_count; i++) {
|
1328
|
+
uint8 *chunk_dst = dst;
|
1329
|
+
int dec = Kraken_DecodeBytes(&chunk_dst, src, src_end, &decoded_size, dst_end - dst, force_memmove, scratch, scratch_end);
|
1330
|
+
if (dec < 0)
|
1331
|
+
return -1;
|
1332
|
+
dst += decoded_size;
|
1333
|
+
array_lens[i] = decoded_size;
|
1334
|
+
array_data[i] = chunk_dst;
|
1335
|
+
src += dec;
|
1336
|
+
total_size += decoded_size;
|
1337
|
+
}
|
1338
|
+
*total_size_out = total_size;
|
1339
|
+
return src - src_org; // not supported yet
|
1340
|
+
}
|
1341
|
+
|
1342
|
+
uint8 *entropy_array_data[32];
|
1343
|
+
uint32 entropy_array_size[32];
|
1344
|
+
|
1345
|
+
// First loop just decodes everything to scratch
|
1346
|
+
uint8 *scratch_cur = scratch;
|
1347
|
+
|
1348
|
+
for(int i = 0; i < num_arrays_in_file; i++) {
|
1349
|
+
uint8 *chunk_dst = scratch_cur;
|
1350
|
+
int dec = Kraken_DecodeBytes(&chunk_dst, src, src_end, &decoded_size, scratch_end - scratch_cur, force_memmove, scratch_cur, scratch_end);
|
1351
|
+
if (dec < 0)
|
1352
|
+
return -1;
|
1353
|
+
entropy_array_data[i] = chunk_dst;
|
1354
|
+
entropy_array_size[i] = decoded_size;
|
1355
|
+
scratch_cur += decoded_size;
|
1356
|
+
total_size += decoded_size;
|
1357
|
+
src += dec;
|
1358
|
+
}
|
1359
|
+
*total_size_out = total_size;
|
1360
|
+
|
1361
|
+
if (src_end - src < 3)
|
1362
|
+
return -1;
|
1363
|
+
|
1364
|
+
int Q = *(uint16*)src;
|
1365
|
+
src += 2;
|
1366
|
+
|
1367
|
+
int out_size;
|
1368
|
+
if (Kraken_GetBlockSize(src, src_end, &out_size, total_size) < 0)
|
1369
|
+
return -1;
|
1370
|
+
int num_indexes = out_size;
|
1371
|
+
|
1372
|
+
int num_lens = num_indexes - array_count;
|
1373
|
+
if (num_lens < 1)
|
1374
|
+
return -1;
|
1375
|
+
|
1376
|
+
if (scratch_end - scratch_cur < num_indexes)
|
1377
|
+
return -1;
|
1378
|
+
uint8 *interval_lenlog2 = scratch_cur;
|
1379
|
+
scratch_cur += num_indexes;
|
1380
|
+
|
1381
|
+
if (scratch_end - scratch_cur < num_indexes)
|
1382
|
+
return -1;
|
1383
|
+
uint8 *interval_indexes = scratch_cur;
|
1384
|
+
scratch_cur += num_indexes;
|
1385
|
+
|
1386
|
+
|
1387
|
+
if (Q & 0x8000) {
|
1388
|
+
int size_out;
|
1389
|
+
int n = Kraken_DecodeBytes(&interval_indexes, src, src_end, &size_out, num_indexes, false, scratch_cur, scratch_end);
|
1390
|
+
if (n < 0 || size_out != num_indexes)
|
1391
|
+
return -1;
|
1392
|
+
src += n;
|
1393
|
+
|
1394
|
+
for (int i = 0; i < num_indexes; i++) {
|
1395
|
+
int t = interval_indexes[i];
|
1396
|
+
interval_lenlog2[i] = t >> 4;
|
1397
|
+
interval_indexes[i] = t & 0xF;
|
1398
|
+
}
|
1399
|
+
|
1400
|
+
num_lens = num_indexes;
|
1401
|
+
} else {
|
1402
|
+
int lenlog2_chunksize = num_indexes - array_count;
|
1403
|
+
|
1404
|
+
int size_out;
|
1405
|
+
int n = Kraken_DecodeBytes(&interval_indexes, src, src_end, &size_out, num_indexes, false, scratch_cur, scratch_end);
|
1406
|
+
if (n < 0 || size_out != num_indexes)
|
1407
|
+
return -1;
|
1408
|
+
src += n;
|
1409
|
+
|
1410
|
+
n = Kraken_DecodeBytes(&interval_lenlog2, src, src_end, &size_out, lenlog2_chunksize, false, scratch_cur, scratch_end);
|
1411
|
+
if (n < 0 || size_out != lenlog2_chunksize)
|
1412
|
+
return -1;
|
1413
|
+
src += n;
|
1414
|
+
|
1415
|
+
for (int i = 0; i < lenlog2_chunksize; i++)
|
1416
|
+
if (interval_lenlog2[i] > 16)
|
1417
|
+
return -1;
|
1418
|
+
}
|
1419
|
+
|
1420
|
+
if (scratch_end - scratch_cur < 4)
|
1421
|
+
return -1;
|
1422
|
+
|
1423
|
+
scratch_cur = ALIGN_POINTER(scratch_cur, 4);
|
1424
|
+
if (scratch_end - scratch_cur < num_lens * 4)
|
1425
|
+
return -1;
|
1426
|
+
uint32 *decoded_intervals = (uint32*)scratch_cur;
|
1427
|
+
|
1428
|
+
int varbits_complen = Q & 0x3FFF;
|
1429
|
+
if (src_end - src < varbits_complen)
|
1430
|
+
return -1;
|
1431
|
+
|
1432
|
+
const uint8 *f = src;
|
1433
|
+
uint32 bits_f = 0;
|
1434
|
+
int bitpos_f = 24;
|
1435
|
+
|
1436
|
+
const uint8 *src_end_actual = src + varbits_complen;
|
1437
|
+
|
1438
|
+
const uint8 *b = src_end_actual;
|
1439
|
+
uint32 bits_b = 0;
|
1440
|
+
int bitpos_b = 24;
|
1441
|
+
|
1442
|
+
|
1443
|
+
int i;
|
1444
|
+
for (i = 0; i + 2 <= num_lens; i += 2) {
|
1445
|
+
bits_f |= _byteswap_ulong(*(uint32*)f) >> (24 - bitpos_f);
|
1446
|
+
f += (bitpos_f + 7) >> 3;
|
1447
|
+
|
1448
|
+
bits_b |= ((uint32*)b)[-1] >> (24 - bitpos_b);
|
1449
|
+
b -= (bitpos_b + 7) >> 3;
|
1450
|
+
|
1451
|
+
int numbits_f = interval_lenlog2[i + 0];
|
1452
|
+
int numbits_b = interval_lenlog2[i + 1];
|
1453
|
+
|
1454
|
+
bits_f = _rotl(bits_f | 1, numbits_f);
|
1455
|
+
bitpos_f += numbits_f - 8 * ((bitpos_f + 7) >> 3);
|
1456
|
+
|
1457
|
+
bits_b = _rotl(bits_b | 1, numbits_b);
|
1458
|
+
bitpos_b += numbits_b - 8 * ((bitpos_b + 7) >> 3);
|
1459
|
+
|
1460
|
+
int value_f = bits_f & bitmasks[numbits_f];
|
1461
|
+
bits_f &= ~bitmasks[numbits_f];
|
1462
|
+
|
1463
|
+
int value_b = bits_b & bitmasks[numbits_b];
|
1464
|
+
bits_b &= ~bitmasks[numbits_b];
|
1465
|
+
|
1466
|
+
decoded_intervals[i + 0] = value_f;
|
1467
|
+
decoded_intervals[i + 1] = value_b;
|
1468
|
+
}
|
1469
|
+
|
1470
|
+
// read final one since above loop reads 2
|
1471
|
+
if (i < num_lens) {
|
1472
|
+
bits_f |= _byteswap_ulong(*(uint32*)f) >> (24 - bitpos_f);
|
1473
|
+
int numbits_f = interval_lenlog2[i];
|
1474
|
+
bits_f = _rotl(bits_f | 1, numbits_f);
|
1475
|
+
int value_f = bits_f & bitmasks[numbits_f];
|
1476
|
+
decoded_intervals[i + 0] = value_f;
|
1477
|
+
}
|
1478
|
+
|
1479
|
+
if (interval_indexes[num_indexes - 1])
|
1480
|
+
return -1;
|
1481
|
+
|
1482
|
+
int indi = 0, leni = 0, source;
|
1483
|
+
int increment_leni = (Q & 0x8000) != 0;
|
1484
|
+
|
1485
|
+
for(int arri = 0; arri < array_count; arri++) {
|
1486
|
+
array_data[arri] = dst;
|
1487
|
+
if (indi >= num_indexes)
|
1488
|
+
return -1;
|
1489
|
+
|
1490
|
+
while ((source = interval_indexes[indi++]) != 0) {
|
1491
|
+
if (source > num_arrays_in_file)
|
1492
|
+
return -1;
|
1493
|
+
if (leni >= num_lens)
|
1494
|
+
return -1;
|
1495
|
+
int cur_len = decoded_intervals[leni++];
|
1496
|
+
int bytes_left = entropy_array_size[source - 1];
|
1497
|
+
if (cur_len > bytes_left || cur_len > dst_end - dst)
|
1498
|
+
return -1;
|
1499
|
+
uint8 *blksrc = entropy_array_data[source - 1];
|
1500
|
+
entropy_array_size[source - 1] -= cur_len;
|
1501
|
+
entropy_array_data[source - 1] += cur_len;
|
1502
|
+
uint8 *dstx = dst;
|
1503
|
+
dst += cur_len;
|
1504
|
+
memcpy(dstx, blksrc, cur_len);
|
1505
|
+
}
|
1506
|
+
leni += increment_leni;
|
1507
|
+
array_lens[arri] = dst - array_data[arri];
|
1508
|
+
}
|
1509
|
+
|
1510
|
+
if (indi != num_indexes || leni != num_lens)
|
1511
|
+
return -1;
|
1512
|
+
|
1513
|
+
for (int i = 0; i < num_arrays_in_file; i++) {
|
1514
|
+
if (entropy_array_size[i])
|
1515
|
+
return -1;
|
1516
|
+
}
|
1517
|
+
return src_end_actual - src_org;
|
1518
|
+
}
|
1519
|
+
|
1520
|
+
int Krak_DecodeRecursive(const byte *src, size_t src_size, byte *output, int output_size, uint8 *scratch, uint8 *scratch_end) {
|
1521
|
+
const uint8 *src_org = src;
|
1522
|
+
byte *output_end = output + output_size;
|
1523
|
+
const byte *src_end = src + src_size;
|
1524
|
+
|
1525
|
+
if (src_size < 6)
|
1526
|
+
return -1;
|
1527
|
+
|
1528
|
+
int n = src[0] & 0x7f;
|
1529
|
+
if (n < 2)
|
1530
|
+
return -1;
|
1531
|
+
|
1532
|
+
if (!(src[0] & 0x80)) {
|
1533
|
+
src++;
|
1534
|
+
do {
|
1535
|
+
int decoded_size;
|
1536
|
+
int dec = Kraken_DecodeBytes(&output, src, src_end, &decoded_size, output_end - output, true, scratch, scratch_end);
|
1537
|
+
if (dec < 0)
|
1538
|
+
return -1;
|
1539
|
+
output += decoded_size;
|
1540
|
+
src += dec;
|
1541
|
+
} while (--n);
|
1542
|
+
if (output != output_end)
|
1543
|
+
return -1;
|
1544
|
+
return src - src_org;
|
1545
|
+
} else {
|
1546
|
+
uint8 *array_data;
|
1547
|
+
int array_len, decoded_size;
|
1548
|
+
int dec = Kraken_DecodeMultiArray(src, src_end, output, output_end, &array_data, &array_len, 1, &decoded_size, true, scratch, scratch_end);
|
1549
|
+
if (dec < 0)
|
1550
|
+
return -1;
|
1551
|
+
output += decoded_size;
|
1552
|
+
if (output != output_end)
|
1553
|
+
return -1;
|
1554
|
+
return dec;
|
1555
|
+
}
|
1556
|
+
}
|
1557
|
+
|
1558
|
+
int Krak_DecodeRLE(const byte *src, size_t src_size, byte *dst, int dst_size, uint8 *scratch, uint8 *scratch_end) {
|
1559
|
+
if (src_size <= 1) {
|
1560
|
+
if (src_size != 1)
|
1561
|
+
return -1;
|
1562
|
+
memset(dst, src[0], dst_size);
|
1563
|
+
return 1;
|
1564
|
+
}
|
1565
|
+
uint8 *dst_end = dst + dst_size;
|
1566
|
+
const uint8 *cmd_ptr = src + 1, *cmd_ptr_end = src + src_size;
|
1567
|
+
// Unpack the first X bytes of the command buffer?
|
1568
|
+
if (src[0]) {
|
1569
|
+
uint8 *dst_ptr = scratch;
|
1570
|
+
int dec_size;
|
1571
|
+
int n = Kraken_DecodeBytes(&dst_ptr, src, src + src_size, &dec_size, scratch_end - scratch, true, scratch, scratch_end);
|
1572
|
+
if (n <= 0)
|
1573
|
+
return -1;
|
1574
|
+
int cmd_len = src_size - n + dec_size;
|
1575
|
+
if (cmd_len > scratch_end - scratch)
|
1576
|
+
return -1;
|
1577
|
+
memcpy(dst_ptr + dec_size, src + n, src_size - n);
|
1578
|
+
cmd_ptr = dst_ptr;
|
1579
|
+
cmd_ptr_end = &dst_ptr[cmd_len];
|
1580
|
+
}
|
1581
|
+
|
1582
|
+
int rle_byte = 0;
|
1583
|
+
|
1584
|
+
while (cmd_ptr < cmd_ptr_end) {
|
1585
|
+
uint32 cmd = cmd_ptr_end[-1];
|
1586
|
+
if (cmd - 1 >= 0x2f) {
|
1587
|
+
cmd_ptr_end--;
|
1588
|
+
uint32 bytes_to_copy = (-1 - cmd) & 0xF;
|
1589
|
+
uint32 bytes_to_rle = cmd >> 4;
|
1590
|
+
if (dst_end - dst < bytes_to_copy + bytes_to_rle || cmd_ptr_end - cmd_ptr < bytes_to_copy)
|
1591
|
+
return -1;
|
1592
|
+
memcpy(dst, cmd_ptr, bytes_to_copy);
|
1593
|
+
cmd_ptr += bytes_to_copy;
|
1594
|
+
dst += bytes_to_copy;
|
1595
|
+
memset(dst, rle_byte, bytes_to_rle);
|
1596
|
+
dst += bytes_to_rle;
|
1597
|
+
} else if (cmd >= 0x10) {
|
1598
|
+
uint32 data = *(uint16*)(cmd_ptr_end - 2) - 4096;
|
1599
|
+
cmd_ptr_end -= 2;
|
1600
|
+
uint32 bytes_to_copy = data & 0x3F;
|
1601
|
+
uint32 bytes_to_rle = data >> 6;
|
1602
|
+
if (dst_end - dst < bytes_to_copy + bytes_to_rle || cmd_ptr_end - cmd_ptr < bytes_to_copy)
|
1603
|
+
return -1;
|
1604
|
+
memcpy(dst, cmd_ptr, bytes_to_copy);
|
1605
|
+
cmd_ptr += bytes_to_copy;
|
1606
|
+
dst += bytes_to_copy;
|
1607
|
+
memset(dst, rle_byte, bytes_to_rle);
|
1608
|
+
dst += bytes_to_rle;
|
1609
|
+
} else if (cmd == 1) {
|
1610
|
+
rle_byte = *cmd_ptr++;
|
1611
|
+
cmd_ptr_end--;
|
1612
|
+
} else if (cmd >= 9) {
|
1613
|
+
uint32 bytes_to_rle = (*(uint16*)(cmd_ptr_end - 2) - 0x8ff) * 128;
|
1614
|
+
cmd_ptr_end -= 2;
|
1615
|
+
if (dst_end - dst < bytes_to_rle)
|
1616
|
+
return -1;
|
1617
|
+
memset(dst, rle_byte, bytes_to_rle);
|
1618
|
+
dst += bytes_to_rle;
|
1619
|
+
} else {
|
1620
|
+
uint32 bytes_to_copy = (*(uint16*)(cmd_ptr_end - 2) - 511) * 64;
|
1621
|
+
cmd_ptr_end -= 2;
|
1622
|
+
if (cmd_ptr_end - cmd_ptr < bytes_to_copy || dst_end - dst < bytes_to_copy)
|
1623
|
+
return -1;
|
1624
|
+
memcpy(dst, cmd_ptr, bytes_to_copy);
|
1625
|
+
dst += bytes_to_copy;
|
1626
|
+
cmd_ptr += bytes_to_copy;
|
1627
|
+
}
|
1628
|
+
}
|
1629
|
+
if (cmd_ptr_end != cmd_ptr)
|
1630
|
+
return -1;
|
1631
|
+
|
1632
|
+
if (dst != dst_end)
|
1633
|
+
return -1;
|
1634
|
+
|
1635
|
+
return src_size;
|
1636
|
+
}
|
1637
|
+
|
1638
|
+
struct TansData {
|
1639
|
+
uint32 A_used;
|
1640
|
+
uint32 B_used;
|
1641
|
+
uint8 A[256];
|
1642
|
+
uint32 B[256];
|
1643
|
+
};
|
1644
|
+
|
1645
|
+
template<typename T> void SimpleSort(T *p, T *pend) {
|
1646
|
+
if (p != pend) {
|
1647
|
+
for (T *lp = p + 1, *rp; lp != pend; lp++) {
|
1648
|
+
T t = lp[0];
|
1649
|
+
for (rp = lp; rp > p && t < rp[-1]; rp--)
|
1650
|
+
rp[0] = rp[-1];
|
1651
|
+
rp[0] = t;
|
1652
|
+
}
|
1653
|
+
}
|
1654
|
+
}
|
1655
|
+
|
1656
|
+
bool Tans_DecodeTable(BitReader *bits, int L_bits, TansData *tans_data) {
|
1657
|
+
BitReader_Refill(bits);
|
1658
|
+
if (BitReader_ReadBitNoRefill(bits)) {
|
1659
|
+
int Q = BitReader_ReadBitsNoRefill(bits, 3);
|
1660
|
+
int num_symbols = BitReader_ReadBitsNoRefill(bits, 8) + 1;
|
1661
|
+
if (num_symbols < 2)
|
1662
|
+
return false;
|
1663
|
+
int fluff = BitReader_ReadFluff(bits, num_symbols);
|
1664
|
+
int total_rice_values = fluff + num_symbols;
|
1665
|
+
uint8 rice[512 + 16];
|
1666
|
+
BitReader2 br2;
|
1667
|
+
|
1668
|
+
// another bit reader...
|
1669
|
+
br2.p = bits->p - ((uint)(24 - bits->bitpos + 7) >> 3);
|
1670
|
+
br2.p_end = bits->p_end;
|
1671
|
+
br2.bitpos = (bits->bitpos - 24) & 7;
|
1672
|
+
|
1673
|
+
if (!DecodeGolombRiceLengths(rice, total_rice_values, &br2))
|
1674
|
+
return false;
|
1675
|
+
memset(rice + total_rice_values, 0, 16);
|
1676
|
+
|
1677
|
+
// Switch back to other bitreader impl
|
1678
|
+
bits->bitpos = 24;
|
1679
|
+
bits->p = br2.p;
|
1680
|
+
bits->bits = 0;
|
1681
|
+
BitReader_Refill(bits);
|
1682
|
+
bits->bits <<= br2.bitpos;
|
1683
|
+
bits->bitpos += br2.bitpos;
|
1684
|
+
|
1685
|
+
HuffRange range[133];
|
1686
|
+
fluff = Huff_ConvertToRanges(range, num_symbols, fluff, &rice[num_symbols], bits);
|
1687
|
+
if (fluff < 0)
|
1688
|
+
return false;
|
1689
|
+
|
1690
|
+
BitReader_Refill(bits);
|
1691
|
+
|
1692
|
+
uint32 L = 1 << L_bits;
|
1693
|
+
uint8 *cur_rice_ptr = rice;
|
1694
|
+
int average = 6;
|
1695
|
+
int somesum = 0;
|
1696
|
+
uint8 *tanstable_A = tans_data->A;
|
1697
|
+
uint32 *tanstable_B = tans_data->B;
|
1698
|
+
|
1699
|
+
for (int ri = 0; ri < fluff; ri++) {
|
1700
|
+
int symbol = range[ri].symbol;
|
1701
|
+
int num = range[ri].num;
|
1702
|
+
do {
|
1703
|
+
BitReader_Refill(bits);
|
1704
|
+
|
1705
|
+
int nextra = Q + *cur_rice_ptr++;
|
1706
|
+
if (nextra > 15)
|
1707
|
+
return false;
|
1708
|
+
int v = BitReader_ReadBitsNoRefillZero(bits, nextra) + (1 << nextra) - (1 << Q);
|
1709
|
+
|
1710
|
+
int average_div4 = average >> 2;
|
1711
|
+
int limit = 2 * average_div4;
|
1712
|
+
if (v <= limit)
|
1713
|
+
v = average_div4 + (-(v & 1) ^ ((uint32)v >> 1));
|
1714
|
+
if (limit > v)
|
1715
|
+
limit = v;
|
1716
|
+
v += 1;
|
1717
|
+
average += limit - average_div4;
|
1718
|
+
*tanstable_A = symbol;
|
1719
|
+
*tanstable_B = (symbol << 16) + v;
|
1720
|
+
tanstable_A += (v == 1);
|
1721
|
+
tanstable_B += v >= 2;
|
1722
|
+
somesum += v;
|
1723
|
+
symbol += 1;
|
1724
|
+
} while (--num);
|
1725
|
+
}
|
1726
|
+
tans_data->A_used = tanstable_A - tans_data->A;
|
1727
|
+
tans_data->B_used = tanstable_B - tans_data->B;
|
1728
|
+
if (somesum != L)
|
1729
|
+
return false;
|
1730
|
+
|
1731
|
+
return true;
|
1732
|
+
} else {
|
1733
|
+
bool seen[256];
|
1734
|
+
memset(seen, 0, sizeof(seen));
|
1735
|
+
uint32 L = 1 << L_bits;
|
1736
|
+
|
1737
|
+
int count = BitReader_ReadBitsNoRefill(bits, 3) + 1;
|
1738
|
+
|
1739
|
+
int bits_per_sym = BSR(L_bits) + 1;
|
1740
|
+
int max_delta_bits = BitReader_ReadBitsNoRefill(bits, bits_per_sym);
|
1741
|
+
|
1742
|
+
if (max_delta_bits == 0 || max_delta_bits > L_bits)
|
1743
|
+
return false;
|
1744
|
+
|
1745
|
+
uint8 *tanstable_A = tans_data->A;
|
1746
|
+
uint32 *tanstable_B = tans_data->B;
|
1747
|
+
|
1748
|
+
int weight = 0;
|
1749
|
+
int total_weights = 0;
|
1750
|
+
|
1751
|
+
do {
|
1752
|
+
BitReader_Refill(bits);
|
1753
|
+
|
1754
|
+
int sym = BitReader_ReadBitsNoRefill(bits, 8);
|
1755
|
+
if (seen[sym])
|
1756
|
+
return false;
|
1757
|
+
|
1758
|
+
int delta = BitReader_ReadBitsNoRefill(bits, max_delta_bits);
|
1759
|
+
|
1760
|
+
weight += delta;
|
1761
|
+
|
1762
|
+
if (weight == 0)
|
1763
|
+
return false;
|
1764
|
+
|
1765
|
+
seen[sym] = true;
|
1766
|
+
if (weight == 1) {
|
1767
|
+
*tanstable_A++ = sym;
|
1768
|
+
} else {
|
1769
|
+
*tanstable_B++ = (sym << 16) + weight;
|
1770
|
+
}
|
1771
|
+
|
1772
|
+
total_weights += weight;
|
1773
|
+
} while (--count);
|
1774
|
+
|
1775
|
+
BitReader_Refill(bits);
|
1776
|
+
|
1777
|
+
int sym = BitReader_ReadBitsNoRefill(bits, 8);
|
1778
|
+
if (seen[sym])
|
1779
|
+
return false;
|
1780
|
+
|
1781
|
+
if (L - total_weights < weight || L - total_weights <= 1)
|
1782
|
+
return false;
|
1783
|
+
|
1784
|
+
*tanstable_B++ = (sym << 16) + (L - total_weights);
|
1785
|
+
|
1786
|
+
tans_data->A_used = tanstable_A - tans_data->A;
|
1787
|
+
tans_data->B_used = tanstable_B - tans_data->B;
|
1788
|
+
|
1789
|
+
SimpleSort(tans_data->A, tanstable_A);
|
1790
|
+
SimpleSort(tans_data->B, tanstable_B);
|
1791
|
+
return true;
|
1792
|
+
}
|
1793
|
+
}
|
1794
|
+
|
1795
|
+
struct TansLutEnt {
|
1796
|
+
uint32 x;
|
1797
|
+
uint8 bits_x;
|
1798
|
+
uint8 symbol;
|
1799
|
+
uint16 w;
|
1800
|
+
};
|
1801
|
+
|
1802
|
+
void Tans_InitLut(TansData *tans_data, int L_bits, TansLutEnt *lut) {
|
1803
|
+
TansLutEnt *pointers[4];
|
1804
|
+
|
1805
|
+
int L = 1 << L_bits;
|
1806
|
+
int a_used = tans_data->A_used;
|
1807
|
+
|
1808
|
+
uint slots_left_to_alloc = L - a_used;
|
1809
|
+
|
1810
|
+
uint sa = slots_left_to_alloc >> 2;
|
1811
|
+
pointers[0] = lut;
|
1812
|
+
uint sb = sa + ((slots_left_to_alloc & 3) > 0);
|
1813
|
+
pointers[1] = lut + sb;
|
1814
|
+
sb += sa + ((slots_left_to_alloc & 3) > 1);
|
1815
|
+
pointers[2] = lut + sb;
|
1816
|
+
sb += sa + ((slots_left_to_alloc & 3) > 2);
|
1817
|
+
pointers[3] = lut + sb;
|
1818
|
+
|
1819
|
+
// Setup the single entrys with weight=1
|
1820
|
+
{
|
1821
|
+
TansLutEnt *lut_singles = lut + slots_left_to_alloc, le;
|
1822
|
+
le.w = 0;
|
1823
|
+
le.bits_x = L_bits;
|
1824
|
+
le.x = (1 << L_bits) - 1;
|
1825
|
+
for (int i = 0; i < a_used; i++) {
|
1826
|
+
lut_singles[i] = le;
|
1827
|
+
lut_singles[i].symbol = tans_data->A[i];
|
1828
|
+
}
|
1829
|
+
}
|
1830
|
+
|
1831
|
+
// Setup the entrys with weight >= 2
|
1832
|
+
int weights_sum = 0;
|
1833
|
+
for (int i = 0; i < tans_data->B_used; i++) {
|
1834
|
+
int weight = tans_data->B[i] & 0xffff;
|
1835
|
+
int symbol = tans_data->B[i] >> 16;
|
1836
|
+
if (weight > 4) {
|
1837
|
+
uint32 sym_bits = BSR(weight);
|
1838
|
+
int Z = L_bits - sym_bits;
|
1839
|
+
TansLutEnt le;
|
1840
|
+
le.symbol = symbol;
|
1841
|
+
le.bits_x = Z;
|
1842
|
+
le.x = (1 << Z) - 1;
|
1843
|
+
le.w = (L - 1) & (weight << Z);
|
1844
|
+
int what_to_add = 1 << Z;
|
1845
|
+
int X = (1 << (sym_bits + 1)) - weight;
|
1846
|
+
|
1847
|
+
for (int j = 0; j < 4; j++) {
|
1848
|
+
TansLutEnt *dst = pointers[j];
|
1849
|
+
|
1850
|
+
int Y = (weight + ((weights_sum - j - 1) & 3)) >> 2;
|
1851
|
+
if (X >= Y) {
|
1852
|
+
for(int n = Y; n; n--) {
|
1853
|
+
*dst++ = le;
|
1854
|
+
le.w += what_to_add;
|
1855
|
+
}
|
1856
|
+
X -= Y;
|
1857
|
+
} else {
|
1858
|
+
for (int n = X; n; n--) {
|
1859
|
+
*dst++ = le;
|
1860
|
+
le.w += what_to_add;
|
1861
|
+
}
|
1862
|
+
Z--;
|
1863
|
+
|
1864
|
+
what_to_add >>= 1;
|
1865
|
+
le.bits_x = Z;
|
1866
|
+
le.w = 0;
|
1867
|
+
le.x >>= 1;
|
1868
|
+
for (int n = Y - X; n; n--) {
|
1869
|
+
*dst++ = le;
|
1870
|
+
le.w += what_to_add;
|
1871
|
+
}
|
1872
|
+
X = weight;
|
1873
|
+
}
|
1874
|
+
pointers[j] = dst;
|
1875
|
+
}
|
1876
|
+
} else {
|
1877
|
+
assert(weight > 0);
|
1878
|
+
uint32 bits = ((1 << weight) - 1) << (weights_sum & 3);
|
1879
|
+
bits |= (bits >> 4);
|
1880
|
+
int n = weight, ww = weight;
|
1881
|
+
do {
|
1882
|
+
uint32 idx = BSF(bits);
|
1883
|
+
bits &= bits - 1;
|
1884
|
+
TansLutEnt *dst = pointers[idx]++;
|
1885
|
+
dst->symbol = symbol;
|
1886
|
+
uint32 weight_bits = BSR(ww);
|
1887
|
+
dst->bits_x = L_bits - weight_bits;
|
1888
|
+
dst->x = (1 << (L_bits - weight_bits)) - 1;
|
1889
|
+
dst->w = (L - 1) & (ww++ << (L_bits - weight_bits));
|
1890
|
+
} while (--n);
|
1891
|
+
}
|
1892
|
+
weights_sum += weight;
|
1893
|
+
}
|
1894
|
+
}
|
1895
|
+
|
1896
|
+
struct TansDecoderParams {
|
1897
|
+
TansLutEnt *lut;
|
1898
|
+
uint8 *dst, *dst_end;
|
1899
|
+
const uint8 *ptr_f, *ptr_b;
|
1900
|
+
uint32 bits_f, bits_b;
|
1901
|
+
int bitpos_f, bitpos_b;
|
1902
|
+
uint32 state_0, state_1, state_2, state_3, state_4;
|
1903
|
+
};
|
1904
|
+
|
1905
|
+
bool Tans_Decode(TansDecoderParams *params) {
|
1906
|
+
TansLutEnt *lut = params->lut, *e;
|
1907
|
+
uint8 *dst = params->dst, *dst_end = params->dst_end;
|
1908
|
+
const uint8 *ptr_f = params->ptr_f, *ptr_b = params->ptr_b;
|
1909
|
+
uint32 bits_f = params->bits_f, bits_b = params->bits_b;
|
1910
|
+
int bitpos_f = params->bitpos_f, bitpos_b = params->bitpos_b;
|
1911
|
+
uint32 state_0 = params->state_0, state_1 = params->state_1;
|
1912
|
+
uint32 state_2 = params->state_2, state_3 = params->state_3;
|
1913
|
+
uint32 state_4 = params->state_4;
|
1914
|
+
|
1915
|
+
if (ptr_f > ptr_b)
|
1916
|
+
return false;
|
1917
|
+
|
1918
|
+
#define TANS_FORWARD_BITS() \
|
1919
|
+
bits_f |= *(uint32 *)ptr_f << bitpos_f; \
|
1920
|
+
ptr_f += (31 - bitpos_f) >> 3; \
|
1921
|
+
bitpos_f |= 24;
|
1922
|
+
|
1923
|
+
#define TANS_FORWARD_ROUND(state) \
|
1924
|
+
e = &lut[state]; \
|
1925
|
+
*dst++ = e->symbol; \
|
1926
|
+
bitpos_f -= e->bits_x; \
|
1927
|
+
state = (bits_f & e->x) + e->w; \
|
1928
|
+
bits_f >>= e->bits_x; \
|
1929
|
+
if (dst >= dst_end) \
|
1930
|
+
break;
|
1931
|
+
|
1932
|
+
#define TANS_BACKWARD_BITS() \
|
1933
|
+
bits_b |= _byteswap_ulong(((uint32 *)ptr_b)[-1]) << bitpos_b; \
|
1934
|
+
ptr_b -= (31 - bitpos_b) >> 3; \
|
1935
|
+
bitpos_b |= 24;
|
1936
|
+
|
1937
|
+
#define TANS_BACKWARD_ROUND(state) \
|
1938
|
+
e = &lut[state]; \
|
1939
|
+
*dst++ = e->symbol; \
|
1940
|
+
bitpos_b -= e->bits_x; \
|
1941
|
+
state = (bits_b & e->x) + e->w; \
|
1942
|
+
bits_b >>= e->bits_x; \
|
1943
|
+
if (dst >= dst_end) \
|
1944
|
+
break;
|
1945
|
+
|
1946
|
+
if (dst < dst_end) {
|
1947
|
+
for (;;) {
|
1948
|
+
TANS_FORWARD_BITS();
|
1949
|
+
TANS_FORWARD_ROUND(state_0);
|
1950
|
+
TANS_FORWARD_ROUND(state_1);
|
1951
|
+
TANS_FORWARD_BITS();
|
1952
|
+
TANS_FORWARD_ROUND(state_2);
|
1953
|
+
TANS_FORWARD_ROUND(state_3);
|
1954
|
+
TANS_FORWARD_BITS();
|
1955
|
+
TANS_FORWARD_ROUND(state_4);
|
1956
|
+
TANS_BACKWARD_BITS();
|
1957
|
+
TANS_BACKWARD_ROUND(state_0);
|
1958
|
+
TANS_BACKWARD_ROUND(state_1);
|
1959
|
+
TANS_BACKWARD_BITS();
|
1960
|
+
TANS_BACKWARD_ROUND(state_2);
|
1961
|
+
TANS_BACKWARD_ROUND(state_3);
|
1962
|
+
TANS_BACKWARD_BITS();
|
1963
|
+
TANS_BACKWARD_ROUND(state_4);
|
1964
|
+
}
|
1965
|
+
}
|
1966
|
+
|
1967
|
+
if (ptr_b - ptr_f + (bitpos_f >> 3) + (bitpos_b >> 3) != 0)
|
1968
|
+
return false;
|
1969
|
+
|
1970
|
+
uint32 states_or = state_0 | state_1 | state_2 | state_3 | state_4;
|
1971
|
+
if (states_or & ~0xFF)
|
1972
|
+
return false;
|
1973
|
+
|
1974
|
+
dst_end[0] = (uint8)state_0;
|
1975
|
+
dst_end[1] = (uint8)state_1;
|
1976
|
+
dst_end[2] = (uint8)state_2;
|
1977
|
+
dst_end[3] = (uint8)state_3;
|
1978
|
+
dst_end[4] = (uint8)state_4;
|
1979
|
+
return true;
|
1980
|
+
}
|
1981
|
+
|
1982
|
+
int Krak_DecodeTans(const byte *src, size_t src_size, byte *dst, int dst_size, uint8 *scratch, uint8 *scratch_end) {
|
1983
|
+
if (src_size < 8 || dst_size < 5)
|
1984
|
+
return -1;
|
1985
|
+
|
1986
|
+
const uint8 *src_end = src + src_size;
|
1987
|
+
|
1988
|
+
BitReader br;
|
1989
|
+
TansData tans_data;
|
1990
|
+
|
1991
|
+
br.bitpos = 24;
|
1992
|
+
br.bits = 0;
|
1993
|
+
br.p = src;
|
1994
|
+
br.p_end = src_end;
|
1995
|
+
BitReader_Refill(&br);
|
1996
|
+
|
1997
|
+
// reserved bit
|
1998
|
+
if (BitReader_ReadBitNoRefill(&br))
|
1999
|
+
return -1;
|
2000
|
+
|
2001
|
+
int L_bits = BitReader_ReadBitsNoRefill(&br, 2) + 8;
|
2002
|
+
|
2003
|
+
if (!Tans_DecodeTable(&br, L_bits, &tans_data))
|
2004
|
+
return -1;
|
2005
|
+
|
2006
|
+
src = br.p - (24 - br.bitpos) / 8;
|
2007
|
+
|
2008
|
+
if (src >= src_end)
|
2009
|
+
return -1;
|
2010
|
+
|
2011
|
+
uint32 lut_space_required = ((sizeof(TansLutEnt) << L_bits) + 15) &~ 15;
|
2012
|
+
if (lut_space_required > (scratch_end - scratch))
|
2013
|
+
return -1;
|
2014
|
+
|
2015
|
+
TansDecoderParams params;
|
2016
|
+
params.dst = dst;
|
2017
|
+
params.dst_end = dst + dst_size - 5;
|
2018
|
+
|
2019
|
+
params.lut = (TansLutEnt *)ALIGN_POINTER(scratch, 16);
|
2020
|
+
Tans_InitLut(&tans_data, L_bits, params.lut);
|
2021
|
+
|
2022
|
+
// Read out the initial state
|
2023
|
+
uint32 L_mask = (1 << L_bits) - 1;
|
2024
|
+
uint32 bits_f = *(uint32*)src;
|
2025
|
+
src += 4;
|
2026
|
+
uint32 bits_b = _byteswap_ulong(*(uint32*)(src_end - 4));
|
2027
|
+
src_end -= 4;
|
2028
|
+
uint32 bitpos_f = 32, bitpos_b = 32;
|
2029
|
+
|
2030
|
+
// Read first two.
|
2031
|
+
params.state_0 = bits_f & L_mask;
|
2032
|
+
params.state_1 = bits_b & L_mask;
|
2033
|
+
bits_f >>= L_bits, bitpos_f -= L_bits;
|
2034
|
+
bits_b >>= L_bits, bitpos_b -= L_bits;
|
2035
|
+
|
2036
|
+
// Read next two.
|
2037
|
+
params.state_2 = bits_f & L_mask;
|
2038
|
+
params.state_3 = bits_b & L_mask;
|
2039
|
+
bits_f >>= L_bits, bitpos_f -= L_bits;
|
2040
|
+
bits_b >>= L_bits, bitpos_b -= L_bits;
|
2041
|
+
|
2042
|
+
// Refill more bits
|
2043
|
+
bits_f |= *(uint32 *)src << bitpos_f;
|
2044
|
+
src += (31 - bitpos_f) >> 3;
|
2045
|
+
bitpos_f |= 24;
|
2046
|
+
|
2047
|
+
// Read final state variable
|
2048
|
+
params.state_4 = bits_f & L_mask;
|
2049
|
+
bits_f >>= L_bits, bitpos_f -= L_bits;
|
2050
|
+
|
2051
|
+
params.bits_f = bits_f;
|
2052
|
+
params.ptr_f = src - (bitpos_f >> 3);
|
2053
|
+
params.bitpos_f = bitpos_f & 7;
|
2054
|
+
|
2055
|
+
params.bits_b = bits_b;
|
2056
|
+
params.ptr_b = src_end + (bitpos_b >> 3);
|
2057
|
+
params.bitpos_b = bitpos_b & 7;
|
2058
|
+
|
2059
|
+
if (!Tans_Decode(¶ms))
|
2060
|
+
return -1;
|
2061
|
+
|
2062
|
+
return src_size;
|
2063
|
+
}
|
2064
|
+
|
2065
|
+
int Kraken_GetBlockSize(const uint8 *src, const uint8 *src_end, int *dest_size, int dest_capacity) {
|
2066
|
+
const byte *src_org = src;
|
2067
|
+
int src_size, dst_size;
|
2068
|
+
|
2069
|
+
if (src_end - src < 2)
|
2070
|
+
return -1; // too few bytes
|
2071
|
+
|
2072
|
+
int chunk_type = (src[0] >> 4) & 0x7;
|
2073
|
+
if (chunk_type == 0) {
|
2074
|
+
if (src[0] >= 0x80) {
|
2075
|
+
// In this mode, memcopy stores the length in the bottom 12 bits.
|
2076
|
+
src_size = ((src[0] << 8) | src[1]) & 0xFFF;
|
2077
|
+
src += 2;
|
2078
|
+
} else {
|
2079
|
+
if (src_end - src < 3)
|
2080
|
+
return -1; // too few bytes
|
2081
|
+
src_size = ((src[0] << 16) | (src[1] << 8) | src[2]);
|
2082
|
+
if (src_size & ~0x3ffff)
|
2083
|
+
return -1; // reserved bits must not be set
|
2084
|
+
src += 3;
|
2085
|
+
}
|
2086
|
+
if (src_size > dest_capacity || src_end - src < src_size)
|
2087
|
+
return -1;
|
2088
|
+
*dest_size = src_size;
|
2089
|
+
return src + src_size - src_org;
|
2090
|
+
}
|
2091
|
+
|
2092
|
+
if (chunk_type >= 6)
|
2093
|
+
return -1;
|
2094
|
+
|
2095
|
+
// In all the other modes, the initial bytes encode
|
2096
|
+
// the src_size and the dst_size
|
2097
|
+
if (src[0] >= 0x80) {
|
2098
|
+
if (src_end - src < 3)
|
2099
|
+
return -1; // too few bytes
|
2100
|
+
|
2101
|
+
// short mode, 10 bit sizes
|
2102
|
+
uint32 bits = ((src[0] << 16) | (src[1] << 8) | src[2]);
|
2103
|
+
src_size = bits & 0x3ff;
|
2104
|
+
dst_size = src_size + ((bits >> 10) & 0x3ff) + 1;
|
2105
|
+
src += 3;
|
2106
|
+
} else {
|
2107
|
+
// long mode, 18 bit sizes
|
2108
|
+
if (src_end - src < 5)
|
2109
|
+
return -1; // too few bytes
|
2110
|
+
uint32 bits = ((src[1] << 24) | (src[2] << 16) | (src[3] << 8) | src[4]);
|
2111
|
+
src_size = bits & 0x3ffff;
|
2112
|
+
dst_size = (((bits >> 18) | (src[0] << 14)) & 0x3FFFF) + 1;
|
2113
|
+
if (src_size >= dst_size)
|
2114
|
+
return -1;
|
2115
|
+
src += 5;
|
2116
|
+
}
|
2117
|
+
if (src_end - src < src_size || dst_size > dest_capacity)
|
2118
|
+
return -1;
|
2119
|
+
*dest_size = dst_size;
|
2120
|
+
return src_size;
|
2121
|
+
}
|
2122
|
+
|
2123
|
+
|
2124
|
+
int Kraken_DecodeBytes(byte **output, const byte *src, const byte *src_end, int *decoded_size, size_t output_size, bool force_memmove, uint8 *scratch, uint8 *scratch_end) {
|
2125
|
+
const byte *src_org = src;
|
2126
|
+
int src_size, dst_size;
|
2127
|
+
|
2128
|
+
if (src_end - src < 2)
|
2129
|
+
return -1; // too few bytes
|
2130
|
+
|
2131
|
+
int chunk_type = (src[0] >> 4) & 0x7;
|
2132
|
+
if (chunk_type == 0) {
|
2133
|
+
if (src[0] >= 0x80) {
|
2134
|
+
// In this mode, memcopy stores the length in the bottom 12 bits.
|
2135
|
+
src_size = ((src[0] << 8) | src[1]) & 0xFFF;
|
2136
|
+
src += 2;
|
2137
|
+
} else {
|
2138
|
+
if (src_end - src < 3)
|
2139
|
+
return -1; // too few bytes
|
2140
|
+
src_size = ((src[0] << 16) | (src[1] << 8) | src[2]);
|
2141
|
+
if (src_size & ~0x3ffff)
|
2142
|
+
return -1; // reserved bits must not be set
|
2143
|
+
src += 3;
|
2144
|
+
}
|
2145
|
+
if (src_size > output_size || src_end - src < src_size)
|
2146
|
+
return -1;
|
2147
|
+
*decoded_size = src_size;
|
2148
|
+
if (force_memmove)
|
2149
|
+
memmove(*output, src, src_size);
|
2150
|
+
else
|
2151
|
+
*output = (byte*)src;
|
2152
|
+
return src + src_size - src_org;
|
2153
|
+
}
|
2154
|
+
|
2155
|
+
// In all the other modes, the initial bytes encode
|
2156
|
+
// the src_size and the dst_size
|
2157
|
+
if (src[0] >= 0x80) {
|
2158
|
+
if (src_end - src < 3)
|
2159
|
+
return -1; // too few bytes
|
2160
|
+
|
2161
|
+
// short mode, 10 bit sizes
|
2162
|
+
uint32 bits = ((src[0] << 16) | (src[1] << 8) | src[2]);
|
2163
|
+
src_size = bits & 0x3ff;
|
2164
|
+
dst_size = src_size + ((bits >> 10) & 0x3ff) + 1;
|
2165
|
+
src += 3;
|
2166
|
+
} else {
|
2167
|
+
// long mode, 18 bit sizes
|
2168
|
+
if (src_end - src < 5)
|
2169
|
+
return -1; // too few bytes
|
2170
|
+
uint32 bits = ((src[1] << 24) | (src[2] << 16) | (src[3] << 8) | src[4]);
|
2171
|
+
src_size = bits & 0x3ffff;
|
2172
|
+
dst_size = (((bits >> 18) | (src[0] << 14)) & 0x3FFFF) + 1;
|
2173
|
+
if (src_size >= dst_size)
|
2174
|
+
return -1;
|
2175
|
+
src += 5;
|
2176
|
+
}
|
2177
|
+
if (src_end - src < src_size || dst_size > output_size)
|
2178
|
+
return -1;
|
2179
|
+
|
2180
|
+
uint8 *dst = *output;
|
2181
|
+
if (dst == scratch) {
|
2182
|
+
if (scratch_end - scratch < dst_size)
|
2183
|
+
return -1;
|
2184
|
+
scratch += dst_size;
|
2185
|
+
}
|
2186
|
+
|
2187
|
+
// printf("%d -> %d (%d)\n", src_size, dst_size, chunk_type);
|
2188
|
+
|
2189
|
+
int src_used = -1;
|
2190
|
+
switch (chunk_type) {
|
2191
|
+
case 2:
|
2192
|
+
case 4:
|
2193
|
+
src_used = Kraken_DecodeBytes_Type12(src, src_size, dst, dst_size, chunk_type >> 1);
|
2194
|
+
break;
|
2195
|
+
case 5:
|
2196
|
+
src_used = Krak_DecodeRecursive(src, src_size, dst, dst_size, scratch, scratch_end);
|
2197
|
+
break;
|
2198
|
+
case 3:
|
2199
|
+
src_used = Krak_DecodeRLE(src, src_size, dst, dst_size, scratch, scratch_end);
|
2200
|
+
break;
|
2201
|
+
case 1:
|
2202
|
+
src_used = Krak_DecodeTans(src, src_size, dst, dst_size, scratch, scratch_end);
|
2203
|
+
break;
|
2204
|
+
}
|
2205
|
+
if (src_used != src_size)
|
2206
|
+
return -1;
|
2207
|
+
*decoded_size = dst_size;
|
2208
|
+
return src + src_size - src_org;
|
2209
|
+
}
|
2210
|
+
|
2211
|
+
void CombineScaledOffsetArrays(int *offs_stream, size_t offs_stream_size, int scale, const uint8 *low_bits) {
|
2212
|
+
for (size_t i = 0; i != offs_stream_size; i++)
|
2213
|
+
offs_stream[i] = scale * offs_stream[i] - low_bits[i];
|
2214
|
+
}
|
2215
|
+
|
2216
|
+
// Unpacks the packed 8 bit offset and lengths into 32 bit.
|
2217
|
+
bool Kraken_UnpackOffsets(const byte *src, const byte *src_end,
|
2218
|
+
const byte *packed_offs_stream, const byte *packed_offs_stream_extra, int packed_offs_stream_size,
|
2219
|
+
int multi_dist_scale,
|
2220
|
+
const byte *packed_litlen_stream, int packed_litlen_stream_size,
|
2221
|
+
int *offs_stream, int *len_stream,
|
2222
|
+
bool excess_flag, int excess_bytes) {
|
2223
|
+
|
2224
|
+
|
2225
|
+
BitReader bits_a, bits_b;
|
2226
|
+
int n, i;
|
2227
|
+
int u32_len_stream_size = 0;
|
2228
|
+
|
2229
|
+
bits_a.bitpos = 24;
|
2230
|
+
bits_a.bits = 0;
|
2231
|
+
bits_a.p = src;
|
2232
|
+
bits_a.p_end = src_end;
|
2233
|
+
BitReader_Refill(&bits_a);
|
2234
|
+
|
2235
|
+
bits_b.bitpos = 24;
|
2236
|
+
bits_b.bits = 0;
|
2237
|
+
bits_b.p = src_end;
|
2238
|
+
bits_b.p_end = src;
|
2239
|
+
BitReader_RefillBackwards(&bits_b);
|
2240
|
+
|
2241
|
+
if (!excess_flag) {
|
2242
|
+
if (bits_b.bits < 0x2000)
|
2243
|
+
return false;
|
2244
|
+
n = 31 - BSR(bits_b.bits);
|
2245
|
+
bits_b.bitpos += n;
|
2246
|
+
bits_b.bits <<= n;
|
2247
|
+
BitReader_RefillBackwards(&bits_b);
|
2248
|
+
n++;
|
2249
|
+
u32_len_stream_size = (bits_b.bits >> (32 - n)) - 1;
|
2250
|
+
bits_b.bitpos += n;
|
2251
|
+
bits_b.bits <<= n;
|
2252
|
+
BitReader_RefillBackwards(&bits_b);
|
2253
|
+
}
|
2254
|
+
|
2255
|
+
if (multi_dist_scale == 0) {
|
2256
|
+
// Traditional way of coding offsets
|
2257
|
+
const uint8 *packed_offs_stream_end = packed_offs_stream + packed_offs_stream_size;
|
2258
|
+
while (packed_offs_stream != packed_offs_stream_end) {
|
2259
|
+
*offs_stream++ = -(int32)BitReader_ReadDistance(&bits_a, *packed_offs_stream++);
|
2260
|
+
if (packed_offs_stream == packed_offs_stream_end)
|
2261
|
+
break;
|
2262
|
+
*offs_stream++ = -(int32)BitReader_ReadDistanceB(&bits_b, *packed_offs_stream++);
|
2263
|
+
}
|
2264
|
+
} else {
|
2265
|
+
// New way of coding offsets
|
2266
|
+
int *offs_stream_org = offs_stream;
|
2267
|
+
const uint8 *packed_offs_stream_end = packed_offs_stream + packed_offs_stream_size;
|
2268
|
+
uint32 cmd, offs;
|
2269
|
+
while (packed_offs_stream != packed_offs_stream_end) {
|
2270
|
+
cmd = *packed_offs_stream++;
|
2271
|
+
if ((cmd >> 3) > 26)
|
2272
|
+
return 0;
|
2273
|
+
offs = ((8 + (cmd & 7)) << (cmd >> 3)) | BitReader_ReadMoreThan24Bits(&bits_a, (cmd >> 3));
|
2274
|
+
*offs_stream++ = 8 - (int32)offs;
|
2275
|
+
if (packed_offs_stream == packed_offs_stream_end)
|
2276
|
+
break;
|
2277
|
+
cmd = *packed_offs_stream++;
|
2278
|
+
if ((cmd >> 3) > 26)
|
2279
|
+
return 0;
|
2280
|
+
offs = ((8 + (cmd & 7)) << (cmd >> 3)) | BitReader_ReadMoreThan24BitsB(&bits_b, (cmd >> 3));
|
2281
|
+
*offs_stream++ = 8 - (int32)offs;
|
2282
|
+
}
|
2283
|
+
if (multi_dist_scale != 1) {
|
2284
|
+
CombineScaledOffsetArrays(offs_stream_org, offs_stream - offs_stream_org, multi_dist_scale, packed_offs_stream_extra);
|
2285
|
+
}
|
2286
|
+
}
|
2287
|
+
uint32 u32_len_stream_buf[512]; // max count is 128kb / 256 = 512
|
2288
|
+
if (u32_len_stream_size > 512)
|
2289
|
+
return false;
|
2290
|
+
|
2291
|
+
uint32 *u32_len_stream = u32_len_stream_buf,
|
2292
|
+
*u32_len_stream_end = u32_len_stream_buf + u32_len_stream_size;
|
2293
|
+
for (i = 0; i + 1 < u32_len_stream_size; i += 2) {
|
2294
|
+
if (!BitReader_ReadLength(&bits_a, &u32_len_stream[i + 0]))
|
2295
|
+
return false;
|
2296
|
+
if (!BitReader_ReadLengthB(&bits_b, &u32_len_stream[i + 1]))
|
2297
|
+
return false;
|
2298
|
+
}
|
2299
|
+
if (i < u32_len_stream_size) {
|
2300
|
+
if (!BitReader_ReadLength(&bits_a, &u32_len_stream[i + 0]))
|
2301
|
+
return false;
|
2302
|
+
}
|
2303
|
+
|
2304
|
+
bits_a.p -= (24 - bits_a.bitpos) >> 3;
|
2305
|
+
bits_b.p += (24 - bits_b.bitpos) >> 3;
|
2306
|
+
|
2307
|
+
if (bits_a.p != bits_b.p)
|
2308
|
+
return false;
|
2309
|
+
|
2310
|
+
for (i = 0; i < packed_litlen_stream_size; i++) {
|
2311
|
+
uint32 v = packed_litlen_stream[i];
|
2312
|
+
if (v == 255)
|
2313
|
+
v = *u32_len_stream++ + 255;
|
2314
|
+
len_stream[i] = v + 3;
|
2315
|
+
}
|
2316
|
+
if (u32_len_stream != u32_len_stream_end)
|
2317
|
+
return false;
|
2318
|
+
|
2319
|
+
return true;
|
2320
|
+
}
|
2321
|
+
bool Kraken_ReadLzTable(int mode,
|
2322
|
+
const byte *src, const byte *src_end,
|
2323
|
+
byte *dst, int dst_size, int offset,
|
2324
|
+
byte *scratch, byte *scratch_end, KrakenLzTable *lztable) {
|
2325
|
+
byte *out;
|
2326
|
+
int decode_count, n;
|
2327
|
+
byte *packed_offs_stream, *packed_len_stream;
|
2328
|
+
|
2329
|
+
if (mode > 1)
|
2330
|
+
return false;
|
2331
|
+
|
2332
|
+
if (src_end - src < 13)
|
2333
|
+
return false;
|
2334
|
+
|
2335
|
+
if (offset == 0) {
|
2336
|
+
COPY_64(dst, src);
|
2337
|
+
dst += 8;
|
2338
|
+
src += 8;
|
2339
|
+
}
|
2340
|
+
|
2341
|
+
if (*src & 0x80) {
|
2342
|
+
uint8 flag = *src++;
|
2343
|
+
if ((flag & 0xc0) != 0x80)
|
2344
|
+
return false; // reserved flag set
|
2345
|
+
|
2346
|
+
return false; // excess bytes not supported
|
2347
|
+
}
|
2348
|
+
|
2349
|
+
// Disable no copy optimization if source and dest overlap
|
2350
|
+
bool force_copy = dst <= src_end && src <= dst + dst_size;
|
2351
|
+
|
2352
|
+
// Decode lit stream, bounded by dst_size
|
2353
|
+
out = scratch;
|
2354
|
+
n = Kraken_DecodeBytes(&out, src, src_end, &decode_count, Min(scratch_end - scratch, dst_size),
|
2355
|
+
force_copy, scratch, scratch_end);
|
2356
|
+
if (n < 0)
|
2357
|
+
return false;
|
2358
|
+
src += n;
|
2359
|
+
lztable->lit_stream = out;
|
2360
|
+
lztable->lit_stream_size = decode_count;
|
2361
|
+
scratch += decode_count;
|
2362
|
+
|
2363
|
+
// Decode command stream, bounded by dst_size
|
2364
|
+
out = scratch;
|
2365
|
+
n = Kraken_DecodeBytes(&out, src, src_end, &decode_count, Min(scratch_end - scratch, dst_size),
|
2366
|
+
force_copy, scratch, scratch_end);
|
2367
|
+
if (n < 0)
|
2368
|
+
return false;
|
2369
|
+
src += n;
|
2370
|
+
lztable->cmd_stream = out;
|
2371
|
+
lztable->cmd_stream_size = decode_count;
|
2372
|
+
scratch += decode_count;
|
2373
|
+
|
2374
|
+
// Check if to decode the multistuff crap
|
2375
|
+
if (src_end - src < 3)
|
2376
|
+
return false;
|
2377
|
+
|
2378
|
+
int offs_scaling = 0;
|
2379
|
+
uint8 *packed_offs_stream_extra = NULL;
|
2380
|
+
|
2381
|
+
if (src[0] & 0x80) {
|
2382
|
+
// uses the mode where distances are coded with 2 tables
|
2383
|
+
offs_scaling = src[0] - 127;
|
2384
|
+
src++;
|
2385
|
+
|
2386
|
+
packed_offs_stream = scratch;
|
2387
|
+
n = Kraken_DecodeBytes(&packed_offs_stream, src, src_end, &lztable->offs_stream_size,
|
2388
|
+
Min(scratch_end - scratch, lztable->cmd_stream_size), false, scratch, scratch_end);
|
2389
|
+
if (n < 0)
|
2390
|
+
return false;
|
2391
|
+
src += n;
|
2392
|
+
scratch += lztable->offs_stream_size;
|
2393
|
+
|
2394
|
+
if (offs_scaling != 1) {
|
2395
|
+
packed_offs_stream_extra = scratch;
|
2396
|
+
n = Kraken_DecodeBytes(&packed_offs_stream_extra, src, src_end, &decode_count,
|
2397
|
+
Min(scratch_end - scratch, lztable->offs_stream_size), false, scratch, scratch_end);
|
2398
|
+
if (n < 0 || decode_count != lztable->offs_stream_size)
|
2399
|
+
return false;
|
2400
|
+
src += n;
|
2401
|
+
scratch += decode_count;
|
2402
|
+
}
|
2403
|
+
} else {
|
2404
|
+
// Decode packed offset stream, it's bounded by the command length.
|
2405
|
+
packed_offs_stream = scratch;
|
2406
|
+
n = Kraken_DecodeBytes(&packed_offs_stream, src, src_end, &lztable->offs_stream_size,
|
2407
|
+
Min(scratch_end - scratch, lztable->cmd_stream_size), false, scratch, scratch_end);
|
2408
|
+
if (n < 0)
|
2409
|
+
return false;
|
2410
|
+
src += n;
|
2411
|
+
scratch += lztable->offs_stream_size;
|
2412
|
+
}
|
2413
|
+
|
2414
|
+
// Decode packed litlen stream. It's bounded by 1/4 of dst_size.
|
2415
|
+
packed_len_stream = scratch;
|
2416
|
+
n = Kraken_DecodeBytes(&packed_len_stream, src, src_end, &lztable->len_stream_size,
|
2417
|
+
Min(scratch_end - scratch, dst_size >> 2), false, scratch, scratch_end);
|
2418
|
+
if (n < 0)
|
2419
|
+
return false;
|
2420
|
+
src += n;
|
2421
|
+
scratch += lztable->len_stream_size;
|
2422
|
+
|
2423
|
+
// Reserve memory for final dist stream
|
2424
|
+
scratch = ALIGN_POINTER(scratch, 16);
|
2425
|
+
lztable->offs_stream = (int*)scratch;
|
2426
|
+
scratch += lztable->offs_stream_size * 4;
|
2427
|
+
|
2428
|
+
// Reserve memory for final len stream
|
2429
|
+
scratch = ALIGN_POINTER(scratch, 16);
|
2430
|
+
lztable->len_stream = (int*)scratch;
|
2431
|
+
scratch += lztable->len_stream_size * 4;
|
2432
|
+
|
2433
|
+
if (scratch + 64 > scratch_end)
|
2434
|
+
return false;
|
2435
|
+
|
2436
|
+
return Kraken_UnpackOffsets(src, src_end, packed_offs_stream, packed_offs_stream_extra,
|
2437
|
+
lztable->offs_stream_size, offs_scaling,
|
2438
|
+
packed_len_stream, lztable->len_stream_size,
|
2439
|
+
lztable->offs_stream, lztable->len_stream, 0, 0);
|
2440
|
+
}
|
2441
|
+
|
2442
|
+
|
2443
|
+
// Note: may access memory out of bounds on invalid input.
|
2444
|
+
bool Kraken_ProcessLzRuns_Type0(KrakenLzTable *lzt, byte *dst, byte *dst_end, byte *dst_start) {
|
2445
|
+
const byte *cmd_stream = lzt->cmd_stream,
|
2446
|
+
*cmd_stream_end = cmd_stream + lzt->cmd_stream_size;
|
2447
|
+
const int *len_stream = lzt->len_stream;
|
2448
|
+
const int *len_stream_end = lzt->len_stream + lzt->len_stream_size;
|
2449
|
+
const byte *lit_stream = lzt->lit_stream;
|
2450
|
+
const byte *lit_stream_end = lzt->lit_stream + lzt->lit_stream_size;
|
2451
|
+
const int *offs_stream = lzt->offs_stream;
|
2452
|
+
const int *offs_stream_end = lzt->offs_stream + lzt->offs_stream_size;
|
2453
|
+
const byte *copyfrom;
|
2454
|
+
uint32 final_len;
|
2455
|
+
int32 offset;
|
2456
|
+
int32 recent_offs[7];
|
2457
|
+
int32 last_offset;
|
2458
|
+
|
2459
|
+
recent_offs[3] = -8;
|
2460
|
+
recent_offs[4] = -8;
|
2461
|
+
recent_offs[5] = -8;
|
2462
|
+
last_offset = -8;
|
2463
|
+
|
2464
|
+
while (cmd_stream < cmd_stream_end) {
|
2465
|
+
uint32 f = *cmd_stream++;
|
2466
|
+
uint32 litlen = f & 3;
|
2467
|
+
uint32 offs_index = f >> 6;
|
2468
|
+
uint32 matchlen = (f >> 2) & 0xF;
|
2469
|
+
|
2470
|
+
// use cmov
|
2471
|
+
uint32 next_long_length = *len_stream;
|
2472
|
+
const int *next_len_stream = len_stream + 1;
|
2473
|
+
|
2474
|
+
len_stream = (litlen == 3) ? next_len_stream : len_stream;
|
2475
|
+
litlen = (litlen == 3) ? next_long_length : litlen;
|
2476
|
+
recent_offs[6] = *offs_stream;
|
2477
|
+
|
2478
|
+
COPY_64_ADD(dst, lit_stream, &dst[last_offset]);
|
2479
|
+
if (litlen > 8) {
|
2480
|
+
COPY_64_ADD(dst + 8, lit_stream + 8, &dst[last_offset + 8]);
|
2481
|
+
if (litlen > 16) {
|
2482
|
+
COPY_64_ADD(dst + 16, lit_stream + 16, &dst[last_offset + 16]);
|
2483
|
+
if (litlen > 24) {
|
2484
|
+
do {
|
2485
|
+
COPY_64_ADD(dst + 24, lit_stream + 24, &dst[last_offset + 24]);
|
2486
|
+
litlen -= 8;
|
2487
|
+
dst += 8;
|
2488
|
+
lit_stream += 8;
|
2489
|
+
} while (litlen > 24);
|
2490
|
+
}
|
2491
|
+
}
|
2492
|
+
}
|
2493
|
+
dst += litlen;
|
2494
|
+
lit_stream += litlen;
|
2495
|
+
|
2496
|
+
offset = recent_offs[offs_index + 3];
|
2497
|
+
recent_offs[offs_index + 3] = recent_offs[offs_index + 2];
|
2498
|
+
recent_offs[offs_index + 2] = recent_offs[offs_index + 1];
|
2499
|
+
recent_offs[offs_index + 1] = recent_offs[offs_index + 0];
|
2500
|
+
recent_offs[3] = offset;
|
2501
|
+
last_offset = offset;
|
2502
|
+
|
2503
|
+
offs_stream = (int*)((intptr_t)offs_stream + ((offs_index + 1) & 4));
|
2504
|
+
|
2505
|
+
if ((uintptr_t)offset < (uintptr_t)(dst_start - dst))
|
2506
|
+
return false; // offset out of bounds
|
2507
|
+
|
2508
|
+
copyfrom = dst + offset;
|
2509
|
+
if (matchlen != 15) {
|
2510
|
+
COPY_64(dst, copyfrom);
|
2511
|
+
COPY_64(dst + 8, copyfrom + 8);
|
2512
|
+
dst += matchlen + 2;
|
2513
|
+
} else {
|
2514
|
+
matchlen = 14 + *len_stream++; // why is the value not 16 here, the above case copies up to 16 bytes.
|
2515
|
+
if ((uintptr_t)matchlen >(uintptr_t)(dst_end - dst))
|
2516
|
+
return false; // copy length out of bounds
|
2517
|
+
COPY_64(dst, copyfrom);
|
2518
|
+
COPY_64(dst + 8, copyfrom + 8);
|
2519
|
+
COPY_64(dst + 16, copyfrom + 16);
|
2520
|
+
do {
|
2521
|
+
COPY_64(dst + 24, copyfrom + 24);
|
2522
|
+
matchlen -= 8;
|
2523
|
+
dst += 8;
|
2524
|
+
copyfrom += 8;
|
2525
|
+
} while (matchlen > 24);
|
2526
|
+
dst += matchlen;
|
2527
|
+
}
|
2528
|
+
}
|
2529
|
+
|
2530
|
+
// check for incorrect input
|
2531
|
+
if (offs_stream != offs_stream_end || len_stream != len_stream_end)
|
2532
|
+
return false;
|
2533
|
+
|
2534
|
+
final_len = dst_end - dst;
|
2535
|
+
if (final_len != lit_stream_end - lit_stream)
|
2536
|
+
return false;
|
2537
|
+
|
2538
|
+
if (final_len >= 8) {
|
2539
|
+
do {
|
2540
|
+
COPY_64_ADD(dst, lit_stream, &dst[last_offset]);
|
2541
|
+
dst += 8, lit_stream += 8, final_len -= 8;
|
2542
|
+
} while (final_len >= 8);
|
2543
|
+
}
|
2544
|
+
if (final_len > 0) {
|
2545
|
+
do {
|
2546
|
+
*dst = *lit_stream++ + dst[last_offset];
|
2547
|
+
} while (dst++, --final_len);
|
2548
|
+
}
|
2549
|
+
return true;
|
2550
|
+
}
|
2551
|
+
|
2552
|
+
|
2553
|
+
// Note: may access memory out of bounds on invalid input.
|
2554
|
+
bool Kraken_ProcessLzRuns_Type1(KrakenLzTable *lzt, byte *dst, byte *dst_end, byte *dst_start) {
|
2555
|
+
const byte *cmd_stream = lzt->cmd_stream,
|
2556
|
+
*cmd_stream_end = cmd_stream + lzt->cmd_stream_size;
|
2557
|
+
const int *len_stream = lzt->len_stream;
|
2558
|
+
const int *len_stream_end = lzt->len_stream + lzt->len_stream_size;
|
2559
|
+
const byte *lit_stream = lzt->lit_stream;
|
2560
|
+
const byte *lit_stream_end = lzt->lit_stream + lzt->lit_stream_size;
|
2561
|
+
const int *offs_stream = lzt->offs_stream;
|
2562
|
+
const int *offs_stream_end = lzt->offs_stream + lzt->offs_stream_size;
|
2563
|
+
const byte *copyfrom;
|
2564
|
+
uint32 final_len;
|
2565
|
+
int32 offset;
|
2566
|
+
int32 recent_offs[7];
|
2567
|
+
|
2568
|
+
recent_offs[3] = -8;
|
2569
|
+
recent_offs[4] = -8;
|
2570
|
+
recent_offs[5] = -8;
|
2571
|
+
|
2572
|
+
while (cmd_stream < cmd_stream_end) {
|
2573
|
+
uint32 f = *cmd_stream++;
|
2574
|
+
uint32 litlen = f & 3;
|
2575
|
+
uint32 offs_index = f >> 6;
|
2576
|
+
uint32 matchlen = (f >> 2) & 0xF;
|
2577
|
+
|
2578
|
+
// use cmov
|
2579
|
+
uint32 next_long_length = *len_stream;
|
2580
|
+
const int *next_len_stream = len_stream + 1;
|
2581
|
+
|
2582
|
+
len_stream = (litlen == 3) ? next_len_stream : len_stream;
|
2583
|
+
litlen = (litlen == 3) ? next_long_length : litlen;
|
2584
|
+
recent_offs[6] = *offs_stream;
|
2585
|
+
|
2586
|
+
COPY_64(dst, lit_stream);
|
2587
|
+
if (litlen > 8) {
|
2588
|
+
COPY_64(dst + 8, lit_stream + 8);
|
2589
|
+
if (litlen > 16) {
|
2590
|
+
COPY_64(dst + 16, lit_stream + 16);
|
2591
|
+
if (litlen > 24) {
|
2592
|
+
do {
|
2593
|
+
COPY_64(dst + 24, lit_stream + 24);
|
2594
|
+
litlen -= 8;
|
2595
|
+
dst += 8;
|
2596
|
+
lit_stream += 8;
|
2597
|
+
} while (litlen > 24);
|
2598
|
+
}
|
2599
|
+
}
|
2600
|
+
}
|
2601
|
+
dst += litlen;
|
2602
|
+
lit_stream += litlen;
|
2603
|
+
|
2604
|
+
offset = recent_offs[offs_index + 3];
|
2605
|
+
recent_offs[offs_index + 3] = recent_offs[offs_index + 2];
|
2606
|
+
recent_offs[offs_index + 2] = recent_offs[offs_index + 1];
|
2607
|
+
recent_offs[offs_index + 1] = recent_offs[offs_index + 0];
|
2608
|
+
recent_offs[3] = offset;
|
2609
|
+
|
2610
|
+
offs_stream = (int*)((intptr_t)offs_stream + ((offs_index + 1) & 4));
|
2611
|
+
|
2612
|
+
if ((uintptr_t)offset < (uintptr_t)(dst_start - dst))
|
2613
|
+
return false; // offset out of bounds
|
2614
|
+
|
2615
|
+
copyfrom = dst + offset;
|
2616
|
+
if (matchlen != 15) {
|
2617
|
+
COPY_64(dst, copyfrom);
|
2618
|
+
COPY_64(dst + 8, copyfrom + 8);
|
2619
|
+
dst += matchlen + 2;
|
2620
|
+
} else {
|
2621
|
+
matchlen = 14 + *len_stream++; // why is the value not 16 here, the above case copies up to 16 bytes.
|
2622
|
+
if ((uintptr_t)matchlen > (uintptr_t)(dst_end - dst))
|
2623
|
+
return false; // copy length out of bounds
|
2624
|
+
COPY_64(dst, copyfrom);
|
2625
|
+
COPY_64(dst + 8, copyfrom + 8);
|
2626
|
+
COPY_64(dst + 16, copyfrom + 16);
|
2627
|
+
do {
|
2628
|
+
COPY_64(dst + 24, copyfrom + 24);
|
2629
|
+
matchlen -= 8;
|
2630
|
+
dst += 8;
|
2631
|
+
copyfrom += 8;
|
2632
|
+
} while (matchlen > 24);
|
2633
|
+
dst += matchlen;
|
2634
|
+
}
|
2635
|
+
}
|
2636
|
+
|
2637
|
+
// check for incorrect input
|
2638
|
+
if (offs_stream != offs_stream_end || len_stream != len_stream_end)
|
2639
|
+
return false;
|
2640
|
+
|
2641
|
+
final_len = dst_end - dst;
|
2642
|
+
if (final_len != lit_stream_end - lit_stream)
|
2643
|
+
return false;
|
2644
|
+
|
2645
|
+
if (final_len >= 64) {
|
2646
|
+
do {
|
2647
|
+
COPY_64_BYTES(dst, lit_stream);
|
2648
|
+
dst += 64, lit_stream += 64, final_len -= 64;
|
2649
|
+
} while (final_len >= 64);
|
2650
|
+
}
|
2651
|
+
if (final_len >= 8) {
|
2652
|
+
do {
|
2653
|
+
COPY_64(dst, lit_stream);
|
2654
|
+
dst += 8, lit_stream += 8, final_len -= 8;
|
2655
|
+
} while (final_len >= 8);
|
2656
|
+
}
|
2657
|
+
if (final_len > 0) {
|
2658
|
+
do {
|
2659
|
+
*dst++ = *lit_stream++;
|
2660
|
+
} while (--final_len);
|
2661
|
+
}
|
2662
|
+
return true;
|
2663
|
+
}
|
2664
|
+
|
2665
|
+
bool Kraken_ProcessLzRuns(int mode, byte *dst, int dst_size, int offset, KrakenLzTable *lztable) {
|
2666
|
+
byte *dst_end = dst + dst_size;
|
2667
|
+
|
2668
|
+
if (mode == 1)
|
2669
|
+
return Kraken_ProcessLzRuns_Type1(lztable, dst + (offset == 0 ? 8 : 0), dst_end, dst - offset);
|
2670
|
+
|
2671
|
+
if (mode == 0)
|
2672
|
+
return Kraken_ProcessLzRuns_Type0(lztable, dst + (offset == 0 ? 8 : 0), dst_end, dst - offset);
|
2673
|
+
|
2674
|
+
|
2675
|
+
return false;
|
2676
|
+
}
|
2677
|
+
|
2678
|
+
// Decode one 256kb big quantum block. It's divided into two 128k blocks
|
2679
|
+
// internally that are compressed separately but with a shared history.
|
2680
|
+
int Kraken_DecodeQuantum(byte *dst, byte *dst_end, byte *dst_start,
|
2681
|
+
const byte *src, const byte *src_end,
|
2682
|
+
byte *scratch, byte *scratch_end) {
|
2683
|
+
const byte *src_in = src;
|
2684
|
+
int mode, chunkhdr, dst_count, src_used, written_bytes;
|
2685
|
+
|
2686
|
+
while (dst_end - dst != 0) {
|
2687
|
+
dst_count = dst_end - dst;
|
2688
|
+
if (dst_count > 0x20000) dst_count = 0x20000;
|
2689
|
+
if (src_end - src < 4)
|
2690
|
+
return -1;
|
2691
|
+
chunkhdr = src[2] | src[1] << 8 | src[0] << 16;
|
2692
|
+
if (!(chunkhdr & 0x800000)) {
|
2693
|
+
// Stored as entropy without any match copying.
|
2694
|
+
byte *out = dst;
|
2695
|
+
src_used = Kraken_DecodeBytes(&out, src, src_end, &written_bytes, dst_count, false, scratch, scratch_end);
|
2696
|
+
if (src_used < 0 || written_bytes != dst_count)
|
2697
|
+
return -1;
|
2698
|
+
} else {
|
2699
|
+
src += 3;
|
2700
|
+
src_used = chunkhdr & 0x7FFFF;
|
2701
|
+
mode = (chunkhdr >> 19) & 0xF;
|
2702
|
+
if (src_end - src < src_used)
|
2703
|
+
return -1;
|
2704
|
+
if (src_used < dst_count) {
|
2705
|
+
size_t scratch_usage = Min(Min(3 * dst_count + 32 + 0xd000, 0x6C000), scratch_end - scratch);
|
2706
|
+
if (scratch_usage < sizeof(KrakenLzTable))
|
2707
|
+
return -1;
|
2708
|
+
if (!Kraken_ReadLzTable(mode,
|
2709
|
+
src, src + src_used,
|
2710
|
+
dst, dst_count,
|
2711
|
+
dst - dst_start,
|
2712
|
+
scratch + sizeof(KrakenLzTable), scratch + scratch_usage,
|
2713
|
+
(KrakenLzTable*)scratch))
|
2714
|
+
return -1;
|
2715
|
+
if (!Kraken_ProcessLzRuns(mode, dst, dst_count, dst - dst_start, (KrakenLzTable*)scratch))
|
2716
|
+
return -1;
|
2717
|
+
} else if (src_used > dst_count || mode != 0) {
|
2718
|
+
return -1;
|
2719
|
+
} else {
|
2720
|
+
memmove(dst, src, dst_count);
|
2721
|
+
}
|
2722
|
+
}
|
2723
|
+
src += src_used;
|
2724
|
+
dst += dst_count;
|
2725
|
+
}
|
2726
|
+
return src - src_in;
|
2727
|
+
}
|
2728
|
+
|
2729
|
+
struct LeviathanLzTable {
|
2730
|
+
int *offs_stream;
|
2731
|
+
int offs_stream_size;
|
2732
|
+
int *len_stream;
|
2733
|
+
int len_stream_size;
|
2734
|
+
uint8 *lit_stream[16];
|
2735
|
+
int lit_stream_size[16];
|
2736
|
+
int lit_stream_total;
|
2737
|
+
uint8 *multi_cmd_ptr[8];
|
2738
|
+
uint8 *multi_cmd_end[8];
|
2739
|
+
uint8 *cmd_stream;
|
2740
|
+
int cmd_stream_size;
|
2741
|
+
};
|
2742
|
+
|
2743
|
+
bool Leviathan_ReadLzTable(int chunk_type,
|
2744
|
+
const byte *src, const byte *src_end,
|
2745
|
+
byte *dst, int dst_size, int offset,
|
2746
|
+
byte *scratch, byte *scratch_end, LeviathanLzTable *lztable) {
|
2747
|
+
byte *packed_offs_stream, *packed_len_stream, *out;
|
2748
|
+
int decode_count, n;
|
2749
|
+
|
2750
|
+
if (chunk_type > 5)
|
2751
|
+
return false;
|
2752
|
+
|
2753
|
+
if (src_end - src < 13)
|
2754
|
+
return false;
|
2755
|
+
|
2756
|
+
if (offset == 0) {
|
2757
|
+
COPY_64(dst, src);
|
2758
|
+
dst += 8;
|
2759
|
+
src += 8;
|
2760
|
+
}
|
2761
|
+
|
2762
|
+
int offs_scaling = 0;
|
2763
|
+
uint8 *packed_offs_stream_extra = NULL;
|
2764
|
+
|
2765
|
+
|
2766
|
+
int offs_stream_limit = dst_size / 3;
|
2767
|
+
|
2768
|
+
if (!(src[0] & 0x80)) {
|
2769
|
+
// Decode packed offset stream, it's bounded by the command length.
|
2770
|
+
packed_offs_stream = scratch;
|
2771
|
+
n = Kraken_DecodeBytes(&packed_offs_stream, src, src_end, &lztable->offs_stream_size,
|
2772
|
+
Min(scratch_end - scratch, offs_stream_limit), false, scratch, scratch_end);
|
2773
|
+
if (n < 0)
|
2774
|
+
return false;
|
2775
|
+
src += n;
|
2776
|
+
scratch += lztable->offs_stream_size;
|
2777
|
+
} else {
|
2778
|
+
// uses the mode where distances are coded with 2 tables
|
2779
|
+
// and the transformation offs * scaling + low_bits
|
2780
|
+
offs_scaling = src[0] - 127;
|
2781
|
+
src++;
|
2782
|
+
|
2783
|
+
packed_offs_stream = scratch;
|
2784
|
+
n = Kraken_DecodeBytes(&packed_offs_stream, src, src_end, &lztable->offs_stream_size,
|
2785
|
+
Min(scratch_end - scratch, offs_stream_limit), false, scratch, scratch_end);
|
2786
|
+
if (n < 0)
|
2787
|
+
return false;
|
2788
|
+
src += n;
|
2789
|
+
scratch += lztable->offs_stream_size;
|
2790
|
+
|
2791
|
+
if (offs_scaling != 1) {
|
2792
|
+
packed_offs_stream_extra = scratch;
|
2793
|
+
n = Kraken_DecodeBytes(&packed_offs_stream_extra, src, src_end, &decode_count,
|
2794
|
+
Min(scratch_end - scratch, offs_stream_limit), false, scratch, scratch_end);
|
2795
|
+
if (n < 0 || decode_count != lztable->offs_stream_size)
|
2796
|
+
return false;
|
2797
|
+
src += n;
|
2798
|
+
scratch += decode_count;
|
2799
|
+
}
|
2800
|
+
}
|
2801
|
+
|
2802
|
+
// Decode packed litlen stream. It's bounded by 1/5 of dst_size.
|
2803
|
+
packed_len_stream = scratch;
|
2804
|
+
n = Kraken_DecodeBytes(&packed_len_stream, src, src_end, &lztable->len_stream_size,
|
2805
|
+
Min(scratch_end - scratch, dst_size / 5), false, scratch, scratch_end);
|
2806
|
+
if (n < 0)
|
2807
|
+
return false;
|
2808
|
+
src += n;
|
2809
|
+
scratch += lztable->len_stream_size;
|
2810
|
+
|
2811
|
+
// Reserve memory for final dist stream
|
2812
|
+
scratch = ALIGN_POINTER(scratch, 16);
|
2813
|
+
lztable->offs_stream = (int*)scratch;
|
2814
|
+
scratch += lztable->offs_stream_size * 4;
|
2815
|
+
|
2816
|
+
// Reserve memory for final len stream
|
2817
|
+
scratch = ALIGN_POINTER(scratch, 16);
|
2818
|
+
lztable->len_stream = (int*)scratch;
|
2819
|
+
scratch += lztable->len_stream_size * 4;
|
2820
|
+
|
2821
|
+
if (scratch > scratch_end)
|
2822
|
+
return false;
|
2823
|
+
|
2824
|
+
if (chunk_type <= 1) {
|
2825
|
+
// Decode lit stream, bounded by dst_size
|
2826
|
+
out = scratch;
|
2827
|
+
n = Kraken_DecodeBytes(&out, src, src_end, &decode_count, Min(scratch_end - scratch, dst_size),
|
2828
|
+
true, scratch, scratch_end);
|
2829
|
+
if (n < 0)
|
2830
|
+
return false;
|
2831
|
+
src += n;
|
2832
|
+
lztable->lit_stream[0] = out;
|
2833
|
+
lztable->lit_stream_size[0] = decode_count;
|
2834
|
+
} else {
|
2835
|
+
int array_count = (chunk_type == 2) ? 2 :
|
2836
|
+
(chunk_type == 3) ? 4 : 16;
|
2837
|
+
n = Kraken_DecodeMultiArray(src, src_end, scratch, scratch_end, lztable->lit_stream,
|
2838
|
+
lztable->lit_stream_size, array_count, &decode_count,
|
2839
|
+
true, scratch, scratch_end);
|
2840
|
+
if (n < 0)
|
2841
|
+
return false;
|
2842
|
+
src += n;
|
2843
|
+
}
|
2844
|
+
scratch += decode_count;
|
2845
|
+
lztable->lit_stream_total = decode_count;
|
2846
|
+
|
2847
|
+
if (src >= src_end)
|
2848
|
+
return false;
|
2849
|
+
|
2850
|
+
if (!(src[0] & 0x80)) {
|
2851
|
+
// Decode command stream, bounded by dst_size
|
2852
|
+
out = scratch;
|
2853
|
+
n = Kraken_DecodeBytes(&out, src, src_end, &decode_count, Min(scratch_end - scratch, dst_size),
|
2854
|
+
true, scratch, scratch_end);
|
2855
|
+
if (n < 0)
|
2856
|
+
return false;
|
2857
|
+
src += n;
|
2858
|
+
lztable->cmd_stream = out;
|
2859
|
+
lztable->cmd_stream_size = decode_count;
|
2860
|
+
scratch += decode_count;
|
2861
|
+
} else {
|
2862
|
+
if (src[0] != 0x83)
|
2863
|
+
return false;
|
2864
|
+
src++;
|
2865
|
+
int multi_cmd_lens[8];
|
2866
|
+
n = Kraken_DecodeMultiArray(src, src_end, scratch, scratch_end, lztable->multi_cmd_ptr,
|
2867
|
+
multi_cmd_lens, 8, &decode_count, true, scratch, scratch_end);
|
2868
|
+
if (n < 0)
|
2869
|
+
return false;
|
2870
|
+
src += n;
|
2871
|
+
for (size_t i = 0; i < 8; i++)
|
2872
|
+
lztable->multi_cmd_end[i] = lztable->multi_cmd_ptr[i] + multi_cmd_lens[i];
|
2873
|
+
|
2874
|
+
lztable->cmd_stream = NULL;
|
2875
|
+
lztable->cmd_stream_size = decode_count;
|
2876
|
+
scratch += decode_count;
|
2877
|
+
}
|
2878
|
+
|
2879
|
+
if (dst_size > scratch_end - scratch)
|
2880
|
+
return false;
|
2881
|
+
|
2882
|
+
|
2883
|
+
return Kraken_UnpackOffsets(src, src_end, packed_offs_stream, packed_offs_stream_extra,
|
2884
|
+
lztable->offs_stream_size, offs_scaling,
|
2885
|
+
packed_len_stream, lztable->len_stream_size,
|
2886
|
+
lztable->offs_stream, lztable->len_stream, 0, 0);
|
2887
|
+
}
|
2888
|
+
|
2889
|
+
#define finline __forceinline
|
2890
|
+
|
2891
|
+
struct LeviathanModeRaw {
|
2892
|
+
const uint8 *lit_stream;
|
2893
|
+
|
2894
|
+
finline LeviathanModeRaw(LeviathanLzTable *lzt, uint8 *dst_start) : lit_stream(lzt->lit_stream[0]) {
|
2895
|
+
}
|
2896
|
+
|
2897
|
+
finline bool CopyLiterals(uint32 cmd, uint8 *&dst, const int *&len_stream, uint8 *match_zone_end, size_t last_offset) {
|
2898
|
+
uint32 litlen = (cmd >> 3) & 3;
|
2899
|
+
// use cmov
|
2900
|
+
uint32 len_stream_value = *len_stream & 0xffffff;
|
2901
|
+
const int *next_len_stream = len_stream + 1;
|
2902
|
+
len_stream = (litlen == 3) ? next_len_stream : len_stream;
|
2903
|
+
litlen = (litlen == 3) ? len_stream_value : litlen;
|
2904
|
+
COPY_64(dst, lit_stream);
|
2905
|
+
if (litlen > 8) {
|
2906
|
+
COPY_64(dst + 8, lit_stream + 8);
|
2907
|
+
if (litlen > 16) {
|
2908
|
+
COPY_64(dst + 16, lit_stream + 16);
|
2909
|
+
if (litlen > 24) {
|
2910
|
+
if (litlen > match_zone_end - dst)
|
2911
|
+
return false; // out of bounds
|
2912
|
+
do {
|
2913
|
+
COPY_64(dst + 24, lit_stream + 24);
|
2914
|
+
litlen -= 8, dst += 8, lit_stream += 8;
|
2915
|
+
} while (litlen > 24);
|
2916
|
+
}
|
2917
|
+
}
|
2918
|
+
}
|
2919
|
+
dst += litlen;
|
2920
|
+
lit_stream += litlen;
|
2921
|
+
return true;
|
2922
|
+
}
|
2923
|
+
|
2924
|
+
finline void CopyFinalLiterals(uint32 final_len, uint8 *&dst, size_t last_offset) {
|
2925
|
+
if (final_len >= 64) {
|
2926
|
+
do {
|
2927
|
+
COPY_64_BYTES(dst, lit_stream);
|
2928
|
+
dst += 64, lit_stream += 64, final_len -= 64;
|
2929
|
+
} while (final_len >= 64);
|
2930
|
+
}
|
2931
|
+
if (final_len >= 8) {
|
2932
|
+
do {
|
2933
|
+
COPY_64(dst, lit_stream);
|
2934
|
+
dst += 8, lit_stream += 8, final_len -= 8;
|
2935
|
+
} while (final_len >= 8);
|
2936
|
+
}
|
2937
|
+
if (final_len > 0) {
|
2938
|
+
do {
|
2939
|
+
*dst++ = *lit_stream++;
|
2940
|
+
} while (--final_len);
|
2941
|
+
}
|
2942
|
+
}
|
2943
|
+
};
|
2944
|
+
|
2945
|
+
struct LeviathanModeSub {
|
2946
|
+
const uint8 *lit_stream;
|
2947
|
+
|
2948
|
+
finline LeviathanModeSub(LeviathanLzTable *lzt, uint8 *dst_start) : lit_stream(lzt->lit_stream[0]) {
|
2949
|
+
}
|
2950
|
+
|
2951
|
+
finline bool CopyLiterals(uint32 cmd, uint8 *&dst, const int *&len_stream, uint8 *match_zone_end, size_t last_offset) {
|
2952
|
+
uint32 litlen = (cmd >> 3) & 3;
|
2953
|
+
// use cmov
|
2954
|
+
uint32 len_stream_value = *len_stream & 0xffffff;
|
2955
|
+
const int *next_len_stream = len_stream + 1;
|
2956
|
+
len_stream = (litlen == 3) ? next_len_stream : len_stream;
|
2957
|
+
litlen = (litlen == 3) ? len_stream_value : litlen;
|
2958
|
+
COPY_64_ADD(dst, lit_stream, &dst[last_offset]);
|
2959
|
+
if (litlen > 8) {
|
2960
|
+
COPY_64_ADD(dst + 8, lit_stream + 8, &dst[last_offset + 8]);
|
2961
|
+
if (litlen > 16) {
|
2962
|
+
COPY_64_ADD(dst + 16, lit_stream + 16, &dst[last_offset + 16]);
|
2963
|
+
if (litlen > 24) {
|
2964
|
+
if (litlen > match_zone_end - dst)
|
2965
|
+
return false; // out of bounds
|
2966
|
+
do {
|
2967
|
+
COPY_64_ADD(dst + 24, lit_stream + 24, &dst[last_offset + 24]);
|
2968
|
+
litlen -= 8, dst += 8, lit_stream += 8;
|
2969
|
+
} while (litlen > 24);
|
2970
|
+
}
|
2971
|
+
}
|
2972
|
+
}
|
2973
|
+
dst += litlen;
|
2974
|
+
lit_stream += litlen;
|
2975
|
+
return true;
|
2976
|
+
}
|
2977
|
+
|
2978
|
+
finline void CopyFinalLiterals(uint32 final_len, uint8 *&dst, size_t last_offset) {
|
2979
|
+
if (final_len >= 8) {
|
2980
|
+
do {
|
2981
|
+
COPY_64_ADD(dst, lit_stream, &dst[last_offset]);
|
2982
|
+
dst += 8, lit_stream += 8, final_len -= 8;
|
2983
|
+
} while (final_len >= 8);
|
2984
|
+
}
|
2985
|
+
if (final_len > 0) {
|
2986
|
+
do {
|
2987
|
+
*dst = *lit_stream++ + dst[last_offset];
|
2988
|
+
} while (dst++, --final_len);
|
2989
|
+
}
|
2990
|
+
}
|
2991
|
+
};
|
2992
|
+
|
2993
|
+
struct LeviathanModeLamSub {
|
2994
|
+
const uint8 *lit_stream, *lam_lit_stream;
|
2995
|
+
|
2996
|
+
finline LeviathanModeLamSub(LeviathanLzTable *lzt, uint8 *dst_start)
|
2997
|
+
: lit_stream(lzt->lit_stream[0]),
|
2998
|
+
lam_lit_stream(lzt->lit_stream[1]) {
|
2999
|
+
}
|
3000
|
+
|
3001
|
+
finline bool CopyLiterals(uint32 cmd, uint8 *&dst, const int *&len_stream, uint8 *match_zone_end, size_t last_offset) {
|
3002
|
+
uint32 lit_cmd = cmd & 0x18;
|
3003
|
+
if (!lit_cmd)
|
3004
|
+
return true;
|
3005
|
+
|
3006
|
+
uint32 litlen = lit_cmd >> 3;
|
3007
|
+
// use cmov
|
3008
|
+
uint32 len_stream_value = *len_stream & 0xffffff;
|
3009
|
+
const int *next_len_stream = len_stream + 1;
|
3010
|
+
len_stream = (litlen == 3) ? next_len_stream : len_stream;
|
3011
|
+
litlen = (litlen == 3) ? len_stream_value : litlen;
|
3012
|
+
|
3013
|
+
if (litlen-- == 0)
|
3014
|
+
return false; // lamsub mode requires one literal
|
3015
|
+
|
3016
|
+
dst[0] = *lam_lit_stream++ + dst[last_offset], dst++;
|
3017
|
+
|
3018
|
+
COPY_64_ADD(dst, lit_stream, &dst[last_offset]);
|
3019
|
+
if (litlen > 8) {
|
3020
|
+
COPY_64_ADD(dst + 8, lit_stream + 8, &dst[last_offset + 8]);
|
3021
|
+
if (litlen > 16) {
|
3022
|
+
COPY_64_ADD(dst + 16, lit_stream + 16, &dst[last_offset + 16]);
|
3023
|
+
if (litlen > 24) {
|
3024
|
+
if (litlen > match_zone_end - dst)
|
3025
|
+
return false; // out of bounds
|
3026
|
+
do {
|
3027
|
+
COPY_64_ADD(dst + 24, lit_stream + 24, &dst[last_offset + 24]);
|
3028
|
+
litlen -= 8, dst += 8, lit_stream += 8;
|
3029
|
+
} while (litlen > 24);
|
3030
|
+
}
|
3031
|
+
}
|
3032
|
+
}
|
3033
|
+
dst += litlen;
|
3034
|
+
lit_stream += litlen;
|
3035
|
+
return true;
|
3036
|
+
}
|
3037
|
+
|
3038
|
+
finline void CopyFinalLiterals(uint32 final_len, uint8 *&dst, size_t last_offset) {
|
3039
|
+
dst[0] = *lam_lit_stream++ + dst[last_offset], dst++;
|
3040
|
+
final_len -= 1;
|
3041
|
+
|
3042
|
+
if (final_len >= 8) {
|
3043
|
+
do {
|
3044
|
+
COPY_64_ADD(dst, lit_stream, &dst[last_offset]);
|
3045
|
+
dst += 8, lit_stream += 8, final_len -= 8;
|
3046
|
+
} while (final_len >= 8);
|
3047
|
+
}
|
3048
|
+
if (final_len > 0) {
|
3049
|
+
do {
|
3050
|
+
*dst = *lit_stream++ + dst[last_offset];
|
3051
|
+
} while (dst++, --final_len);
|
3052
|
+
}
|
3053
|
+
}
|
3054
|
+
};
|
3055
|
+
|
3056
|
+
struct LeviathanModeSubAnd3 {
|
3057
|
+
enum { NUM = 4, MASK = NUM - 1};
|
3058
|
+
const uint8 *lit_stream[NUM];
|
3059
|
+
|
3060
|
+
finline LeviathanModeSubAnd3(LeviathanLzTable *lzt, uint8 *dst_start) {
|
3061
|
+
for (size_t i = 0; i != NUM; i++)
|
3062
|
+
lit_stream[i] = lzt->lit_stream[(-(intptr_t)dst_start + i) & MASK];
|
3063
|
+
}
|
3064
|
+
finline bool CopyLiterals(uint32 cmd, uint8 *&dst, const int *&len_stream, uint8 *match_zone_end, size_t last_offset) {
|
3065
|
+
uint32 lit_cmd = cmd & 0x18;
|
3066
|
+
|
3067
|
+
if (lit_cmd == 0x18) {
|
3068
|
+
uint32 litlen = *len_stream++ & 0xffffff;
|
3069
|
+
if (litlen > match_zone_end - dst)
|
3070
|
+
return false;
|
3071
|
+
while (litlen) {
|
3072
|
+
*dst = *lit_stream[(uintptr_t)dst & MASK]++ + dst[last_offset];
|
3073
|
+
dst++, litlen--;
|
3074
|
+
}
|
3075
|
+
} else if (lit_cmd) {
|
3076
|
+
*dst = *lit_stream[(uintptr_t)dst & MASK]++ + dst[last_offset];
|
3077
|
+
dst++;
|
3078
|
+
if (lit_cmd == 0x10) {
|
3079
|
+
*dst = *lit_stream[(uintptr_t)dst & MASK]++ + dst[last_offset];
|
3080
|
+
dst++;
|
3081
|
+
}
|
3082
|
+
}
|
3083
|
+
return true;
|
3084
|
+
}
|
3085
|
+
|
3086
|
+
finline void CopyFinalLiterals(uint32 final_len, uint8 *&dst, size_t last_offset) {
|
3087
|
+
if (final_len > 0) {
|
3088
|
+
do {
|
3089
|
+
*dst = *lit_stream[(uintptr_t)dst & MASK]++ + dst[last_offset];
|
3090
|
+
} while (dst++, --final_len);
|
3091
|
+
}
|
3092
|
+
}
|
3093
|
+
};
|
3094
|
+
|
3095
|
+
struct LeviathanModeSubAndF {
|
3096
|
+
enum { NUM = 16, MASK = NUM - 1};
|
3097
|
+
const uint8 *lit_stream[NUM];
|
3098
|
+
|
3099
|
+
finline LeviathanModeSubAndF(LeviathanLzTable *lzt, uint8 *dst_start) {
|
3100
|
+
for(size_t i = 0; i != NUM; i++)
|
3101
|
+
lit_stream[i] = lzt->lit_stream[(-(intptr_t)dst_start + i) & MASK];
|
3102
|
+
}
|
3103
|
+
finline bool CopyLiterals(uint32 cmd, uint8 *&dst, const int *&len_stream, uint8 *match_zone_end, size_t last_offset) {
|
3104
|
+
uint32 lit_cmd = cmd & 0x18;
|
3105
|
+
|
3106
|
+
if (lit_cmd == 0x18) {
|
3107
|
+
uint32 litlen = *len_stream++ & 0xffffff;
|
3108
|
+
if (litlen > match_zone_end - dst)
|
3109
|
+
return false;
|
3110
|
+
while (litlen) {
|
3111
|
+
*dst = *lit_stream[(uintptr_t)dst & MASK]++ + dst[last_offset];
|
3112
|
+
dst++, litlen--;
|
3113
|
+
}
|
3114
|
+
} else if (lit_cmd) {
|
3115
|
+
*dst = *lit_stream[(uintptr_t)dst & MASK]++ + dst[last_offset];
|
3116
|
+
dst++;
|
3117
|
+
if (lit_cmd == 0x10) {
|
3118
|
+
*dst = *lit_stream[(uintptr_t)dst & MASK]++ + dst[last_offset];
|
3119
|
+
dst++;
|
3120
|
+
}
|
3121
|
+
}
|
3122
|
+
return true;
|
3123
|
+
}
|
3124
|
+
|
3125
|
+
finline void CopyFinalLiterals(uint32 final_len, uint8 *&dst, size_t last_offset) {
|
3126
|
+
if (final_len > 0) {
|
3127
|
+
do {
|
3128
|
+
*dst = *lit_stream[(uintptr_t)dst & MASK]++ + dst[last_offset];
|
3129
|
+
} while (dst++, --final_len);
|
3130
|
+
}
|
3131
|
+
}
|
3132
|
+
};
|
3133
|
+
|
3134
|
+
struct LeviathanModeO1 {
|
3135
|
+
const uint8 *lit_streams[16];
|
3136
|
+
uint8 next_lit[16];
|
3137
|
+
|
3138
|
+
finline LeviathanModeO1(LeviathanLzTable *lzt, uint8 *dst_start) {
|
3139
|
+
for (size_t i = 0; i != 16; i++) {
|
3140
|
+
uint8 *p = lzt->lit_stream[i];
|
3141
|
+
next_lit[i] = *p;
|
3142
|
+
lit_streams[i] = p + 1;
|
3143
|
+
}
|
3144
|
+
}
|
3145
|
+
|
3146
|
+
finline bool CopyLiterals(uint32 cmd, uint8 *&dst, const int *&len_stream, uint8 *match_zone_end, size_t last_offset) {
|
3147
|
+
uint32 lit_cmd = cmd & 0x18;
|
3148
|
+
|
3149
|
+
if (lit_cmd == 0x18) {
|
3150
|
+
uint32 litlen = *len_stream++;
|
3151
|
+
if ((int32)litlen <= 0)
|
3152
|
+
return false;
|
3153
|
+
uint context = dst[-1];
|
3154
|
+
do {
|
3155
|
+
size_t slot = context >> 4;
|
3156
|
+
*dst++ = (context = next_lit[slot]);
|
3157
|
+
next_lit[slot] = *lit_streams[slot]++;
|
3158
|
+
} while (--litlen);
|
3159
|
+
} else if (lit_cmd) {
|
3160
|
+
// either 1 or 2
|
3161
|
+
uint context = dst[-1];
|
3162
|
+
size_t slot = context >> 4;
|
3163
|
+
*dst++ = (context = next_lit[slot]);
|
3164
|
+
next_lit[slot] = *lit_streams[slot]++;
|
3165
|
+
if (lit_cmd == 0x10) {
|
3166
|
+
slot = context >> 4;
|
3167
|
+
*dst++ = (context = next_lit[slot]);
|
3168
|
+
next_lit[slot] = *lit_streams[slot]++;
|
3169
|
+
}
|
3170
|
+
}
|
3171
|
+
return true;
|
3172
|
+
}
|
3173
|
+
|
3174
|
+
finline void CopyFinalLiterals(uint32 final_len, uint8 *&dst, size_t last_offset) {
|
3175
|
+
uint context = dst[-1];
|
3176
|
+
while (final_len) {
|
3177
|
+
size_t slot = context >> 4;
|
3178
|
+
*dst++ = (context = next_lit[slot]);
|
3179
|
+
next_lit[slot] = *lit_streams[slot]++;
|
3180
|
+
final_len--;
|
3181
|
+
}
|
3182
|
+
}
|
3183
|
+
};
|
3184
|
+
|
3185
|
+
template<typename Mode, bool MultiCmd>
|
3186
|
+
bool Leviathan_ProcessLz(LeviathanLzTable *lzt, uint8 *dst,
|
3187
|
+
uint8 *dst_start, uint8 *dst_end, uint8 *window_base) {
|
3188
|
+
const uint8 *cmd_stream = lzt->cmd_stream,
|
3189
|
+
*cmd_stream_end = cmd_stream + lzt->cmd_stream_size;
|
3190
|
+
const int *len_stream = lzt->len_stream;
|
3191
|
+
const int *len_stream_end = len_stream + lzt->len_stream_size;
|
3192
|
+
|
3193
|
+
const int *offs_stream = lzt->offs_stream;
|
3194
|
+
const int *offs_stream_end = offs_stream + lzt->offs_stream_size;
|
3195
|
+
const byte *copyfrom;
|
3196
|
+
uint8 *match_zone_end = (dst_end - dst_start >= 16) ? dst_end - 16 : dst_start;
|
3197
|
+
|
3198
|
+
int32 recent_offs[16];
|
3199
|
+
recent_offs[8] = recent_offs[9] = recent_offs[10] = recent_offs[11] = -8;
|
3200
|
+
recent_offs[12] = recent_offs[13] = recent_offs[14] = -8;
|
3201
|
+
|
3202
|
+
size_t offset = -8;
|
3203
|
+
|
3204
|
+
Mode mode(lzt, dst_start);
|
3205
|
+
|
3206
|
+
uint32 cmd_stream_left;
|
3207
|
+
const uint8 *multi_cmd_stream[8], **cmd_stream_ptr;
|
3208
|
+
if (MultiCmd) {
|
3209
|
+
for (size_t i = 0; i != 8; i++)
|
3210
|
+
multi_cmd_stream[i] = lzt->multi_cmd_ptr[(i - (uintptr_t)dst_start) & 7];
|
3211
|
+
cmd_stream_left = lzt->cmd_stream_size;
|
3212
|
+
cmd_stream_ptr = &multi_cmd_stream[(uintptr_t)dst & 7];
|
3213
|
+
cmd_stream = *cmd_stream_ptr;
|
3214
|
+
}
|
3215
|
+
|
3216
|
+
for(;;) {
|
3217
|
+
uint32 cmd;
|
3218
|
+
|
3219
|
+
if (!MultiCmd) {
|
3220
|
+
if (cmd_stream >= cmd_stream_end)
|
3221
|
+
break;
|
3222
|
+
cmd = *cmd_stream++;
|
3223
|
+
} else {
|
3224
|
+
if (cmd_stream_left == 0)
|
3225
|
+
break;
|
3226
|
+
cmd_stream_left--;
|
3227
|
+
cmd = *cmd_stream;
|
3228
|
+
*cmd_stream_ptr = cmd_stream + 1;
|
3229
|
+
}
|
3230
|
+
|
3231
|
+
uint32 offs_index = cmd >> 5;
|
3232
|
+
uint32 matchlen = (cmd & 7) + 2;
|
3233
|
+
|
3234
|
+
recent_offs[15] = *offs_stream;
|
3235
|
+
|
3236
|
+
if (!mode.CopyLiterals(cmd, dst, len_stream, match_zone_end, offset))
|
3237
|
+
return false;
|
3238
|
+
|
3239
|
+
offset = recent_offs[(size_t)offs_index + 8];
|
3240
|
+
|
3241
|
+
// Permute the recent offsets table
|
3242
|
+
__m128i temp = _mm_loadu_si128((const __m128i *)&recent_offs[(size_t)offs_index + 4]);
|
3243
|
+
_mm_storeu_si128((__m128i *)&recent_offs[(size_t)offs_index + 1], _mm_loadu_si128((const __m128i *)&recent_offs[offs_index]));
|
3244
|
+
_mm_storeu_si128((__m128i *)&recent_offs[(size_t)offs_index + 5], temp);
|
3245
|
+
recent_offs[8] = (int32)offset;
|
3246
|
+
offs_stream += offs_index == 7;
|
3247
|
+
|
3248
|
+
if ((uintptr_t)offset < (uintptr_t)(window_base - dst))
|
3249
|
+
return false; // offset out of bounds
|
3250
|
+
copyfrom = dst + offset;
|
3251
|
+
|
3252
|
+
if (matchlen == 9) {
|
3253
|
+
if (len_stream >= len_stream_end)
|
3254
|
+
return false; // len stream empty
|
3255
|
+
matchlen = *--len_stream_end + 6;
|
3256
|
+
COPY_64(dst, copyfrom);
|
3257
|
+
COPY_64(dst + 8, copyfrom + 8);
|
3258
|
+
uint8 *next_dst = dst + matchlen;
|
3259
|
+
if (MultiCmd)
|
3260
|
+
cmd_stream = *(cmd_stream_ptr = &multi_cmd_stream[(uintptr_t)next_dst & 7]);
|
3261
|
+
if (matchlen > 16) {
|
3262
|
+
if (matchlen > (uintptr_t)(dst_end - 8 - dst))
|
3263
|
+
return false; // no space in buf
|
3264
|
+
COPY_64(dst + 16, copyfrom + 16);
|
3265
|
+
do {
|
3266
|
+
COPY_64(dst + 24, copyfrom + 24);
|
3267
|
+
matchlen -= 8;
|
3268
|
+
dst += 8;
|
3269
|
+
copyfrom += 8;
|
3270
|
+
} while (matchlen > 24);
|
3271
|
+
}
|
3272
|
+
dst = next_dst;
|
3273
|
+
} else {
|
3274
|
+
COPY_64(dst, copyfrom);
|
3275
|
+
dst += matchlen;
|
3276
|
+
if (MultiCmd)
|
3277
|
+
cmd_stream = *(cmd_stream_ptr = &multi_cmd_stream[(uintptr_t)dst & 7]);
|
3278
|
+
}
|
3279
|
+
}
|
3280
|
+
|
3281
|
+
// check for incorrect input
|
3282
|
+
if (offs_stream != offs_stream_end || len_stream != len_stream_end)
|
3283
|
+
return false;
|
3284
|
+
|
3285
|
+
// copy final literals
|
3286
|
+
if (dst < dst_end) {
|
3287
|
+
mode.CopyFinalLiterals(dst_end - dst, dst, offset);
|
3288
|
+
} else if (dst != dst_end) {
|
3289
|
+
return false;
|
3290
|
+
}
|
3291
|
+
return true;
|
3292
|
+
}
|
3293
|
+
|
3294
|
+
bool Leviathan_ProcessLzRuns(int chunk_type, byte *dst, int dst_size, int offset, LeviathanLzTable *lzt) {
|
3295
|
+
uint8 *dst_cur = dst + (offset == 0 ? 8 : 0);
|
3296
|
+
uint8 *dst_end = dst + dst_size;
|
3297
|
+
uint8 *dst_start = dst - offset;
|
3298
|
+
|
3299
|
+
if (lzt->cmd_stream != NULL) {
|
3300
|
+
// single cmd mode
|
3301
|
+
switch (chunk_type) {
|
3302
|
+
case 0:
|
3303
|
+
return Leviathan_ProcessLz<LeviathanModeSub, false>(lzt, dst_cur, dst, dst_end, dst_start);
|
3304
|
+
case 1:
|
3305
|
+
return Leviathan_ProcessLz<LeviathanModeRaw, false>(lzt, dst_cur, dst, dst_end, dst_start);
|
3306
|
+
case 2:
|
3307
|
+
return Leviathan_ProcessLz<LeviathanModeLamSub, false>(lzt, dst_cur, dst, dst_end, dst_start);
|
3308
|
+
case 3:
|
3309
|
+
return Leviathan_ProcessLz<LeviathanModeSubAnd3, false>(lzt, dst_cur, dst, dst_end, dst_start);
|
3310
|
+
case 4:
|
3311
|
+
return Leviathan_ProcessLz<LeviathanModeO1, false>(lzt, dst_cur, dst, dst_end, dst_start);
|
3312
|
+
case 5:
|
3313
|
+
return Leviathan_ProcessLz<LeviathanModeSubAndF, false>(lzt, dst_cur, dst, dst_end, dst_start);
|
3314
|
+
}
|
3315
|
+
} else {
|
3316
|
+
// multi cmd mode
|
3317
|
+
switch (chunk_type) {
|
3318
|
+
case 0:
|
3319
|
+
return Leviathan_ProcessLz<LeviathanModeSub, true>(lzt, dst_cur, dst, dst_end, dst_start);
|
3320
|
+
case 1:
|
3321
|
+
return Leviathan_ProcessLz<LeviathanModeRaw, true>(lzt, dst_cur, dst, dst_end, dst_start);
|
3322
|
+
case 2:
|
3323
|
+
return Leviathan_ProcessLz<LeviathanModeLamSub, true>(lzt, dst_cur, dst, dst_end, dst_start);
|
3324
|
+
case 3:
|
3325
|
+
return Leviathan_ProcessLz<LeviathanModeSubAnd3, true>(lzt, dst_cur, dst, dst_end, dst_start);
|
3326
|
+
case 4:
|
3327
|
+
return Leviathan_ProcessLz<LeviathanModeO1, true>(lzt, dst_cur, dst, dst_end, dst_start);
|
3328
|
+
case 5:
|
3329
|
+
return Leviathan_ProcessLz<LeviathanModeSubAndF, true>(lzt, dst_cur, dst, dst_end, dst_start);
|
3330
|
+
}
|
3331
|
+
|
3332
|
+
}
|
3333
|
+
return false;
|
3334
|
+
}
|
3335
|
+
|
3336
|
+
|
3337
|
+
|
3338
|
+
// Decode one 256kb big quantum block. It's divided into two 128k blocks
|
3339
|
+
// internally that are compressed separately but with a shared history.
|
3340
|
+
int Leviathan_DecodeQuantum(byte *dst, byte *dst_end, byte *dst_start,
|
3341
|
+
const byte *src, const byte *src_end,
|
3342
|
+
byte *scratch, byte *scratch_end) {
|
3343
|
+
const byte *src_in = src;
|
3344
|
+
int mode, chunkhdr, dst_count, src_used, written_bytes;
|
3345
|
+
|
3346
|
+
while (dst_end - dst != 0) {
|
3347
|
+
dst_count = dst_end - dst;
|
3348
|
+
if (dst_count > 0x20000) dst_count = 0x20000;
|
3349
|
+
if (src_end - src < 4)
|
3350
|
+
return -1;
|
3351
|
+
chunkhdr = src[2] | src[1] << 8 | src[0] << 16;
|
3352
|
+
if (!(chunkhdr & 0x800000)) {
|
3353
|
+
// Stored as entropy without any match copying.
|
3354
|
+
byte *out = dst;
|
3355
|
+
src_used = Kraken_DecodeBytes(&out, src, src_end, &written_bytes, dst_count, false, scratch, scratch_end);
|
3356
|
+
if (src_used < 0 || written_bytes != dst_count)
|
3357
|
+
return -1;
|
3358
|
+
} else {
|
3359
|
+
src += 3;
|
3360
|
+
src_used = chunkhdr & 0x7FFFF;
|
3361
|
+
mode = (chunkhdr >> 19) & 0xF;
|
3362
|
+
if (src_end - src < src_used)
|
3363
|
+
return -1;
|
3364
|
+
if (src_used < dst_count) {
|
3365
|
+
size_t scratch_usage = Min(Min(3 * dst_count + 32 + 0xd000, 0x6C000), scratch_end - scratch);
|
3366
|
+
if (scratch_usage < sizeof(LeviathanLzTable))
|
3367
|
+
return -1;
|
3368
|
+
if (!Leviathan_ReadLzTable(mode,
|
3369
|
+
src, src + src_used,
|
3370
|
+
dst, dst_count,
|
3371
|
+
dst - dst_start,
|
3372
|
+
scratch + sizeof(LeviathanLzTable), scratch + scratch_usage,
|
3373
|
+
(LeviathanLzTable*)scratch))
|
3374
|
+
return -1;
|
3375
|
+
if (!Leviathan_ProcessLzRuns(mode, dst, dst_count, dst - dst_start, (LeviathanLzTable*)scratch))
|
3376
|
+
return -1;
|
3377
|
+
} else if (src_used > dst_count || mode != 0) {
|
3378
|
+
return -1;
|
3379
|
+
} else {
|
3380
|
+
memmove(dst, src, dst_count);
|
3381
|
+
}
|
3382
|
+
}
|
3383
|
+
src += src_used;
|
3384
|
+
dst += dst_count;
|
3385
|
+
}
|
3386
|
+
return src - src_in;
|
3387
|
+
}
|
3388
|
+
|
3389
|
+
|
3390
|
+
|
3391
|
+
int Mermaid_DecodeFarOffsets(const byte *src, const byte *src_end, uint32 *output, size_t output_size, int64 offset) {
|
3392
|
+
const byte *src_cur = src;
|
3393
|
+
size_t i;
|
3394
|
+
uint32 off;
|
3395
|
+
|
3396
|
+
if (offset < (0xC00000 - 1)) {
|
3397
|
+
for (i = 0; i != output_size; i++) {
|
3398
|
+
if (src_end - src_cur < 3)
|
3399
|
+
return -1;
|
3400
|
+
off = src_cur[0] | src_cur[1] << 8 | src_cur[2] << 16;
|
3401
|
+
src_cur += 3;
|
3402
|
+
output[i] = off;
|
3403
|
+
if (off > offset)
|
3404
|
+
return -1;
|
3405
|
+
}
|
3406
|
+
return src_cur - src;
|
3407
|
+
}
|
3408
|
+
|
3409
|
+
for (i = 0; i != output_size; i++) {
|
3410
|
+
if (src_end - src_cur < 3)
|
3411
|
+
return -1;
|
3412
|
+
off = src_cur[0] | src_cur[1] << 8 | src_cur[2] << 16;
|
3413
|
+
src_cur += 3;
|
3414
|
+
|
3415
|
+
if (off >= 0xc00000) {
|
3416
|
+
if (src_cur == src_end)
|
3417
|
+
return -1;
|
3418
|
+
off += *src_cur++ << 22;
|
3419
|
+
}
|
3420
|
+
output[i] = off;
|
3421
|
+
if (off > offset)
|
3422
|
+
return -1;
|
3423
|
+
}
|
3424
|
+
return src_cur - src;
|
3425
|
+
}
|
3426
|
+
|
3427
|
+
void Mermaid_CombineOffs16(uint16 *dst, size_t size, const uint8 *lo, const uint8 *hi) {
|
3428
|
+
for (size_t i = 0; i != size; i++)
|
3429
|
+
dst[i] = lo[i] + hi[i] * 256;
|
3430
|
+
}
|
3431
|
+
|
3432
|
+
bool Mermaid_ReadLzTable(int mode,
|
3433
|
+
const byte *src, const byte *src_end,
|
3434
|
+
byte *dst, int dst_size, int64 offset,
|
3435
|
+
byte *scratch, byte *scratch_end, MermaidLzTable *lz) {
|
3436
|
+
byte *out;
|
3437
|
+
int decode_count, n;
|
3438
|
+
uint32 tmp, off32_size_2, off32_size_1;
|
3439
|
+
|
3440
|
+
if (mode > 1)
|
3441
|
+
return false;
|
3442
|
+
|
3443
|
+
if (src_end - src < 10)
|
3444
|
+
return false;
|
3445
|
+
|
3446
|
+
if (offset == 0) {
|
3447
|
+
COPY_64(dst, src);
|
3448
|
+
dst += 8;
|
3449
|
+
src += 8;
|
3450
|
+
}
|
3451
|
+
|
3452
|
+
// Decode lit stream
|
3453
|
+
out = scratch;
|
3454
|
+
n = Kraken_DecodeBytes(&out, src, src_end, &decode_count, Min(scratch_end - scratch, dst_size), false, scratch, scratch_end);
|
3455
|
+
if (n < 0)
|
3456
|
+
return false;
|
3457
|
+
src += n;
|
3458
|
+
lz->lit_stream = out;
|
3459
|
+
lz->lit_stream_end = out + decode_count;
|
3460
|
+
scratch += decode_count;
|
3461
|
+
|
3462
|
+
// Decode flag stream
|
3463
|
+
out = scratch;
|
3464
|
+
n = Kraken_DecodeBytes(&out, src, src_end, &decode_count, Min(scratch_end - scratch, dst_size), false, scratch, scratch_end);
|
3465
|
+
if (n < 0)
|
3466
|
+
return false;
|
3467
|
+
src += n;
|
3468
|
+
lz->cmd_stream = out;
|
3469
|
+
lz->cmd_stream_end = out + decode_count;
|
3470
|
+
scratch += decode_count;
|
3471
|
+
|
3472
|
+
lz->cmd_stream_2_offs_end = decode_count;
|
3473
|
+
if (dst_size <= 0x10000) {
|
3474
|
+
lz->cmd_stream_2_offs = decode_count;
|
3475
|
+
} else {
|
3476
|
+
if (src_end - src < 2)
|
3477
|
+
return false;
|
3478
|
+
lz->cmd_stream_2_offs = *(uint16*)src;
|
3479
|
+
src += 2;
|
3480
|
+
if (lz->cmd_stream_2_offs > lz->cmd_stream_2_offs_end)
|
3481
|
+
return false;
|
3482
|
+
}
|
3483
|
+
|
3484
|
+
if (src_end - src < 2)
|
3485
|
+
return false;
|
3486
|
+
|
3487
|
+
int off16_count = *(uint16*)src;
|
3488
|
+
if (off16_count == 0xffff) {
|
3489
|
+
// off16 is entropy coded
|
3490
|
+
uint8 *off16_lo, *off16_hi;
|
3491
|
+
int off16_lo_count, off16_hi_count;
|
3492
|
+
src += 2;
|
3493
|
+
off16_hi = scratch;
|
3494
|
+
n = Kraken_DecodeBytes(&off16_hi, src, src_end, &off16_hi_count, Min(scratch_end - scratch, dst_size >> 1), false, scratch, scratch_end);
|
3495
|
+
if (n < 0)
|
3496
|
+
return false;
|
3497
|
+
src += n;
|
3498
|
+
scratch += off16_hi_count;
|
3499
|
+
|
3500
|
+
off16_lo = scratch;
|
3501
|
+
n = Kraken_DecodeBytes(&off16_lo, src, src_end, &off16_lo_count, Min(scratch_end - scratch, dst_size >> 1), false, scratch, scratch_end);
|
3502
|
+
if (n < 0)
|
3503
|
+
return false;
|
3504
|
+
src += n;
|
3505
|
+
scratch += off16_lo_count;
|
3506
|
+
|
3507
|
+
if (off16_lo_count != off16_hi_count)
|
3508
|
+
return false;
|
3509
|
+
scratch = ALIGN_POINTER(scratch, 2);
|
3510
|
+
lz->off16_stream = (uint16*)scratch;
|
3511
|
+
if (scratch + off16_lo_count * 2 > scratch_end)
|
3512
|
+
return false;
|
3513
|
+
scratch += off16_lo_count * 2;
|
3514
|
+
lz->off16_stream_end = (uint16*)scratch;
|
3515
|
+
Mermaid_CombineOffs16((uint16*)lz->off16_stream, off16_lo_count, off16_lo, off16_hi);
|
3516
|
+
} else {
|
3517
|
+
lz->off16_stream = (uint16*)(src + 2);
|
3518
|
+
src += 2 + off16_count * 2;
|
3519
|
+
lz->off16_stream_end = (uint16*)src;
|
3520
|
+
}
|
3521
|
+
|
3522
|
+
if (src_end - src < 3)
|
3523
|
+
return false;
|
3524
|
+
tmp = src[0] | src[1] << 8 | src[2] << 16;
|
3525
|
+
src += 3;
|
3526
|
+
|
3527
|
+
if (tmp != 0) {
|
3528
|
+
off32_size_1 = tmp >> 12;
|
3529
|
+
off32_size_2 = tmp & 0xFFF;
|
3530
|
+
if (off32_size_1 == 4095) {
|
3531
|
+
if (src_end - src < 2)
|
3532
|
+
return false;
|
3533
|
+
off32_size_1 = *(uint16*)src;
|
3534
|
+
src += 2;
|
3535
|
+
}
|
3536
|
+
if (off32_size_2 == 4095) {
|
3537
|
+
if (src_end - src < 2)
|
3538
|
+
return false;
|
3539
|
+
off32_size_2 = *(uint16*)src;
|
3540
|
+
src += 2;
|
3541
|
+
}
|
3542
|
+
lz->off32_size_1 = off32_size_1;
|
3543
|
+
lz->off32_size_2 = off32_size_2;
|
3544
|
+
|
3545
|
+
if (scratch + 4 * (off32_size_2 + off32_size_1) + 64 > scratch_end)
|
3546
|
+
return false;
|
3547
|
+
|
3548
|
+
scratch = ALIGN_POINTER(scratch, 4);
|
3549
|
+
|
3550
|
+
lz->off32_stream_1 = (uint32*)scratch;
|
3551
|
+
scratch += off32_size_1 * 4;
|
3552
|
+
// store dummy bytes after for prefetcher.
|
3553
|
+
((uint64*)scratch)[0] = 0;
|
3554
|
+
((uint64*)scratch)[1] = 0;
|
3555
|
+
((uint64*)scratch)[2] = 0;
|
3556
|
+
((uint64*)scratch)[3] = 0;
|
3557
|
+
scratch += 32;
|
3558
|
+
|
3559
|
+
lz->off32_stream_2 = (uint32*)scratch;
|
3560
|
+
scratch += off32_size_2 * 4;
|
3561
|
+
// store dummy bytes after for prefetcher.
|
3562
|
+
((uint64*)scratch)[0] = 0;
|
3563
|
+
((uint64*)scratch)[1] = 0;
|
3564
|
+
((uint64*)scratch)[2] = 0;
|
3565
|
+
((uint64*)scratch)[3] = 0;
|
3566
|
+
scratch += 32;
|
3567
|
+
|
3568
|
+
n = Mermaid_DecodeFarOffsets(src, src_end, lz->off32_stream_1, lz->off32_size_1, offset);
|
3569
|
+
if (n < 0)
|
3570
|
+
return false;
|
3571
|
+
src += n;
|
3572
|
+
|
3573
|
+
n = Mermaid_DecodeFarOffsets(src, src_end, lz->off32_stream_2, lz->off32_size_2, offset + 0x10000);
|
3574
|
+
if (n < 0)
|
3575
|
+
return false;
|
3576
|
+
src += n;
|
3577
|
+
} else {
|
3578
|
+
if (scratch_end - scratch < 32)
|
3579
|
+
return false;
|
3580
|
+
lz->off32_size_1 = 0;
|
3581
|
+
lz->off32_size_2 = 0;
|
3582
|
+
lz->off32_stream_1 = (uint32*)scratch;
|
3583
|
+
lz->off32_stream_2 = (uint32*)scratch;
|
3584
|
+
// store dummy bytes after for prefetcher.
|
3585
|
+
((uint64*)scratch)[0] = 0;
|
3586
|
+
((uint64*)scratch)[1] = 0;
|
3587
|
+
((uint64*)scratch)[2] = 0;
|
3588
|
+
((uint64*)scratch)[3] = 0;
|
3589
|
+
}
|
3590
|
+
lz->length_stream = src;
|
3591
|
+
return true;
|
3592
|
+
}
|
3593
|
+
|
3594
|
+
const byte *Mermaid_Mode0(byte *dst, size_t dst_size, byte *dst_ptr_end, byte *dst_start,
|
3595
|
+
const byte *src_end, MermaidLzTable *lz, int32 *saved_dist, size_t startoff) {
|
3596
|
+
const byte *dst_end = dst + dst_size;
|
3597
|
+
const byte *cmd_stream = lz->cmd_stream;
|
3598
|
+
const byte *cmd_stream_end = lz->cmd_stream_end;
|
3599
|
+
const byte *length_stream = lz->length_stream;
|
3600
|
+
const byte *lit_stream = lz->lit_stream;
|
3601
|
+
const byte *lit_stream_end = lz->lit_stream_end;
|
3602
|
+
const uint16 *off16_stream = lz->off16_stream;
|
3603
|
+
const uint16 *off16_stream_end = lz->off16_stream_end;
|
3604
|
+
const uint32 *off32_stream = lz->off32_stream;
|
3605
|
+
const uint32 *off32_stream_end = lz->off32_stream_end;
|
3606
|
+
intptr_t recent_offs = *saved_dist;
|
3607
|
+
const byte *match;
|
3608
|
+
intptr_t length;
|
3609
|
+
const byte *dst_begin = dst;
|
3610
|
+
|
3611
|
+
dst += startoff;
|
3612
|
+
|
3613
|
+
while (cmd_stream < cmd_stream_end) {
|
3614
|
+
uintptr_t cmd = *cmd_stream++;
|
3615
|
+
if (cmd >= 24) {
|
3616
|
+
intptr_t new_dist = *off16_stream;
|
3617
|
+
uintptr_t use_distance = (uintptr_t)(cmd >> 7) - 1;
|
3618
|
+
uintptr_t litlen = (cmd & 7);
|
3619
|
+
COPY_64_ADD(dst, lit_stream, &dst[recent_offs]);
|
3620
|
+
dst += litlen;
|
3621
|
+
lit_stream += litlen;
|
3622
|
+
recent_offs ^= use_distance & (recent_offs ^ -new_dist);
|
3623
|
+
off16_stream = (uint16*)((uintptr_t)off16_stream + (use_distance & 2));
|
3624
|
+
match = dst + recent_offs;
|
3625
|
+
COPY_64(dst, match);
|
3626
|
+
COPY_64(dst + 8, match + 8);
|
3627
|
+
dst += (cmd >> 3) & 0xF;
|
3628
|
+
} else if (cmd > 2) {
|
3629
|
+
length = cmd + 5;
|
3630
|
+
|
3631
|
+
if (off32_stream == off32_stream_end)
|
3632
|
+
return NULL;
|
3633
|
+
match = dst_begin - *off32_stream++;
|
3634
|
+
recent_offs = (match - dst);
|
3635
|
+
|
3636
|
+
if (dst_end - dst < length)
|
3637
|
+
return NULL;
|
3638
|
+
COPY_64(dst, match);
|
3639
|
+
COPY_64(dst + 8, match + 8);
|
3640
|
+
COPY_64(dst + 16, match + 16);
|
3641
|
+
COPY_64(dst + 24, match + 24);
|
3642
|
+
dst += length;
|
3643
|
+
_mm_prefetch((char*)dst_begin - off32_stream[3], _MM_HINT_T0);
|
3644
|
+
} else if (cmd == 0) {
|
3645
|
+
if (src_end - length_stream == 0)
|
3646
|
+
return NULL;
|
3647
|
+
length = *length_stream;
|
3648
|
+
if (length > 251) {
|
3649
|
+
if (src_end - length_stream < 3)
|
3650
|
+
return NULL;
|
3651
|
+
length += (size_t)*(uint16*)(length_stream + 1) * 4;
|
3652
|
+
length_stream += 2;
|
3653
|
+
}
|
3654
|
+
length_stream += 1;
|
3655
|
+
|
3656
|
+
length += 64;
|
3657
|
+
if (dst_end - dst < length ||
|
3658
|
+
lit_stream_end - lit_stream < length)
|
3659
|
+
return NULL;
|
3660
|
+
|
3661
|
+
do {
|
3662
|
+
COPY_64_ADD(dst, lit_stream, &dst[recent_offs]);
|
3663
|
+
COPY_64_ADD(dst + 8, lit_stream + 8, &dst[recent_offs + 8]);
|
3664
|
+
dst += 16;
|
3665
|
+
lit_stream += 16;
|
3666
|
+
length -= 16;
|
3667
|
+
} while (length > 0);
|
3668
|
+
dst += length;
|
3669
|
+
lit_stream += length;
|
3670
|
+
} else if (cmd == 1) {
|
3671
|
+
if (src_end - length_stream == 0)
|
3672
|
+
return NULL;
|
3673
|
+
length = *length_stream;
|
3674
|
+
if (length > 251) {
|
3675
|
+
if (src_end - length_stream < 3)
|
3676
|
+
return NULL;
|
3677
|
+
length += (size_t)*(uint16*)(length_stream + 1) * 4;
|
3678
|
+
length_stream += 2;
|
3679
|
+
}
|
3680
|
+
length_stream += 1;
|
3681
|
+
length += 91;
|
3682
|
+
|
3683
|
+
if (off16_stream == off16_stream_end)
|
3684
|
+
return NULL;
|
3685
|
+
match = dst - *off16_stream++;
|
3686
|
+
recent_offs = (match - dst);
|
3687
|
+
do {
|
3688
|
+
COPY_64(dst, match);
|
3689
|
+
COPY_64(dst + 8, match + 8);
|
3690
|
+
dst += 16;
|
3691
|
+
match += 16;
|
3692
|
+
length -= 16;
|
3693
|
+
} while (length > 0);
|
3694
|
+
dst += length;
|
3695
|
+
} else /* flag == 2 */ {
|
3696
|
+
if (src_end - length_stream == 0)
|
3697
|
+
return NULL;
|
3698
|
+
length = *length_stream;
|
3699
|
+
if (length > 251) {
|
3700
|
+
if (src_end - length_stream < 3)
|
3701
|
+
return NULL;
|
3702
|
+
length += (size_t)*(uint16*)(length_stream + 1) * 4;
|
3703
|
+
length_stream += 2;
|
3704
|
+
}
|
3705
|
+
length_stream += 1;
|
3706
|
+
length += 29;
|
3707
|
+
if (off32_stream == off32_stream_end)
|
3708
|
+
return NULL;
|
3709
|
+
match = dst_begin - *off32_stream++;
|
3710
|
+
recent_offs = (match - dst);
|
3711
|
+
do {
|
3712
|
+
COPY_64(dst, match);
|
3713
|
+
COPY_64(dst + 8, match + 8);
|
3714
|
+
dst += 16;
|
3715
|
+
match += 16;
|
3716
|
+
length -= 16;
|
3717
|
+
} while (length > 0);
|
3718
|
+
dst += length;
|
3719
|
+
_mm_prefetch((char*)dst_begin - off32_stream[3], _MM_HINT_T0);
|
3720
|
+
}
|
3721
|
+
}
|
3722
|
+
|
3723
|
+
length = dst_end - dst;
|
3724
|
+
if (length >= 8) {
|
3725
|
+
do {
|
3726
|
+
COPY_64_ADD(dst, lit_stream, &dst[recent_offs]);
|
3727
|
+
dst += 8;
|
3728
|
+
lit_stream += 8;
|
3729
|
+
length -= 8;
|
3730
|
+
} while (length >= 8);
|
3731
|
+
}
|
3732
|
+
if (length > 0) {
|
3733
|
+
do {
|
3734
|
+
*dst = *lit_stream++ + dst[recent_offs];
|
3735
|
+
dst++;
|
3736
|
+
} while (--length);
|
3737
|
+
}
|
3738
|
+
|
3739
|
+
*saved_dist = (int32)recent_offs;
|
3740
|
+
lz->length_stream = length_stream;
|
3741
|
+
lz->off16_stream = off16_stream;
|
3742
|
+
lz->lit_stream = lit_stream;
|
3743
|
+
return length_stream;
|
3744
|
+
}
|
3745
|
+
|
3746
|
+
const byte *Mermaid_Mode1(byte *dst, size_t dst_size, byte *dst_ptr_end, byte *dst_start,
|
3747
|
+
const byte *src_end, MermaidLzTable *lz, int32 *saved_dist, size_t startoff) {
|
3748
|
+
const byte *dst_end = dst + dst_size;
|
3749
|
+
const byte *cmd_stream = lz->cmd_stream;
|
3750
|
+
const byte *cmd_stream_end = lz->cmd_stream_end;
|
3751
|
+
const byte *length_stream = lz->length_stream;
|
3752
|
+
const byte *lit_stream = lz->lit_stream;
|
3753
|
+
const byte *lit_stream_end = lz->lit_stream_end;
|
3754
|
+
const uint16 *off16_stream = lz->off16_stream;
|
3755
|
+
const uint16 *off16_stream_end = lz->off16_stream_end;
|
3756
|
+
const uint32 *off32_stream = lz->off32_stream;
|
3757
|
+
const uint32 *off32_stream_end = lz->off32_stream_end;
|
3758
|
+
intptr_t recent_offs = *saved_dist;
|
3759
|
+
const byte *match;
|
3760
|
+
intptr_t length;
|
3761
|
+
const byte *dst_begin = dst;
|
3762
|
+
|
3763
|
+
dst += startoff;
|
3764
|
+
|
3765
|
+
while (cmd_stream < cmd_stream_end) {
|
3766
|
+
uintptr_t flag = *cmd_stream++;
|
3767
|
+
if (flag >= 24) {
|
3768
|
+
intptr_t new_dist = *off16_stream;
|
3769
|
+
uintptr_t use_distance = (uintptr_t)(flag >> 7) - 1;
|
3770
|
+
uintptr_t litlen = (flag & 7);
|
3771
|
+
COPY_64(dst, lit_stream);
|
3772
|
+
dst += litlen;
|
3773
|
+
lit_stream += litlen;
|
3774
|
+
recent_offs ^= use_distance & (recent_offs ^ -new_dist);
|
3775
|
+
off16_stream = (uint16*)((uintptr_t)off16_stream + (use_distance & 2));
|
3776
|
+
match = dst + recent_offs;
|
3777
|
+
COPY_64(dst, match);
|
3778
|
+
COPY_64(dst + 8, match + 8);
|
3779
|
+
dst += (flag >> 3) & 0xF;
|
3780
|
+
} else if (flag > 2) {
|
3781
|
+
length = flag + 5;
|
3782
|
+
|
3783
|
+
if (off32_stream == off32_stream_end)
|
3784
|
+
return NULL;
|
3785
|
+
match = dst_begin - *off32_stream++;
|
3786
|
+
recent_offs = (match - dst);
|
3787
|
+
|
3788
|
+
if (dst_end - dst < length)
|
3789
|
+
return NULL;
|
3790
|
+
COPY_64(dst, match);
|
3791
|
+
COPY_64(dst + 8, match + 8);
|
3792
|
+
COPY_64(dst + 16, match + 16);
|
3793
|
+
COPY_64(dst + 24, match + 24);
|
3794
|
+
dst += length;
|
3795
|
+
_mm_prefetch((char*)dst_begin - off32_stream[3], _MM_HINT_T0);
|
3796
|
+
} else if (flag == 0) {
|
3797
|
+
if (src_end - length_stream == 0)
|
3798
|
+
return NULL;
|
3799
|
+
length = *length_stream;
|
3800
|
+
if (length > 251) {
|
3801
|
+
if (src_end - length_stream < 3)
|
3802
|
+
return NULL;
|
3803
|
+
length += (size_t)*(uint16*)(length_stream + 1) * 4;
|
3804
|
+
length_stream += 2;
|
3805
|
+
}
|
3806
|
+
length_stream += 1;
|
3807
|
+
|
3808
|
+
length += 64;
|
3809
|
+
if (dst_end - dst < length ||
|
3810
|
+
lit_stream_end - lit_stream < length)
|
3811
|
+
return NULL;
|
3812
|
+
|
3813
|
+
do {
|
3814
|
+
COPY_64(dst, lit_stream);
|
3815
|
+
COPY_64(dst + 8, lit_stream + 8);
|
3816
|
+
dst += 16;
|
3817
|
+
lit_stream += 16;
|
3818
|
+
length -= 16;
|
3819
|
+
} while (length > 0);
|
3820
|
+
dst += length;
|
3821
|
+
lit_stream += length;
|
3822
|
+
} else if (flag == 1) {
|
3823
|
+
if (src_end - length_stream == 0)
|
3824
|
+
return NULL;
|
3825
|
+
length = *length_stream;
|
3826
|
+
if (length > 251) {
|
3827
|
+
if (src_end - length_stream < 3)
|
3828
|
+
return NULL;
|
3829
|
+
length += (size_t)*(uint16*)(length_stream + 1) * 4;
|
3830
|
+
length_stream += 2;
|
3831
|
+
}
|
3832
|
+
length_stream += 1;
|
3833
|
+
length += 91;
|
3834
|
+
|
3835
|
+
if (off16_stream == off16_stream_end)
|
3836
|
+
return NULL;
|
3837
|
+
match = dst - *off16_stream++;
|
3838
|
+
recent_offs = (match - dst);
|
3839
|
+
do {
|
3840
|
+
COPY_64(dst, match);
|
3841
|
+
COPY_64(dst + 8, match + 8);
|
3842
|
+
dst += 16;
|
3843
|
+
match += 16;
|
3844
|
+
length -= 16;
|
3845
|
+
} while (length > 0);
|
3846
|
+
dst += length;
|
3847
|
+
} else /* flag == 2 */ {
|
3848
|
+
if (src_end - length_stream == 0)
|
3849
|
+
return NULL;
|
3850
|
+
length = *length_stream;
|
3851
|
+
if (length > 251) {
|
3852
|
+
if (src_end - length_stream < 3)
|
3853
|
+
return NULL;
|
3854
|
+
length += (size_t)*(uint16*)(length_stream + 1) * 4;
|
3855
|
+
length_stream += 2;
|
3856
|
+
}
|
3857
|
+
length_stream += 1;
|
3858
|
+
length += 29;
|
3859
|
+
|
3860
|
+
if (off32_stream == off32_stream_end)
|
3861
|
+
return NULL;
|
3862
|
+
match = dst_begin - *off32_stream++;
|
3863
|
+
recent_offs = (match - dst);
|
3864
|
+
|
3865
|
+
do {
|
3866
|
+
COPY_64(dst, match);
|
3867
|
+
COPY_64(dst + 8, match + 8);
|
3868
|
+
dst += 16;
|
3869
|
+
match += 16;
|
3870
|
+
length -= 16;
|
3871
|
+
} while (length > 0);
|
3872
|
+
dst += length;
|
3873
|
+
|
3874
|
+
_mm_prefetch((char*)dst_begin - off32_stream[3], _MM_HINT_T0);
|
3875
|
+
}
|
3876
|
+
}
|
3877
|
+
|
3878
|
+
length = dst_end - dst;
|
3879
|
+
if (length >= 8) {
|
3880
|
+
do {
|
3881
|
+
COPY_64(dst, lit_stream);
|
3882
|
+
dst += 8;
|
3883
|
+
lit_stream += 8;
|
3884
|
+
length -= 8;
|
3885
|
+
} while (length >= 8);
|
3886
|
+
}
|
3887
|
+
if (length > 0) {
|
3888
|
+
do {
|
3889
|
+
*dst++ = *lit_stream++;
|
3890
|
+
} while (--length);
|
3891
|
+
}
|
3892
|
+
|
3893
|
+
*saved_dist = (int32)recent_offs;
|
3894
|
+
lz->length_stream = length_stream;
|
3895
|
+
lz->off16_stream = off16_stream;
|
3896
|
+
lz->lit_stream = lit_stream;
|
3897
|
+
return length_stream;
|
3898
|
+
}
|
3899
|
+
|
3900
|
+
bool Mermaid_ProcessLzRuns(int mode,
|
3901
|
+
const byte *src, const byte *src_end,
|
3902
|
+
byte *dst, size_t dst_size, uint64 offset, byte *dst_end,
|
3903
|
+
MermaidLzTable *lz) {
|
3904
|
+
|
3905
|
+
int iteration = 0;
|
3906
|
+
byte *dst_start = dst - offset;
|
3907
|
+
int32 saved_dist = -8;
|
3908
|
+
const byte *src_cur;
|
3909
|
+
|
3910
|
+
for (iteration = 0; iteration != 2; iteration++) {
|
3911
|
+
size_t dst_size_cur = dst_size;
|
3912
|
+
if (dst_size_cur > 0x10000) dst_size_cur = 0x10000;
|
3913
|
+
|
3914
|
+
if (iteration == 0) {
|
3915
|
+
lz->off32_stream = lz->off32_stream_1;
|
3916
|
+
lz->off32_stream_end = lz->off32_stream_1 + lz->off32_size_1 * 4;
|
3917
|
+
lz->cmd_stream_end = lz->cmd_stream + lz->cmd_stream_2_offs;
|
3918
|
+
} else {
|
3919
|
+
lz->off32_stream = lz->off32_stream_2;
|
3920
|
+
lz->off32_stream_end = lz->off32_stream_2 + lz->off32_size_2 * 4;
|
3921
|
+
lz->cmd_stream_end = lz->cmd_stream + lz->cmd_stream_2_offs_end;
|
3922
|
+
lz->cmd_stream += lz->cmd_stream_2_offs;
|
3923
|
+
}
|
3924
|
+
|
3925
|
+
if (mode == 0) {
|
3926
|
+
src_cur = Mermaid_Mode0(dst, dst_size_cur, dst_end, dst_start, src_end, lz, &saved_dist,
|
3927
|
+
(offset == 0) && (iteration == 0) ? 8 : 0);
|
3928
|
+
} else {
|
3929
|
+
src_cur = Mermaid_Mode1(dst, dst_size_cur, dst_end, dst_start, src_end, lz, &saved_dist,
|
3930
|
+
(offset == 0) && (iteration == 0) ? 8 : 0);
|
3931
|
+
}
|
3932
|
+
if (src_cur == NULL)
|
3933
|
+
return false;
|
3934
|
+
|
3935
|
+
dst += dst_size_cur;
|
3936
|
+
dst_size -= dst_size_cur;
|
3937
|
+
if (dst_size == 0)
|
3938
|
+
break;
|
3939
|
+
}
|
3940
|
+
|
3941
|
+
if (src_cur != src_end)
|
3942
|
+
return false;
|
3943
|
+
|
3944
|
+
return true;
|
3945
|
+
}
|
3946
|
+
|
3947
|
+
|
3948
|
+
int Mermaid_DecodeQuantum(byte *dst, byte *dst_end, byte *dst_start,
|
3949
|
+
const byte *src, const byte *src_end,
|
3950
|
+
byte *temp, byte *temp_end) {
|
3951
|
+
const byte *src_in = src;
|
3952
|
+
int mode, chunkhdr, dst_count, src_used, written_bytes;
|
3953
|
+
|
3954
|
+
while (dst_end - dst != 0) {
|
3955
|
+
dst_count = dst_end - dst;
|
3956
|
+
if (dst_count > 0x20000) dst_count = 0x20000;
|
3957
|
+
if (src_end - src < 4)
|
3958
|
+
return -1;
|
3959
|
+
chunkhdr = src[2] | src[1] << 8 | src[0] << 16;
|
3960
|
+
if (!(chunkhdr & 0x800000)) {
|
3961
|
+
// Stored without any match copying.
|
3962
|
+
byte *out = dst;
|
3963
|
+
src_used = Kraken_DecodeBytes(&out, src, src_end, &written_bytes, dst_count, false, temp, temp_end);
|
3964
|
+
if (src_used < 0 || written_bytes != dst_count)
|
3965
|
+
return -1;
|
3966
|
+
} else {
|
3967
|
+
src += 3;
|
3968
|
+
src_used = chunkhdr & 0x7FFFF;
|
3969
|
+
mode = (chunkhdr >> 19) & 0xF;
|
3970
|
+
if (src_end - src < src_used)
|
3971
|
+
return -1;
|
3972
|
+
if (src_used < dst_count) {
|
3973
|
+
int temp_usage = 2 * dst_count + 32;
|
3974
|
+
if (temp_usage > 0x40000) temp_usage = 0x40000;
|
3975
|
+
if (!Mermaid_ReadLzTable(mode,
|
3976
|
+
src, src + src_used,
|
3977
|
+
dst, dst_count,
|
3978
|
+
dst - dst_start,
|
3979
|
+
temp + sizeof(MermaidLzTable), temp + temp_usage,
|
3980
|
+
(MermaidLzTable*)temp))
|
3981
|
+
return -1;
|
3982
|
+
if (!Mermaid_ProcessLzRuns(mode,
|
3983
|
+
src, src + src_used,
|
3984
|
+
dst, dst_count,
|
3985
|
+
dst - dst_start, dst_end,
|
3986
|
+
(MermaidLzTable*)temp))
|
3987
|
+
return -1;
|
3988
|
+
} else if (src_used > dst_count || mode != 0) {
|
3989
|
+
return -1;
|
3990
|
+
} else {
|
3991
|
+
memmove(dst, src, dst_count);
|
3992
|
+
}
|
3993
|
+
}
|
3994
|
+
src += src_used;
|
3995
|
+
dst += dst_count;
|
3996
|
+
}
|
3997
|
+
return src - src_in;
|
3998
|
+
}
|
3999
|
+
|
4000
|
+
int LZNA_DecodeQuantum(byte *dst, byte *dst_end, byte *dst_start,
|
4001
|
+
const byte *src, const byte *src_end,
|
4002
|
+
struct LznaState *lut);
|
4003
|
+
void LZNA_InitLookup(LznaState *lut);
|
4004
|
+
|
4005
|
+
struct BitknitState;
|
4006
|
+
|
4007
|
+
void BitknitState_Init(BitknitState *bk);
|
4008
|
+
size_t Bitknit_Decode(const byte *src, const byte *src_end, byte *dst, byte *dst_end, byte *dst_start, BitknitState *bk);
|
4009
|
+
|
4010
|
+
|
4011
|
+
void Kraken_CopyWholeMatch(byte *dst, uint32 offset, size_t length) {
|
4012
|
+
size_t i = 0;
|
4013
|
+
byte *src = dst - offset;
|
4014
|
+
if (offset >= 8) {
|
4015
|
+
for (; i + 8 <= length; i += 8)
|
4016
|
+
*(uint64*)(dst + i) = *(uint64*)(src + i);
|
4017
|
+
}
|
4018
|
+
for (; i < length; i++)
|
4019
|
+
dst[i] = src[i];
|
4020
|
+
}
|
4021
|
+
|
4022
|
+
bool Kraken_DecodeStep(struct KrakenDecoder *dec,
|
4023
|
+
byte *dst_start, int offset, size_t dst_bytes_left_in,
|
4024
|
+
const byte *src, size_t src_bytes_left) {
|
4025
|
+
const byte *src_in = src;
|
4026
|
+
const byte *src_end = src + src_bytes_left;
|
4027
|
+
KrakenQuantumHeader qhdr;
|
4028
|
+
int n;
|
4029
|
+
|
4030
|
+
if ((offset & 0x3FFFF) == 0) {
|
4031
|
+
src = Kraken_ParseHeader(&dec->hdr, src);
|
4032
|
+
if (!src)
|
4033
|
+
return false;
|
4034
|
+
}
|
4035
|
+
|
4036
|
+
bool is_kraken_decoder = (dec->hdr.decoder_type == 6 || dec->hdr.decoder_type == 10 || dec->hdr.decoder_type == 12);
|
4037
|
+
|
4038
|
+
int dst_bytes_left = (int)Min(is_kraken_decoder ? 0x40000 : 0x4000, dst_bytes_left_in);
|
4039
|
+
|
4040
|
+
if (dec->hdr.uncompressed) {
|
4041
|
+
if (src_end - src < dst_bytes_left) {
|
4042
|
+
dec->src_used = dec->dst_used = 0;
|
4043
|
+
return true;
|
4044
|
+
}
|
4045
|
+
memmove(dst_start + offset, src, dst_bytes_left);
|
4046
|
+
dec->src_used = (src - src_in) + dst_bytes_left;
|
4047
|
+
dec->dst_used = dst_bytes_left;
|
4048
|
+
return true;
|
4049
|
+
}
|
4050
|
+
|
4051
|
+
if (is_kraken_decoder) {
|
4052
|
+
src = Kraken_ParseQuantumHeader(&qhdr, src, dec->hdr.use_checksums);
|
4053
|
+
} else {
|
4054
|
+
src = LZNA_ParseQuantumHeader(&qhdr, src, dec->hdr.use_checksums, dst_bytes_left);
|
4055
|
+
}
|
4056
|
+
|
4057
|
+
if (!src || src > src_end)
|
4058
|
+
return false;
|
4059
|
+
|
4060
|
+
// Too few bytes in buffer to make any progress?
|
4061
|
+
if ((uintptr_t)(src_end - src) < qhdr.compressed_size) {
|
4062
|
+
dec->src_used = dec->dst_used = 0;
|
4063
|
+
return true;
|
4064
|
+
}
|
4065
|
+
|
4066
|
+
if (qhdr.compressed_size > (uint32)dst_bytes_left)
|
4067
|
+
return false;
|
4068
|
+
|
4069
|
+
if (qhdr.compressed_size == 0) {
|
4070
|
+
if (qhdr.whole_match_distance != 0) {
|
4071
|
+
if (qhdr.whole_match_distance > (uint32)offset)
|
4072
|
+
return false;
|
4073
|
+
Kraken_CopyWholeMatch(dst_start + offset, qhdr.whole_match_distance, dst_bytes_left);
|
4074
|
+
} else {
|
4075
|
+
memset(dst_start + offset, qhdr.checksum, dst_bytes_left);
|
4076
|
+
}
|
4077
|
+
dec->src_used = (src - src_in);
|
4078
|
+
dec->dst_used = dst_bytes_left;
|
4079
|
+
return true;
|
4080
|
+
}
|
4081
|
+
|
4082
|
+
if (dec->hdr.use_checksums &&
|
4083
|
+
(Kraken_GetCrc(src, qhdr.compressed_size) & 0xFFFFFF) != qhdr.checksum)
|
4084
|
+
return false;
|
4085
|
+
|
4086
|
+
if (qhdr.compressed_size == dst_bytes_left) {
|
4087
|
+
memmove(dst_start + offset, src, dst_bytes_left);
|
4088
|
+
dec->src_used = (src - src_in) + dst_bytes_left;
|
4089
|
+
dec->dst_used = dst_bytes_left;
|
4090
|
+
return true;
|
4091
|
+
}
|
4092
|
+
|
4093
|
+
if (dec->hdr.decoder_type == 6) {
|
4094
|
+
n = Kraken_DecodeQuantum(dst_start + offset, dst_start + offset + dst_bytes_left, dst_start,
|
4095
|
+
src, src + qhdr.compressed_size,
|
4096
|
+
dec->scratch, dec->scratch + dec->scratch_size);
|
4097
|
+
} else if (dec->hdr.decoder_type == 5) {
|
4098
|
+
if (dec->hdr.restart_decoder) {
|
4099
|
+
dec->hdr.restart_decoder = false;
|
4100
|
+
LZNA_InitLookup((struct LznaState*)dec->scratch);
|
4101
|
+
}
|
4102
|
+
n = LZNA_DecodeQuantum(dst_start + offset, dst_start + offset + dst_bytes_left, dst_start,
|
4103
|
+
src, src + qhdr.compressed_size,
|
4104
|
+
(struct LznaState*)dec->scratch);
|
4105
|
+
} else if (dec->hdr.decoder_type == 11) {
|
4106
|
+
if (dec->hdr.restart_decoder) {
|
4107
|
+
dec->hdr.restart_decoder = false;
|
4108
|
+
BitknitState_Init((struct BitknitState*)dec->scratch);
|
4109
|
+
}
|
4110
|
+
n = (int)Bitknit_Decode(src, src + qhdr.compressed_size, dst_start + offset, dst_start + offset + dst_bytes_left, dst_start, (struct BitknitState*)dec->scratch);
|
4111
|
+
|
4112
|
+
} else if (dec->hdr.decoder_type == 10) {
|
4113
|
+
n = Mermaid_DecodeQuantum(dst_start + offset, dst_start + offset + dst_bytes_left, dst_start,
|
4114
|
+
src, src + qhdr.compressed_size,
|
4115
|
+
dec->scratch, dec->scratch + dec->scratch_size);
|
4116
|
+
} else if (dec->hdr.decoder_type == 12) {
|
4117
|
+
n = Leviathan_DecodeQuantum(dst_start + offset, dst_start + offset + dst_bytes_left, dst_start,
|
4118
|
+
src, src + qhdr.compressed_size,
|
4119
|
+
dec->scratch, dec->scratch + dec->scratch_size);
|
4120
|
+
} else {
|
4121
|
+
return false;
|
4122
|
+
}
|
4123
|
+
|
4124
|
+
if (n != qhdr.compressed_size)
|
4125
|
+
return false;
|
4126
|
+
|
4127
|
+
dec->src_used = (src - src_in) + n;
|
4128
|
+
dec->dst_used = dst_bytes_left;
|
4129
|
+
return true;
|
4130
|
+
}
|
4131
|
+
|
4132
|
+
ssize_t Kraken_Decompress(const byte *src, size_t src_len, byte *dst, size_t dst_len) {
|
4133
|
+
KrakenDecoder *dec = Kraken_Create();
|
4134
|
+
ssize_t offset = 0;
|
4135
|
+
while (dst_len != 0) {
|
4136
|
+
if (!Kraken_DecodeStep(dec, dst, offset, dst_len, src, src_len))
|
4137
|
+
goto FAIL;
|
4138
|
+
if (dec->src_used == 0)
|
4139
|
+
goto FAIL;
|
4140
|
+
src += dec->src_used;
|
4141
|
+
src_len -= dec->src_used;
|
4142
|
+
dst_len -= dec->dst_used;
|
4143
|
+
offset += dec->dst_used;
|
4144
|
+
}
|
4145
|
+
if (src_len != 0)
|
4146
|
+
goto FAIL;
|
4147
|
+
Kraken_Destroy(dec);
|
4148
|
+
return offset;
|
4149
|
+
FAIL:
|
4150
|
+
Kraken_Destroy(dec);
|
4151
|
+
return -1;
|
4152
|
+
}
|
4153
|
+
|