extzstd 0.0.1.CONCEPT

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1801 @@
1
+ /*
2
+ zstd - standard compression library
3
+ Copyright (C) 2014-2015, Yann Collet.
4
+
5
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
+
7
+ Redistribution and use in source and binary forms, with or without
8
+ modification, are permitted provided that the following conditions are
9
+ met:
10
+ * Redistributions of source code must retain the above copyright
11
+ notice, this list of conditions and the following disclaimer.
12
+ * Redistributions in binary form must reproduce the above
13
+ copyright notice, this list of conditions and the following disclaimer
14
+ in the documentation and/or other materials provided with the
15
+ distribution.
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
+
28
+ You can contact the author at :
29
+ - zstd source repository : https://github.com/Cyan4973/zstd
30
+ - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
31
+ */
32
+
33
+ /****************************************************************
34
+ * Tuning parameters
35
+ *****************************************************************/
36
+ /* MEMORY_USAGE :
37
+ * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
38
+ * Increasing memory usage improves compression ratio
39
+ * Reduced memory usage can improve speed, due to cache effect */
40
+ #define ZSTD_MEMORY_USAGE 17
41
+
42
+
43
+ /**************************************
44
+ CPU Feature Detection
45
+ **************************************/
46
+ /*
47
+ * Automated efficient unaligned memory access detection
48
+ * Based on known hardware architectures
49
+ * This list will be updated thanks to feedbacks
50
+ */
51
+ #if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
52
+ || defined(__ARM_FEATURE_UNALIGNED) \
53
+ || defined(__i386__) || defined(__x86_64__) \
54
+ || defined(_M_IX86) || defined(_M_X64) \
55
+ || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
56
+ || (defined(_M_ARM) && (_M_ARM >= 7))
57
+ # define ZSTD_UNALIGNED_ACCESS 1
58
+ #else
59
+ # define ZSTD_UNALIGNED_ACCESS 0
60
+ #endif
61
+
62
+
63
+ /********************************************************
64
+ * Includes
65
+ *********************************************************/
66
+ #include <stdlib.h> /* calloc */
67
+ #include <string.h> /* memcpy, memmove */
68
+ #include <stdio.h> /* debug : printf */
69
+ #include "zstd_static.h"
70
+ #if defined(__clang__) || defined(__GNUC__)
71
+ # ifdef __clang__
72
+ # pragma clang diagnostic ignored "-Wtypedef-redefinition"
73
+ # endif
74
+ # include "fse.c" /* due to GCC/Clang inlining limitations, including *.c runs noticeably faster */
75
+ #else
76
+ # include "fse_static.h"
77
+ #endif
78
+
79
+
80
+ /********************************************************
81
+ * Compiler specifics
82
+ *********************************************************/
83
+ //#if (!(defined(_MSC_VER) && (_MSC_VER<=1500))) /* exclude Visual 2008 and below */
84
+ #ifdef __AVX2__
85
+ # include <immintrin.h> /* AVX2 intrinsics */
86
+ #endif
87
+
88
+ #ifdef _MSC_VER /* Visual Studio */
89
+ # define FORCE_INLINE static __forceinline
90
+ # include <intrin.h> /* For Visual 2005 */
91
+ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
92
+ # pragma warning(disable : 4324) /* disable: C4324: padded structure */
93
+ #else
94
+ # define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
95
+ # ifdef __GNUC__
96
+ # define FORCE_INLINE static inline __attribute__((always_inline))
97
+ # else
98
+ # define FORCE_INLINE static inline
99
+ # endif
100
+ #endif
101
+
102
+
103
+ /********************************************************
104
+ * Basic Types
105
+ *********************************************************/
106
+ #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
107
+ # include <stdint.h>
108
+ typedef uint8_t BYTE;
109
+ typedef uint16_t U16;
110
+ typedef int16_t S16;
111
+ typedef uint32_t U32;
112
+ typedef int32_t S32;
113
+ typedef uint64_t U64;
114
+ #else
115
+ typedef unsigned char BYTE;
116
+ typedef unsigned short U16;
117
+ typedef signed short S16;
118
+ typedef unsigned int U32;
119
+ typedef signed int S32;
120
+ typedef unsigned long long U64;
121
+ #endif
122
+
123
+
124
+ /********************************************************
125
+ * Constants
126
+ *********************************************************/
127
+ static const U32 ZSTD_magicNumber = 0xFD2FB51C;
128
+
129
+ #define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
130
+ #define HASH_TABLESIZE (1 << HASH_LOG)
131
+ #define HASH_MASK (HASH_TABLESIZE - 1)
132
+
133
+ #define MAXD_LOG 16
134
+ #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
135
+
136
+ #define KNUTH 2654435761
137
+
138
+ #define BIT7 128
139
+ #define BIT6 64
140
+ #define BIT5 32
141
+ #define BIT4 16
142
+
143
+ #define KB *(1 <<10)
144
+ #define MB *(1 <<20)
145
+ #define GB *(1U<<20)
146
+
147
+ #define BLOCKSIZE (128 KB) // define, for static allocation
148
+ static const U32 g_maxDistance = 512 KB;
149
+ static const U32 g_maxLimit = 1 GB;
150
+ static const U32 g_searchStrength = 8;
151
+
152
+ #define WORKPLACESIZE (BLOCKSIZE*11/4)
153
+ #define MINMATCH 4
154
+ #define MLbits 7
155
+ #define LLbits 6
156
+ #define Offbits 5
157
+ #define MaxML ((1<<MLbits )-1)
158
+ #define MaxLL ((1<<LLbits )-1)
159
+ #define MaxOff ((1<<Offbits)-1)
160
+ #define LitFSELog 11
161
+ #define MLFSELog 10
162
+ #define LLFSELog 10
163
+ #define OffFSELog 9
164
+
165
+ #define LITERAL_NOENTROPY 63
166
+ #define COMMAND_NOENTROPY 7 /* to remove */
167
+
168
+ static const size_t ZSTD_blockHeaderSize = 3;
169
+ static const size_t ZSTD_frameHeaderSize = 4;
170
+
171
+
172
+ /********************************************************
173
+ * Memory operations
174
+ *********************************************************/
175
+ static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; }
176
+ static unsigned ZSTD_64bits(void) { return sizeof(void*)==8; }
177
+
178
+ static unsigned ZSTD_isLittleEndian(void)
179
+ {
180
+ const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
181
+ return one.c[0];
182
+ }
183
+
184
+ static U16 ZSTD_read16(const void* p) { return *(U16*)p; }
185
+
186
+ static U32 ZSTD_read32(const void* p) { return *(U32*)p; }
187
+
188
+ static size_t ZSTD_read_ARCH(const void* p) { return *(size_t*)p; }
189
+
190
+ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
191
+
192
+ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
193
+
194
+ #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
195
+
196
+ static void ZSTD_wildcopy(void* dst, const void* src, size_t length)
197
+ {
198
+ const BYTE* ip = (const BYTE*)src;
199
+ BYTE* op = (BYTE*)dst;
200
+ BYTE* const oend = op + length;
201
+ while (op < oend) COPY8(op, ip);
202
+ }
203
+
204
+ static U32 ZSTD_readLE32(const void* memPtr)
205
+ {
206
+ if (ZSTD_isLittleEndian())
207
+ return ZSTD_read32(memPtr);
208
+ else
209
+ {
210
+ const BYTE* p = (const BYTE*)memPtr;
211
+ return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
212
+ }
213
+ }
214
+
215
+ static void ZSTD_writeLE32(void* memPtr, U32 val32)
216
+ {
217
+ if (ZSTD_isLittleEndian())
218
+ {
219
+ memcpy(memPtr, &val32, 4);
220
+ }
221
+ else
222
+ {
223
+ BYTE* p = (BYTE*)memPtr;
224
+ p[0] = (BYTE)val32;
225
+ p[1] = (BYTE)(val32>>8);
226
+ p[2] = (BYTE)(val32>>16);
227
+ p[3] = (BYTE)(val32>>24);
228
+ }
229
+ }
230
+
231
+ static U32 ZSTD_readBE32(const void* memPtr)
232
+ {
233
+ const BYTE* p = (const BYTE*)memPtr;
234
+ return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0));
235
+ }
236
+
237
+ static void ZSTD_writeBE32(void* memPtr, U32 value)
238
+ {
239
+ BYTE* const p = (BYTE* const) memPtr;
240
+ p[0] = (BYTE)(value>>24);
241
+ p[1] = (BYTE)(value>>16);
242
+ p[2] = (BYTE)(value>>8);
243
+ p[3] = (BYTE)(value>>0);
244
+ }
245
+
246
+ static size_t ZSTD_writeProgressive(void* ptr, size_t value)
247
+ {
248
+ BYTE* const bStart = (BYTE* const)ptr;
249
+ BYTE* byte = bStart;
250
+
251
+ do
252
+ {
253
+ BYTE l = value & 127;
254
+ value >>= 7;
255
+ if (value) l += 128;
256
+ *byte++ = l;
257
+ } while (value);
258
+
259
+ return byte - bStart;
260
+ }
261
+
262
+
263
+ static size_t ZSTD_readProgressive(size_t* result, const void* ptr)
264
+ {
265
+ const BYTE* const bStart = (const BYTE* const)ptr;
266
+ const BYTE* byte = bStart;
267
+ size_t r = 0;
268
+ U32 shift = 0;
269
+
270
+ do
271
+ {
272
+ r += (*byte & 127) << shift;
273
+ shift += 7;
274
+ } while (*byte++ & 128);
275
+
276
+ *result = r;
277
+ return byte - bStart;
278
+ }
279
+
280
+
281
+ /**************************************
282
+ * Local structures
283
+ ***************************************/
284
+ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
285
+
286
+ typedef struct
287
+ {
288
+ blockType_t blockType;
289
+ U32 origSize;
290
+ } blockProperties_t;
291
+
292
+ typedef struct
293
+ {
294
+ const BYTE* base;
295
+ U32 current;
296
+ U32 nextUpdate;
297
+ BYTE* workplace;
298
+ #ifdef __AVX2__
299
+ __m256i hashTable[HASH_TABLESIZE>>3];
300
+ #else
301
+ U32 hashTable[HASH_TABLESIZE];
302
+ #endif
303
+ } cctxi_t;
304
+
305
+
306
+ ZSTD_cctx_t ZSTD_createCCtx(void)
307
+ {
308
+ cctxi_t* srt = (cctxi_t *) malloc( sizeof(cctxi_t) );
309
+ srt->workplace = (BYTE*) malloc(WORKPLACESIZE);
310
+ return (ZSTD_cctx_t)srt;
311
+ }
312
+
313
+
314
+ void ZSTD_resetCCtx(ZSTD_cctx_t ctx)
315
+ {
316
+ cctxi_t* srt = (cctxi_t*)ctx;
317
+ srt->base = NULL;
318
+ memset(srt->hashTable, 0, HASH_TABLESIZE*4);
319
+ }
320
+
321
+
322
+ size_t ZSTD_freeCCtx(ZSTD_cctx_t ctx)
323
+ {
324
+ cctxi_t *srt = (cctxi_t *) (ctx);
325
+ free(srt->workplace);
326
+ free(srt);
327
+ return 0;
328
+ }
329
+
330
+
331
+ /**************************************
332
+ * Error Management
333
+ **************************************/
334
+ /* tells if a return value is an error code */
335
+ unsigned ZSTD_isError(size_t code)
336
+ {
337
+ return (code > (size_t)(-ZSTD_ERROR_maxCode));
338
+ }
339
+
340
+ #define ZSTD_GENERATE_STRING(STRING) #STRING,
341
+ static const char* ZSTD_errorStrings[] = { ZSTD_LIST_ERRORS(ZSTD_GENERATE_STRING) };
342
+
343
+ /* provides error code string (useful for debugging) */
344
+ const char* ZSTD_getErrorName(size_t code)
345
+ {
346
+ static const char* codeError = "Unspecified error code";
347
+ if (ZSTD_isError(code)) return ZSTD_errorStrings[-(int)(code)];
348
+ return codeError;
349
+ }
350
+
351
+
352
+ /**************************************
353
+ * Tool functions
354
+ **************************************/
355
+ unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; }
356
+
357
+ static unsigned ZSTD_highbit(U32 val)
358
+ {
359
+ # if defined(_MSC_VER) /* Visual */
360
+ unsigned long r;
361
+ _BitScanReverse(&r, val);
362
+ return (unsigned)r;
363
+ # elif defined(__GNUC__) && (GCC_VERSION >= 304) // GCC Intrinsic
364
+ return 31 - __builtin_clz(val);
365
+ # else // Software version
366
+ static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
367
+ U32 v = val;
368
+ int r;
369
+ v |= v >> 1;
370
+ v |= v >> 2;
371
+ v |= v >> 4;
372
+ v |= v >> 8;
373
+ v |= v >> 16;
374
+ r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
375
+ return r;
376
+ # endif
377
+ }
378
+
379
+ static unsigned ZSTD_NbCommonBytes (register size_t val)
380
+ {
381
+ if (ZSTD_isLittleEndian())
382
+ {
383
+ if (ZSTD_64bits())
384
+ {
385
+ # if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
386
+ unsigned long r = 0;
387
+ _BitScanForward64( &r, (U64)val );
388
+ return (int)(r>>3);
389
+ # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
390
+ return (__builtin_ctzll((U64)val) >> 3);
391
+ # else
392
+ static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
393
+ return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
394
+ # endif
395
+ }
396
+ else /* 32 bits */
397
+ {
398
+ # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
399
+ unsigned long r;
400
+ _BitScanForward( &r, (U32)val );
401
+ return (int)(r>>3);
402
+ # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
403
+ return (__builtin_ctz((U32)val) >> 3);
404
+ # else
405
+ static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
406
+ return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
407
+ # endif
408
+ }
409
+ }
410
+ else /* Big Endian CPU */
411
+ {
412
+ if (ZSTD_64bits())
413
+ {
414
+ # if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
415
+ unsigned long r = 0;
416
+ _BitScanReverse64( &r, val );
417
+ return (unsigned)(r>>3);
418
+ # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
419
+ return (__builtin_clzll(val) >> 3);
420
+ # else
421
+ unsigned r;
422
+ const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
423
+ if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
424
+ if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
425
+ r += (!val);
426
+ return r;
427
+ # endif
428
+ }
429
+ else /* 32 bits */
430
+ {
431
+ # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
432
+ unsigned long r = 0;
433
+ _BitScanReverse( &r, (unsigned long)val );
434
+ return (unsigned)(r>>3);
435
+ # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
436
+ return (__builtin_clz(val) >> 3);
437
+ # else
438
+ unsigned r;
439
+ if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
440
+ r += (!val);
441
+ return r;
442
+ # endif
443
+ }
444
+ }
445
+ }
446
+
447
+ static unsigned ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
448
+ {
449
+ const BYTE* const pStart = pIn;
450
+
451
+ while ((pIn<pInLimit-(sizeof(size_t)-1)))
452
+ {
453
+ size_t diff = ZSTD_read_ARCH(pMatch) ^ ZSTD_read_ARCH(pIn);
454
+ if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
455
+ pIn += ZSTD_NbCommonBytes(diff);
456
+ return (unsigned)(pIn - pStart);
457
+ }
458
+
459
+ if (ZSTD_64bits()) if ((pIn<(pInLimit-3)) && (ZSTD_read32(pMatch) == ZSTD_read32(pIn))) { pIn+=4; pMatch+=4; }
460
+ if ((pIn<(pInLimit-1)) && (ZSTD_read16(pMatch) == ZSTD_read16(pIn))) { pIn+=2; pMatch+=2; }
461
+ if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
462
+ return (unsigned)(pIn - pStart);
463
+ }
464
+
465
+
466
+ /********************************************************
467
+ * Compression
468
+ *********************************************************/
469
+ size_t ZSTD_compressBound(size_t srcSize) /* maximum compressed size */
470
+ {
471
+ return FSE_compressBound(srcSize) + 12;
472
+ }
473
+
474
+
475
+ static size_t ZSTD_compressRle (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
476
+ {
477
+ BYTE* const ostart = (BYTE* const)dst;
478
+
479
+ /* at this stage : dstSize >= FSE_compressBound(srcSize) > (ZSTD_blockHeaderSize+1) (checked by ZSTD_compressLiterals()) */
480
+ (void)maxDstSize;
481
+
482
+ ostart[ZSTD_blockHeaderSize] = *(BYTE*)src;
483
+
484
+ // Build header
485
+ {
486
+ ostart[0] = (BYTE)(srcSize>>16);
487
+ ostart[1] = (BYTE)(srcSize>>8);
488
+ ostart[2] = (BYTE)srcSize;
489
+ ostart[0] += (BYTE)(bt_rle<<6);
490
+ }
491
+
492
+ return ZSTD_blockHeaderSize+1;
493
+ }
494
+
495
+
496
+ static size_t ZSTD_noCompressBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
497
+ {
498
+ BYTE* const ostart = (BYTE* const)dst;
499
+
500
+ if (srcSize + ZSTD_blockHeaderSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
501
+ memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize);
502
+
503
+ // Build header
504
+ {
505
+ ostart[0] = (BYTE)(srcSize>>16);
506
+ ostart[1] = (BYTE)(srcSize>>8);
507
+ ostart[2] = (BYTE)srcSize;
508
+ ostart[0] += (BYTE)(bt_raw<<6); /* is a raw (uncompressed) block */
509
+ }
510
+
511
+ return ZSTD_blockHeaderSize+srcSize;
512
+ }
513
+
514
+
515
+ /* return : size of CStream in bits */
516
+ static size_t ZSTD_compressLiterals_usingCTable(void* dst, size_t dstSize,
517
+ const void* src, size_t srcSize,
518
+ const void* CTable)
519
+ {
520
+ const BYTE* const istart = (const BYTE*)src;
521
+ const BYTE* ip = istart;
522
+ const BYTE* const iend = istart + srcSize;
523
+ FSE_CStream_t bitC;
524
+ FSE_CState_t CState1, CState2;
525
+
526
+ // init
527
+ (void)dstSize; // objective : ensure it fits into dstBuffer (Todo)
528
+ FSE_initCStream(&bitC, dst);
529
+ FSE_initCState(&CState1, CTable);
530
+ CState2 = CState1;
531
+
532
+ /* Note : at this stage, srcSize > LITERALS_NOENTROPY (checked by ZSTD_compressLiterals()) */
533
+ // join to mod 2
534
+ if (srcSize & 1)
535
+ {
536
+ FSE_encodeByte(&bitC, &CState1, *ip++);
537
+ FSE_flushBits(&bitC);
538
+ }
539
+
540
+ // join to mod 4
541
+ if ((sizeof(size_t)*8 > LitFSELog*4+7 ) && (srcSize & 2)) // test bit 2
542
+ {
543
+ FSE_encodeByte(&bitC, &CState2, *ip++);
544
+ FSE_encodeByte(&bitC, &CState1, *ip++);
545
+ FSE_flushBits(&bitC);
546
+ }
547
+
548
+ // 2 or 4 encoding per loop
549
+ while (ip<iend)
550
+ {
551
+ FSE_encodeByte(&bitC, &CState2, *ip++);
552
+
553
+ if (sizeof(size_t)*8 < LitFSELog*2+7 ) // this test must be static
554
+ FSE_flushBits(&bitC);
555
+
556
+ FSE_encodeByte(&bitC, &CState1, *ip++);
557
+
558
+ if (sizeof(size_t)*8 > LitFSELog*4+7 ) // this test must be static
559
+ {
560
+ FSE_encodeByte(&bitC, &CState2, *ip++);
561
+ FSE_encodeByte(&bitC, &CState1, *ip++);
562
+ }
563
+
564
+ FSE_flushBits(&bitC);
565
+ }
566
+
567
+ FSE_flushCState(&bitC, &CState2);
568
+ FSE_flushCState(&bitC, &CState1);
569
+ return FSE_closeCStream(&bitC);
570
+ }
571
+
572
+
573
+ size_t ZSTD_minGain(size_t srcSize)
574
+ {
575
+ return (srcSize >> 6) + 1;
576
+ }
577
+
578
+
579
+ static size_t ZSTD_compressLiterals (void* dst, size_t dstSize,
580
+ const void* src, size_t srcSize)
581
+ {
582
+ const BYTE* const istart = (const BYTE*) src;
583
+ const BYTE* ip = istart;
584
+
585
+ BYTE* const ostart = (BYTE*) dst;
586
+ BYTE* op = ostart + ZSTD_blockHeaderSize;
587
+ BYTE* const oend = ostart + dstSize;
588
+
589
+ U32 maxSymbolValue = 256;
590
+ U32 tableLog = LitFSELog;
591
+ U32 count[256];
592
+ S16 norm[256];
593
+ U32 CTable[ FSE_CTABLE_SIZE_U32(LitFSELog, 256) ];
594
+ size_t errorCode;
595
+ const size_t minGain = ZSTD_minGain(srcSize);
596
+
597
+ // early out
598
+ if (dstSize < FSE_compressBound(srcSize)) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
599
+
600
+ // Scan input and build symbol stats
601
+ errorCode = FSE_count (count, ip, srcSize, &maxSymbolValue);
602
+ if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
603
+ if (errorCode == srcSize) return 1;
604
+ if (errorCode < ((srcSize * 7) >> 10)) return 0;
605
+
606
+ tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
607
+ errorCode = (int)FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
608
+ if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
609
+
610
+ // Write table description header
611
+ errorCode = FSE_writeHeader (op, FSE_MAX_HEADERSIZE, norm, maxSymbolValue, tableLog);
612
+ if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
613
+ op += errorCode;
614
+
615
+ // Compress
616
+ errorCode = FSE_buildCTable (&CTable, norm, maxSymbolValue, tableLog);
617
+ if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
618
+ errorCode = ZSTD_compressLiterals_usingCTable(op, oend - op, ip, srcSize, &CTable);
619
+ if (ZSTD_isError(errorCode)) return errorCode;
620
+ op += errorCode;
621
+
622
+ // check compressibility
623
+ if ( (size_t)(op-ostart) >= srcSize-minGain)
624
+ return 0;
625
+
626
+ // Build header
627
+ {
628
+ size_t totalSize;
629
+ totalSize = op - ostart - ZSTD_blockHeaderSize;
630
+ ostart[0] = (BYTE)(totalSize>>16);
631
+ ostart[1] = (BYTE)(totalSize>>8);
632
+ ostart[2] = (BYTE)totalSize;
633
+ ostart[0] += (BYTE)(bt_compressed<<6); /* is a block, is compressed */
634
+ }
635
+
636
+ return op-ostart;
637
+ }
638
+
639
+
640
+ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
641
+ const BYTE* op_lit_start, const BYTE* op_lit,
642
+ const BYTE* op_litLength_start, const BYTE* op_litLength,
643
+ const BYTE* op_matchLength_start,
644
+ const U32* op_offset_start,
645
+ const BYTE* op_dumps_start, const BYTE* op_dumps,
646
+ size_t srcSize, size_t lastLLSize
647
+ )
648
+ {
649
+ FSE_CStream_t blockStream;
650
+ U32 count[256];
651
+ S16 norm[256];
652
+ size_t mostFrequent;
653
+ U32 max = 255;
654
+ U32 tableLog = 11;
655
+ const size_t nbSeq = op_litLength - op_litLength_start;
656
+ U32 CTable_LitLength [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL )];
657
+ U32 CTable_OffsetBits [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
658
+ U32 CTable_MatchLength[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML )];
659
+ U32 LLtype, Offtype, MLtype;
660
+ BYTE* op;
661
+ const U32* op_offset = op_offset_start + nbSeq;
662
+ const BYTE* op_matchLength = op_matchLength_start + nbSeq;
663
+ BYTE offsetBits_start[BLOCKSIZE / 4];
664
+ BYTE* offsetBitsPtr = offsetBits_start;
665
+ const size_t minGain = ZSTD_minGain(srcSize);
666
+ const size_t maxCSize = srcSize - minGain;
667
+ const size_t minSeqSize = 1 /*lastL*/ + 2 /*dHead*/ + 2 /*dumpsIn*/ + 5 /*SeqHead*/ + 3 /*SeqIn*/ + 1 /*margin*/ + ZSTD_blockHeaderSize;
668
+ const size_t maxLSize = maxCSize > minSeqSize ? maxCSize - minSeqSize : 0;
669
+ BYTE* seqHead;
670
+
671
+
672
+ /* init */
673
+ op = dst;
674
+
675
+ /* Encode literals */
676
+ {
677
+ size_t cSize;
678
+ size_t litSize = op_lit - op_lit_start;
679
+ if (litSize <= LITERAL_NOENTROPY) cSize = ZSTD_noCompressBlock (op, maxDstSize, op_lit_start, litSize);
680
+ else
681
+ {
682
+ cSize = ZSTD_compressLiterals(op, maxDstSize, op_lit_start, litSize);
683
+ if (cSize == 1) cSize = ZSTD_compressRle (op, maxDstSize, op_lit_start, litSize);
684
+ else if (cSize == 0)
685
+ {
686
+ if (litSize >= maxLSize) return 0; /* block not compressible enough */
687
+ cSize = ZSTD_noCompressBlock (op, maxDstSize, op_lit_start, litSize);
688
+ }
689
+ }
690
+ if (ZSTD_isError(cSize)) return cSize;
691
+ op += cSize;
692
+ }
693
+
694
+ /* Encode Sequences */
695
+
696
+ /* seqHeader */
697
+ op += ZSTD_writeProgressive(op, lastLLSize); CHECK_OVERFLOW(op <= dst + maxDstSize);
698
+ seqHead = op;
699
+
700
+ /* dumps */
701
+ {
702
+ size_t dumpsLength = op_dumps- op_dumps_start;
703
+ if (dumpsLength < 512)
704
+ {
705
+ op[0] = (BYTE)(dumpsLength >> 8);
706
+ op[1] = (BYTE)(dumpsLength);
707
+ op += 2; CHECK_OVERFLOW(op <= dst + maxDstSize);
708
+ }
709
+ else
710
+ {
711
+ op[0] = 2;
712
+ op[1] = (BYTE)(dumpsLength>>8);
713
+ op[2] = (BYTE)(dumpsLength);
714
+ op += 3; CHECK_OVERFLOW(op <= dst + maxDstSize);
715
+ }
716
+ memcpy(op, op_dumps_start, dumpsLength);
717
+ op += dumpsLength; CHECK_OVERFLOW(op <= dst + maxDstSize);
718
+ }
719
+
720
+ /* Encoding table of Literal Lengths */
721
+ max = MaxLL;
722
+ mostFrequent = FSE_countFast(count, op_litLength_start, nbSeq, &max);
723
+ if (mostFrequent == nbSeq)
724
+ {
725
+ *op++ = *op_litLength_start; CHECK_OVERFLOW(op <= dst + maxDstSize);
726
+ FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
727
+ LLtype = bt_rle;
728
+ }
729
+ else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (LLbits-1))))
730
+ {
731
+ FSE_buildCTable_raw(CTable_LitLength, LLbits);
732
+ LLtype = bt_raw;
733
+ }
734
+ else
735
+ {
736
+ tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
737
+ FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
738
+ op += FSE_writeHeader(op, maxDstSize, norm, max, tableLog); CHECK_OVERFLOW(op <= dst + maxDstSize);
739
+ FSE_buildCTable(CTable_LitLength, norm, max, tableLog);
740
+ LLtype = bt_compressed;
741
+ }
742
+
743
+ /* Encoding table of Offsets */
744
+ {
745
+ /* create OffsetBits */
746
+ size_t i;
747
+ max = MaxOff;
748
+ for (i=0; i<nbSeq; i++)
749
+ {
750
+ offsetBits_start[i] = (BYTE)ZSTD_highbit(op_offset_start[i]) + 1;
751
+ if (op_offset_start[i]==0) offsetBits_start[i]=0;
752
+ }
753
+ offsetBitsPtr += nbSeq;
754
+ mostFrequent = FSE_countFast(count, offsetBits_start, nbSeq, &max);
755
+ }
756
+ if (mostFrequent == nbSeq)
757
+ {
758
+ *op++ = *offsetBits_start; CHECK_OVERFLOW(op <= dst + maxDstSize);
759
+ FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
760
+ Offtype = bt_rle;
761
+ }
762
+ else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (Offbits-1))))
763
+ {
764
+ FSE_buildCTable_raw(CTable_OffsetBits, Offbits);
765
+ Offtype = bt_raw;
766
+ }
767
+ else
768
+ {
769
+ tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
770
+ FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
771
+ op += FSE_writeHeader(op, maxDstSize, norm, max, tableLog); CHECK_OVERFLOW(op <= dst + maxDstSize);
772
+ FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog);
773
+ Offtype = bt_compressed;
774
+ }
775
+
776
+ /* Encoding Table of MatchLengths */
777
+ max = MaxML;
778
+ mostFrequent = FSE_countFast(count, op_matchLength_start, nbSeq, &max);
779
+ if (mostFrequent == nbSeq)
780
+ {
781
+ *op++ = *op_matchLength_start; CHECK_OVERFLOW(op <= dst + maxDstSize);
782
+ FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
783
+ MLtype = bt_rle;
784
+ }
785
+ else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (MLbits-1))))
786
+ {
787
+ FSE_buildCTable_raw(CTable_MatchLength, MLbits);
788
+ MLtype = bt_raw;
789
+ }
790
+ else
791
+ {
792
+ tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
793
+ FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
794
+ op += FSE_writeHeader(op, maxDstSize, norm, max, tableLog); CHECK_OVERFLOW(op <= dst + maxDstSize);
795
+ FSE_buildCTable(CTable_MatchLength, norm, max, tableLog);
796
+ MLtype = bt_compressed;
797
+ }
798
+
799
+ seqHead[0] += (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
800
+
801
+ /* Encoding */
802
+ {
803
+ FSE_CState_t stateMatchLength;
804
+ FSE_CState_t stateOffsetBits;
805
+ FSE_CState_t stateLitLength;
806
+
807
+ FSE_initCStream(&blockStream, op);
808
+ FSE_initCState(&stateMatchLength, CTable_MatchLength);
809
+ FSE_initCState(&stateOffsetBits, CTable_OffsetBits);
810
+ FSE_initCState(&stateLitLength, CTable_LitLength);
811
+
812
+ while (op_litLength > op_litLength_start)
813
+ {
814
+ BYTE matchLength = *(--op_matchLength);
815
+ U32 offset = *(--op_offset);
816
+ BYTE offCode = *(--offsetBitsPtr); /* 32b*/ /* 64b*/
817
+ U32 nbBits = (offCode-1) * (!!offCode);
818
+ BYTE litLength = *(--op_litLength); /* (7)*/ /* (7)*/
819
+ FSE_encodeByte(&blockStream, &stateMatchLength, matchLength); /* 17 */ /* 17 */
820
+ if (ZSTD_32bits()) FSE_flushBits(&blockStream); /* 7 */
821
+ FSE_addBits(&blockStream, offset, nbBits); /* 32 */ /* 42 */
822
+ if (ZSTD_32bits()) FSE_flushBits(&blockStream); /* 7 */
823
+ FSE_encodeByte(&blockStream, &stateOffsetBits, offCode); /* 16 */ /* 51 */
824
+ FSE_encodeByte(&blockStream, &stateLitLength, litLength); /* 26 */ /* 61 */
825
+ FSE_flushBits(&blockStream); /* 7 */ /* 7 */
826
+ }
827
+
828
+ FSE_flushCState(&blockStream, &stateMatchLength);
829
+ FSE_flushCState(&blockStream, &stateOffsetBits);
830
+ FSE_flushCState(&blockStream, &stateLitLength);
831
+ }
832
+
833
+ op += FSE_closeCStream(&blockStream); CHECK_OVERFLOW(op <= dst + maxDstSize);
834
+
835
+ /* check compressibility */
836
+ if ((size_t)(op-dst) >= maxCSize) return 0;
837
+
838
+ return op - dst;
839
+ }
840
+
841
+
842
+ static size_t ZSTD_storeSeq(BYTE* op_lit, BYTE* op_ll, U32* op_offset, BYTE* op_ml, BYTE* op_dumps,
843
+ size_t litLength, const BYTE* srcLit, size_t offset, size_t matchLength)
844
+ {
845
+ const BYTE* const dumpStart = op_dumps;
846
+ const BYTE* const l_end = op_lit + litLength;
847
+
848
+
849
+ /* copy Literals */
850
+ while (op_lit<l_end) COPY8(op_lit, srcLit);
851
+
852
+ /* literal Length */
853
+ if (litLength >= MaxLL)
854
+ {
855
+ *op_ll++ = MaxLL;
856
+ if (litLength<255 + MaxLL)
857
+ *op_dumps++ = (BYTE)(litLength - MaxLL);
858
+ else
859
+ {
860
+ *op_dumps++ = 255;
861
+ ZSTD_writeLE32(op_dumps, (U32)litLength); op_dumps += 3;
862
+
863
+ //litLength |= 0xFF000000;
864
+ //ZSTD_writeBE32(op_dumps, (U32)litLength);
865
+ //op_dumps += 4;
866
+ }
867
+ }
868
+ else *op_ll = (BYTE)litLength;
869
+
870
+ /* match offset */
871
+ *op_offset = (U32)offset;
872
+
873
+ /* match Length */
874
+ if (matchLength >= MaxML)
875
+ {
876
+ *op_ml++ = MaxML;
877
+ if (matchLength<255 + MaxML)
878
+ *op_dumps++ = (BYTE)(matchLength - MaxML);
879
+ else
880
+ {
881
+ *op_dumps++ = 255;
882
+ ZSTD_writeLE32(op_dumps, (U32)matchLength); op_dumps+=3;
883
+ //*(U32*)op_dumps = (U32)matchLength; op_dumps += 3; /* store direct result */
884
+
885
+ //matchLength |= 0xFF000000;
886
+ //ZSTD_writeBE32(op_dumps, (U32)matchLength);
887
+ //op_dumps += 4;
888
+ }
889
+ }
890
+ else *op_ml = (BYTE)matchLength;
891
+
892
+ return op_dumps - dumpStart;
893
+ }
894
+
895
+
896
+ //static const U32 hashMask = (1<<HASH_LOG)-1;
897
+ //static const U64 prime5bytes = 889523592379ULL;
898
+ //static const U64 prime6bytes = 227718039650203ULL;
899
+ static const U64 prime7bytes = 58295818150454627ULL;
900
+ //static const U64 prime8bytes = 14923729446516375013ULL;
901
+
902
+ //static U32 ZSTD_hashPtr(const void* p) { return (U32) _bextr_u64(*(U64*)p * prime7bytes, (56-HASH_LOG), HASH_LOG); }
903
+ //static U32 ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime7bytes) << 8 >> (64-HASH_LOG)); }
904
+ //static U32 ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime7bytes) >> (56-HASH_LOG)) & ((1<<HASH_LOG)-1); }
905
+ //static U32 ZSTD_hashPtr(const void* p) { return ( ((*(U64*)p & 0xFFFFFFFFFFFFFF) * prime7bytes) >> (64-HASH_LOG)); }
906
+
907
+ //static U32 ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime8bytes) >> (64-HASH_LOG)); }
908
+ static U32 ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime7bytes) >> (56-HASH_LOG)) & HASH_MASK; }
909
+ //static U32 ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime6bytes) >> (48-HASH_LOG)) & HASH_MASK; }
910
+ //static U32 ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime5bytes) >> (40-HASH_LOG)) & HASH_MASK; }
911
+ //static U32 ZSTD_hashPtr(const void* p) { return ( (*(U32*)p * KNUTH) >> (32-HASH_LOG)); }
912
+
913
+ static void ZSTD_addPtr(U32* table, const BYTE* p, const BYTE* start) { table[ZSTD_hashPtr(p)] = (U32)(p-start); }
914
+
915
+ static const BYTE* ZSTD_updateMatch(U32* table, const BYTE* p, const BYTE* start)
916
+ {
917
+ U32 h = ZSTD_hashPtr(p);
918
+ const BYTE* r;
919
+ r = table[h] + start;
920
+ //table[h] = (U32)(p - start);
921
+ ZSTD_addPtr(table, p, start);
922
+ return r;
923
+ }
924
+
925
+ static int ZSTD_checkMatch(const BYTE* match, const BYTE* ip)
926
+ {
927
+ return ZSTD_read32(match) == ZSTD_read32(ip);
928
+ }
929
+
930
+
931
+ static size_t ZSTD_compressBlock(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
932
+ {
933
+ cctxi_t* srt = (cctxi_t*) ctx;
934
+ U32* HashTable = (U32*)(srt->hashTable);
935
+ void* workplace = srt->workplace;
936
+ const BYTE* const base = srt->base;
937
+
938
+ const BYTE* const istart = (const BYTE*)src;
939
+ const BYTE* ip = istart + 1;
940
+ const BYTE* anchor = istart;
941
+ const BYTE* const iend = istart + srcSize;
942
+ const BYTE* const ilimit = iend - 16;
943
+
944
+ U32 *op_offset = (U32*)(workplace), *op_offset_start = op_offset;
945
+ BYTE *op_l = (BYTE*)workplace + srcSize + 4, *op_l_start = op_l;
946
+ BYTE *op_rl = op_l + srcSize + 4, *op_rl_start = op_rl;
947
+ BYTE *op_ml = op_rl + (srcSize >> 2) + 4, *op_ml_start = op_ml;
948
+ BYTE *op_dumps = op_ml + (srcSize >> 2) + 4, *op_dumps_start = op_dumps;
949
+ size_t prevOffset=0, offset=0;
950
+ size_t lastLLSize;
951
+
952
+
953
+ /* Main Search Loop */
954
+ while (ip < ilimit)
955
+ {
956
+ const BYTE* match = (BYTE*) ZSTD_updateMatch(HashTable, ip, base);
957
+
958
+ if (!ZSTD_checkMatch(match,ip)) { ip += ((ip-anchor) >> g_searchStrength) + 1; continue; }
959
+
960
+ /* catch up */
961
+ while ((ip>anchor) && (match>base) && (ip[-1] == match[-1])) { ip--; match--; }
962
+
963
+ {
964
+ size_t litLength = ip-anchor;
965
+ size_t matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iend);
966
+ size_t offsetCode;
967
+ if (litLength) prevOffset = offset;
968
+ offsetCode = ip-match;
969
+ if (offsetCode == prevOffset) offsetCode = 0;
970
+ prevOffset = offset;
971
+ offset = ip-match;
972
+ op_dumps += ZSTD_storeSeq(op_l, op_rl++, op_offset++, op_ml++, op_dumps, litLength, anchor, offsetCode, matchLength);
973
+ op_l += litLength;
974
+
975
+ /* Fill Table */
976
+ ZSTD_addPtr(HashTable, ip+1, base);
977
+ ip += matchLength + MINMATCH;
978
+ if (ip<=iend-8) ZSTD_addPtr(HashTable, ip-2, base);
979
+ anchor = ip;
980
+ }
981
+ }
982
+
983
+ /* Last Literals */
984
+ lastLLSize = iend - anchor;
985
+ memcpy(op_l, anchor, lastLLSize);
986
+ op_l += lastLLSize;
987
+
988
+ /* Finale compression stage */
989
+ return ZSTD_compressEntropy((BYTE*)dst, maxDstSize,
990
+ op_l_start, op_l, op_rl_start, op_rl, op_ml_start, op_offset_start, op_dumps_start, op_dumps,
991
+ srcSize, lastLLSize);
992
+ }
993
+
994
+
995
+ size_t ZSTD_compressBegin(ZSTD_cctx_t ctx, void* dst, size_t maxDstSize)
996
+ {
997
+ /* Sanity check */
998
+ if (maxDstSize < ZSTD_frameHeaderSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
999
+
1000
+ /* Init */
1001
+ ZSTD_resetCCtx(ctx);
1002
+
1003
+ /* Write Header */
1004
+ ZSTD_writeBE32(dst, ZSTD_magicNumber);
1005
+
1006
+ return ZSTD_frameHeaderSize;
1007
+ }
1008
+
1009
+
1010
+ /* this should be auto-vectorized by compiler */
1011
+ static void ZSTD_scaleDownCtx(void* cctx, const U32 limit)
1012
+ {
1013
+ cctxi_t* ctx = (cctxi_t*) cctx;
1014
+ int i;
1015
+
1016
+ #if defined(__AVX2__) /* <immintrin.h> */
1017
+ /* AVX2 version */
1018
+ __m256i* h = ctx->hashTable;
1019
+ const __m256i limit8 = _mm256_set1_epi32(limit);
1020
+ for (i=0; i<(HASH_TABLESIZE>>3); i++)
1021
+ {
1022
+ __m256i src =_mm256_loadu_si256((const __m256i*)(h+i));
1023
+ const __m256i dec = _mm256_min_epu32(src, limit8);
1024
+ src = _mm256_sub_epi32(src, dec);
1025
+ _mm256_storeu_si256((__m256i*)(h+i), src);
1026
+ }
1027
+ #else
1028
+ U32* h = ctx->hashTable;
1029
+ for (i=0; i<HASH_TABLESIZE; ++i)
1030
+ {
1031
+ U32 dec;
1032
+ if (h[i] > limit) dec = limit; else dec = h[i];
1033
+ h[i] -= dec;
1034
+ }
1035
+ #endif
1036
+ }
1037
+
1038
+
1039
+ /* this should be auto-vectorized by compiler */
1040
+ static void ZSTD_limitCtx(void* cctx, const U32 limit)
1041
+ {
1042
+ cctxi_t* ctx = (cctxi_t*) cctx;
1043
+ int i;
1044
+
1045
+ if (limit > g_maxLimit)
1046
+ {
1047
+ ZSTD_scaleDownCtx(cctx, limit);
1048
+ ctx->base += limit;
1049
+ ctx->current -= limit;
1050
+ ctx->nextUpdate -= limit;
1051
+ return;
1052
+ }
1053
+
1054
+ #if defined(__AVX2__) /* <immintrin.h> */
1055
+ /* AVX2 version */
1056
+ {
1057
+ __m256i* h = ctx->hashTable;
1058
+ const __m256i limit8 = _mm256_set1_epi32(limit);
1059
+ //printf("Address h : %0X\n", (U32)h); // address test
1060
+ for (i=0; i<(HASH_TABLESIZE>>3); i++)
1061
+ {
1062
+ __m256i src =_mm256_loadu_si256((const __m256i*)(h+i)); // Unfortunately, clang doesn't guarantee 32-bytes alignment
1063
+ src = _mm256_max_epu32(src, limit8);
1064
+ _mm256_storeu_si256((__m256i*)(h+i), src);
1065
+ }
1066
+ }
1067
+ #else
1068
+ {
1069
+ U32* h = (U32*)(ctx->hashTable);
1070
+ for (i=0; i<HASH_TABLESIZE; ++i)
1071
+ {
1072
+ if (h[i] < limit) h[i] = limit;
1073
+ }
1074
+ }
1075
+ #endif
1076
+ }
1077
+
1078
+
1079
+ size_t ZSTD_compressContinue(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1080
+ {
1081
+ cctxi_t* ctx = (cctxi_t*) cctx;
1082
+ const BYTE* const istart = (const BYTE* const)src;
1083
+ const BYTE* ip = istart;
1084
+ BYTE* const ostart = (BYTE* const)dst;
1085
+ BYTE* op = ostart;
1086
+ const U32 updateRate = 2 * BLOCKSIZE;
1087
+
1088
+ /* Init */
1089
+ if (ctx->base==NULL)
1090
+ ctx->base = (const BYTE*)src, ctx->current=0, ctx->nextUpdate = g_maxDistance;
1091
+ if (src != ctx->base + ctx->current) /* not contiguous */
1092
+ {
1093
+ ZSTD_resetCCtx(ctx);
1094
+ ctx->base = (const BYTE*)src;
1095
+ ctx->current = 0;
1096
+ }
1097
+ ctx->current += (U32)srcSize;
1098
+
1099
+ while (srcSize)
1100
+ {
1101
+ size_t cSize;
1102
+ size_t blockSize = BLOCKSIZE;
1103
+ if (blockSize > srcSize) blockSize = srcSize;
1104
+
1105
+ /* update hash table */
1106
+ if (g_maxDistance <= BLOCKSIZE) /* static test => all blocks are independent */
1107
+ {
1108
+ ZSTD_resetCCtx(ctx);
1109
+ ctx->base = ip;
1110
+ ctx->current=0;
1111
+ }
1112
+ else if (ip >= ctx->base + ctx->nextUpdate)
1113
+ {
1114
+ ctx->nextUpdate += updateRate;
1115
+ ZSTD_limitCtx(ctx, ctx->nextUpdate - g_maxDistance);
1116
+ }
1117
+
1118
+ /* compress */
1119
+ if (maxDstSize < ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1120
+ cSize = ZSTD_compressBlock(ctx, op+ZSTD_blockHeaderSize, maxDstSize-ZSTD_blockHeaderSize, ip, blockSize);
1121
+ if (cSize == 0)
1122
+ {
1123
+ cSize = ZSTD_noCompressBlock(op, maxDstSize, ip, blockSize); /* block is not compressible */
1124
+ if (ZSTD_isError(cSize)) return cSize;
1125
+ }
1126
+ else
1127
+ {
1128
+ if (ZSTD_isError(cSize)) return cSize;
1129
+ op[0] = (BYTE)(cSize>>16);
1130
+ op[1] = (BYTE)(cSize>>8);
1131
+ op[2] = (BYTE)cSize;
1132
+ op[0] += (BYTE)(bt_compressed << 6); /* is a compressed block */
1133
+ cSize += 3;
1134
+ }
1135
+ op += cSize;
1136
+ maxDstSize -= cSize;
1137
+ ip += blockSize;
1138
+ srcSize -= blockSize;
1139
+ }
1140
+
1141
+ return op-ostart;
1142
+ }
1143
+
1144
+
1145
+ size_t ZSTD_compressEnd(ZSTD_cctx_t ctx, void* dst, size_t maxDstSize)
1146
+ {
1147
+ BYTE* op = (BYTE*)dst;
1148
+
1149
+ /* Sanity check */
1150
+ (void)ctx;
1151
+ if (maxDstSize < ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1152
+
1153
+ /* End of frame */
1154
+ op[0] = (BYTE)(bt_end << 6);
1155
+ op[1] = 0;
1156
+ op[2] = 0;
1157
+
1158
+ return 3;
1159
+ }
1160
+
1161
+
1162
+ static size_t ZSTD_compressCCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1163
+ {
1164
+ BYTE* const ostart = (BYTE* const)dst;
1165
+ BYTE* op = ostart;
1166
+
1167
+ /* Header */
1168
+ {
1169
+ size_t headerSize = ZSTD_compressBegin(ctx, dst, maxDstSize);
1170
+ if(ZSTD_isError(headerSize)) return headerSize;
1171
+ op += headerSize;
1172
+ maxDstSize -= headerSize;
1173
+ }
1174
+
1175
+ /* Compression */
1176
+ {
1177
+ size_t cSize = ZSTD_compressContinue(ctx, op, maxDstSize, src, srcSize);
1178
+ if (ZSTD_isError(cSize)) return cSize;
1179
+ op += cSize;
1180
+ maxDstSize -= cSize;
1181
+ }
1182
+
1183
+ /* Close frame */
1184
+ {
1185
+ size_t endSize = ZSTD_compressEnd(ctx, op, maxDstSize);
1186
+ if(ZSTD_isError(endSize)) return endSize;
1187
+ op += endSize;
1188
+ }
1189
+
1190
+ return (op - ostart);
1191
+ }
1192
+
1193
+
1194
+ size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1195
+ {
1196
+ void* ctx;
1197
+ size_t r;
1198
+
1199
+ ctx = ZSTD_createCCtx();
1200
+ r = ZSTD_compressCCtx(ctx, dst, maxDstSize, src, srcSize);
1201
+ ZSTD_freeCCtx(ctx);
1202
+ return r;
1203
+ }
1204
+
1205
+
1206
+ /**************************************************************
1207
+ * Decompression code
1208
+ **************************************************************/
1209
+
1210
+ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
1211
+ {
1212
+ const BYTE* const in = (const BYTE* const)src;
1213
+ BYTE headerFlags;
1214
+ U32 cSize;
1215
+
1216
+ if (srcSize < 3) return (size_t)-ZSTD_ERROR_wrongSrcSize;
1217
+
1218
+ headerFlags = *in;
1219
+ cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
1220
+
1221
+ bpPtr->blockType = (blockType_t)(headerFlags >> 6);
1222
+ bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
1223
+
1224
+ if (bpPtr->blockType == bt_end) return 0;
1225
+ if (bpPtr->blockType == bt_rle) return 1;
1226
+ return cSize;
1227
+ }
1228
+
1229
+
1230
+ static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1231
+ {
1232
+ if (srcSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1233
+ memcpy(dst, src, srcSize);
1234
+ return srcSize;
1235
+ }
1236
+
1237
+
1238
+ /* force inline : 'fast' really needs to be evaluated at compile time */
1239
+ FORCE_INLINE size_t ZSTD_decompressLiterals_usingDTable_generic(
1240
+ void* const dst, size_t maxDstSize,
1241
+ const void* src, size_t srcSize,
1242
+ const void* DTable, U32 fast)
1243
+ {
1244
+ BYTE* op = (BYTE*) dst;
1245
+ BYTE* const olimit = op;
1246
+ BYTE* const oend = op + maxDstSize;
1247
+ FSE_DStream_t bitD;
1248
+ FSE_DState_t state1, state2;
1249
+ size_t errorCode;
1250
+
1251
+ /* Init */
1252
+ errorCode = FSE_initDStream(&bitD, src, srcSize);
1253
+ if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
1254
+
1255
+ FSE_initDState(&state1, &bitD, DTable);
1256
+ FSE_initDState(&state2, &bitD, DTable);
1257
+ op = oend;
1258
+
1259
+ // 2 symbols per loop
1260
+ while (!FSE_reloadDStream(&bitD) && (op>olimit+3))
1261
+ {
1262
+ *--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1263
+
1264
+ if (LitFSELog*2+7 > sizeof(size_t)*8) // This test must be static
1265
+ FSE_reloadDStream(&bitD);
1266
+
1267
+ *--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1268
+
1269
+ if (LitFSELog*4+7 < sizeof(size_t)*8) // This test must be static
1270
+ {
1271
+ *--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1272
+ *--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1273
+ }
1274
+ }
1275
+
1276
+ /* tail */
1277
+ while (1)
1278
+ {
1279
+ if ( (FSE_reloadDStream(&bitD)>2) || (op==olimit) || (FSE_endOfDState(&state1) && FSE_endOfDStream(&bitD)) )
1280
+ break;
1281
+
1282
+ *--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1283
+
1284
+ if ( (FSE_reloadDStream(&bitD)>2) || (op==olimit) || (FSE_endOfDState(&state2) && FSE_endOfDStream(&bitD)) )
1285
+ break;
1286
+
1287
+ *--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1288
+ }
1289
+
1290
+ /* end ? */
1291
+ if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2) )
1292
+ return oend-op;
1293
+
1294
+ if (op==olimit) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
1295
+
1296
+ return (size_t)-ZSTD_ERROR_GENERIC;
1297
+ }
1298
+
1299
+ static size_t ZSTD_decompressLiterals_usingDTable(
1300
+ void* const dst, size_t maxDstSize,
1301
+ const void* src, size_t srcSize,
1302
+ const void* DTable, U32 fast)
1303
+ {
1304
+ if (fast) return ZSTD_decompressLiterals_usingDTable_generic(dst, maxDstSize, src, srcSize, DTable, 1);
1305
+ return ZSTD_decompressLiterals_usingDTable_generic(dst, maxDstSize, src, srcSize, DTable, 0);
1306
+ }
1307
+
1308
+ static size_t ZSTD_decompressLiterals(void* ctx, void* dst, size_t maxDstSize,
1309
+ const void* src, size_t srcSize)
1310
+ {
1311
+ /* assumed : blockType == blockCompressed */
1312
+ const BYTE* ip = (const BYTE*)src;
1313
+ short norm[256];
1314
+ void* DTable = ctx;
1315
+ U32 maxSymbolValue = 255;
1316
+ U32 tableLog;
1317
+ U32 fastMode;
1318
+ size_t errorCode;
1319
+
1320
+ if (srcSize < 2) return (size_t)-ZSTD_ERROR_wrongLBlockSize; // too small input size
1321
+
1322
+ errorCode = FSE_readHeader (norm, &maxSymbolValue, &tableLog, ip, srcSize);
1323
+ if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
1324
+ ip += errorCode;
1325
+ srcSize -= errorCode;
1326
+
1327
+ errorCode = FSE_buildDTable (DTable, norm, maxSymbolValue, tableLog);
1328
+ if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
1329
+ fastMode = (U32)errorCode;
1330
+
1331
+ return ZSTD_decompressLiterals_usingDTable (dst, maxDstSize, ip, srcSize, DTable, fastMode);
1332
+ }
1333
+
1334
+
1335
+ size_t ZSTD_decodeLiteralsBlock(void* ctx,
1336
+ void* dst, size_t maxDstSize,
1337
+ const BYTE** litPtr,
1338
+ const void* src, size_t srcSize)
1339
+ {
1340
+ const BYTE* const istart = (const BYTE* const)src;
1341
+ const BYTE* ip = istart;
1342
+ BYTE* const ostart = (BYTE* const)dst;
1343
+ BYTE* const oend = ostart + maxDstSize;
1344
+ blockProperties_t litbp;
1345
+
1346
+ size_t litcSize = ZSTD_getcBlockSize(src, srcSize, &litbp);
1347
+ if (ZSTD_isError(litcSize)) return litcSize;
1348
+ if (litcSize > srcSize - ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_wrongLBlockSize;
1349
+ ip += ZSTD_blockHeaderSize;
1350
+
1351
+ switch(litbp.blockType)
1352
+ {
1353
+ case bt_raw: *litPtr = ip; ip+= litcSize; break;
1354
+ case bt_rle:
1355
+ {
1356
+ size_t rleSize = litbp.origSize;
1357
+ memset(oend - rleSize, *ip, rleSize);
1358
+ *litPtr = oend - rleSize;
1359
+ ip++;
1360
+ break;
1361
+ }
1362
+ case bt_compressed:
1363
+ {
1364
+ size_t cSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
1365
+ if (ZSTD_isError(cSize)) return cSize;
1366
+ *litPtr = oend - cSize;
1367
+ ip += litcSize;
1368
+ break;
1369
+ }
1370
+ default:
1371
+ return (size_t)-ZSTD_ERROR_GENERIC;
1372
+ }
1373
+
1374
+ return ip-istart;
1375
+ }
1376
+
1377
+
1378
+ size_t ZSTD_decodeSeqHeaders(size_t* lastLLPtr, const BYTE** dumpsPtr,
1379
+ void* DTableLL, void* DTableML, void* DTableOffb,
1380
+ const void* src, size_t srcSize)
1381
+ {
1382
+ const BYTE* const istart = (const BYTE* const)src;
1383
+ const BYTE* ip = istart;
1384
+ const BYTE* const iend = istart + srcSize;
1385
+ U32 LLtype, Offtype, MLtype;
1386
+ U32 LLlog, Offlog, MLlog;
1387
+ size_t dumpsLength;
1388
+
1389
+ /* SeqHead */
1390
+ ip += ZSTD_readProgressive(lastLLPtr, ip);
1391
+ LLtype = *ip >> 6;
1392
+ Offtype = (*ip >> 4) & 3;
1393
+ MLtype = (*ip >> 2) & 3;
1394
+ if (*ip & 2)
1395
+ {
1396
+ dumpsLength = ip[2];
1397
+ dumpsLength += ip[1] << 8;
1398
+ ip += 3;
1399
+ }
1400
+ else
1401
+ {
1402
+ dumpsLength = ip[1];
1403
+ dumpsLength += (ip[0] & 1) << 8;
1404
+ ip += 2;
1405
+ }
1406
+ *dumpsPtr = ip;
1407
+ ip += dumpsLength;
1408
+
1409
+ /* sequences */
1410
+ {
1411
+ S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL and MaxOff */
1412
+ size_t headerSize;
1413
+
1414
+ /* Build DTables */
1415
+ switch(LLtype)
1416
+ {
1417
+ U32 max;
1418
+ case bt_rle :
1419
+ LLlog = 0;
1420
+ FSE_buildDTable_rle(DTableLL, *ip++); break;
1421
+ case bt_raw :
1422
+ LLlog = LLbits;
1423
+ FSE_buildDTable_raw(DTableLL, LLbits); break;
1424
+ default :
1425
+ max = MaxLL;
1426
+ headerSize = FSE_readHeader(norm, &max, &LLlog, ip, iend-ip);
1427
+ if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC;
1428
+ ip += headerSize;
1429
+ FSE_buildDTable(DTableLL, norm, max, LLlog);
1430
+ }
1431
+
1432
+ switch(Offtype)
1433
+ {
1434
+ U32 max;
1435
+ case bt_rle :
1436
+ Offlog = 0;
1437
+ FSE_buildDTable_rle(DTableOffb, *ip++); break;
1438
+ case bt_raw :
1439
+ Offlog = Offbits;
1440
+ FSE_buildDTable_raw(DTableOffb, Offbits); break;
1441
+ default :
1442
+ max = MaxOff;
1443
+ headerSize = FSE_readHeader(norm, &max, &Offlog, ip, iend-ip);
1444
+ if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC;
1445
+ ip += headerSize;
1446
+ FSE_buildDTable(DTableOffb, norm, max, Offlog);
1447
+ }
1448
+
1449
+ switch(MLtype)
1450
+ {
1451
+ U32 max;
1452
+ case bt_rle :
1453
+ MLlog = 0;
1454
+ FSE_buildDTable_rle(DTableML, *ip++); break;
1455
+ case bt_raw :
1456
+ MLlog = MLbits;
1457
+ FSE_buildDTable_raw(DTableML, MLbits); break;
1458
+ default :
1459
+ max = MaxML;
1460
+ headerSize = FSE_readHeader(norm, &max, &MLlog, ip, iend-ip);
1461
+ if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC;
1462
+ ip += headerSize;
1463
+ FSE_buildDTable(DTableML, norm, max, MLlog);
1464
+ }
1465
+ }
1466
+
1467
+ return ip-istart;
1468
+ }
1469
+
1470
+
1471
+ #define ZSTD_prefetch(p) { const BYTE pByte = *(volatile const BYTE*)p; }
1472
+
1473
+ FORCE_INLINE size_t ZSTD_decompressBlock(void* ctx, void* dst, size_t maxDstSize,
1474
+ const void* src, size_t srcSize)
1475
+ {
1476
+ const BYTE* ip = (const BYTE*)src;
1477
+ const BYTE* const iend = ip + srcSize;
1478
+ BYTE* const ostart = (BYTE* const)dst;
1479
+ BYTE* op = ostart;
1480
+ BYTE* const oend = ostart + maxDstSize;
1481
+ size_t errorCode;
1482
+ size_t lastLLSize;
1483
+ const BYTE* dumps;
1484
+ const BYTE* litPtr;
1485
+ const BYTE* litEnd;
1486
+ const size_t dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4}; /* added */
1487
+ const size_t dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
1488
+ void* DTableML = ctx;
1489
+ void* DTableLL = ((U32*)ctx) + FSE_DTABLE_SIZE_U32(MLFSELog);
1490
+ void* DTableOffb = ((U32*)DTableLL) + FSE_DTABLE_SIZE_U32(LLFSELog);
1491
+
1492
+ /* blockType == blockCompressed, srcSize is trusted */
1493
+
1494
+ /* literal sub-block */
1495
+ errorCode = ZSTD_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, src, srcSize);
1496
+ if (ZSTD_isError(errorCode)) return errorCode;
1497
+ ip += errorCode;
1498
+
1499
+ /* Build Decoding Tables */
1500
+ errorCode = ZSTD_decodeSeqHeaders(&lastLLSize, &dumps,
1501
+ DTableLL, DTableML, DTableOffb,
1502
+ ip, iend-ip);
1503
+ if (ZSTD_isError(errorCode)) return errorCode;
1504
+ /* end pos */
1505
+ if ((litPtr>=ostart) && (litPtr<=oend))
1506
+ litEnd = oend - lastLLSize;
1507
+ else
1508
+ litEnd = ip - lastLLSize;
1509
+ ip += errorCode;
1510
+
1511
+ /* decompression */
1512
+ {
1513
+ FSE_DStream_t DStream;
1514
+ FSE_DState_t stateLL, stateOffb, stateML;
1515
+ size_t prevOffset = 0, offset = 0;
1516
+ size_t qutt=0;
1517
+
1518
+ FSE_initDStream(&DStream, ip, iend-ip);
1519
+ FSE_initDState(&stateLL, &DStream, DTableLL);
1520
+ FSE_initDState(&stateOffb, &DStream, DTableOffb);
1521
+ FSE_initDState(&stateML, &DStream, DTableML);
1522
+
1523
+ while (FSE_reloadDStream(&DStream)<2)
1524
+ {
1525
+ U32 nbBits, offsetCode;
1526
+ const BYTE* match;
1527
+ size_t litLength;
1528
+ size_t matchLength;
1529
+ size_t newOffset;
1530
+
1531
+ _another_round:
1532
+
1533
+ /* Literals */
1534
+ litLength = FSE_decodeSymbol(&stateLL, &DStream);
1535
+ if (litLength) prevOffset = offset;
1536
+ if (litLength == MaxLL)
1537
+ {
1538
+ BYTE add = *dumps++;
1539
+ if (add < 255) litLength += add;
1540
+ else
1541
+ {
1542
+ //litLength = (*(U32*)dumps) & 0xFFFFFF;
1543
+ litLength = ZSTD_readLE32(dumps) & 0xFFFFFF;
1544
+ dumps += 3;
1545
+ }
1546
+ }
1547
+ if (((size_t)(litPtr - op) < 8) || ((size_t)(oend-(litPtr+litLength)) < 8))
1548
+ memmove(op, litPtr, litLength); /* overwrite risk */
1549
+ else
1550
+ ZSTD_wildcopy(op, litPtr, litLength);
1551
+ op += litLength; CHECK_OVERFLOW(op <= (BYTE *)dst + maxDstSize);
1552
+ litPtr += litLength;
1553
+
1554
+ /* Offset */
1555
+ offsetCode = FSE_decodeSymbol(&stateOffb, &DStream);
1556
+ if (ZSTD_32bits()) FSE_reloadDStream(&DStream);
1557
+ nbBits = offsetCode - 1;
1558
+ if (offsetCode==0) nbBits = 0; /* cmove */
1559
+ newOffset = FSE_readBits(&DStream, nbBits);
1560
+ if (ZSTD_32bits()) FSE_reloadDStream(&DStream);
1561
+ newOffset += (size_t)1 << nbBits;
1562
+ if (offsetCode==0) newOffset = prevOffset;
1563
+ match = op - newOffset;
1564
+ prevOffset = offset;
1565
+ offset = newOffset;
1566
+
1567
+ /* MatchLength */
1568
+ matchLength = FSE_decodeSymbol(&stateML, &DStream);
1569
+ if (matchLength == MaxML)
1570
+ {
1571
+ BYTE add = *dumps++;
1572
+ if (add < 255) matchLength += add;
1573
+ else
1574
+ {
1575
+ //matchLength = (*(U32*)dumps) & 0xFFFFFF;
1576
+ matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF;
1577
+ dumps += 3;
1578
+ }
1579
+ }
1580
+ matchLength += MINMATCH;
1581
+
1582
+ /* copy Match */
1583
+ {
1584
+ BYTE* const endMatch = op + matchLength;
1585
+ U64 saved[2];
1586
+
1587
+ if ((size_t)(litPtr - endMatch) < 12)
1588
+ {
1589
+ qutt = endMatch + 12 - litPtr;
1590
+ if ((litPtr + qutt) > oend) qutt = oend-litPtr;
1591
+ memcpy(saved, litPtr, qutt);
1592
+ }
1593
+
1594
+ if (offset < 8)
1595
+ {
1596
+ const size_t dec64 = dec64table[offset];
1597
+ op[0] = match[0];
1598
+ op[1] = match[1];
1599
+ op[2] = match[2];
1600
+ op[3] = match[3];
1601
+ match += dec32table[offset];
1602
+ ZSTD_copy4(op+4, match);
1603
+ match -= dec64;
1604
+ } else { ZSTD_copy8(op, match); }
1605
+
1606
+ if (endMatch > oend-12)
1607
+ {
1608
+ if (op < oend-16)
1609
+ {
1610
+ ZSTD_wildcopy(op+8, match+8, (oend-8) - (op+8));
1611
+ match += (oend-8) - op;
1612
+ op = oend-8; CHECK_OVERFLOW(op <= (BYTE *)dst + maxDstSize);
1613
+ }
1614
+ while (op<endMatch) *op++ = *match++; CHECK_OVERFLOW(op <= (BYTE *)dst + maxDstSize);
1615
+ }
1616
+ else
1617
+ ZSTD_wildcopy(op+8, match+8, matchLength-8); /* works even if matchLength < 8 */
1618
+
1619
+ op = endMatch; CHECK_OVERFLOW(op <= (BYTE *)dst + maxDstSize);
1620
+
1621
+ if ((size_t)(litPtr - endMatch) < 12)
1622
+ memcpy((void*)litPtr, saved, qutt);
1623
+ }
1624
+ }
1625
+
1626
+ /* check if reached exact end */
1627
+ if (FSE_reloadDStream(&DStream) > 2) return (size_t)-ZSTD_ERROR_GENERIC; /* requested too much : data is corrupted */
1628
+ if (!FSE_endOfDState(&stateLL) && !FSE_endOfDState(&stateML) && !FSE_endOfDState(&stateOffb)) goto _another_round; /* some ultra-compressible sequence remain ! */
1629
+ if (litPtr != litEnd) goto _another_round; /* literals not entirely spent */
1630
+
1631
+ /* last literal segment */
1632
+ if (op != litPtr) memmove(op, litPtr, lastLLSize);
1633
+ op += lastLLSize; CHECK_OVERFLOW(op <= (BYTE *)dst + maxDstSize);
1634
+ }
1635
+
1636
+ return op-ostart;
1637
+ }
1638
+
1639
+
1640
+ static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1641
+ {
1642
+ const BYTE* ip = (const BYTE*)src;
1643
+ const BYTE* iend = ip + srcSize;
1644
+ BYTE* const ostart = (BYTE* const)dst;
1645
+ BYTE* op = ostart;
1646
+ BYTE* const oend = ostart + maxDstSize;
1647
+ size_t remainingSize = srcSize;
1648
+ U32 magicNumber;
1649
+ size_t errorCode=0;
1650
+ blockProperties_t blockProperties;
1651
+
1652
+ /* Header */
1653
+ if (srcSize < ZSTD_frameHeaderSize) return (size_t)-ZSTD_ERROR_wrongSrcSize;
1654
+ magicNumber = ZSTD_readBE32(src);
1655
+ if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_wrongMagicNumber;
1656
+ ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
1657
+
1658
+ while (1)
1659
+ {
1660
+ size_t blockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
1661
+ if (ZSTD_isError(blockSize))
1662
+ return blockSize;
1663
+
1664
+ ip += ZSTD_blockHeaderSize;
1665
+ remainingSize -= ZSTD_blockHeaderSize;
1666
+ if (ip+blockSize > iend)
1667
+ return (size_t)-ZSTD_ERROR_wrongSrcSize;
1668
+
1669
+ switch(blockProperties.blockType)
1670
+ {
1671
+ case bt_compressed:
1672
+ errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize);
1673
+ break;
1674
+ case bt_raw :
1675
+ errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
1676
+ break;
1677
+ case bt_rle :
1678
+ return (size_t)-ZSTD_ERROR_GENERIC; /* not yet handled */
1679
+ break;
1680
+ case bt_end :
1681
+ /* end of frame */
1682
+ if (remainingSize) return (size_t)-ZSTD_ERROR_wrongSrcSize;
1683
+ break;
1684
+ default:
1685
+ return (size_t)-ZSTD_ERROR_GENERIC;
1686
+ }
1687
+ if (blockSize == 0) break; /* bt_end */
1688
+
1689
+ if (ZSTD_isError(errorCode)) return errorCode;
1690
+ op += errorCode; CHECK_OVERFLOW(op <= oend);
1691
+ ip += blockSize;
1692
+ remainingSize -= blockSize;
1693
+ }
1694
+
1695
+ return op-ostart;
1696
+ }
1697
+
1698
+
1699
+ size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1700
+ {
1701
+ U32 ctx[FSE_DTABLE_SIZE_U32(LLFSELog) + FSE_DTABLE_SIZE_U32(OffFSELog) + FSE_DTABLE_SIZE_U32(MLFSELog)];
1702
+ return ZSTD_decompressDCtx(ctx, dst, maxDstSize, src, srcSize);
1703
+ }
1704
+
1705
+
1706
+ /******************************
1707
+ * Streaming Decompression API
1708
+ ******************************/
1709
+
1710
+ typedef struct
1711
+ {
1712
+ U32 ctx[FSE_DTABLE_SIZE_U32(LLFSELog) + FSE_DTABLE_SIZE_U32(OffFSELog) + FSE_DTABLE_SIZE_U32(MLFSELog)];
1713
+ size_t expected;
1714
+ blockType_t bType;
1715
+ U32 started;
1716
+ } dctx_t;
1717
+
1718
+
1719
+ ZSTD_dctx_t ZSTD_createDCtx(void)
1720
+ {
1721
+ dctx_t* dctx = (dctx_t*)malloc(sizeof(dctx_t));
1722
+ dctx->expected = 4 + ZSTD_blockHeaderSize; // Frame Header + Block Header
1723
+ dctx->started = 0;
1724
+ return (ZSTD_dctx_t)dctx;
1725
+ }
1726
+
1727
+ size_t ZSTD_freeDCtx(ZSTD_dctx_t dctx)
1728
+ {
1729
+ free(dctx);
1730
+ return 0;
1731
+ }
1732
+
1733
+
1734
+ size_t ZSTD_getNextcBlockSize(ZSTD_dctx_t dctx)
1735
+ {
1736
+ return ((dctx_t*)dctx)->expected;
1737
+ }
1738
+
1739
+ size_t ZSTD_decompressContinue(ZSTD_dctx_t dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1740
+ {
1741
+ dctx_t* ctx = (dctx_t*)dctx;
1742
+ size_t cSize = srcSize - ZSTD_blockHeaderSize;
1743
+ size_t rSize;
1744
+
1745
+ // Sanity check
1746
+ if (srcSize != ctx->expected) return (size_t)-ZSTD_ERROR_wrongSrcSize;
1747
+
1748
+ // Decompress
1749
+ if (!ctx->started)
1750
+ {
1751
+ // Just check correct magic header
1752
+ U32 magicNumber = ZSTD_readBE32(src);
1753
+ if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_wrongMagicNumber;
1754
+ rSize = 0;
1755
+ }
1756
+ else
1757
+ {
1758
+ switch(ctx->bType)
1759
+ {
1760
+ case bt_compressed:
1761
+ rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, cSize);
1762
+ break;
1763
+ case bt_raw :
1764
+ rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, cSize);
1765
+ break;
1766
+ case bt_rle :
1767
+ return (size_t)-ZSTD_ERROR_GENERIC; /* not yet handled */
1768
+ break;
1769
+ case bt_end :
1770
+ rSize = 0;
1771
+ break;
1772
+ default:
1773
+ return (size_t)-ZSTD_ERROR_GENERIC;
1774
+ }
1775
+ }
1776
+
1777
+ // Prepare next block
1778
+ {
1779
+ const BYTE* header = (const BYTE*)src;
1780
+ blockProperties_t bp;
1781
+ size_t blockSize;
1782
+ header += cSize;
1783
+ blockSize = ZSTD_getcBlockSize(header, ZSTD_blockHeaderSize, &bp);
1784
+ if (ZSTD_isError(blockSize)) return blockSize;
1785
+ if (bp.blockType == bt_end)
1786
+ {
1787
+ ctx->expected = 0;
1788
+ ctx->started = 0;
1789
+ }
1790
+ else
1791
+ {
1792
+ ctx->expected = blockSize + ZSTD_blockHeaderSize;
1793
+ ctx->bType = bp.blockType;
1794
+ ctx->started = 1;
1795
+ }
1796
+ }
1797
+
1798
+ return rSize;
1799
+ }
1800
+
1801
+