extzstd 0.0.1.CONCEPT

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1801 @@
1
+ /*
2
+ zstd - standard compression library
3
+ Copyright (C) 2014-2015, Yann Collet.
4
+
5
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
+
7
+ Redistribution and use in source and binary forms, with or without
8
+ modification, are permitted provided that the following conditions are
9
+ met:
10
+ * Redistributions of source code must retain the above copyright
11
+ notice, this list of conditions and the following disclaimer.
12
+ * Redistributions in binary form must reproduce the above
13
+ copyright notice, this list of conditions and the following disclaimer
14
+ in the documentation and/or other materials provided with the
15
+ distribution.
16
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
+
28
+ You can contact the author at :
29
+ - zstd source repository : https://github.com/Cyan4973/zstd
30
+ - ztsd public forum : https://groups.google.com/forum/#!forum/lz4c
31
+ */
32
+
33
+ /****************************************************************
34
+ * Tuning parameters
35
+ *****************************************************************/
36
+ /* MEMORY_USAGE :
37
+ * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
38
+ * Increasing memory usage improves compression ratio
39
+ * Reduced memory usage can improve speed, due to cache effect */
40
+ #define ZSTD_MEMORY_USAGE 17
41
+
42
+
43
+ /**************************************
44
+ CPU Feature Detection
45
+ **************************************/
46
+ /*
47
+ * Automated efficient unaligned memory access detection
48
+ * Based on known hardware architectures
49
+ * This list will be updated thanks to feedbacks
50
+ */
51
+ #if defined(CPU_HAS_EFFICIENT_UNALIGNED_MEMORY_ACCESS) \
52
+ || defined(__ARM_FEATURE_UNALIGNED) \
53
+ || defined(__i386__) || defined(__x86_64__) \
54
+ || defined(_M_IX86) || defined(_M_X64) \
55
+ || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_8__) \
56
+ || (defined(_M_ARM) && (_M_ARM >= 7))
57
+ # define ZSTD_UNALIGNED_ACCESS 1
58
+ #else
59
+ # define ZSTD_UNALIGNED_ACCESS 0
60
+ #endif
61
+
62
+
63
+ /********************************************************
64
+ * Includes
65
+ *********************************************************/
66
+ #include <stdlib.h> /* calloc */
67
+ #include <string.h> /* memcpy, memmove */
68
+ #include <stdio.h> /* debug : printf */
69
+ #include "zstd_static.h"
70
+ #if defined(__clang__) || defined(__GNUC__)
71
+ # ifdef __clang__
72
+ # pragma clang diagnostic ignored "-Wtypedef-redefinition"
73
+ # endif
74
+ # include "fse.c" /* due to GCC/Clang inlining limitations, including *.c runs noticeably faster */
75
+ #else
76
+ # include "fse_static.h"
77
+ #endif
78
+
79
+
80
+ /********************************************************
81
+ * Compiler specifics
82
+ *********************************************************/
83
+ //#if (!(defined(_MSC_VER) && (_MSC_VER<=1500))) /* exclude Visual 2008 and below */
84
+ #ifdef __AVX2__
85
+ # include <immintrin.h> /* AVX2 intrinsics */
86
+ #endif
87
+
88
+ #ifdef _MSC_VER /* Visual Studio */
89
+ # define FORCE_INLINE static __forceinline
90
+ # include <intrin.h> /* For Visual 2005 */
91
+ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
92
+ # pragma warning(disable : 4324) /* disable: C4324: padded structure */
93
+ #else
94
+ # define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
95
+ # ifdef __GNUC__
96
+ # define FORCE_INLINE static inline __attribute__((always_inline))
97
+ # else
98
+ # define FORCE_INLINE static inline
99
+ # endif
100
+ #endif
101
+
102
+
103
+ /********************************************************
104
+ * Basic Types
105
+ *********************************************************/
106
+ #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
107
+ # include <stdint.h>
108
+ typedef uint8_t BYTE;
109
+ typedef uint16_t U16;
110
+ typedef int16_t S16;
111
+ typedef uint32_t U32;
112
+ typedef int32_t S32;
113
+ typedef uint64_t U64;
114
+ #else
115
+ typedef unsigned char BYTE;
116
+ typedef unsigned short U16;
117
+ typedef signed short S16;
118
+ typedef unsigned int U32;
119
+ typedef signed int S32;
120
+ typedef unsigned long long U64;
121
+ #endif
122
+
123
+
124
+ /********************************************************
125
+ * Constants
126
+ *********************************************************/
127
+ static const U32 ZSTD_magicNumber = 0xFD2FB51C;
128
+
129
+ #define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
130
+ #define HASH_TABLESIZE (1 << HASH_LOG)
131
+ #define HASH_MASK (HASH_TABLESIZE - 1)
132
+
133
+ #define MAXD_LOG 16
134
+ #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
135
+
136
+ #define KNUTH 2654435761
137
+
138
+ #define BIT7 128
139
+ #define BIT6 64
140
+ #define BIT5 32
141
+ #define BIT4 16
142
+
143
+ #define KB *(1 <<10)
144
+ #define MB *(1 <<20)
145
+ #define GB *(1U<<20)
146
+
147
+ #define BLOCKSIZE (128 KB) // define, for static allocation
148
+ static const U32 g_maxDistance = 512 KB;
149
+ static const U32 g_maxLimit = 1 GB;
150
+ static const U32 g_searchStrength = 8;
151
+
152
+ #define WORKPLACESIZE (BLOCKSIZE*11/4)
153
+ #define MINMATCH 4
154
+ #define MLbits 7
155
+ #define LLbits 6
156
+ #define Offbits 5
157
+ #define MaxML ((1<<MLbits )-1)
158
+ #define MaxLL ((1<<LLbits )-1)
159
+ #define MaxOff ((1<<Offbits)-1)
160
+ #define LitFSELog 11
161
+ #define MLFSELog 10
162
+ #define LLFSELog 10
163
+ #define OffFSELog 9
164
+
165
+ #define LITERAL_NOENTROPY 63
166
+ #define COMMAND_NOENTROPY 7 /* to remove */
167
+
168
+ static const size_t ZSTD_blockHeaderSize = 3;
169
+ static const size_t ZSTD_frameHeaderSize = 4;
170
+
171
+
172
+ /********************************************************
173
+ * Memory operations
174
+ *********************************************************/
175
+ static unsigned ZSTD_32bits(void) { return sizeof(void*)==4; }
176
+ static unsigned ZSTD_64bits(void) { return sizeof(void*)==8; }
177
+
178
+ static unsigned ZSTD_isLittleEndian(void)
179
+ {
180
+ const union { U32 i; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
181
+ return one.c[0];
182
+ }
183
+
184
+ static U16 ZSTD_read16(const void* p) { return *(U16*)p; }
185
+
186
+ static U32 ZSTD_read32(const void* p) { return *(U32*)p; }
187
+
188
+ static size_t ZSTD_read_ARCH(const void* p) { return *(size_t*)p; }
189
+
190
+ static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
191
+
192
+ static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
193
+
194
+ #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
195
+
196
+ static void ZSTD_wildcopy(void* dst, const void* src, size_t length)
197
+ {
198
+ const BYTE* ip = (const BYTE*)src;
199
+ BYTE* op = (BYTE*)dst;
200
+ BYTE* const oend = op + length;
201
+ while (op < oend) COPY8(op, ip);
202
+ }
203
+
204
+ static U32 ZSTD_readLE32(const void* memPtr)
205
+ {
206
+ if (ZSTD_isLittleEndian())
207
+ return ZSTD_read32(memPtr);
208
+ else
209
+ {
210
+ const BYTE* p = (const BYTE*)memPtr;
211
+ return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24));
212
+ }
213
+ }
214
+
215
+ static void ZSTD_writeLE32(void* memPtr, U32 val32)
216
+ {
217
+ if (ZSTD_isLittleEndian())
218
+ {
219
+ memcpy(memPtr, &val32, 4);
220
+ }
221
+ else
222
+ {
223
+ BYTE* p = (BYTE*)memPtr;
224
+ p[0] = (BYTE)val32;
225
+ p[1] = (BYTE)(val32>>8);
226
+ p[2] = (BYTE)(val32>>16);
227
+ p[3] = (BYTE)(val32>>24);
228
+ }
229
+ }
230
+
231
+ static U32 ZSTD_readBE32(const void* memPtr)
232
+ {
233
+ const BYTE* p = (const BYTE*)memPtr;
234
+ return (U32)(((U32)p[0]<<24) + ((U32)p[1]<<16) + ((U32)p[2]<<8) + ((U32)p[3]<<0));
235
+ }
236
+
237
+ static void ZSTD_writeBE32(void* memPtr, U32 value)
238
+ {
239
+ BYTE* const p = (BYTE* const) memPtr;
240
+ p[0] = (BYTE)(value>>24);
241
+ p[1] = (BYTE)(value>>16);
242
+ p[2] = (BYTE)(value>>8);
243
+ p[3] = (BYTE)(value>>0);
244
+ }
245
+
246
+ static size_t ZSTD_writeProgressive(void* ptr, size_t value)
247
+ {
248
+ BYTE* const bStart = (BYTE* const)ptr;
249
+ BYTE* byte = bStart;
250
+
251
+ do
252
+ {
253
+ BYTE l = value & 127;
254
+ value >>= 7;
255
+ if (value) l += 128;
256
+ *byte++ = l;
257
+ } while (value);
258
+
259
+ return byte - bStart;
260
+ }
261
+
262
+
263
+ static size_t ZSTD_readProgressive(size_t* result, const void* ptr)
264
+ {
265
+ const BYTE* const bStart = (const BYTE* const)ptr;
266
+ const BYTE* byte = bStart;
267
+ size_t r = 0;
268
+ U32 shift = 0;
269
+
270
+ do
271
+ {
272
+ r += (*byte & 127) << shift;
273
+ shift += 7;
274
+ } while (*byte++ & 128);
275
+
276
+ *result = r;
277
+ return byte - bStart;
278
+ }
279
+
280
+
281
+ /**************************************
282
+ * Local structures
283
+ ***************************************/
284
+ typedef enum { bt_compressed, bt_raw, bt_rle, bt_end } blockType_t;
285
+
286
+ typedef struct
287
+ {
288
+ blockType_t blockType;
289
+ U32 origSize;
290
+ } blockProperties_t;
291
+
292
+ typedef struct
293
+ {
294
+ const BYTE* base;
295
+ U32 current;
296
+ U32 nextUpdate;
297
+ BYTE* workplace;
298
+ #ifdef __AVX2__
299
+ __m256i hashTable[HASH_TABLESIZE>>3];
300
+ #else
301
+ U32 hashTable[HASH_TABLESIZE];
302
+ #endif
303
+ } cctxi_t;
304
+
305
+
306
+ ZSTD_cctx_t ZSTD_createCCtx(void)
307
+ {
308
+ cctxi_t* srt = (cctxi_t *) malloc( sizeof(cctxi_t) );
309
+ srt->workplace = (BYTE*) malloc(WORKPLACESIZE);
310
+ return (ZSTD_cctx_t)srt;
311
+ }
312
+
313
+
314
+ void ZSTD_resetCCtx(ZSTD_cctx_t ctx)
315
+ {
316
+ cctxi_t* srt = (cctxi_t*)ctx;
317
+ srt->base = NULL;
318
+ memset(srt->hashTable, 0, HASH_TABLESIZE*4);
319
+ }
320
+
321
+
322
+ size_t ZSTD_freeCCtx(ZSTD_cctx_t ctx)
323
+ {
324
+ cctxi_t *srt = (cctxi_t *) (ctx);
325
+ free(srt->workplace);
326
+ free(srt);
327
+ return 0;
328
+ }
329
+
330
+
331
+ /**************************************
332
+ * Error Management
333
+ **************************************/
334
+ /* tells if a return value is an error code */
335
+ unsigned ZSTD_isError(size_t code)
336
+ {
337
+ return (code > (size_t)(-ZSTD_ERROR_maxCode));
338
+ }
339
+
340
+ #define ZSTD_GENERATE_STRING(STRING) #STRING,
341
+ static const char* ZSTD_errorStrings[] = { ZSTD_LIST_ERRORS(ZSTD_GENERATE_STRING) };
342
+
343
+ /* provides error code string (useful for debugging) */
344
+ const char* ZSTD_getErrorName(size_t code)
345
+ {
346
+ static const char* codeError = "Unspecified error code";
347
+ if (ZSTD_isError(code)) return ZSTD_errorStrings[-(int)(code)];
348
+ return codeError;
349
+ }
350
+
351
+
352
+ /**************************************
353
+ * Tool functions
354
+ **************************************/
355
+ unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; }
356
+
357
+ static unsigned ZSTD_highbit(U32 val)
358
+ {
359
+ # if defined(_MSC_VER) /* Visual */
360
+ unsigned long r;
361
+ _BitScanReverse(&r, val);
362
+ return (unsigned)r;
363
+ # elif defined(__GNUC__) && (GCC_VERSION >= 304) // GCC Intrinsic
364
+ return 31 - __builtin_clz(val);
365
+ # else // Software version
366
+ static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
367
+ U32 v = val;
368
+ int r;
369
+ v |= v >> 1;
370
+ v |= v >> 2;
371
+ v |= v >> 4;
372
+ v |= v >> 8;
373
+ v |= v >> 16;
374
+ r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
375
+ return r;
376
+ # endif
377
+ }
378
+
379
+ static unsigned ZSTD_NbCommonBytes (register size_t val)
380
+ {
381
+ if (ZSTD_isLittleEndian())
382
+ {
383
+ if (ZSTD_64bits())
384
+ {
385
+ # if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
386
+ unsigned long r = 0;
387
+ _BitScanForward64( &r, (U64)val );
388
+ return (int)(r>>3);
389
+ # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
390
+ return (__builtin_ctzll((U64)val) >> 3);
391
+ # else
392
+ static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
393
+ return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
394
+ # endif
395
+ }
396
+ else /* 32 bits */
397
+ {
398
+ # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
399
+ unsigned long r;
400
+ _BitScanForward( &r, (U32)val );
401
+ return (int)(r>>3);
402
+ # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
403
+ return (__builtin_ctz((U32)val) >> 3);
404
+ # else
405
+ static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
406
+ return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
407
+ # endif
408
+ }
409
+ }
410
+ else /* Big Endian CPU */
411
+ {
412
+ if (ZSTD_64bits())
413
+ {
414
+ # if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
415
+ unsigned long r = 0;
416
+ _BitScanReverse64( &r, val );
417
+ return (unsigned)(r>>3);
418
+ # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
419
+ return (__builtin_clzll(val) >> 3);
420
+ # else
421
+ unsigned r;
422
+ const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
423
+ if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
424
+ if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
425
+ r += (!val);
426
+ return r;
427
+ # endif
428
+ }
429
+ else /* 32 bits */
430
+ {
431
+ # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
432
+ unsigned long r = 0;
433
+ _BitScanReverse( &r, (unsigned long)val );
434
+ return (unsigned)(r>>3);
435
+ # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
436
+ return (__builtin_clz(val) >> 3);
437
+ # else
438
+ unsigned r;
439
+ if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
440
+ r += (!val);
441
+ return r;
442
+ # endif
443
+ }
444
+ }
445
+ }
446
+
447
+ static unsigned ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
448
+ {
449
+ const BYTE* const pStart = pIn;
450
+
451
+ while ((pIn<pInLimit-(sizeof(size_t)-1)))
452
+ {
453
+ size_t diff = ZSTD_read_ARCH(pMatch) ^ ZSTD_read_ARCH(pIn);
454
+ if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
455
+ pIn += ZSTD_NbCommonBytes(diff);
456
+ return (unsigned)(pIn - pStart);
457
+ }
458
+
459
+ if (ZSTD_64bits()) if ((pIn<(pInLimit-3)) && (ZSTD_read32(pMatch) == ZSTD_read32(pIn))) { pIn+=4; pMatch+=4; }
460
+ if ((pIn<(pInLimit-1)) && (ZSTD_read16(pMatch) == ZSTD_read16(pIn))) { pIn+=2; pMatch+=2; }
461
+ if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
462
+ return (unsigned)(pIn - pStart);
463
+ }
464
+
465
+
466
+ /********************************************************
467
+ * Compression
468
+ *********************************************************/
469
+ size_t ZSTD_compressBound(size_t srcSize) /* maximum compressed size */
470
+ {
471
+ return FSE_compressBound(srcSize) + 12;
472
+ }
473
+
474
+
475
+ static size_t ZSTD_compressRle (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
476
+ {
477
+ BYTE* const ostart = (BYTE* const)dst;
478
+
479
+ /* at this stage : dstSize >= FSE_compressBound(srcSize) > (ZSTD_blockHeaderSize+1) (checked by ZSTD_compressLiterals()) */
480
+ (void)maxDstSize;
481
+
482
+ ostart[ZSTD_blockHeaderSize] = *(BYTE*)src;
483
+
484
+ // Build header
485
+ {
486
+ ostart[0] = (BYTE)(srcSize>>16);
487
+ ostart[1] = (BYTE)(srcSize>>8);
488
+ ostart[2] = (BYTE)srcSize;
489
+ ostart[0] += (BYTE)(bt_rle<<6);
490
+ }
491
+
492
+ return ZSTD_blockHeaderSize+1;
493
+ }
494
+
495
+
496
+ static size_t ZSTD_noCompressBlock (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
497
+ {
498
+ BYTE* const ostart = (BYTE* const)dst;
499
+
500
+ if (srcSize + ZSTD_blockHeaderSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
501
+ memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize);
502
+
503
+ // Build header
504
+ {
505
+ ostart[0] = (BYTE)(srcSize>>16);
506
+ ostart[1] = (BYTE)(srcSize>>8);
507
+ ostart[2] = (BYTE)srcSize;
508
+ ostart[0] += (BYTE)(bt_raw<<6); /* is a raw (uncompressed) block */
509
+ }
510
+
511
+ return ZSTD_blockHeaderSize+srcSize;
512
+ }
513
+
514
+
515
+ /* return : size of CStream in bits */
516
+ static size_t ZSTD_compressLiterals_usingCTable(void* dst, size_t dstSize,
517
+ const void* src, size_t srcSize,
518
+ const void* CTable)
519
+ {
520
+ const BYTE* const istart = (const BYTE*)src;
521
+ const BYTE* ip = istart;
522
+ const BYTE* const iend = istart + srcSize;
523
+ FSE_CStream_t bitC;
524
+ FSE_CState_t CState1, CState2;
525
+
526
+ // init
527
+ (void)dstSize; // objective : ensure it fits into dstBuffer (Todo)
528
+ FSE_initCStream(&bitC, dst);
529
+ FSE_initCState(&CState1, CTable);
530
+ CState2 = CState1;
531
+
532
+ /* Note : at this stage, srcSize > LITERALS_NOENTROPY (checked by ZSTD_compressLiterals()) */
533
+ // join to mod 2
534
+ if (srcSize & 1)
535
+ {
536
+ FSE_encodeByte(&bitC, &CState1, *ip++);
537
+ FSE_flushBits(&bitC);
538
+ }
539
+
540
+ // join to mod 4
541
+ if ((sizeof(size_t)*8 > LitFSELog*4+7 ) && (srcSize & 2)) // test bit 2
542
+ {
543
+ FSE_encodeByte(&bitC, &CState2, *ip++);
544
+ FSE_encodeByte(&bitC, &CState1, *ip++);
545
+ FSE_flushBits(&bitC);
546
+ }
547
+
548
+ // 2 or 4 encoding per loop
549
+ while (ip<iend)
550
+ {
551
+ FSE_encodeByte(&bitC, &CState2, *ip++);
552
+
553
+ if (sizeof(size_t)*8 < LitFSELog*2+7 ) // this test must be static
554
+ FSE_flushBits(&bitC);
555
+
556
+ FSE_encodeByte(&bitC, &CState1, *ip++);
557
+
558
+ if (sizeof(size_t)*8 > LitFSELog*4+7 ) // this test must be static
559
+ {
560
+ FSE_encodeByte(&bitC, &CState2, *ip++);
561
+ FSE_encodeByte(&bitC, &CState1, *ip++);
562
+ }
563
+
564
+ FSE_flushBits(&bitC);
565
+ }
566
+
567
+ FSE_flushCState(&bitC, &CState2);
568
+ FSE_flushCState(&bitC, &CState1);
569
+ return FSE_closeCStream(&bitC);
570
+ }
571
+
572
+
573
+ size_t ZSTD_minGain(size_t srcSize)
574
+ {
575
+ return (srcSize >> 6) + 1;
576
+ }
577
+
578
+
579
+ static size_t ZSTD_compressLiterals (void* dst, size_t dstSize,
580
+ const void* src, size_t srcSize)
581
+ {
582
+ const BYTE* const istart = (const BYTE*) src;
583
+ const BYTE* ip = istart;
584
+
585
+ BYTE* const ostart = (BYTE*) dst;
586
+ BYTE* op = ostart + ZSTD_blockHeaderSize;
587
+ BYTE* const oend = ostart + dstSize;
588
+
589
+ U32 maxSymbolValue = 256;
590
+ U32 tableLog = LitFSELog;
591
+ U32 count[256];
592
+ S16 norm[256];
593
+ U32 CTable[ FSE_CTABLE_SIZE_U32(LitFSELog, 256) ];
594
+ size_t errorCode;
595
+ const size_t minGain = ZSTD_minGain(srcSize);
596
+
597
+ // early out
598
+ if (dstSize < FSE_compressBound(srcSize)) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
599
+
600
+ // Scan input and build symbol stats
601
+ errorCode = FSE_count (count, ip, srcSize, &maxSymbolValue);
602
+ if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
603
+ if (errorCode == srcSize) return 1;
604
+ if (errorCode < ((srcSize * 7) >> 10)) return 0;
605
+
606
+ tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
607
+ errorCode = (int)FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
608
+ if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
609
+
610
+ // Write table description header
611
+ errorCode = FSE_writeHeader (op, FSE_MAX_HEADERSIZE, norm, maxSymbolValue, tableLog);
612
+ if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
613
+ op += errorCode;
614
+
615
+ // Compress
616
+ errorCode = FSE_buildCTable (&CTable, norm, maxSymbolValue, tableLog);
617
+ if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
618
+ errorCode = ZSTD_compressLiterals_usingCTable(op, oend - op, ip, srcSize, &CTable);
619
+ if (ZSTD_isError(errorCode)) return errorCode;
620
+ op += errorCode;
621
+
622
+ // check compressibility
623
+ if ( (size_t)(op-ostart) >= srcSize-minGain)
624
+ return 0;
625
+
626
+ // Build header
627
+ {
628
+ size_t totalSize;
629
+ totalSize = op - ostart - ZSTD_blockHeaderSize;
630
+ ostart[0] = (BYTE)(totalSize>>16);
631
+ ostart[1] = (BYTE)(totalSize>>8);
632
+ ostart[2] = (BYTE)totalSize;
633
+ ostart[0] += (BYTE)(bt_compressed<<6); /* is a block, is compressed */
634
+ }
635
+
636
+ return op-ostart;
637
+ }
638
+
639
+
640
+ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
641
+ const BYTE* op_lit_start, const BYTE* op_lit,
642
+ const BYTE* op_litLength_start, const BYTE* op_litLength,
643
+ const BYTE* op_matchLength_start,
644
+ const U32* op_offset_start,
645
+ const BYTE* op_dumps_start, const BYTE* op_dumps,
646
+ size_t srcSize, size_t lastLLSize
647
+ )
648
+ {
649
+ FSE_CStream_t blockStream;
650
+ U32 count[256];
651
+ S16 norm[256];
652
+ size_t mostFrequent;
653
+ U32 max = 255;
654
+ U32 tableLog = 11;
655
+ const size_t nbSeq = op_litLength - op_litLength_start;
656
+ U32 CTable_LitLength [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL )];
657
+ U32 CTable_OffsetBits [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
658
+ U32 CTable_MatchLength[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML )];
659
+ U32 LLtype, Offtype, MLtype;
660
+ BYTE* op;
661
+ const U32* op_offset = op_offset_start + nbSeq;
662
+ const BYTE* op_matchLength = op_matchLength_start + nbSeq;
663
+ BYTE offsetBits_start[BLOCKSIZE / 4];
664
+ BYTE* offsetBitsPtr = offsetBits_start;
665
+ const size_t minGain = ZSTD_minGain(srcSize);
666
+ const size_t maxCSize = srcSize - minGain;
667
+ const size_t minSeqSize = 1 /*lastL*/ + 2 /*dHead*/ + 2 /*dumpsIn*/ + 5 /*SeqHead*/ + 3 /*SeqIn*/ + 1 /*margin*/ + ZSTD_blockHeaderSize;
668
+ const size_t maxLSize = maxCSize > minSeqSize ? maxCSize - minSeqSize : 0;
669
+ BYTE* seqHead;
670
+
671
+
672
+ /* init */
673
+ op = dst;
674
+
675
+ /* Encode literals */
676
+ {
677
+ size_t cSize;
678
+ size_t litSize = op_lit - op_lit_start;
679
+ if (litSize <= LITERAL_NOENTROPY) cSize = ZSTD_noCompressBlock (op, maxDstSize, op_lit_start, litSize);
680
+ else
681
+ {
682
+ cSize = ZSTD_compressLiterals(op, maxDstSize, op_lit_start, litSize);
683
+ if (cSize == 1) cSize = ZSTD_compressRle (op, maxDstSize, op_lit_start, litSize);
684
+ else if (cSize == 0)
685
+ {
686
+ if (litSize >= maxLSize) return 0; /* block not compressible enough */
687
+ cSize = ZSTD_noCompressBlock (op, maxDstSize, op_lit_start, litSize);
688
+ }
689
+ }
690
+ if (ZSTD_isError(cSize)) return cSize;
691
+ op += cSize;
692
+ }
693
+
694
+ /* Encode Sequences */
695
+
696
+ /* seqHeader */
697
+ op += ZSTD_writeProgressive(op, lastLLSize); CHECK_OVERFLOW(op <= dst + maxDstSize);
698
+ seqHead = op;
699
+
700
+ /* dumps */
701
+ {
702
+ size_t dumpsLength = op_dumps- op_dumps_start;
703
+ if (dumpsLength < 512)
704
+ {
705
+ op[0] = (BYTE)(dumpsLength >> 8);
706
+ op[1] = (BYTE)(dumpsLength);
707
+ op += 2; CHECK_OVERFLOW(op <= dst + maxDstSize);
708
+ }
709
+ else
710
+ {
711
+ op[0] = 2;
712
+ op[1] = (BYTE)(dumpsLength>>8);
713
+ op[2] = (BYTE)(dumpsLength);
714
+ op += 3; CHECK_OVERFLOW(op <= dst + maxDstSize);
715
+ }
716
+ memcpy(op, op_dumps_start, dumpsLength);
717
+ op += dumpsLength; CHECK_OVERFLOW(op <= dst + maxDstSize);
718
+ }
719
+
720
+ /* Encoding table of Literal Lengths */
721
+ max = MaxLL;
722
+ mostFrequent = FSE_countFast(count, op_litLength_start, nbSeq, &max);
723
+ if (mostFrequent == nbSeq)
724
+ {
725
+ *op++ = *op_litLength_start; CHECK_OVERFLOW(op <= dst + maxDstSize);
726
+ FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
727
+ LLtype = bt_rle;
728
+ }
729
+ else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (LLbits-1))))
730
+ {
731
+ FSE_buildCTable_raw(CTable_LitLength, LLbits);
732
+ LLtype = bt_raw;
733
+ }
734
+ else
735
+ {
736
+ tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
737
+ FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
738
+ op += FSE_writeHeader(op, maxDstSize, norm, max, tableLog); CHECK_OVERFLOW(op <= dst + maxDstSize);
739
+ FSE_buildCTable(CTable_LitLength, norm, max, tableLog);
740
+ LLtype = bt_compressed;
741
+ }
742
+
743
+ /* Encoding table of Offsets */
744
+ {
745
+ /* create OffsetBits */
746
+ size_t i;
747
+ max = MaxOff;
748
+ for (i=0; i<nbSeq; i++)
749
+ {
750
+ offsetBits_start[i] = (BYTE)ZSTD_highbit(op_offset_start[i]) + 1;
751
+ if (op_offset_start[i]==0) offsetBits_start[i]=0;
752
+ }
753
+ offsetBitsPtr += nbSeq;
754
+ mostFrequent = FSE_countFast(count, offsetBits_start, nbSeq, &max);
755
+ }
756
+ if (mostFrequent == nbSeq)
757
+ {
758
+ *op++ = *offsetBits_start; CHECK_OVERFLOW(op <= dst + maxDstSize);
759
+ FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
760
+ Offtype = bt_rle;
761
+ }
762
+ else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (Offbits-1))))
763
+ {
764
+ FSE_buildCTable_raw(CTable_OffsetBits, Offbits);
765
+ Offtype = bt_raw;
766
+ }
767
+ else
768
+ {
769
+ tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
770
+ FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
771
+ op += FSE_writeHeader(op, maxDstSize, norm, max, tableLog); CHECK_OVERFLOW(op <= dst + maxDstSize);
772
+ FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog);
773
+ Offtype = bt_compressed;
774
+ }
775
+
776
+ /* Encoding Table of MatchLengths */
777
+ max = MaxML;
778
+ mostFrequent = FSE_countFast(count, op_matchLength_start, nbSeq, &max);
779
+ if (mostFrequent == nbSeq)
780
+ {
781
+ *op++ = *op_matchLength_start; CHECK_OVERFLOW(op <= dst + maxDstSize);
782
+ FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
783
+ MLtype = bt_rle;
784
+ }
785
+ else if ((nbSeq < 64) || (mostFrequent < (nbSeq >> (MLbits-1))))
786
+ {
787
+ FSE_buildCTable_raw(CTable_MatchLength, MLbits);
788
+ MLtype = bt_raw;
789
+ }
790
+ else
791
+ {
792
+ tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
793
+ FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
794
+ op += FSE_writeHeader(op, maxDstSize, norm, max, tableLog); CHECK_OVERFLOW(op <= dst + maxDstSize);
795
+ FSE_buildCTable(CTable_MatchLength, norm, max, tableLog);
796
+ MLtype = bt_compressed;
797
+ }
798
+
799
+ seqHead[0] += (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
800
+
801
+ /* Encoding */
802
+ {
803
+ FSE_CState_t stateMatchLength;
804
+ FSE_CState_t stateOffsetBits;
805
+ FSE_CState_t stateLitLength;
806
+
807
+ FSE_initCStream(&blockStream, op);
808
+ FSE_initCState(&stateMatchLength, CTable_MatchLength);
809
+ FSE_initCState(&stateOffsetBits, CTable_OffsetBits);
810
+ FSE_initCState(&stateLitLength, CTable_LitLength);
811
+
812
+ while (op_litLength > op_litLength_start)
813
+ {
814
+ BYTE matchLength = *(--op_matchLength);
815
+ U32 offset = *(--op_offset);
816
+ BYTE offCode = *(--offsetBitsPtr); /* 32b*/ /* 64b*/
817
+ U32 nbBits = (offCode-1) * (!!offCode);
818
+ BYTE litLength = *(--op_litLength); /* (7)*/ /* (7)*/
819
+ FSE_encodeByte(&blockStream, &stateMatchLength, matchLength); /* 17 */ /* 17 */
820
+ if (ZSTD_32bits()) FSE_flushBits(&blockStream); /* 7 */
821
+ FSE_addBits(&blockStream, offset, nbBits); /* 32 */ /* 42 */
822
+ if (ZSTD_32bits()) FSE_flushBits(&blockStream); /* 7 */
823
+ FSE_encodeByte(&blockStream, &stateOffsetBits, offCode); /* 16 */ /* 51 */
824
+ FSE_encodeByte(&blockStream, &stateLitLength, litLength); /* 26 */ /* 61 */
825
+ FSE_flushBits(&blockStream); /* 7 */ /* 7 */
826
+ }
827
+
828
+ FSE_flushCState(&blockStream, &stateMatchLength);
829
+ FSE_flushCState(&blockStream, &stateOffsetBits);
830
+ FSE_flushCState(&blockStream, &stateLitLength);
831
+ }
832
+
833
+ op += FSE_closeCStream(&blockStream); CHECK_OVERFLOW(op <= dst + maxDstSize);
834
+
835
+ /* check compressibility */
836
+ if ((size_t)(op-dst) >= maxCSize) return 0;
837
+
838
+ return op - dst;
839
+ }
840
+
841
+
842
+ static size_t ZSTD_storeSeq(BYTE* op_lit, BYTE* op_ll, U32* op_offset, BYTE* op_ml, BYTE* op_dumps,
843
+ size_t litLength, const BYTE* srcLit, size_t offset, size_t matchLength)
844
+ {
845
+ const BYTE* const dumpStart = op_dumps;
846
+ const BYTE* const l_end = op_lit + litLength;
847
+
848
+
849
+ /* copy Literals */
850
+ while (op_lit<l_end) COPY8(op_lit, srcLit);
851
+
852
+ /* literal Length */
853
+ if (litLength >= MaxLL)
854
+ {
855
+ *op_ll++ = MaxLL;
856
+ if (litLength<255 + MaxLL)
857
+ *op_dumps++ = (BYTE)(litLength - MaxLL);
858
+ else
859
+ {
860
+ *op_dumps++ = 255;
861
+ ZSTD_writeLE32(op_dumps, (U32)litLength); op_dumps += 3;
862
+
863
+ //litLength |= 0xFF000000;
864
+ //ZSTD_writeBE32(op_dumps, (U32)litLength);
865
+ //op_dumps += 4;
866
+ }
867
+ }
868
+ else *op_ll = (BYTE)litLength;
869
+
870
+ /* match offset */
871
+ *op_offset = (U32)offset;
872
+
873
+ /* match Length */
874
+ if (matchLength >= MaxML)
875
+ {
876
+ *op_ml++ = MaxML;
877
+ if (matchLength<255 + MaxML)
878
+ *op_dumps++ = (BYTE)(matchLength - MaxML);
879
+ else
880
+ {
881
+ *op_dumps++ = 255;
882
+ ZSTD_writeLE32(op_dumps, (U32)matchLength); op_dumps+=3;
883
+ //*(U32*)op_dumps = (U32)matchLength; op_dumps += 3; /* store direct result */
884
+
885
+ //matchLength |= 0xFF000000;
886
+ //ZSTD_writeBE32(op_dumps, (U32)matchLength);
887
+ //op_dumps += 4;
888
+ }
889
+ }
890
+ else *op_ml = (BYTE)matchLength;
891
+
892
+ return op_dumps - dumpStart;
893
+ }
894
+
895
+
896
+ //static const U32 hashMask = (1<<HASH_LOG)-1;
897
+ //static const U64 prime5bytes = 889523592379ULL;
898
+ //static const U64 prime6bytes = 227718039650203ULL;
899
+ static const U64 prime7bytes = 58295818150454627ULL;
900
+ //static const U64 prime8bytes = 14923729446516375013ULL;
901
+
902
+ //static U32 ZSTD_hashPtr(const void* p) { return (U32) _bextr_u64(*(U64*)p * prime7bytes, (56-HASH_LOG), HASH_LOG); }
903
+ //static U32 ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime7bytes) << 8 >> (64-HASH_LOG)); }
904
+ //static U32 ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime7bytes) >> (56-HASH_LOG)) & ((1<<HASH_LOG)-1); }
905
+ //static U32 ZSTD_hashPtr(const void* p) { return ( ((*(U64*)p & 0xFFFFFFFFFFFFFF) * prime7bytes) >> (64-HASH_LOG)); }
906
+
907
+ //static U32 ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime8bytes) >> (64-HASH_LOG)); }
908
+ static U32 ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime7bytes) >> (56-HASH_LOG)) & HASH_MASK; }
909
+ //static U32 ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime6bytes) >> (48-HASH_LOG)) & HASH_MASK; }
910
+ //static U32 ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime5bytes) >> (40-HASH_LOG)) & HASH_MASK; }
911
+ //static U32 ZSTD_hashPtr(const void* p) { return ( (*(U32*)p * KNUTH) >> (32-HASH_LOG)); }
912
+
913
+ static void ZSTD_addPtr(U32* table, const BYTE* p, const BYTE* start) { table[ZSTD_hashPtr(p)] = (U32)(p-start); }
914
+
915
+ static const BYTE* ZSTD_updateMatch(U32* table, const BYTE* p, const BYTE* start)
916
+ {
917
+ U32 h = ZSTD_hashPtr(p);
918
+ const BYTE* r;
919
+ r = table[h] + start;
920
+ //table[h] = (U32)(p - start);
921
+ ZSTD_addPtr(table, p, start);
922
+ return r;
923
+ }
924
+
925
+ static int ZSTD_checkMatch(const BYTE* match, const BYTE* ip)
926
+ {
927
+ return ZSTD_read32(match) == ZSTD_read32(ip);
928
+ }
929
+
930
+
931
+ static size_t ZSTD_compressBlock(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
932
+ {
933
+ cctxi_t* srt = (cctxi_t*) ctx;
934
+ U32* HashTable = (U32*)(srt->hashTable);
935
+ void* workplace = srt->workplace;
936
+ const BYTE* const base = srt->base;
937
+
938
+ const BYTE* const istart = (const BYTE*)src;
939
+ const BYTE* ip = istart + 1;
940
+ const BYTE* anchor = istart;
941
+ const BYTE* const iend = istart + srcSize;
942
+ const BYTE* const ilimit = iend - 16;
943
+
944
+ U32 *op_offset = (U32*)(workplace), *op_offset_start = op_offset;
945
+ BYTE *op_l = (BYTE*)workplace + srcSize + 4, *op_l_start = op_l;
946
+ BYTE *op_rl = op_l + srcSize + 4, *op_rl_start = op_rl;
947
+ BYTE *op_ml = op_rl + (srcSize >> 2) + 4, *op_ml_start = op_ml;
948
+ BYTE *op_dumps = op_ml + (srcSize >> 2) + 4, *op_dumps_start = op_dumps;
949
+ size_t prevOffset=0, offset=0;
950
+ size_t lastLLSize;
951
+
952
+
953
+ /* Main Search Loop */
954
+ while (ip < ilimit)
955
+ {
956
+ const BYTE* match = (BYTE*) ZSTD_updateMatch(HashTable, ip, base);
957
+
958
+ if (!ZSTD_checkMatch(match,ip)) { ip += ((ip-anchor) >> g_searchStrength) + 1; continue; }
959
+
960
+ /* catch up */
961
+ while ((ip>anchor) && (match>base) && (ip[-1] == match[-1])) { ip--; match--; }
962
+
963
+ {
964
+ size_t litLength = ip-anchor;
965
+ size_t matchLength = ZSTD_count(ip+MINMATCH, match+MINMATCH, iend);
966
+ size_t offsetCode;
967
+ if (litLength) prevOffset = offset;
968
+ offsetCode = ip-match;
969
+ if (offsetCode == prevOffset) offsetCode = 0;
970
+ prevOffset = offset;
971
+ offset = ip-match;
972
+ op_dumps += ZSTD_storeSeq(op_l, op_rl++, op_offset++, op_ml++, op_dumps, litLength, anchor, offsetCode, matchLength);
973
+ op_l += litLength;
974
+
975
+ /* Fill Table */
976
+ ZSTD_addPtr(HashTable, ip+1, base);
977
+ ip += matchLength + MINMATCH;
978
+ if (ip<=iend-8) ZSTD_addPtr(HashTable, ip-2, base);
979
+ anchor = ip;
980
+ }
981
+ }
982
+
983
+ /* Last Literals */
984
+ lastLLSize = iend - anchor;
985
+ memcpy(op_l, anchor, lastLLSize);
986
+ op_l += lastLLSize;
987
+
988
+ /* Finale compression stage */
989
+ return ZSTD_compressEntropy((BYTE*)dst, maxDstSize,
990
+ op_l_start, op_l, op_rl_start, op_rl, op_ml_start, op_offset_start, op_dumps_start, op_dumps,
991
+ srcSize, lastLLSize);
992
+ }
993
+
994
+
995
+ size_t ZSTD_compressBegin(ZSTD_cctx_t ctx, void* dst, size_t maxDstSize)
996
+ {
997
+ /* Sanity check */
998
+ if (maxDstSize < ZSTD_frameHeaderSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
999
+
1000
+ /* Init */
1001
+ ZSTD_resetCCtx(ctx);
1002
+
1003
+ /* Write Header */
1004
+ ZSTD_writeBE32(dst, ZSTD_magicNumber);
1005
+
1006
+ return ZSTD_frameHeaderSize;
1007
+ }
1008
+
1009
+
1010
+ /* this should be auto-vectorized by compiler */
1011
+ static void ZSTD_scaleDownCtx(void* cctx, const U32 limit)
1012
+ {
1013
+ cctxi_t* ctx = (cctxi_t*) cctx;
1014
+ int i;
1015
+
1016
+ #if defined(__AVX2__) /* <immintrin.h> */
1017
+ /* AVX2 version */
1018
+ __m256i* h = ctx->hashTable;
1019
+ const __m256i limit8 = _mm256_set1_epi32(limit);
1020
+ for (i=0; i<(HASH_TABLESIZE>>3); i++)
1021
+ {
1022
+ __m256i src =_mm256_loadu_si256((const __m256i*)(h+i));
1023
+ const __m256i dec = _mm256_min_epu32(src, limit8);
1024
+ src = _mm256_sub_epi32(src, dec);
1025
+ _mm256_storeu_si256((__m256i*)(h+i), src);
1026
+ }
1027
+ #else
1028
+ U32* h = ctx->hashTable;
1029
+ for (i=0; i<HASH_TABLESIZE; ++i)
1030
+ {
1031
+ U32 dec;
1032
+ if (h[i] > limit) dec = limit; else dec = h[i];
1033
+ h[i] -= dec;
1034
+ }
1035
+ #endif
1036
+ }
1037
+
1038
+
1039
+ /* this should be auto-vectorized by compiler */
1040
+ static void ZSTD_limitCtx(void* cctx, const U32 limit)
1041
+ {
1042
+ cctxi_t* ctx = (cctxi_t*) cctx;
1043
+ int i;
1044
+
1045
+ if (limit > g_maxLimit)
1046
+ {
1047
+ ZSTD_scaleDownCtx(cctx, limit);
1048
+ ctx->base += limit;
1049
+ ctx->current -= limit;
1050
+ ctx->nextUpdate -= limit;
1051
+ return;
1052
+ }
1053
+
1054
+ #if defined(__AVX2__) /* <immintrin.h> */
1055
+ /* AVX2 version */
1056
+ {
1057
+ __m256i* h = ctx->hashTable;
1058
+ const __m256i limit8 = _mm256_set1_epi32(limit);
1059
+ //printf("Address h : %0X\n", (U32)h); // address test
1060
+ for (i=0; i<(HASH_TABLESIZE>>3); i++)
1061
+ {
1062
+ __m256i src =_mm256_loadu_si256((const __m256i*)(h+i)); // Unfortunately, clang doesn't guarantee 32-bytes alignment
1063
+ src = _mm256_max_epu32(src, limit8);
1064
+ _mm256_storeu_si256((__m256i*)(h+i), src);
1065
+ }
1066
+ }
1067
+ #else
1068
+ {
1069
+ U32* h = (U32*)(ctx->hashTable);
1070
+ for (i=0; i<HASH_TABLESIZE; ++i)
1071
+ {
1072
+ if (h[i] < limit) h[i] = limit;
1073
+ }
1074
+ }
1075
+ #endif
1076
+ }
1077
+
1078
+
1079
+ size_t ZSTD_compressContinue(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1080
+ {
1081
+ cctxi_t* ctx = (cctxi_t*) cctx;
1082
+ const BYTE* const istart = (const BYTE* const)src;
1083
+ const BYTE* ip = istart;
1084
+ BYTE* const ostart = (BYTE* const)dst;
1085
+ BYTE* op = ostart;
1086
+ const U32 updateRate = 2 * BLOCKSIZE;
1087
+
1088
+ /* Init */
1089
+ if (ctx->base==NULL)
1090
+ ctx->base = (const BYTE*)src, ctx->current=0, ctx->nextUpdate = g_maxDistance;
1091
+ if (src != ctx->base + ctx->current) /* not contiguous */
1092
+ {
1093
+ ZSTD_resetCCtx(ctx);
1094
+ ctx->base = (const BYTE*)src;
1095
+ ctx->current = 0;
1096
+ }
1097
+ ctx->current += (U32)srcSize;
1098
+
1099
+ while (srcSize)
1100
+ {
1101
+ size_t cSize;
1102
+ size_t blockSize = BLOCKSIZE;
1103
+ if (blockSize > srcSize) blockSize = srcSize;
1104
+
1105
+ /* update hash table */
1106
+ if (g_maxDistance <= BLOCKSIZE) /* static test => all blocks are independent */
1107
+ {
1108
+ ZSTD_resetCCtx(ctx);
1109
+ ctx->base = ip;
1110
+ ctx->current=0;
1111
+ }
1112
+ else if (ip >= ctx->base + ctx->nextUpdate)
1113
+ {
1114
+ ctx->nextUpdate += updateRate;
1115
+ ZSTD_limitCtx(ctx, ctx->nextUpdate - g_maxDistance);
1116
+ }
1117
+
1118
+ /* compress */
1119
+ if (maxDstSize < ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1120
+ cSize = ZSTD_compressBlock(ctx, op+ZSTD_blockHeaderSize, maxDstSize-ZSTD_blockHeaderSize, ip, blockSize);
1121
+ if (cSize == 0)
1122
+ {
1123
+ cSize = ZSTD_noCompressBlock(op, maxDstSize, ip, blockSize); /* block is not compressible */
1124
+ if (ZSTD_isError(cSize)) return cSize;
1125
+ }
1126
+ else
1127
+ {
1128
+ if (ZSTD_isError(cSize)) return cSize;
1129
+ op[0] = (BYTE)(cSize>>16);
1130
+ op[1] = (BYTE)(cSize>>8);
1131
+ op[2] = (BYTE)cSize;
1132
+ op[0] += (BYTE)(bt_compressed << 6); /* is a compressed block */
1133
+ cSize += 3;
1134
+ }
1135
+ op += cSize;
1136
+ maxDstSize -= cSize;
1137
+ ip += blockSize;
1138
+ srcSize -= blockSize;
1139
+ }
1140
+
1141
+ return op-ostart;
1142
+ }
1143
+
1144
+
1145
+ size_t ZSTD_compressEnd(ZSTD_cctx_t ctx, void* dst, size_t maxDstSize)
1146
+ {
1147
+ BYTE* op = (BYTE*)dst;
1148
+
1149
+ /* Sanity check */
1150
+ (void)ctx;
1151
+ if (maxDstSize < ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1152
+
1153
+ /* End of frame */
1154
+ op[0] = (BYTE)(bt_end << 6);
1155
+ op[1] = 0;
1156
+ op[2] = 0;
1157
+
1158
+ return 3;
1159
+ }
1160
+
1161
+
1162
+ static size_t ZSTD_compressCCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1163
+ {
1164
+ BYTE* const ostart = (BYTE* const)dst;
1165
+ BYTE* op = ostart;
1166
+
1167
+ /* Header */
1168
+ {
1169
+ size_t headerSize = ZSTD_compressBegin(ctx, dst, maxDstSize);
1170
+ if(ZSTD_isError(headerSize)) return headerSize;
1171
+ op += headerSize;
1172
+ maxDstSize -= headerSize;
1173
+ }
1174
+
1175
+ /* Compression */
1176
+ {
1177
+ size_t cSize = ZSTD_compressContinue(ctx, op, maxDstSize, src, srcSize);
1178
+ if (ZSTD_isError(cSize)) return cSize;
1179
+ op += cSize;
1180
+ maxDstSize -= cSize;
1181
+ }
1182
+
1183
+ /* Close frame */
1184
+ {
1185
+ size_t endSize = ZSTD_compressEnd(ctx, op, maxDstSize);
1186
+ if(ZSTD_isError(endSize)) return endSize;
1187
+ op += endSize;
1188
+ }
1189
+
1190
+ return (op - ostart);
1191
+ }
1192
+
1193
+
1194
+ size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1195
+ {
1196
+ void* ctx;
1197
+ size_t r;
1198
+
1199
+ ctx = ZSTD_createCCtx();
1200
+ r = ZSTD_compressCCtx(ctx, dst, maxDstSize, src, srcSize);
1201
+ ZSTD_freeCCtx(ctx);
1202
+ return r;
1203
+ }
1204
+
1205
+
1206
+ /**************************************************************
1207
+ * Decompression code
1208
+ **************************************************************/
1209
+
1210
+ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bpPtr)
1211
+ {
1212
+ const BYTE* const in = (const BYTE* const)src;
1213
+ BYTE headerFlags;
1214
+ U32 cSize;
1215
+
1216
+ if (srcSize < 3) return (size_t)-ZSTD_ERROR_wrongSrcSize;
1217
+
1218
+ headerFlags = *in;
1219
+ cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
1220
+
1221
+ bpPtr->blockType = (blockType_t)(headerFlags >> 6);
1222
+ bpPtr->origSize = (bpPtr->blockType == bt_rle) ? cSize : 0;
1223
+
1224
+ if (bpPtr->blockType == bt_end) return 0;
1225
+ if (bpPtr->blockType == bt_rle) return 1;
1226
+ return cSize;
1227
+ }
1228
+
1229
+
1230
+ static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1231
+ {
1232
+ if (srcSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1233
+ memcpy(dst, src, srcSize);
1234
+ return srcSize;
1235
+ }
1236
+
1237
+
1238
+ /* force inline : 'fast' really needs to be evaluated at compile time */
1239
+ FORCE_INLINE size_t ZSTD_decompressLiterals_usingDTable_generic(
1240
+ void* const dst, size_t maxDstSize,
1241
+ const void* src, size_t srcSize,
1242
+ const void* DTable, U32 fast)
1243
+ {
1244
+ BYTE* op = (BYTE*) dst;
1245
+ BYTE* const olimit = op;
1246
+ BYTE* const oend = op + maxDstSize;
1247
+ FSE_DStream_t bitD;
1248
+ FSE_DState_t state1, state2;
1249
+ size_t errorCode;
1250
+
1251
+ /* Init */
1252
+ errorCode = FSE_initDStream(&bitD, src, srcSize);
1253
+ if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
1254
+
1255
+ FSE_initDState(&state1, &bitD, DTable);
1256
+ FSE_initDState(&state2, &bitD, DTable);
1257
+ op = oend;
1258
+
1259
+ // 2 symbols per loop
1260
+ while (!FSE_reloadDStream(&bitD) && (op>olimit+3))
1261
+ {
1262
+ *--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1263
+
1264
+ if (LitFSELog*2+7 > sizeof(size_t)*8) // This test must be static
1265
+ FSE_reloadDStream(&bitD);
1266
+
1267
+ *--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1268
+
1269
+ if (LitFSELog*4+7 < sizeof(size_t)*8) // This test must be static
1270
+ {
1271
+ *--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1272
+ *--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1273
+ }
1274
+ }
1275
+
1276
+ /* tail */
1277
+ while (1)
1278
+ {
1279
+ if ( (FSE_reloadDStream(&bitD)>2) || (op==olimit) || (FSE_endOfDState(&state1) && FSE_endOfDStream(&bitD)) )
1280
+ break;
1281
+
1282
+ *--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1283
+
1284
+ if ( (FSE_reloadDStream(&bitD)>2) || (op==olimit) || (FSE_endOfDState(&state2) && FSE_endOfDStream(&bitD)) )
1285
+ break;
1286
+
1287
+ *--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1288
+ }
1289
+
1290
+ /* end ? */
1291
+ if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2) )
1292
+ return oend-op;
1293
+
1294
+ if (op==olimit) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
1295
+
1296
+ return (size_t)-ZSTD_ERROR_GENERIC;
1297
+ }
1298
+
1299
+ static size_t ZSTD_decompressLiterals_usingDTable(
1300
+ void* const dst, size_t maxDstSize,
1301
+ const void* src, size_t srcSize,
1302
+ const void* DTable, U32 fast)
1303
+ {
1304
+ if (fast) return ZSTD_decompressLiterals_usingDTable_generic(dst, maxDstSize, src, srcSize, DTable, 1);
1305
+ return ZSTD_decompressLiterals_usingDTable_generic(dst, maxDstSize, src, srcSize, DTable, 0);
1306
+ }
1307
+
1308
+ static size_t ZSTD_decompressLiterals(void* ctx, void* dst, size_t maxDstSize,
1309
+ const void* src, size_t srcSize)
1310
+ {
1311
+ /* assumed : blockType == blockCompressed */
1312
+ const BYTE* ip = (const BYTE*)src;
1313
+ short norm[256];
1314
+ void* DTable = ctx;
1315
+ U32 maxSymbolValue = 255;
1316
+ U32 tableLog;
1317
+ U32 fastMode;
1318
+ size_t errorCode;
1319
+
1320
+ if (srcSize < 2) return (size_t)-ZSTD_ERROR_wrongLBlockSize; // too small input size
1321
+
1322
+ errorCode = FSE_readHeader (norm, &maxSymbolValue, &tableLog, ip, srcSize);
1323
+ if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
1324
+ ip += errorCode;
1325
+ srcSize -= errorCode;
1326
+
1327
+ errorCode = FSE_buildDTable (DTable, norm, maxSymbolValue, tableLog);
1328
+ if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
1329
+ fastMode = (U32)errorCode;
1330
+
1331
+ return ZSTD_decompressLiterals_usingDTable (dst, maxDstSize, ip, srcSize, DTable, fastMode);
1332
+ }
1333
+
1334
+
1335
+ size_t ZSTD_decodeLiteralsBlock(void* ctx,
1336
+ void* dst, size_t maxDstSize,
1337
+ const BYTE** litPtr,
1338
+ const void* src, size_t srcSize)
1339
+ {
1340
+ const BYTE* const istart = (const BYTE* const)src;
1341
+ const BYTE* ip = istart;
1342
+ BYTE* const ostart = (BYTE* const)dst;
1343
+ BYTE* const oend = ostart + maxDstSize;
1344
+ blockProperties_t litbp;
1345
+
1346
+ size_t litcSize = ZSTD_getcBlockSize(src, srcSize, &litbp);
1347
+ if (ZSTD_isError(litcSize)) return litcSize;
1348
+ if (litcSize > srcSize - ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_wrongLBlockSize;
1349
+ ip += ZSTD_blockHeaderSize;
1350
+
1351
+ switch(litbp.blockType)
1352
+ {
1353
+ case bt_raw: *litPtr = ip; ip+= litcSize; break;
1354
+ case bt_rle:
1355
+ {
1356
+ size_t rleSize = litbp.origSize;
1357
+ memset(oend - rleSize, *ip, rleSize);
1358
+ *litPtr = oend - rleSize;
1359
+ ip++;
1360
+ break;
1361
+ }
1362
+ case bt_compressed:
1363
+ {
1364
+ size_t cSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
1365
+ if (ZSTD_isError(cSize)) return cSize;
1366
+ *litPtr = oend - cSize;
1367
+ ip += litcSize;
1368
+ break;
1369
+ }
1370
+ default:
1371
+ return (size_t)-ZSTD_ERROR_GENERIC;
1372
+ }
1373
+
1374
+ return ip-istart;
1375
+ }
1376
+
1377
+
1378
+ size_t ZSTD_decodeSeqHeaders(size_t* lastLLPtr, const BYTE** dumpsPtr,
1379
+ void* DTableLL, void* DTableML, void* DTableOffb,
1380
+ const void* src, size_t srcSize)
1381
+ {
1382
+ const BYTE* const istart = (const BYTE* const)src;
1383
+ const BYTE* ip = istart;
1384
+ const BYTE* const iend = istart + srcSize;
1385
+ U32 LLtype, Offtype, MLtype;
1386
+ U32 LLlog, Offlog, MLlog;
1387
+ size_t dumpsLength;
1388
+
1389
+ /* SeqHead */
1390
+ ip += ZSTD_readProgressive(lastLLPtr, ip);
1391
+ LLtype = *ip >> 6;
1392
+ Offtype = (*ip >> 4) & 3;
1393
+ MLtype = (*ip >> 2) & 3;
1394
+ if (*ip & 2)
1395
+ {
1396
+ dumpsLength = ip[2];
1397
+ dumpsLength += ip[1] << 8;
1398
+ ip += 3;
1399
+ }
1400
+ else
1401
+ {
1402
+ dumpsLength = ip[1];
1403
+ dumpsLength += (ip[0] & 1) << 8;
1404
+ ip += 2;
1405
+ }
1406
+ *dumpsPtr = ip;
1407
+ ip += dumpsLength;
1408
+
1409
+ /* sequences */
1410
+ {
1411
+ S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL and MaxOff */
1412
+ size_t headerSize;
1413
+
1414
+ /* Build DTables */
1415
+ switch(LLtype)
1416
+ {
1417
+ U32 max;
1418
+ case bt_rle :
1419
+ LLlog = 0;
1420
+ FSE_buildDTable_rle(DTableLL, *ip++); break;
1421
+ case bt_raw :
1422
+ LLlog = LLbits;
1423
+ FSE_buildDTable_raw(DTableLL, LLbits); break;
1424
+ default :
1425
+ max = MaxLL;
1426
+ headerSize = FSE_readHeader(norm, &max, &LLlog, ip, iend-ip);
1427
+ if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC;
1428
+ ip += headerSize;
1429
+ FSE_buildDTable(DTableLL, norm, max, LLlog);
1430
+ }
1431
+
1432
+ switch(Offtype)
1433
+ {
1434
+ U32 max;
1435
+ case bt_rle :
1436
+ Offlog = 0;
1437
+ FSE_buildDTable_rle(DTableOffb, *ip++); break;
1438
+ case bt_raw :
1439
+ Offlog = Offbits;
1440
+ FSE_buildDTable_raw(DTableOffb, Offbits); break;
1441
+ default :
1442
+ max = MaxOff;
1443
+ headerSize = FSE_readHeader(norm, &max, &Offlog, ip, iend-ip);
1444
+ if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC;
1445
+ ip += headerSize;
1446
+ FSE_buildDTable(DTableOffb, norm, max, Offlog);
1447
+ }
1448
+
1449
+ switch(MLtype)
1450
+ {
1451
+ U32 max;
1452
+ case bt_rle :
1453
+ MLlog = 0;
1454
+ FSE_buildDTable_rle(DTableML, *ip++); break;
1455
+ case bt_raw :
1456
+ MLlog = MLbits;
1457
+ FSE_buildDTable_raw(DTableML, MLbits); break;
1458
+ default :
1459
+ max = MaxML;
1460
+ headerSize = FSE_readHeader(norm, &max, &MLlog, ip, iend-ip);
1461
+ if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC;
1462
+ ip += headerSize;
1463
+ FSE_buildDTable(DTableML, norm, max, MLlog);
1464
+ }
1465
+ }
1466
+
1467
+ return ip-istart;
1468
+ }
1469
+
1470
+
1471
+ #define ZSTD_prefetch(p) { const BYTE pByte = *(volatile const BYTE*)p; }
1472
+
1473
+ FORCE_INLINE size_t ZSTD_decompressBlock(void* ctx, void* dst, size_t maxDstSize,
1474
+ const void* src, size_t srcSize)
1475
+ {
1476
+ const BYTE* ip = (const BYTE*)src;
1477
+ const BYTE* const iend = ip + srcSize;
1478
+ BYTE* const ostart = (BYTE* const)dst;
1479
+ BYTE* op = ostart;
1480
+ BYTE* const oend = ostart + maxDstSize;
1481
+ size_t errorCode;
1482
+ size_t lastLLSize;
1483
+ const BYTE* dumps;
1484
+ const BYTE* litPtr;
1485
+ const BYTE* litEnd;
1486
+ const size_t dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4}; /* added */
1487
+ const size_t dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
1488
+ void* DTableML = ctx;
1489
+ void* DTableLL = ((U32*)ctx) + FSE_DTABLE_SIZE_U32(MLFSELog);
1490
+ void* DTableOffb = ((U32*)DTableLL) + FSE_DTABLE_SIZE_U32(LLFSELog);
1491
+
1492
+ /* blockType == blockCompressed, srcSize is trusted */
1493
+
1494
+ /* literal sub-block */
1495
+ errorCode = ZSTD_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, src, srcSize);
1496
+ if (ZSTD_isError(errorCode)) return errorCode;
1497
+ ip += errorCode;
1498
+
1499
+ /* Build Decoding Tables */
1500
+ errorCode = ZSTD_decodeSeqHeaders(&lastLLSize, &dumps,
1501
+ DTableLL, DTableML, DTableOffb,
1502
+ ip, iend-ip);
1503
+ if (ZSTD_isError(errorCode)) return errorCode;
1504
+ /* end pos */
1505
+ if ((litPtr>=ostart) && (litPtr<=oend))
1506
+ litEnd = oend - lastLLSize;
1507
+ else
1508
+ litEnd = ip - lastLLSize;
1509
+ ip += errorCode;
1510
+
1511
+ /* decompression */
1512
+ {
1513
+ FSE_DStream_t DStream;
1514
+ FSE_DState_t stateLL, stateOffb, stateML;
1515
+ size_t prevOffset = 0, offset = 0;
1516
+ size_t qutt=0;
1517
+
1518
+ FSE_initDStream(&DStream, ip, iend-ip);
1519
+ FSE_initDState(&stateLL, &DStream, DTableLL);
1520
+ FSE_initDState(&stateOffb, &DStream, DTableOffb);
1521
+ FSE_initDState(&stateML, &DStream, DTableML);
1522
+
1523
+ while (FSE_reloadDStream(&DStream)<2)
1524
+ {
1525
+ U32 nbBits, offsetCode;
1526
+ const BYTE* match;
1527
+ size_t litLength;
1528
+ size_t matchLength;
1529
+ size_t newOffset;
1530
+
1531
+ _another_round:
1532
+
1533
+ /* Literals */
1534
+ litLength = FSE_decodeSymbol(&stateLL, &DStream);
1535
+ if (litLength) prevOffset = offset;
1536
+ if (litLength == MaxLL)
1537
+ {
1538
+ BYTE add = *dumps++;
1539
+ if (add < 255) litLength += add;
1540
+ else
1541
+ {
1542
+ //litLength = (*(U32*)dumps) & 0xFFFFFF;
1543
+ litLength = ZSTD_readLE32(dumps) & 0xFFFFFF;
1544
+ dumps += 3;
1545
+ }
1546
+ }
1547
+ if (((size_t)(litPtr - op) < 8) || ((size_t)(oend-(litPtr+litLength)) < 8))
1548
+ memmove(op, litPtr, litLength); /* overwrite risk */
1549
+ else
1550
+ ZSTD_wildcopy(op, litPtr, litLength);
1551
+ op += litLength; CHECK_OVERFLOW(op <= (BYTE *)dst + maxDstSize);
1552
+ litPtr += litLength;
1553
+
1554
+ /* Offset */
1555
+ offsetCode = FSE_decodeSymbol(&stateOffb, &DStream);
1556
+ if (ZSTD_32bits()) FSE_reloadDStream(&DStream);
1557
+ nbBits = offsetCode - 1;
1558
+ if (offsetCode==0) nbBits = 0; /* cmove */
1559
+ newOffset = FSE_readBits(&DStream, nbBits);
1560
+ if (ZSTD_32bits()) FSE_reloadDStream(&DStream);
1561
+ newOffset += (size_t)1 << nbBits;
1562
+ if (offsetCode==0) newOffset = prevOffset;
1563
+ match = op - newOffset;
1564
+ prevOffset = offset;
1565
+ offset = newOffset;
1566
+
1567
+ /* MatchLength */
1568
+ matchLength = FSE_decodeSymbol(&stateML, &DStream);
1569
+ if (matchLength == MaxML)
1570
+ {
1571
+ BYTE add = *dumps++;
1572
+ if (add < 255) matchLength += add;
1573
+ else
1574
+ {
1575
+ //matchLength = (*(U32*)dumps) & 0xFFFFFF;
1576
+ matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF;
1577
+ dumps += 3;
1578
+ }
1579
+ }
1580
+ matchLength += MINMATCH;
1581
+
1582
+ /* copy Match */
1583
+ {
1584
+ BYTE* const endMatch = op + matchLength;
1585
+ U64 saved[2];
1586
+
1587
+ if ((size_t)(litPtr - endMatch) < 12)
1588
+ {
1589
+ qutt = endMatch + 12 - litPtr;
1590
+ if ((litPtr + qutt) > oend) qutt = oend-litPtr;
1591
+ memcpy(saved, litPtr, qutt);
1592
+ }
1593
+
1594
+ if (offset < 8)
1595
+ {
1596
+ const size_t dec64 = dec64table[offset];
1597
+ op[0] = match[0];
1598
+ op[1] = match[1];
1599
+ op[2] = match[2];
1600
+ op[3] = match[3];
1601
+ match += dec32table[offset];
1602
+ ZSTD_copy4(op+4, match);
1603
+ match -= dec64;
1604
+ } else { ZSTD_copy8(op, match); }
1605
+
1606
+ if (endMatch > oend-12)
1607
+ {
1608
+ if (op < oend-16)
1609
+ {
1610
+ ZSTD_wildcopy(op+8, match+8, (oend-8) - (op+8));
1611
+ match += (oend-8) - op;
1612
+ op = oend-8; CHECK_OVERFLOW(op <= (BYTE *)dst + maxDstSize);
1613
+ }
1614
+ while (op<endMatch) *op++ = *match++; CHECK_OVERFLOW(op <= (BYTE *)dst + maxDstSize);
1615
+ }
1616
+ else
1617
+ ZSTD_wildcopy(op+8, match+8, matchLength-8); /* works even if matchLength < 8 */
1618
+
1619
+ op = endMatch; CHECK_OVERFLOW(op <= (BYTE *)dst + maxDstSize);
1620
+
1621
+ if ((size_t)(litPtr - endMatch) < 12)
1622
+ memcpy((void*)litPtr, saved, qutt);
1623
+ }
1624
+ }
1625
+
1626
+ /* check if reached exact end */
1627
+ if (FSE_reloadDStream(&DStream) > 2) return (size_t)-ZSTD_ERROR_GENERIC; /* requested too much : data is corrupted */
1628
+ if (!FSE_endOfDState(&stateLL) && !FSE_endOfDState(&stateML) && !FSE_endOfDState(&stateOffb)) goto _another_round; /* some ultra-compressible sequence remain ! */
1629
+ if (litPtr != litEnd) goto _another_round; /* literals not entirely spent */
1630
+
1631
+ /* last literal segment */
1632
+ if (op != litPtr) memmove(op, litPtr, lastLLSize);
1633
+ op += lastLLSize; CHECK_OVERFLOW(op <= (BYTE *)dst + maxDstSize);
1634
+ }
1635
+
1636
+ return op-ostart;
1637
+ }
1638
+
1639
+
1640
+ static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1641
+ {
1642
+ const BYTE* ip = (const BYTE*)src;
1643
+ const BYTE* iend = ip + srcSize;
1644
+ BYTE* const ostart = (BYTE* const)dst;
1645
+ BYTE* op = ostart;
1646
+ BYTE* const oend = ostart + maxDstSize;
1647
+ size_t remainingSize = srcSize;
1648
+ U32 magicNumber;
1649
+ size_t errorCode=0;
1650
+ blockProperties_t blockProperties;
1651
+
1652
+ /* Header */
1653
+ if (srcSize < ZSTD_frameHeaderSize) return (size_t)-ZSTD_ERROR_wrongSrcSize;
1654
+ magicNumber = ZSTD_readBE32(src);
1655
+ if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_wrongMagicNumber;
1656
+ ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
1657
+
1658
+ while (1)
1659
+ {
1660
+ size_t blockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
1661
+ if (ZSTD_isError(blockSize))
1662
+ return blockSize;
1663
+
1664
+ ip += ZSTD_blockHeaderSize;
1665
+ remainingSize -= ZSTD_blockHeaderSize;
1666
+ if (ip+blockSize > iend)
1667
+ return (size_t)-ZSTD_ERROR_wrongSrcSize;
1668
+
1669
+ switch(blockProperties.blockType)
1670
+ {
1671
+ case bt_compressed:
1672
+ errorCode = ZSTD_decompressBlock(ctx, op, oend-op, ip, blockSize);
1673
+ break;
1674
+ case bt_raw :
1675
+ errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
1676
+ break;
1677
+ case bt_rle :
1678
+ return (size_t)-ZSTD_ERROR_GENERIC; /* not yet handled */
1679
+ break;
1680
+ case bt_end :
1681
+ /* end of frame */
1682
+ if (remainingSize) return (size_t)-ZSTD_ERROR_wrongSrcSize;
1683
+ break;
1684
+ default:
1685
+ return (size_t)-ZSTD_ERROR_GENERIC;
1686
+ }
1687
+ if (blockSize == 0) break; /* bt_end */
1688
+
1689
+ if (ZSTD_isError(errorCode)) return errorCode;
1690
+ op += errorCode; CHECK_OVERFLOW(op <= oend);
1691
+ ip += blockSize;
1692
+ remainingSize -= blockSize;
1693
+ }
1694
+
1695
+ return op-ostart;
1696
+ }
1697
+
1698
+
1699
+ size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1700
+ {
1701
+ U32 ctx[FSE_DTABLE_SIZE_U32(LLFSELog) + FSE_DTABLE_SIZE_U32(OffFSELog) + FSE_DTABLE_SIZE_U32(MLFSELog)];
1702
+ return ZSTD_decompressDCtx(ctx, dst, maxDstSize, src, srcSize);
1703
+ }
1704
+
1705
+
1706
+ /******************************
1707
+ * Streaming Decompression API
1708
+ ******************************/
1709
+
1710
+ typedef struct
1711
+ {
1712
+ U32 ctx[FSE_DTABLE_SIZE_U32(LLFSELog) + FSE_DTABLE_SIZE_U32(OffFSELog) + FSE_DTABLE_SIZE_U32(MLFSELog)];
1713
+ size_t expected;
1714
+ blockType_t bType;
1715
+ U32 started;
1716
+ } dctx_t;
1717
+
1718
+
1719
+ ZSTD_dctx_t ZSTD_createDCtx(void)
1720
+ {
1721
+ dctx_t* dctx = (dctx_t*)malloc(sizeof(dctx_t));
1722
+ dctx->expected = 4 + ZSTD_blockHeaderSize; // Frame Header + Block Header
1723
+ dctx->started = 0;
1724
+ return (ZSTD_dctx_t)dctx;
1725
+ }
1726
+
1727
+ size_t ZSTD_freeDCtx(ZSTD_dctx_t dctx)
1728
+ {
1729
+ free(dctx);
1730
+ return 0;
1731
+ }
1732
+
1733
+
1734
+ size_t ZSTD_getNextcBlockSize(ZSTD_dctx_t dctx)
1735
+ {
1736
+ return ((dctx_t*)dctx)->expected;
1737
+ }
1738
+
1739
+ size_t ZSTD_decompressContinue(ZSTD_dctx_t dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1740
+ {
1741
+ dctx_t* ctx = (dctx_t*)dctx;
1742
+ size_t cSize = srcSize - ZSTD_blockHeaderSize;
1743
+ size_t rSize;
1744
+
1745
+ // Sanity check
1746
+ if (srcSize != ctx->expected) return (size_t)-ZSTD_ERROR_wrongSrcSize;
1747
+
1748
+ // Decompress
1749
+ if (!ctx->started)
1750
+ {
1751
+ // Just check correct magic header
1752
+ U32 magicNumber = ZSTD_readBE32(src);
1753
+ if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_wrongMagicNumber;
1754
+ rSize = 0;
1755
+ }
1756
+ else
1757
+ {
1758
+ switch(ctx->bType)
1759
+ {
1760
+ case bt_compressed:
1761
+ rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, cSize);
1762
+ break;
1763
+ case bt_raw :
1764
+ rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, cSize);
1765
+ break;
1766
+ case bt_rle :
1767
+ return (size_t)-ZSTD_ERROR_GENERIC; /* not yet handled */
1768
+ break;
1769
+ case bt_end :
1770
+ rSize = 0;
1771
+ break;
1772
+ default:
1773
+ return (size_t)-ZSTD_ERROR_GENERIC;
1774
+ }
1775
+ }
1776
+
1777
+ // Prepare next block
1778
+ {
1779
+ const BYTE* header = (const BYTE*)src;
1780
+ blockProperties_t bp;
1781
+ size_t blockSize;
1782
+ header += cSize;
1783
+ blockSize = ZSTD_getcBlockSize(header, ZSTD_blockHeaderSize, &bp);
1784
+ if (ZSTD_isError(blockSize)) return blockSize;
1785
+ if (bp.blockType == bt_end)
1786
+ {
1787
+ ctx->expected = 0;
1788
+ ctx->started = 0;
1789
+ }
1790
+ else
1791
+ {
1792
+ ctx->expected = blockSize + ZSTD_blockHeaderSize;
1793
+ ctx->bType = bp.blockType;
1794
+ ctx->started = 1;
1795
+ }
1796
+ }
1797
+
1798
+ return rSize;
1799
+ }
1800
+
1801
+