extzstd 0.0.1.CONCEPT → 0.0.2.CONCEPT

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -40,24 +40,32 @@ extern "C" {
40
40
 
41
41
 
42
42
  /******************************************
43
- * Tool functions
43
+ * FSE API compatible with DLL
44
44
  ******************************************/
45
- #define FSE_MAX_HEADERSIZE 512
46
- #define FSE_COMPRESSBOUND(size) (size + (size>>7) + FSE_MAX_HEADERSIZE) /* Macro can be useful for static allocation */
45
+ #include "fse.h"
47
46
 
48
47
 
49
48
  /******************************************
50
49
  * Static allocation
51
50
  ******************************************/
52
- /* You can statically allocate a CTable as a table of U32 using below macro */
51
+ /* FSE buffer bounds */
52
+ #define FSE_NCOUNTBOUND 512
53
+ #define FSE_BLOCKBOUND(size) (size + (size>>7))
54
+ #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
55
+
56
+ /* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
53
57
  #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
54
- #define FSE_DTABLE_SIZE_U32(maxTableLog) ((1<<maxTableLog)+1)
58
+ #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
55
59
 
60
+ /* Huff0 buffer bounds */
61
+ #define HUF_CTABLEBOUND 129
62
+ #define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true if pre-filtered with fast heuristic */
63
+ #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
56
64
 
57
- /******************************************
58
- * FSE supported API for DLL
59
- ******************************************/
60
- #include "fse.h"
65
+ /* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */
66
+ #define HUF_DTABLE_SIZE_U16(maxTableLog) (1 + (1<<maxTableLog))
67
+ #define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \
68
+ unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog }
61
69
 
62
70
 
63
71
  /******************************************
@@ -65,7 +73,7 @@ extern "C" {
65
73
  ******************************************/
66
74
  #define FSE_LIST_ERRORS(ITEM) \
67
75
  ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
68
- ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) \
76
+ ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \
69
77
  ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
70
78
  ITEM(FSE_ERROR_corruptionDetected) \
71
79
  ITEM(FSE_ERROR_maxCode)
@@ -77,30 +85,196 @@ typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes; /* enum is
77
85
  /******************************************
78
86
  * FSE advanced API
79
87
  ******************************************/
80
- size_t FSE_countFast(unsigned* count, const unsigned char* src, size_t srcSize, unsigned* maxSymbolValuePtr);
81
- /* same as FSE_count(), but won't check if input really respect that all values within src are <= *maxSymbolValuePtr */
88
+ size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const unsigned char* src, size_t srcSize);
89
+ /* same as FSE_count(), but blindly trust that all values within src are <= maxSymbolValuePtr[0] */
82
90
 
83
- size_t FSE_buildCTable_raw (void* CTable, unsigned nbBits);
84
- /* create a fake CTable, designed to not compress an input where each element uses nbBits */
91
+ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
92
+ /* build a fake FSE_CTable, designed to not compress an input, where each symbol uses nbBits */
85
93
 
86
- size_t FSE_buildCTable_rle (void* CTable, unsigned char symbolValue);
87
- /* create a fake CTable, designed to compress a single identical value */
94
+ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
95
+ /* build a fake FSE_CTable, designed to compress always the same symbolValue */
88
96
 
89
- size_t FSE_buildDTable_raw (void* DTable, unsigned nbBits);
90
- /* create a fake DTable, designed to read an uncompressed bitstream where each element uses nbBits */
97
+ size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
98
+ /* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
91
99
 
92
- size_t FSE_buildDTable_rle (void* DTable, unsigned char symbolValue);
93
- /* create a fake DTable, designed to always generate the same symbolValue */
100
+ size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
101
+ /* build a fake FSE_DTable, designed to always generate the same symbolValue */
94
102
 
95
103
 
96
104
  /******************************************
97
- * FSE streaming API
105
+ * FSE symbol compression API
98
106
  ******************************************/
99
- bitD_t FSE_readBitsFast(FSE_DStream_t* bitD, unsigned nbBits);
107
+ /*
108
+ This API consists of small unitary functions, which highly benefit from being inlined.
109
+ You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
110
+ Visual seems to do it automatically.
111
+ For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
112
+ If none of these solutions is applicable, include "fse.c" directly.
113
+ */
114
+
115
+ typedef struct
116
+ {
117
+ size_t bitContainer;
118
+ int bitPos;
119
+ char* startPtr;
120
+ char* ptr;
121
+ char* endPtr;
122
+ } FSE_CStream_t;
123
+
124
+ typedef struct
125
+ {
126
+ ptrdiff_t value;
127
+ const void* stateTable;
128
+ const void* symbolTT;
129
+ unsigned stateLog;
130
+ } FSE_CState_t;
131
+
132
+ size_t FSE_initCStream(FSE_CStream_t* bitC, void* dstBuffer, size_t maxDstSize);
133
+ void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct);
134
+
135
+ void FSE_encodeSymbol(FSE_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol);
136
+ void FSE_addBits(FSE_CStream_t* bitC, size_t value, unsigned nbBits);
137
+ void FSE_flushBits(FSE_CStream_t* bitC);
138
+
139
+ void FSE_flushCState(FSE_CStream_t* bitC, const FSE_CState_t* CStatePtr);
140
+ size_t FSE_closeCStream(FSE_CStream_t* bitC);
141
+
142
+ /*
143
+ These functions are inner components of FSE_compress_usingCTable().
144
+ They allow the creation of custom streams, mixing multiple tables and bit sources.
145
+
146
+ A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
147
+ So the first symbol you will encode is the last you will decode, like a LIFO stack.
148
+
149
+ You will need a few variables to track your CStream. They are :
150
+
151
+ FSE_CTable ct; // Provided by FSE_buildCTable()
152
+ FSE_CStream_t bitStream; // bitStream tracking structure
153
+ FSE_CState_t state; // State tracking structure (can have several)
154
+
155
+
156
+ The first thing to do is to init bitStream and state.
157
+ size_t errorCode = FSE_initCStream(&bitStream, dstBuffer, maxDstSize);
158
+ FSE_initCState(&state, ct);
159
+
160
+ Note that FSE_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
161
+ You can then encode your input data, byte after byte.
162
+ FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
163
+ Remember decoding will be done in reverse direction.
164
+ FSE_encodeByte(&bitStream, &state, symbol);
165
+
166
+ At any time, you can also add any bit sequence.
167
+ Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
168
+ FSE_addBits(&bitStream, bitField, nbBits);
169
+
170
+ The above methods don't commit data to memory, they just store it into local register, for speed.
171
+ Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
172
+ Writing data to memory is a manual operation, performed by the flushBits function.
173
+ FSE_flushBits(&bitStream);
174
+
175
+ Your last FSE encoding operation shall be to flush your last state value(s).
176
+ FSE_flushState(&bitStream, &state);
177
+
178
+ Finally, you must close the bitStream.
179
+ The function returns the size of CStream in bytes.
180
+ If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
181
+ If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
182
+ size_t size = FSE_closeCStream(&bitStream);
183
+ */
184
+
185
+
186
+ /******************************************
187
+ * FSE symbol decompression API
188
+ ******************************************/
189
+ typedef struct
190
+ {
191
+ size_t bitContainer;
192
+ unsigned bitsConsumed;
193
+ const char* ptr;
194
+ const char* start;
195
+ } FSE_DStream_t;
196
+
197
+ typedef struct
198
+ {
199
+ size_t state;
200
+ const void* table; /* precise table may vary, depending on U16 */
201
+ } FSE_DState_t;
202
+
203
+
204
+ size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
205
+ void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt);
206
+
207
+ unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD);
208
+ size_t FSE_readBits(FSE_DStream_t* bitD, unsigned nbBits);
209
+ unsigned int FSE_reloadDStream(FSE_DStream_t* bitD);
210
+
211
+ unsigned FSE_endOfDStream(const FSE_DStream_t* bitD);
212
+ unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
213
+
214
+ typedef enum { FSE_DStream_unfinished = 0,
215
+ FSE_DStream_endOfBuffer = 1,
216
+ FSE_DStream_completed = 2,
217
+ FSE_DStream_tooFar = 3 } FSE_DStream_status; /* result of FSE_reloadDStream() */
218
+ /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */
219
+
220
+ /*
221
+ Let's now decompose FSE_decompress_usingDTable() into its unitary components.
222
+ You will decode FSE-encoded symbols from the bitStream,
223
+ and also any other bitFields you put in, **in reverse order**.
224
+
225
+ You will need a few variables to track your bitStream. They are :
226
+
227
+ FSE_DStream_t DStream; // Stream context
228
+ FSE_DState_t DState; // State context. Multiple ones are possible
229
+ FSE_DTable* DTablePtr; // Decoding table, provided by FSE_buildDTable()
230
+
231
+ The first thing to do is to init the bitStream.
232
+ errorCode = FSE_initDStream(&DStream, srcBuffer, srcSize);
233
+
234
+ You should then retrieve your initial state(s)
235
+ (in reverse flushing order if you have several ones) :
236
+ errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
237
+
238
+ You can then decode your data, symbol after symbol.
239
+ For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
240
+ Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
241
+ unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
242
+
243
+ You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
244
+ Note : maximum allowed nbBits is 25, for 32-bits compatibility
245
+ size_t bitField = FSE_readBits(&DStream, nbBits);
246
+
247
+ All above operations only read from local register (which size depends on size_t).
248
+ Refueling the register from memory is manually performed by the reload method.
249
+ endSignal = FSE_reloadDStream(&DStream);
250
+
251
+ FSE_reloadDStream() result tells if there is still some more data to read from DStream.
252
+ FSE_DStream_unfinished : there is still some data left into the DStream.
253
+ FSE_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
254
+ FSE_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
255
+ FSE_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
256
+
257
+ When reaching end of buffer (FSE_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
258
+ to properly detect the exact end of stream.
259
+ After each decoded symbol, check if DStream is fully consumed using this simple test :
260
+ FSE_reloadDStream(&DStream) >= FSE_DStream_completed
261
+
262
+ When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
263
+ Checking if DStream has reached its end is performed by :
264
+ FSE_endOfDStream(&DStream);
265
+ Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
266
+ FSE_endOfDState(&DState);
267
+ */
268
+
269
+
270
+ /******************************************
271
+ * FSE unsafe symbol API
272
+ ******************************************/
273
+ size_t FSE_readBitsFast(FSE_DStream_t* bitD, unsigned nbBits);
100
274
  /* faster, but works only if nbBits >= 1 (otherwise, result will be corrupted) */
101
275
 
102
276
  unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD);
103
- /* faster, but works only if nbBits >= 1 (otherwise, result will be corrupted) */
277
+ /* faster, but works only if allways nbBits >= 1 (otherwise, result will be corrupted) */
104
278
 
105
279
 
106
280
  #if defined (__cplusplus)
@@ -68,9 +68,6 @@
68
68
  #include <stdio.h> /* debug : printf */
69
69
  #include "zstd_static.h"
70
70
  #if defined(__clang__) || defined(__GNUC__)
71
- # ifdef __clang__
72
- # pragma clang diagnostic ignored "-Wtypedef-redefinition"
73
- # endif
74
71
  # include "fse.c" /* due to GCC/Clang inlining limitations, including *.c runs noticeably faster */
75
72
  #else
76
73
  # include "fse_static.h"
@@ -80,7 +77,6 @@
80
77
  /********************************************************
81
78
  * Compiler specifics
82
79
  *********************************************************/
83
- //#if (!(defined(_MSC_VER) && (_MSC_VER<=1500))) /* exclude Visual 2008 and below */
84
80
  #ifdef __AVX2__
85
81
  # include <immintrin.h> /* AVX2 intrinsics */
86
82
  #endif
@@ -100,10 +96,12 @@
100
96
  #endif
101
97
 
102
98
 
99
+ #ifndef MEM_ACCESS_MODULE
100
+ #define MEM_ACCESS_MODULE
103
101
  /********************************************************
104
102
  * Basic Types
105
103
  *********************************************************/
106
- #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
104
+ #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
107
105
  # include <stdint.h>
108
106
  typedef uint8_t BYTE;
109
107
  typedef uint16_t U16;
@@ -120,19 +118,18 @@ typedef signed int S32;
120
118
  typedef unsigned long long U64;
121
119
  #endif
122
120
 
121
+ #endif /* MEM_ACCESS_MODULE */
122
+
123
123
 
124
124
  /********************************************************
125
125
  * Constants
126
126
  *********************************************************/
127
- static const U32 ZSTD_magicNumber = 0xFD2FB51C;
127
+ static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header */
128
128
 
129
129
  #define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
130
130
  #define HASH_TABLESIZE (1 << HASH_LOG)
131
131
  #define HASH_MASK (HASH_TABLESIZE - 1)
132
132
 
133
- #define MAXD_LOG 16
134
- #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
135
-
136
133
  #define KNUTH 2654435761
137
134
 
138
135
  #define BIT7 128
@@ -142,14 +139,14 @@ static const U32 ZSTD_magicNumber = 0xFD2FB51C;
142
139
 
143
140
  #define KB *(1 <<10)
144
141
  #define MB *(1 <<20)
145
- #define GB *(1U<<20)
142
+ #define GB *(1U<<30)
146
143
 
147
- #define BLOCKSIZE (128 KB) // define, for static allocation
148
- static const U32 g_maxDistance = 512 KB;
144
+ #define BLOCKSIZE (128 KB) /* define, for static allocation */
145
+ static const U32 g_maxDistance = 4 * BLOCKSIZE;
149
146
  static const U32 g_maxLimit = 1 GB;
150
147
  static const U32 g_searchStrength = 8;
151
148
 
152
- #define WORKPLACESIZE (BLOCKSIZE*11/4)
149
+ #define WORKPLACESIZE (BLOCKSIZE*3)
153
150
  #define MINMATCH 4
154
151
  #define MLbits 7
155
152
  #define LLbits 6
@@ -161,6 +158,8 @@ static const U32 g_searchStrength = 8;
161
158
  #define MLFSELog 10
162
159
  #define LLFSELog 10
163
160
  #define OffFSELog 9
161
+ #define MAX(a,b) ((a)<(b)?(b):(a))
162
+ #define MaxSeq MAX(MaxLL, MaxML)
164
163
 
165
164
  #define LITERAL_NOENTROPY 63
166
165
  #define COMMAND_NOENTROPY 7 /* to remove */
@@ -181,11 +180,13 @@ static unsigned ZSTD_isLittleEndian(void)
181
180
  return one.c[0];
182
181
  }
183
182
 
184
- static U16 ZSTD_read16(const void* p) { return *(U16*)p; }
183
+ static U16 ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
185
184
 
186
- static U32 ZSTD_read32(const void* p) { return *(U32*)p; }
185
+ static U32 ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; }
187
186
 
188
- static size_t ZSTD_read_ARCH(const void* p) { return *(size_t*)p; }
187
+ static U64 ZSTD_read64(const void* p) { U64 r; memcpy(&r, p, sizeof(r)); return r; }
188
+
189
+ static size_t ZSTD_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; }
189
190
 
190
191
  static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
191
192
 
@@ -201,6 +202,27 @@ static void ZSTD_wildcopy(void* dst, const void* src, size_t length)
201
202
  while (op < oend) COPY8(op, ip);
202
203
  }
203
204
 
205
+ static U16 ZSTD_readLE16(const void* memPtr)
206
+ {
207
+ if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr);
208
+ else
209
+ {
210
+ const BYTE* p = (const BYTE*)memPtr;
211
+ return (U16)((U16)p[0] + ((U16)p[1]<<8));
212
+ }
213
+ }
214
+
215
+ static void ZSTD_writeLE16(void* memPtr, U16 val)
216
+ {
217
+ if (ZSTD_isLittleEndian()) memcpy(memPtr, &val, sizeof(val));
218
+ else
219
+ {
220
+ BYTE* p = (BYTE*)memPtr;
221
+ p[0] = (BYTE)val;
222
+ p[1] = (BYTE)(val>>8);
223
+ }
224
+ }
225
+
204
226
  static U32 ZSTD_readLE32(const void* memPtr)
205
227
  {
206
228
  if (ZSTD_isLittleEndian())
@@ -243,40 +265,6 @@ static void ZSTD_writeBE32(void* memPtr, U32 value)
243
265
  p[3] = (BYTE)(value>>0);
244
266
  }
245
267
 
246
- static size_t ZSTD_writeProgressive(void* ptr, size_t value)
247
- {
248
- BYTE* const bStart = (BYTE* const)ptr;
249
- BYTE* byte = bStart;
250
-
251
- do
252
- {
253
- BYTE l = value & 127;
254
- value >>= 7;
255
- if (value) l += 128;
256
- *byte++ = l;
257
- } while (value);
258
-
259
- return byte - bStart;
260
- }
261
-
262
-
263
- static size_t ZSTD_readProgressive(size_t* result, const void* ptr)
264
- {
265
- const BYTE* const bStart = (const BYTE* const)ptr;
266
- const BYTE* byte = bStart;
267
- size_t r = 0;
268
- U32 shift = 0;
269
-
270
- do
271
- {
272
- r += (*byte & 127) << shift;
273
- shift += 7;
274
- } while (*byte++ & 128);
275
-
276
- *result = r;
277
- return byte - bStart;
278
- }
279
-
280
268
 
281
269
  /**************************************
282
270
  * Local structures
@@ -289,12 +277,38 @@ typedef struct
289
277
  U32 origSize;
290
278
  } blockProperties_t;
291
279
 
292
- typedef struct
280
+ typedef struct {
281
+ void* buffer;
282
+ U32* offsetStart;
283
+ U32* offset;
284
+ BYTE* offCodeStart;
285
+ BYTE* offCode;
286
+ BYTE* litStart;
287
+ BYTE* lit;
288
+ BYTE* litLengthStart;
289
+ BYTE* litLength;
290
+ BYTE* matchLengthStart;
291
+ BYTE* matchLength;
292
+ BYTE* dumpsStart;
293
+ BYTE* dumps;
294
+ } seqStore_t;
295
+
296
+ void ZSTD_resetSeqStore(seqStore_t* ssPtr)
297
+ {
298
+ ssPtr->offset = ssPtr->offsetStart;
299
+ ssPtr->lit = ssPtr->litStart;
300
+ ssPtr->litLength = ssPtr->litLengthStart;
301
+ ssPtr->matchLength = ssPtr->matchLengthStart;
302
+ ssPtr->dumps = ssPtr->dumpsStart;
303
+ }
304
+
305
+
306
+ typedef struct ZSTD_Cctx_s
293
307
  {
294
308
  const BYTE* base;
295
309
  U32 current;
296
310
  U32 nextUpdate;
297
- BYTE* workplace;
311
+ seqStore_t seqStore;
298
312
  #ifdef __AVX2__
299
313
  __m256i hashTable[HASH_TABLESIZE>>3];
300
314
  #else
@@ -303,27 +317,35 @@ typedef struct
303
317
  } cctxi_t;
304
318
 
305
319
 
306
- ZSTD_cctx_t ZSTD_createCCtx(void)
320
+ ZSTD_Cctx* ZSTD_createCCtx(void)
307
321
  {
308
- cctxi_t* srt = (cctxi_t *) malloc( sizeof(cctxi_t) );
309
- srt->workplace = (BYTE*) malloc(WORKPLACESIZE);
310
- return (ZSTD_cctx_t)srt;
322
+ ZSTD_Cctx* ctx = (ZSTD_Cctx*) malloc( sizeof(ZSTD_Cctx) );
323
+ if (ctx==NULL) return NULL;
324
+ ctx->seqStore.buffer = malloc(WORKPLACESIZE);
325
+ if (ctx->seqStore.buffer==NULL)
326
+ {
327
+ free(ctx);
328
+ return NULL;
329
+ }
330
+ ctx->seqStore.offsetStart = (U32*) (ctx->seqStore.buffer);
331
+ ctx->seqStore.offCodeStart = (BYTE*) (ctx->seqStore.offsetStart + (BLOCKSIZE>>2));
332
+ ctx->seqStore.litStart = ctx->seqStore.offCodeStart + (BLOCKSIZE>>2);
333
+ ctx->seqStore.litLengthStart = ctx->seqStore.litStart + BLOCKSIZE;
334
+ ctx->seqStore.matchLengthStart = ctx->seqStore.litLengthStart + (BLOCKSIZE>>2);
335
+ ctx->seqStore.dumpsStart = ctx->seqStore.matchLengthStart + (BLOCKSIZE>>2);
336
+ return ctx;
311
337
  }
312
338
 
313
-
314
- void ZSTD_resetCCtx(ZSTD_cctx_t ctx)
339
+ void ZSTD_resetCCtx(ZSTD_Cctx* ctx)
315
340
  {
316
- cctxi_t* srt = (cctxi_t*)ctx;
317
- srt->base = NULL;
318
- memset(srt->hashTable, 0, HASH_TABLESIZE*4);
341
+ ctx->base = NULL;
342
+ memset(ctx->hashTable, 0, HASH_TABLESIZE*4);
319
343
  }
320
344
 
321
-
322
- size_t ZSTD_freeCCtx(ZSTD_cctx_t ctx)
345
+ size_t ZSTD_freeCCtx(ZSTD_Cctx* ctx)
323
346
  {
324
- cctxi_t *srt = (cctxi_t *) (ctx);
325
- free(srt->workplace);
326
- free(srt);
347
+ free(ctx->seqStore.buffer);
348
+ free(ctx);
327
349
  return 0;
328
350
  }
329
351
 
@@ -360,9 +382,9 @@ static unsigned ZSTD_highbit(U32 val)
360
382
  unsigned long r;
361
383
  _BitScanReverse(&r, val);
362
384
  return (unsigned)r;
363
- # elif defined(__GNUC__) && (GCC_VERSION >= 304) // GCC Intrinsic
385
+ # elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */
364
386
  return 31 - __builtin_clz(val);
365
- # else // Software version
387
+ # else /* Software version */
366
388
  static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
367
389
  U32 v = val;
368
390
  int r;
@@ -433,7 +455,7 @@ static unsigned ZSTD_NbCommonBytes (register size_t val)
433
455
  _BitScanReverse( &r, (unsigned long)val );
434
456
  return (unsigned)(r>>3);
435
457
  # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
436
- return (__builtin_clz(val) >> 3);
458
+ return (__builtin_clz((U32)val) >> 3);
437
459
  # else
438
460
  unsigned r;
439
461
  if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
@@ -479,15 +501,13 @@ static size_t ZSTD_compressRle (void* dst, size_t maxDstSize, const void* src, s
479
501
  /* at this stage : dstSize >= FSE_compressBound(srcSize) > (ZSTD_blockHeaderSize+1) (checked by ZSTD_compressLiterals()) */
480
502
  (void)maxDstSize;
481
503
 
482
- ostart[ZSTD_blockHeaderSize] = *(BYTE*)src;
504
+ ostart[ZSTD_blockHeaderSize] = *(const BYTE*)src;
483
505
 
484
- // Build header
485
- {
486
- ostart[0] = (BYTE)(srcSize>>16);
487
- ostart[1] = (BYTE)(srcSize>>8);
488
- ostart[2] = (BYTE)srcSize;
489
- ostart[0] += (BYTE)(bt_rle<<6);
490
- }
506
+ /* Build header */
507
+ ostart[0] = (BYTE)(srcSize>>16);
508
+ ostart[1] = (BYTE)(srcSize>>8);
509
+ ostart[2] = (BYTE) srcSize;
510
+ ostart[0] += (BYTE)(bt_rle<<6);
491
511
 
492
512
  return ZSTD_blockHeaderSize+1;
493
513
  }
@@ -500,76 +520,16 @@ static size_t ZSTD_noCompressBlock (void* dst, size_t maxDstSize, const void* sr
500
520
  if (srcSize + ZSTD_blockHeaderSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
501
521
  memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize);
502
522
 
503
- // Build header
504
- {
505
- ostart[0] = (BYTE)(srcSize>>16);
506
- ostart[1] = (BYTE)(srcSize>>8);
507
- ostart[2] = (BYTE)srcSize;
508
- ostart[0] += (BYTE)(bt_raw<<6); /* is a raw (uncompressed) block */
509
- }
523
+ /* Build header */
524
+ ostart[0] = (BYTE)(srcSize>>16);
525
+ ostart[1] = (BYTE)(srcSize>>8);
526
+ ostart[2] = (BYTE) srcSize;
527
+ ostart[0] += (BYTE)(bt_raw<<6); /* is a raw (uncompressed) block */
510
528
 
511
529
  return ZSTD_blockHeaderSize+srcSize;
512
530
  }
513
531
 
514
532
 
515
- /* return : size of CStream in bits */
516
- static size_t ZSTD_compressLiterals_usingCTable(void* dst, size_t dstSize,
517
- const void* src, size_t srcSize,
518
- const void* CTable)
519
- {
520
- const BYTE* const istart = (const BYTE*)src;
521
- const BYTE* ip = istart;
522
- const BYTE* const iend = istart + srcSize;
523
- FSE_CStream_t bitC;
524
- FSE_CState_t CState1, CState2;
525
-
526
- // init
527
- (void)dstSize; // objective : ensure it fits into dstBuffer (Todo)
528
- FSE_initCStream(&bitC, dst);
529
- FSE_initCState(&CState1, CTable);
530
- CState2 = CState1;
531
-
532
- /* Note : at this stage, srcSize > LITERALS_NOENTROPY (checked by ZSTD_compressLiterals()) */
533
- // join to mod 2
534
- if (srcSize & 1)
535
- {
536
- FSE_encodeByte(&bitC, &CState1, *ip++);
537
- FSE_flushBits(&bitC);
538
- }
539
-
540
- // join to mod 4
541
- if ((sizeof(size_t)*8 > LitFSELog*4+7 ) && (srcSize & 2)) // test bit 2
542
- {
543
- FSE_encodeByte(&bitC, &CState2, *ip++);
544
- FSE_encodeByte(&bitC, &CState1, *ip++);
545
- FSE_flushBits(&bitC);
546
- }
547
-
548
- // 2 or 4 encoding per loop
549
- while (ip<iend)
550
- {
551
- FSE_encodeByte(&bitC, &CState2, *ip++);
552
-
553
- if (sizeof(size_t)*8 < LitFSELog*2+7 ) // this test must be static
554
- FSE_flushBits(&bitC);
555
-
556
- FSE_encodeByte(&bitC, &CState1, *ip++);
557
-
558
- if (sizeof(size_t)*8 > LitFSELog*4+7 ) // this test must be static
559
- {
560
- FSE_encodeByte(&bitC, &CState2, *ip++);
561
- FSE_encodeByte(&bitC, &CState1, *ip++);
562
- }
563
-
564
- FSE_flushBits(&bitC);
565
- }
566
-
567
- FSE_flushCState(&bitC, &CState2);
568
- FSE_flushCState(&bitC, &CState1);
569
- return FSE_closeCStream(&bitC);
570
- }
571
-
572
-
573
533
  size_t ZSTD_minGain(size_t srcSize)
574
534
  {
575
535
  return (srcSize >> 6) + 1;
@@ -579,89 +539,58 @@ size_t ZSTD_minGain(size_t srcSize)
579
539
  static size_t ZSTD_compressLiterals (void* dst, size_t dstSize,
580
540
  const void* src, size_t srcSize)
581
541
  {
582
- const BYTE* const istart = (const BYTE*) src;
583
- const BYTE* ip = istart;
584
-
585
- BYTE* const ostart = (BYTE*) dst;
586
- BYTE* op = ostart + ZSTD_blockHeaderSize;
587
- BYTE* const oend = ostart + dstSize;
588
-
589
- U32 maxSymbolValue = 256;
590
- U32 tableLog = LitFSELog;
591
- U32 count[256];
592
- S16 norm[256];
593
- U32 CTable[ FSE_CTABLE_SIZE_U32(LitFSELog, 256) ];
594
- size_t errorCode;
595
542
  const size_t minGain = ZSTD_minGain(srcSize);
543
+ BYTE* const ostart = (BYTE*)dst;
544
+ size_t hsize;
545
+ static const size_t LHSIZE = 5;
596
546
 
597
- // early out
598
- if (dstSize < FSE_compressBound(srcSize)) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
547
+ if (dstSize < LHSIZE+1) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; /* not enough space for compression */
599
548
 
600
- // Scan input and build symbol stats
601
- errorCode = FSE_count (count, ip, srcSize, &maxSymbolValue);
602
- if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
603
- if (errorCode == srcSize) return 1;
604
- if (errorCode < ((srcSize * 7) >> 10)) return 0;
549
+ hsize = HUF_compress(ostart+LHSIZE, dstSize-LHSIZE, src, srcSize);
550
+ if (hsize<2) return hsize; /* special cases */
551
+ if (hsize >= srcSize - minGain) return 0;
605
552
 
606
- tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
607
- errorCode = (int)FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
608
- if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
609
-
610
- // Write table description header
611
- errorCode = FSE_writeHeader (op, FSE_MAX_HEADERSIZE, norm, maxSymbolValue, tableLog);
612
- if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
613
- op += errorCode;
553
+ hsize += 2; /* work around vs fixed 3-bytes header */
614
554
 
615
- // Compress
616
- errorCode = FSE_buildCTable (&CTable, norm, maxSymbolValue, tableLog);
617
- if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
618
- errorCode = ZSTD_compressLiterals_usingCTable(op, oend - op, ip, srcSize, &CTable);
619
- if (ZSTD_isError(errorCode)) return errorCode;
620
- op += errorCode;
621
-
622
- // check compressibility
623
- if ( (size_t)(op-ostart) >= srcSize-minGain)
624
- return 0;
625
-
626
- // Build header
555
+ /* Build header */
627
556
  {
628
- size_t totalSize;
629
- totalSize = op - ostart - ZSTD_blockHeaderSize;
630
- ostart[0] = (BYTE)(totalSize>>16);
631
- ostart[1] = (BYTE)(totalSize>>8);
632
- ostart[2] = (BYTE)totalSize;
633
- ostart[0] += (BYTE)(bt_compressed<<6); /* is a block, is compressed */
557
+ ostart[0] = (BYTE)(bt_compressed<<6); /* is a block, is compressed */
558
+ ostart[0] += (BYTE)(hsize>>16);
559
+ ostart[1] = (BYTE)(hsize>>8);
560
+ ostart[2] = (BYTE)(hsize>>0);
561
+ ostart[0] += (BYTE)((srcSize>>16)<<3);
562
+ ostart[3] = (BYTE)(srcSize>>8);
563
+ ostart[4] = (BYTE)(srcSize>>0);
634
564
  }
635
565
 
636
- return op-ostart;
566
+ hsize -= 2;
567
+ return hsize+LHSIZE;
637
568
  }
638
569
 
639
570
 
640
- static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
641
- const BYTE* op_lit_start, const BYTE* op_lit,
642
- const BYTE* op_litLength_start, const BYTE* op_litLength,
643
- const BYTE* op_matchLength_start,
644
- const U32* op_offset_start,
645
- const BYTE* op_dumps_start, const BYTE* op_dumps,
646
- size_t srcSize, size_t lastLLSize
647
- )
571
+ static size_t ZSTD_compressSequences(BYTE* dst, size_t maxDstSize,
572
+ const seqStore_t* seqStorePtr,
573
+ size_t srcSize)
648
574
  {
649
- FSE_CStream_t blockStream;
650
- U32 count[256];
651
- S16 norm[256];
575
+ U32 count[MaxSeq+1];
576
+ S16 norm[MaxSeq+1];
652
577
  size_t mostFrequent;
653
578
  U32 max = 255;
654
579
  U32 tableLog = 11;
655
- const size_t nbSeq = op_litLength - op_litLength_start;
656
580
  U32 CTable_LitLength [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL )];
657
- U32 CTable_OffsetBits [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
581
+ U32 CTable_OffsetBits [FSE_CTABLE_SIZE_U32(OffFSELog,MaxOff)];
658
582
  U32 CTable_MatchLength[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML )];
659
- U32 LLtype, Offtype, MLtype;
660
- BYTE* op;
661
- const U32* op_offset = op_offset_start + nbSeq;
662
- const BYTE* op_matchLength = op_matchLength_start + nbSeq;
663
- BYTE offsetBits_start[BLOCKSIZE / 4];
664
- BYTE* offsetBitsPtr = offsetBits_start;
583
+ U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
584
+ const BYTE* const op_lit_start = seqStorePtr->litStart;
585
+ const BYTE* op_lit = seqStorePtr->lit;
586
+ const BYTE* const llTable = seqStorePtr->litLengthStart;
587
+ const BYTE* op_litLength = seqStorePtr->litLength;
588
+ const BYTE* const mlTable = seqStorePtr->matchLengthStart;
589
+ const U32* const offsetTable = seqStorePtr->offsetStart;
590
+ BYTE* const offCodeTable = seqStorePtr->offCodeStart;
591
+ BYTE* op = dst;
592
+ BYTE* const oend = dst + maxDstSize;
593
+ const size_t nbSeq = op_litLength - llTable;
665
594
  const size_t minGain = ZSTD_minGain(srcSize);
666
595
  const size_t maxCSize = srcSize - minGain;
667
596
  const size_t minSeqSize = 1 /*lastL*/ + 2 /*dHead*/ + 2 /*dumpsIn*/ + 5 /*SeqHead*/ + 3 /*SeqIn*/ + 1 /*margin*/ + ZSTD_blockHeaderSize;
@@ -669,13 +598,11 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
669
598
  BYTE* seqHead;
670
599
 
671
600
 
672
- /* init */
673
- op = dst;
674
-
675
- /* Encode literals */
601
+ /* Compress literals */
676
602
  {
677
603
  size_t cSize;
678
604
  size_t litSize = op_lit - op_lit_start;
605
+
679
606
  if (litSize <= LITERAL_NOENTROPY) cSize = ZSTD_noCompressBlock (op, maxDstSize, op_lit_start, litSize);
680
607
  else
681
608
  {
@@ -691,38 +618,39 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
691
618
  op += cSize;
692
619
  }
693
620
 
694
- /* Encode Sequences */
695
-
696
- /* seqHeader */
697
- op += ZSTD_writeProgressive(op, lastLLSize); CHECK_OVERFLOW(op <= dst + maxDstSize);
621
+ /* Sequences Header */
622
+ if ((oend-op) < 2+3+6) /* nbSeq + dumpsLength + 3*rleCTable*/
623
+ return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
624
+ ZSTD_writeLE16(op, (U16)nbSeq); op+=2;
698
625
  seqHead = op;
699
626
 
700
- /* dumps */
627
+ /* dumps : contains too large lengths */
701
628
  {
702
- size_t dumpsLength = op_dumps- op_dumps_start;
629
+ size_t dumpsLength = seqStorePtr->dumps - seqStorePtr->dumpsStart;
703
630
  if (dumpsLength < 512)
704
631
  {
705
632
  op[0] = (BYTE)(dumpsLength >> 8);
706
633
  op[1] = (BYTE)(dumpsLength);
707
- op += 2; CHECK_OVERFLOW(op <= dst + maxDstSize);
634
+ op += 2;
708
635
  }
709
636
  else
710
637
  {
711
638
  op[0] = 2;
712
639
  op[1] = (BYTE)(dumpsLength>>8);
713
640
  op[2] = (BYTE)(dumpsLength);
714
- op += 3; CHECK_OVERFLOW(op <= dst + maxDstSize);
641
+ op += 3;
715
642
  }
716
- memcpy(op, op_dumps_start, dumpsLength);
717
- op += dumpsLength; CHECK_OVERFLOW(op <= dst + maxDstSize);
643
+ if ((size_t)(oend-op) < dumpsLength+6) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
644
+ memcpy(op, seqStorePtr->dumpsStart, dumpsLength);
645
+ op += dumpsLength;
718
646
  }
719
647
 
720
- /* Encoding table of Literal Lengths */
648
+ /* CTable for Literal Lengths */
721
649
  max = MaxLL;
722
- mostFrequent = FSE_countFast(count, op_litLength_start, nbSeq, &max);
723
- if (mostFrequent == nbSeq)
650
+ mostFrequent = FSE_countFast(count, &max, seqStorePtr->litLengthStart, nbSeq);
651
+ if ((mostFrequent == nbSeq) && (nbSeq > 2))
724
652
  {
725
- *op++ = *op_litLength_start; CHECK_OVERFLOW(op <= dst + maxDstSize);
653
+ *op++ = *(seqStorePtr->litLengthStart);
726
654
  FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
727
655
  LLtype = bt_rle;
728
656
  }
@@ -733,29 +661,31 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
733
661
  }
734
662
  else
735
663
  {
664
+ size_t NCountSize;
736
665
  tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
737
666
  FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
738
- op += FSE_writeHeader(op, maxDstSize, norm, max, tableLog); CHECK_OVERFLOW(op <= dst + maxDstSize);
667
+ NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
668
+ if (FSE_isError(NCountSize)) return (size_t)-ZSTD_ERROR_GENERIC;
669
+ op += NCountSize;
739
670
  FSE_buildCTable(CTable_LitLength, norm, max, tableLog);
740
671
  LLtype = bt_compressed;
741
672
  }
742
673
 
743
- /* Encoding table of Offsets */
674
+ /* CTable for Offsets codes */
744
675
  {
745
- /* create OffsetBits */
676
+ /* create Offset codes */
746
677
  size_t i;
747
678
  max = MaxOff;
748
679
  for (i=0; i<nbSeq; i++)
749
680
  {
750
- offsetBits_start[i] = (BYTE)ZSTD_highbit(op_offset_start[i]) + 1;
751
- if (op_offset_start[i]==0) offsetBits_start[i]=0;
681
+ offCodeTable[i] = (BYTE)ZSTD_highbit(offsetTable[i]) + 1;
682
+ if (offsetTable[i]==0) offCodeTable[i]=0;
752
683
  }
753
- offsetBitsPtr += nbSeq;
754
- mostFrequent = FSE_countFast(count, offsetBits_start, nbSeq, &max);
684
+ mostFrequent = FSE_countFast(count, &max, offCodeTable, nbSeq);
755
685
  }
756
- if (mostFrequent == nbSeq)
686
+ if ((mostFrequent == nbSeq) && (nbSeq > 2))
757
687
  {
758
- *op++ = *offsetBits_start; CHECK_OVERFLOW(op <= dst + maxDstSize);
688
+ *op++ = *offCodeTable;
759
689
  FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
760
690
  Offtype = bt_rle;
761
691
  }
@@ -766,19 +696,22 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
766
696
  }
767
697
  else
768
698
  {
699
+ size_t NCountSize;
769
700
  tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
770
701
  FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
771
- op += FSE_writeHeader(op, maxDstSize, norm, max, tableLog); CHECK_OVERFLOW(op <= dst + maxDstSize);
702
+ NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
703
+ if (FSE_isError(NCountSize)) return (size_t)-ZSTD_ERROR_GENERIC;
704
+ op += NCountSize;
772
705
  FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog);
773
706
  Offtype = bt_compressed;
774
707
  }
775
708
 
776
- /* Encoding Table of MatchLengths */
709
+ /* CTable for MatchLengths */
777
710
  max = MaxML;
778
- mostFrequent = FSE_countFast(count, op_matchLength_start, nbSeq, &max);
779
- if (mostFrequent == nbSeq)
711
+ mostFrequent = FSE_countFast(count, &max, seqStorePtr->matchLengthStart, nbSeq);
712
+ if ((mostFrequent == nbSeq) && (nbSeq > 2))
780
713
  {
781
- *op++ = *op_matchLength_start; CHECK_OVERFLOW(op <= dst + maxDstSize);
714
+ *op++ = *seqStorePtr->matchLengthStart;
782
715
  FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
783
716
  MLtype = bt_rle;
784
717
  }
@@ -789,48 +722,57 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
789
722
  }
790
723
  else
791
724
  {
725
+ size_t NCountSize;
792
726
  tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
793
727
  FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
794
- op += FSE_writeHeader(op, maxDstSize, norm, max, tableLog); CHECK_OVERFLOW(op <= dst + maxDstSize);
728
+ NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
729
+ if (FSE_isError(NCountSize)) return (size_t)-ZSTD_ERROR_GENERIC;
730
+ op += NCountSize;
795
731
  FSE_buildCTable(CTable_MatchLength, norm, max, tableLog);
796
732
  MLtype = bt_compressed;
797
733
  }
798
734
 
799
735
  seqHead[0] += (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
800
736
 
801
- /* Encoding */
737
+ /* Encoding Sequences */
802
738
  {
739
+ size_t streamSize, errorCode;
740
+ FSE_CStream_t blockStream;
803
741
  FSE_CState_t stateMatchLength;
804
742
  FSE_CState_t stateOffsetBits;
805
743
  FSE_CState_t stateLitLength;
744
+ int i;
806
745
 
807
- FSE_initCStream(&blockStream, op);
746
+ errorCode = FSE_initCStream(&blockStream, op, oend-op);
747
+ if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; /* not enough space remaining */
808
748
  FSE_initCState(&stateMatchLength, CTable_MatchLength);
809
749
  FSE_initCState(&stateOffsetBits, CTable_OffsetBits);
810
750
  FSE_initCState(&stateLitLength, CTable_LitLength);
811
751
 
812
- while (op_litLength > op_litLength_start)
752
+ for (i=(int)nbSeq-1; i>=0; i--)
813
753
  {
814
- BYTE matchLength = *(--op_matchLength);
815
- U32 offset = *(--op_offset);
816
- BYTE offCode = *(--offsetBitsPtr); /* 32b*/ /* 64b*/
754
+ BYTE matchLength = mlTable[i];
755
+ U32 offset = offsetTable[i];
756
+ BYTE offCode = offCodeTable[i]; /* 32b*/ /* 64b*/
817
757
  U32 nbBits = (offCode-1) * (!!offCode);
818
- BYTE litLength = *(--op_litLength); /* (7)*/ /* (7)*/
819
- FSE_encodeByte(&blockStream, &stateMatchLength, matchLength); /* 17 */ /* 17 */
758
+ BYTE litLength = llTable[i]; /* (7)*/ /* (7)*/
759
+ FSE_encodeSymbol(&blockStream, &stateMatchLength, matchLength); /* 17 */ /* 17 */
820
760
  if (ZSTD_32bits()) FSE_flushBits(&blockStream); /* 7 */
821
761
  FSE_addBits(&blockStream, offset, nbBits); /* 32 */ /* 42 */
822
762
  if (ZSTD_32bits()) FSE_flushBits(&blockStream); /* 7 */
823
- FSE_encodeByte(&blockStream, &stateOffsetBits, offCode); /* 16 */ /* 51 */
824
- FSE_encodeByte(&blockStream, &stateLitLength, litLength); /* 26 */ /* 61 */
763
+ FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 16 */ /* 51 */
764
+ FSE_encodeSymbol(&blockStream, &stateLitLength, litLength); /* 26 */ /* 61 */
825
765
  FSE_flushBits(&blockStream); /* 7 */ /* 7 */
826
766
  }
827
767
 
828
768
  FSE_flushCState(&blockStream, &stateMatchLength);
829
769
  FSE_flushCState(&blockStream, &stateOffsetBits);
830
770
  FSE_flushCState(&blockStream, &stateLitLength);
831
- }
832
771
 
833
- op += FSE_closeCStream(&blockStream); CHECK_OVERFLOW(op <= dst + maxDstSize);
772
+ streamSize = FSE_closeCStream(&blockStream);
773
+ if (streamSize==0) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; /* not enough space */
774
+ op += streamSize;
775
+ }
834
776
 
835
777
  /* check compressibility */
836
778
  if ((size_t)(op-dst) >= maxCSize) return 0;
@@ -839,57 +781,45 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
839
781
  }
840
782
 
841
783
 
842
- static size_t ZSTD_storeSeq(BYTE* op_lit, BYTE* op_ll, U32* op_offset, BYTE* op_ml, BYTE* op_dumps,
843
- size_t litLength, const BYTE* srcLit, size_t offset, size_t matchLength)
784
+ static void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, size_t offset, size_t matchLength)
844
785
  {
845
- const BYTE* const dumpStart = op_dumps;
846
- const BYTE* const l_end = op_lit + litLength;
847
-
786
+ BYTE* op_lit = seqStorePtr->lit;
787
+ BYTE* const l_end = op_lit + litLength;
848
788
 
849
789
  /* copy Literals */
850
- while (op_lit<l_end) COPY8(op_lit, srcLit);
790
+ while (op_lit<l_end) COPY8(op_lit, literals);
791
+ seqStorePtr->lit += litLength;
851
792
 
852
793
  /* literal Length */
853
794
  if (litLength >= MaxLL)
854
795
  {
855
- *op_ll++ = MaxLL;
796
+ *(seqStorePtr->litLength++) = MaxLL;
856
797
  if (litLength<255 + MaxLL)
857
- *op_dumps++ = (BYTE)(litLength - MaxLL);
798
+ *(seqStorePtr->dumps++) = (BYTE)(litLength - MaxLL);
858
799
  else
859
800
  {
860
- *op_dumps++ = 255;
861
- ZSTD_writeLE32(op_dumps, (U32)litLength); op_dumps += 3;
862
-
863
- //litLength |= 0xFF000000;
864
- //ZSTD_writeBE32(op_dumps, (U32)litLength);
865
- //op_dumps += 4;
801
+ *(seqStorePtr->dumps++) = 255;
802
+ ZSTD_writeLE32(seqStorePtr->dumps, (U32)litLength); seqStorePtr->dumps += 3;
866
803
  }
867
804
  }
868
- else *op_ll = (BYTE)litLength;
805
+ else *(seqStorePtr->litLength++) = (BYTE)litLength;
869
806
 
870
- /* match offset */
871
- *op_offset = (U32)offset;
807
+ /* match offset */
808
+ *(seqStorePtr->offset++) = (U32)offset;
872
809
 
873
810
  /* match Length */
874
811
  if (matchLength >= MaxML)
875
812
  {
876
- *op_ml++ = MaxML;
877
- if (matchLength<255 + MaxML)
878
- *op_dumps++ = (BYTE)(matchLength - MaxML);
813
+ *(seqStorePtr->matchLength++) = MaxML;
814
+ if (matchLength < 255+MaxML)
815
+ *(seqStorePtr->dumps++) = (BYTE)(matchLength - MaxML);
879
816
  else
880
817
  {
881
- *op_dumps++ = 255;
882
- ZSTD_writeLE32(op_dumps, (U32)matchLength); op_dumps+=3;
883
- //*(U32*)op_dumps = (U32)matchLength; op_dumps += 3; /* store direct result */
884
-
885
- //matchLength |= 0xFF000000;
886
- //ZSTD_writeBE32(op_dumps, (U32)matchLength);
887
- //op_dumps += 4;
818
+ *(seqStorePtr->dumps++) = 255;
819
+ ZSTD_writeLE32(seqStorePtr->dumps, (U32)matchLength); seqStorePtr->dumps+=3;
888
820
  }
889
821
  }
890
- else *op_ml = (BYTE)matchLength;
891
-
892
- return op_dumps - dumpStart;
822
+ else *(seqStorePtr->matchLength++) = (BYTE)matchLength;
893
823
  }
894
824
 
895
825
 
@@ -905,7 +835,7 @@ static const U64 prime7bytes = 58295818150454627ULL;
905
835
  //static U32 ZSTD_hashPtr(const void* p) { return ( ((*(U64*)p & 0xFFFFFFFFFFFFFF) * prime7bytes) >> (64-HASH_LOG)); }
906
836
 
907
837
  //static U32 ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime8bytes) >> (64-HASH_LOG)); }
908
- static U32 ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime7bytes) >> (56-HASH_LOG)) & HASH_MASK; }
838
+ static U32 ZSTD_hashPtr(const void* p) { return ( (ZSTD_read64(p) * prime7bytes) >> (56-HASH_LOG)) & HASH_MASK; }
909
839
  //static U32 ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime6bytes) >> (48-HASH_LOG)) & HASH_MASK; }
910
840
  //static U32 ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime5bytes) >> (40-HASH_LOG)) & HASH_MASK; }
911
841
  //static U32 ZSTD_hashPtr(const void* p) { return ( (*(U32*)p * KNUTH) >> (32-HASH_LOG)); }
@@ -917,7 +847,6 @@ static const BYTE* ZSTD_updateMatch(U32* table, const BYTE* p, const BYTE* start
917
847
  U32 h = ZSTD_hashPtr(p);
918
848
  const BYTE* r;
919
849
  r = table[h] + start;
920
- //table[h] = (U32)(p - start);
921
850
  ZSTD_addPtr(table, p, start);
922
851
  return r;
923
852
  }
@@ -928,12 +857,12 @@ static int ZSTD_checkMatch(const BYTE* match, const BYTE* ip)
928
857
  }
929
858
 
930
859
 
931
- static size_t ZSTD_compressBlock(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
860
+ static size_t ZSTD_compressBlock(void* cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
932
861
  {
933
- cctxi_t* srt = (cctxi_t*) ctx;
934
- U32* HashTable = (U32*)(srt->hashTable);
935
- void* workplace = srt->workplace;
936
- const BYTE* const base = srt->base;
862
+ cctxi_t* ctx = (cctxi_t*) cctx;
863
+ U32* HashTable = (U32*)(ctx->hashTable);
864
+ seqStore_t* seqStorePtr = &(ctx->seqStore);
865
+ const BYTE* const base = ctx->base;
937
866
 
938
867
  const BYTE* const istart = (const BYTE*)src;
939
868
  const BYTE* ip = istart + 1;
@@ -941,19 +870,16 @@ static size_t ZSTD_compressBlock(void* ctx, void* dst, size_t maxDstSize, const
941
870
  const BYTE* const iend = istart + srcSize;
942
871
  const BYTE* const ilimit = iend - 16;
943
872
 
944
- U32 *op_offset = (U32*)(workplace), *op_offset_start = op_offset;
945
- BYTE *op_l = (BYTE*)workplace + srcSize + 4, *op_l_start = op_l;
946
- BYTE *op_rl = op_l + srcSize + 4, *op_rl_start = op_rl;
947
- BYTE *op_ml = op_rl + (srcSize >> 2) + 4, *op_ml_start = op_ml;
948
- BYTE *op_dumps = op_ml + (srcSize >> 2) + 4, *op_dumps_start = op_dumps;
949
873
  size_t prevOffset=0, offset=0;
950
- size_t lastLLSize;
951
874
 
952
875
 
876
+ /* init */
877
+ ZSTD_resetSeqStore(seqStorePtr);
878
+
953
879
  /* Main Search Loop */
954
880
  while (ip < ilimit)
955
881
  {
956
- const BYTE* match = (BYTE*) ZSTD_updateMatch(HashTable, ip, base);
882
+ const BYTE* match = (const BYTE*) ZSTD_updateMatch(HashTable, ip, base);
957
883
 
958
884
  if (!ZSTD_checkMatch(match,ip)) { ip += ((ip-anchor) >> g_searchStrength) + 1; continue; }
959
885
 
@@ -969,8 +895,7 @@ static size_t ZSTD_compressBlock(void* ctx, void* dst, size_t maxDstSize, const
969
895
  if (offsetCode == prevOffset) offsetCode = 0;
970
896
  prevOffset = offset;
971
897
  offset = ip-match;
972
- op_dumps += ZSTD_storeSeq(op_l, op_rl++, op_offset++, op_ml++, op_dumps, litLength, anchor, offsetCode, matchLength);
973
- op_l += litLength;
898
+ ZSTD_storeSeq(seqStorePtr, litLength, anchor, offsetCode, matchLength);
974
899
 
975
900
  /* Fill Table */
976
901
  ZSTD_addPtr(HashTable, ip+1, base);
@@ -981,18 +906,19 @@ static size_t ZSTD_compressBlock(void* ctx, void* dst, size_t maxDstSize, const
981
906
  }
982
907
 
983
908
  /* Last Literals */
984
- lastLLSize = iend - anchor;
985
- memcpy(op_l, anchor, lastLLSize);
986
- op_l += lastLLSize;
909
+ {
910
+ size_t lastLLSize = iend - anchor;
911
+ memcpy(seqStorePtr->lit, anchor, lastLLSize);
912
+ seqStorePtr->lit += lastLLSize;
913
+ }
987
914
 
988
915
  /* Finale compression stage */
989
- return ZSTD_compressEntropy((BYTE*)dst, maxDstSize,
990
- op_l_start, op_l, op_rl_start, op_rl, op_ml_start, op_offset_start, op_dumps_start, op_dumps,
991
- srcSize, lastLLSize);
916
+ return ZSTD_compressSequences((BYTE*)dst, maxDstSize,
917
+ seqStorePtr, srcSize);
992
918
  }
993
919
 
994
920
 
995
- size_t ZSTD_compressBegin(ZSTD_cctx_t ctx, void* dst, size_t maxDstSize)
921
+ size_t ZSTD_compressBegin(ZSTD_Cctx* ctx, void* dst, size_t maxDstSize)
996
922
  {
997
923
  /* Sanity check */
998
924
  if (maxDstSize < ZSTD_frameHeaderSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
@@ -1007,13 +933,12 @@ size_t ZSTD_compressBegin(ZSTD_cctx_t ctx, void* dst, size_t maxDstSize)
1007
933
  }
1008
934
 
1009
935
 
1010
- /* this should be auto-vectorized by compiler */
1011
936
  static void ZSTD_scaleDownCtx(void* cctx, const U32 limit)
1012
937
  {
1013
938
  cctxi_t* ctx = (cctxi_t*) cctx;
1014
939
  int i;
1015
940
 
1016
- #if defined(__AVX2__) /* <immintrin.h> */
941
+ #if defined(__AVX2__)
1017
942
  /* AVX2 version */
1018
943
  __m256i* h = ctx->hashTable;
1019
944
  const __m256i limit8 = _mm256_set1_epi32(limit);
@@ -1025,6 +950,7 @@ static void ZSTD_scaleDownCtx(void* cctx, const U32 limit)
1025
950
  _mm256_storeu_si256((__m256i*)(h+i), src);
1026
951
  }
1027
952
  #else
953
+ /* this should be auto-vectorized by compiler */
1028
954
  U32* h = ctx->hashTable;
1029
955
  for (i=0; i<HASH_TABLESIZE; ++i)
1030
956
  {
@@ -1036,7 +962,6 @@ static void ZSTD_scaleDownCtx(void* cctx, const U32 limit)
1036
962
  }
1037
963
 
1038
964
 
1039
- /* this should be auto-vectorized by compiler */
1040
965
  static void ZSTD_limitCtx(void* cctx, const U32 limit)
1041
966
  {
1042
967
  cctxi_t* ctx = (cctxi_t*) cctx;
@@ -1051,7 +976,7 @@ static void ZSTD_limitCtx(void* cctx, const U32 limit)
1051
976
  return;
1052
977
  }
1053
978
 
1054
- #if defined(__AVX2__) /* <immintrin.h> */
979
+ #if defined(__AVX2__)
1055
980
  /* AVX2 version */
1056
981
  {
1057
982
  __m256i* h = ctx->hashTable;
@@ -1065,6 +990,7 @@ static void ZSTD_limitCtx(void* cctx, const U32 limit)
1065
990
  }
1066
991
  }
1067
992
  #else
993
+ /* this should be auto-vectorized by compiler */
1068
994
  {
1069
995
  U32* h = (U32*)(ctx->hashTable);
1070
996
  for (i=0; i<HASH_TABLESIZE; ++i)
@@ -1076,7 +1002,7 @@ static void ZSTD_limitCtx(void* cctx, const U32 limit)
1076
1002
  }
1077
1003
 
1078
1004
 
1079
- size_t ZSTD_compressContinue(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1005
+ size_t ZSTD_compressContinue(ZSTD_Cctx* cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1080
1006
  {
1081
1007
  cctxi_t* ctx = (cctxi_t*) cctx;
1082
1008
  const BYTE* const istart = (const BYTE* const)src;
@@ -1090,9 +1016,9 @@ size_t ZSTD_compressContinue(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize, con
1090
1016
  ctx->base = (const BYTE*)src, ctx->current=0, ctx->nextUpdate = g_maxDistance;
1091
1017
  if (src != ctx->base + ctx->current) /* not contiguous */
1092
1018
  {
1093
- ZSTD_resetCCtx(ctx);
1094
- ctx->base = (const BYTE*)src;
1095
- ctx->current = 0;
1019
+ ZSTD_resetCCtx(ctx);
1020
+ ctx->base = (const BYTE*)src;
1021
+ ctx->current = 0;
1096
1022
  }
1097
1023
  ctx->current += (U32)srcSize;
1098
1024
 
@@ -1102,8 +1028,11 @@ size_t ZSTD_compressContinue(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize, con
1102
1028
  size_t blockSize = BLOCKSIZE;
1103
1029
  if (blockSize > srcSize) blockSize = srcSize;
1104
1030
 
1031
+ if (maxDstSize < 2*ZSTD_blockHeaderSize+1) /* one RLE block + endMark */
1032
+ return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1033
+
1105
1034
  /* update hash table */
1106
- if (g_maxDistance <= BLOCKSIZE) /* static test => all blocks are independent */
1035
+ if (g_maxDistance <= BLOCKSIZE) /* static test ; yes == blocks are independent */
1107
1036
  {
1108
1037
  ZSTD_resetCCtx(ctx);
1109
1038
  ctx->base = ip;
@@ -1116,7 +1045,6 @@ size_t ZSTD_compressContinue(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize, con
1116
1045
  }
1117
1046
 
1118
1047
  /* compress */
1119
- if (maxDstSize < ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1120
1048
  cSize = ZSTD_compressBlock(ctx, op+ZSTD_blockHeaderSize, maxDstSize-ZSTD_blockHeaderSize, ip, blockSize);
1121
1049
  if (cSize == 0)
1122
1050
  {
@@ -1142,7 +1070,7 @@ size_t ZSTD_compressContinue(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize, con
1142
1070
  }
1143
1071
 
1144
1072
 
1145
- size_t ZSTD_compressEnd(ZSTD_cctx_t ctx, void* dst, size_t maxDstSize)
1073
+ size_t ZSTD_compressEnd(ZSTD_Cctx* ctx, void* dst, size_t maxDstSize)
1146
1074
  {
1147
1075
  BYTE* op = (BYTE*)dst;
1148
1076
 
@@ -1159,7 +1087,7 @@ size_t ZSTD_compressEnd(ZSTD_cctx_t ctx, void* dst, size_t maxDstSize)
1159
1087
  }
1160
1088
 
1161
1089
 
1162
- static size_t ZSTD_compressCCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1090
+ static size_t ZSTD_compressCCtx(ZSTD_Cctx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1163
1091
  {
1164
1092
  BYTE* const ostart = (BYTE* const)dst;
1165
1093
  BYTE* op = ostart;
@@ -1193,10 +1121,11 @@ static size_t ZSTD_compressCCtx(void* ctx, void* dst, size_t maxDstSize, const v
1193
1121
 
1194
1122
  size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1195
1123
  {
1196
- void* ctx;
1124
+ ZSTD_Cctx* ctx;
1197
1125
  size_t r;
1198
1126
 
1199
1127
  ctx = ZSTD_createCCtx();
1128
+ if (ctx==NULL) return (size_t)-ZSTD_ERROR_GENERIC;
1200
1129
  r = ZSTD_compressCCtx(ctx, dst, maxDstSize, src, srcSize);
1201
1130
  ZSTD_freeCCtx(ctx);
1202
1131
  return r;
@@ -1213,7 +1142,7 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bp
1213
1142
  BYTE headerFlags;
1214
1143
  U32 cSize;
1215
1144
 
1216
- if (srcSize < 3) return (size_t)-ZSTD_ERROR_wrongSrcSize;
1145
+ if (srcSize < 3) return (size_t)-ZSTD_ERROR_SrcSize;
1217
1146
 
1218
1147
  headerFlags = *in;
1219
1148
  cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
@@ -1235,106 +1164,34 @@ static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const voi
1235
1164
  }
1236
1165
 
1237
1166
 
1238
- /* force inline : 'fast' really needs to be evaluated at compile time */
1239
- FORCE_INLINE size_t ZSTD_decompressLiterals_usingDTable_generic(
1240
- void* const dst, size_t maxDstSize,
1241
- const void* src, size_t srcSize,
1242
- const void* DTable, U32 fast)
1243
- {
1244
- BYTE* op = (BYTE*) dst;
1245
- BYTE* const olimit = op;
1246
- BYTE* const oend = op + maxDstSize;
1247
- FSE_DStream_t bitD;
1248
- FSE_DState_t state1, state2;
1249
- size_t errorCode;
1250
-
1251
- /* Init */
1252
- errorCode = FSE_initDStream(&bitD, src, srcSize);
1253
- if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
1254
-
1255
- FSE_initDState(&state1, &bitD, DTable);
1256
- FSE_initDState(&state2, &bitD, DTable);
1257
- op = oend;
1258
-
1259
- // 2 symbols per loop
1260
- while (!FSE_reloadDStream(&bitD) && (op>olimit+3))
1261
- {
1262
- *--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1263
-
1264
- if (LitFSELog*2+7 > sizeof(size_t)*8) // This test must be static
1265
- FSE_reloadDStream(&bitD);
1266
-
1267
- *--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1268
-
1269
- if (LitFSELog*4+7 < sizeof(size_t)*8) // This test must be static
1270
- {
1271
- *--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1272
- *--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1273
- }
1274
- }
1275
-
1276
- /* tail */
1277
- while (1)
1278
- {
1279
- if ( (FSE_reloadDStream(&bitD)>2) || (op==olimit) || (FSE_endOfDState(&state1) && FSE_endOfDStream(&bitD)) )
1280
- break;
1281
-
1282
- *--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1283
-
1284
- if ( (FSE_reloadDStream(&bitD)>2) || (op==olimit) || (FSE_endOfDState(&state2) && FSE_endOfDStream(&bitD)) )
1285
- break;
1286
-
1287
- *--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1288
- }
1289
-
1290
- /* end ? */
1291
- if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2) )
1292
- return oend-op;
1293
-
1294
- if (op==olimit) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
1295
-
1296
- return (size_t)-ZSTD_ERROR_GENERIC;
1297
- }
1298
-
1299
- static size_t ZSTD_decompressLiterals_usingDTable(
1300
- void* const dst, size_t maxDstSize,
1301
- const void* src, size_t srcSize,
1302
- const void* DTable, U32 fast)
1303
- {
1304
- if (fast) return ZSTD_decompressLiterals_usingDTable_generic(dst, maxDstSize, src, srcSize, DTable, 1);
1305
- return ZSTD_decompressLiterals_usingDTable_generic(dst, maxDstSize, src, srcSize, DTable, 0);
1306
- }
1307
-
1308
- static size_t ZSTD_decompressLiterals(void* ctx, void* dst, size_t maxDstSize,
1167
+ static size_t ZSTD_decompressLiterals(void* ctx,
1168
+ void* dst, size_t maxDstSize,
1309
1169
  const void* src, size_t srcSize)
1310
1170
  {
1311
- /* assumed : blockType == blockCompressed */
1171
+ BYTE* op = (BYTE*)dst;
1172
+ BYTE* const oend = op + maxDstSize;
1312
1173
  const BYTE* ip = (const BYTE*)src;
1313
- short norm[256];
1314
- void* DTable = ctx;
1315
- U32 maxSymbolValue = 255;
1316
- U32 tableLog;
1317
- U32 fastMode;
1318
1174
  size_t errorCode;
1175
+ size_t litSize;
1319
1176
 
1320
- if (srcSize < 2) return (size_t)-ZSTD_ERROR_wrongLBlockSize; // too small input size
1177
+ /* check : minimum 2, for litSize, +1, for content */
1178
+ if (srcSize <= 3) return (size_t)-ZSTD_ERROR_corruption;
1321
1179
 
1322
- errorCode = FSE_readHeader (norm, &maxSymbolValue, &tableLog, ip, srcSize);
1323
- if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
1324
- ip += errorCode;
1325
- srcSize -= errorCode;
1180
+ litSize = ip[1] + (ip[0]<<8);
1181
+ litSize += ((ip[-3] >> 3) & 7) << 16; // mmmmh....
1182
+ op = oend - litSize;
1326
1183
 
1327
- errorCode = FSE_buildDTable (DTable, norm, maxSymbolValue, tableLog);
1184
+ (void)ctx;
1185
+ if (litSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1186
+ errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2);
1328
1187
  if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
1329
- fastMode = (U32)errorCode;
1330
-
1331
- return ZSTD_decompressLiterals_usingDTable (dst, maxDstSize, ip, srcSize, DTable, fastMode);
1188
+ return litSize;
1332
1189
  }
1333
1190
 
1334
1191
 
1335
1192
  size_t ZSTD_decodeLiteralsBlock(void* ctx,
1336
1193
  void* dst, size_t maxDstSize,
1337
- const BYTE** litPtr,
1194
+ const BYTE** litStart, size_t* litSize,
1338
1195
  const void* src, size_t srcSize)
1339
1196
  {
1340
1197
  const BYTE* const istart = (const BYTE* const)src;
@@ -1345,25 +1202,32 @@ size_t ZSTD_decodeLiteralsBlock(void* ctx,
1345
1202
 
1346
1203
  size_t litcSize = ZSTD_getcBlockSize(src, srcSize, &litbp);
1347
1204
  if (ZSTD_isError(litcSize)) return litcSize;
1348
- if (litcSize > srcSize - ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_wrongLBlockSize;
1205
+ if (litcSize > srcSize - ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_SrcSize;
1349
1206
  ip += ZSTD_blockHeaderSize;
1350
1207
 
1351
1208
  switch(litbp.blockType)
1352
1209
  {
1353
- case bt_raw: *litPtr = ip; ip+= litcSize; break;
1210
+ case bt_raw:
1211
+ *litStart = ip;
1212
+ ip += litcSize;
1213
+ *litSize = litcSize;
1214
+ break;
1354
1215
  case bt_rle:
1355
1216
  {
1356
1217
  size_t rleSize = litbp.origSize;
1218
+ if (rleSize>maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1357
1219
  memset(oend - rleSize, *ip, rleSize);
1358
- *litPtr = oend - rleSize;
1220
+ *litStart = oend - rleSize;
1221
+ *litSize = rleSize;
1359
1222
  ip++;
1360
1223
  break;
1361
1224
  }
1362
1225
  case bt_compressed:
1363
1226
  {
1364
- size_t cSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
1365
- if (ZSTD_isError(cSize)) return cSize;
1366
- *litPtr = oend - cSize;
1227
+ size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
1228
+ if (ZSTD_isError(decodedLitSize)) return decodedLitSize;
1229
+ *litStart = oend - decodedLitSize;
1230
+ *litSize = decodedLitSize;
1367
1231
  ip += litcSize;
1368
1232
  break;
1369
1233
  }
@@ -1375,8 +1239,8 @@ size_t ZSTD_decodeLiteralsBlock(void* ctx,
1375
1239
  }
1376
1240
 
1377
1241
 
1378
- size_t ZSTD_decodeSeqHeaders(size_t* lastLLPtr, const BYTE** dumpsPtr,
1379
- void* DTableLL, void* DTableML, void* DTableOffb,
1242
+ size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr,
1243
+ FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
1380
1244
  const void* src, size_t srcSize)
1381
1245
  {
1382
1246
  const BYTE* const istart = (const BYTE* const)src;
@@ -1386,8 +1250,11 @@ size_t ZSTD_decodeSeqHeaders(size_t* lastLLPtr, const BYTE** dumpsPtr,
1386
1250
  U32 LLlog, Offlog, MLlog;
1387
1251
  size_t dumpsLength;
1388
1252
 
1253
+ /* check */
1254
+ if (srcSize < 5) return (size_t)-ZSTD_ERROR_SrcSize;
1255
+
1389
1256
  /* SeqHead */
1390
- ip += ZSTD_readProgressive(lastLLPtr, ip);
1257
+ *nbSeq = ZSTD_readLE16(ip); ip+=2;
1391
1258
  LLtype = *ip >> 6;
1392
1259
  Offtype = (*ip >> 4) & 3;
1393
1260
  MLtype = (*ip >> 2) & 3;
@@ -1406,6 +1273,9 @@ size_t ZSTD_decodeSeqHeaders(size_t* lastLLPtr, const BYTE** dumpsPtr,
1406
1273
  *dumpsPtr = ip;
1407
1274
  ip += dumpsLength;
1408
1275
 
1276
+ /* check */
1277
+ if (ip > iend-3) return (size_t)-ZSTD_ERROR_SrcSize; /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
1278
+
1409
1279
  /* sequences */
1410
1280
  {
1411
1281
  S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL and MaxOff */
@@ -1423,8 +1293,9 @@ size_t ZSTD_decodeSeqHeaders(size_t* lastLLPtr, const BYTE** dumpsPtr,
1423
1293
  FSE_buildDTable_raw(DTableLL, LLbits); break;
1424
1294
  default :
1425
1295
  max = MaxLL;
1426
- headerSize = FSE_readHeader(norm, &max, &LLlog, ip, iend-ip);
1296
+ headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
1427
1297
  if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC;
1298
+ if (LLlog > LLFSELog) return (size_t)-ZSTD_ERROR_corruption;
1428
1299
  ip += headerSize;
1429
1300
  FSE_buildDTable(DTableLL, norm, max, LLlog);
1430
1301
  }
@@ -1434,14 +1305,16 @@ size_t ZSTD_decodeSeqHeaders(size_t* lastLLPtr, const BYTE** dumpsPtr,
1434
1305
  U32 max;
1435
1306
  case bt_rle :
1436
1307
  Offlog = 0;
1308
+ if (ip > iend-2) return (size_t)-ZSTD_ERROR_SrcSize; /* min : "raw", hence no header, but at least xxLog bits */
1437
1309
  FSE_buildDTable_rle(DTableOffb, *ip++); break;
1438
1310
  case bt_raw :
1439
1311
  Offlog = Offbits;
1440
1312
  FSE_buildDTable_raw(DTableOffb, Offbits); break;
1441
1313
  default :
1442
1314
  max = MaxOff;
1443
- headerSize = FSE_readHeader(norm, &max, &Offlog, ip, iend-ip);
1315
+ headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
1444
1316
  if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC;
1317
+ if (Offlog > OffFSELog) return (size_t)-ZSTD_ERROR_corruption;
1445
1318
  ip += headerSize;
1446
1319
  FSE_buildDTable(DTableOffb, norm, max, Offlog);
1447
1320
  }
@@ -1451,14 +1324,16 @@ size_t ZSTD_decodeSeqHeaders(size_t* lastLLPtr, const BYTE** dumpsPtr,
1451
1324
  U32 max;
1452
1325
  case bt_rle :
1453
1326
  MLlog = 0;
1327
+ if (ip > iend-2) return (size_t)-ZSTD_ERROR_SrcSize; /* min : "raw", hence no header, but at least xxLog bits */
1454
1328
  FSE_buildDTable_rle(DTableML, *ip++); break;
1455
1329
  case bt_raw :
1456
1330
  MLlog = MLbits;
1457
1331
  FSE_buildDTable_raw(DTableML, MLbits); break;
1458
1332
  default :
1459
1333
  max = MaxML;
1460
- headerSize = FSE_readHeader(norm, &max, &MLlog, ip, iend-ip);
1334
+ headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
1461
1335
  if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC;
1336
+ if (MLlog > MLFSELog) return (size_t)-ZSTD_ERROR_corruption;
1462
1337
  ip += headerSize;
1463
1338
  FSE_buildDTable(DTableML, norm, max, MLlog);
1464
1339
  }
@@ -1468,175 +1343,262 @@ size_t ZSTD_decodeSeqHeaders(size_t* lastLLPtr, const BYTE** dumpsPtr,
1468
1343
  }
1469
1344
 
1470
1345
 
1471
- #define ZSTD_prefetch(p) { const BYTE pByte = *(volatile const BYTE*)p; }
1346
+ typedef struct {
1347
+ size_t litLength;
1348
+ size_t offset;
1349
+ size_t matchLength;
1350
+ } seq_t;
1472
1351
 
1473
- FORCE_INLINE size_t ZSTD_decompressBlock(void* ctx, void* dst, size_t maxDstSize,
1474
- const void* src, size_t srcSize)
1475
- {
1476
- const BYTE* ip = (const BYTE*)src;
1477
- const BYTE* const iend = ip + srcSize;
1478
- BYTE* const ostart = (BYTE* const)dst;
1479
- BYTE* op = ostart;
1480
- BYTE* const oend = ostart + maxDstSize;
1481
- size_t errorCode;
1482
- size_t lastLLSize;
1352
+ typedef struct {
1353
+ FSE_DStream_t DStream;
1354
+ FSE_DState_t stateLL;
1355
+ FSE_DState_t stateOffb;
1356
+ FSE_DState_t stateML;
1357
+ size_t prevOffset;
1483
1358
  const BYTE* dumps;
1484
- const BYTE* litPtr;
1485
- const BYTE* litEnd;
1486
- const size_t dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4}; /* added */
1487
- const size_t dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
1488
- void* DTableML = ctx;
1489
- void* DTableLL = ((U32*)ctx) + FSE_DTABLE_SIZE_U32(MLFSELog);
1490
- void* DTableOffb = ((U32*)DTableLL) + FSE_DTABLE_SIZE_U32(LLFSELog);
1359
+ } seqState_t;
1491
1360
 
1492
- /* blockType == blockCompressed, srcSize is trusted */
1493
1361
 
1494
- /* literal sub-block */
1495
- errorCode = ZSTD_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, src, srcSize);
1496
- if (ZSTD_isError(errorCode)) return errorCode;
1497
- ip += errorCode;
1362
+ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
1363
+ {
1364
+ size_t litLength;
1365
+ size_t prevOffset;
1366
+ size_t offset;
1367
+ size_t matchLength;
1368
+ const BYTE* dumps = seqState->dumps;
1369
+
1370
+ /* Literal length */
1371
+ litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
1372
+ prevOffset = litLength ? seq->offset : seqState->prevOffset;
1373
+ seqState->prevOffset = seq->offset;
1374
+ if (litLength == MaxLL)
1375
+ {
1376
+ U32 add = *dumps++;
1377
+ if (add < 255) litLength += add;
1378
+ else
1379
+ {
1380
+ litLength = ZSTD_readLE32(dumps) & 0xFFFFFF;
1381
+ dumps += 3;
1382
+ }
1383
+ }
1498
1384
 
1499
- /* Build Decoding Tables */
1500
- errorCode = ZSTD_decodeSeqHeaders(&lastLLSize, &dumps,
1501
- DTableLL, DTableML, DTableOffb,
1502
- ip, iend-ip);
1503
- if (ZSTD_isError(errorCode)) return errorCode;
1504
- /* end pos */
1505
- if ((litPtr>=ostart) && (litPtr<=oend))
1506
- litEnd = oend - lastLLSize;
1385
+ /* Offset */
1386
+ {
1387
+ U32 offsetCode, nbBits;
1388
+ offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));
1389
+ if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1390
+ nbBits = offsetCode - 1;
1391
+ if (offsetCode==0) nbBits = 0; /* cmove */
1392
+ offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits);
1393
+ if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1394
+ if (offsetCode==0) offset = prevOffset;
1395
+ }
1396
+
1397
+ /* MatchLength */
1398
+ matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
1399
+ if (matchLength == MaxML)
1400
+ {
1401
+ U32 add = *dumps++;
1402
+ if (add < 255) matchLength += add;
1403
+ else
1404
+ {
1405
+ matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
1406
+ dumps += 3;
1407
+ }
1408
+ }
1409
+ matchLength += MINMATCH;
1410
+
1411
+ /* save result */
1412
+ seq->litLength = litLength;
1413
+ seq->offset = offset;
1414
+ seq->matchLength = matchLength;
1415
+ seqState->dumps = dumps;
1416
+ }
1417
+
1418
+
1419
+ static size_t ZSTD_execSequence(BYTE* op,
1420
+ seq_t sequence,
1421
+ const BYTE** litPtr, const BYTE* const litLimit,
1422
+ BYTE* const base, BYTE* const oend)
1423
+ {
1424
+ static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
1425
+ static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
1426
+ const BYTE* const ostart = op;
1427
+ const size_t litLength = sequence.litLength;
1428
+ BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
1429
+ const BYTE* const litEnd = *litPtr + litLength;
1430
+
1431
+ /* check */
1432
+ if (endMatch > oend) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; /* overwrite beyond dst buffer */
1433
+ if (litEnd > litLimit) return (size_t)-ZSTD_ERROR_corruption;
1434
+ if (sequence.matchLength > (size_t)(*litPtr-op)) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; /* overwrite literal segment */
1435
+
1436
+ /* copy Literals */
1437
+ if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8))
1438
+ memmove(op, *litPtr, litLength); /* overwrite risk */
1507
1439
  else
1508
- litEnd = ip - lastLLSize;
1509
- ip += errorCode;
1440
+ ZSTD_wildcopy(op, *litPtr, litLength);
1441
+ op += litLength;
1442
+ *litPtr = litEnd; /* update for next sequence */
1510
1443
 
1511
- /* decompression */
1444
+ /* check : last match must be at a minimum distance of 8 from end of dest buffer */
1445
+ if (oend-op < 8) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1446
+
1447
+ /* copy Match */
1512
1448
  {
1513
- FSE_DStream_t DStream;
1514
- FSE_DState_t stateLL, stateOffb, stateML;
1515
- size_t prevOffset = 0, offset = 0;
1516
- size_t qutt=0;
1449
+ const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12);
1450
+ const BYTE* match = op - sequence.offset; /* possible underflow at op - offset ? */
1451
+ size_t qutt = 12;
1452
+ U64 saved[2];
1517
1453
 
1518
- FSE_initDStream(&DStream, ip, iend-ip);
1519
- FSE_initDState(&stateLL, &DStream, DTableLL);
1520
- FSE_initDState(&stateOffb, &DStream, DTableOffb);
1521
- FSE_initDState(&stateML, &DStream, DTableML);
1454
+ /* check */
1455
+ if (match < base) return (size_t)-ZSTD_ERROR_corruption;
1456
+ if (sequence.offset > (size_t)base) return (size_t)-ZSTD_ERROR_corruption;
1522
1457
 
1523
- while (FSE_reloadDStream(&DStream)<2)
1458
+ /* save beginning of literal sequence, in case of write overlap */
1459
+ if (overlapRisk)
1524
1460
  {
1525
- U32 nbBits, offsetCode;
1526
- const BYTE* match;
1527
- size_t litLength;
1528
- size_t matchLength;
1529
- size_t newOffset;
1530
-
1531
- _another_round:
1461
+ if ((endMatch + qutt) > oend) qutt = oend-endMatch;
1462
+ memcpy(saved, endMatch, qutt);
1463
+ }
1532
1464
 
1533
- /* Literals */
1534
- litLength = FSE_decodeSymbol(&stateLL, &DStream);
1535
- if (litLength) prevOffset = offset;
1536
- if (litLength == MaxLL)
1465
+ if (sequence.offset < 8)
1466
+ {
1467
+ const int dec64 = dec64table[sequence.offset];
1468
+ op[0] = match[0];
1469
+ op[1] = match[1];
1470
+ op[2] = match[2];
1471
+ op[3] = match[3];
1472
+ match += dec32table[sequence.offset];
1473
+ ZSTD_copy4(op+4, match);
1474
+ match -= dec64;
1475
+ } else { ZSTD_copy8(op, match); }
1476
+ op += 8; match += 8;
1477
+
1478
+ if (endMatch > oend-12)
1479
+ {
1480
+ if (op < oend-8)
1537
1481
  {
1538
- BYTE add = *dumps++;
1539
- if (add < 255) litLength += add;
1540
- else
1541
- {
1542
- //litLength = (*(U32*)dumps) & 0xFFFFFF;
1543
- litLength = ZSTD_readLE32(dumps) & 0xFFFFFF;
1544
- dumps += 3;
1545
- }
1482
+ ZSTD_wildcopy(op, match, (oend-8) - op);
1483
+ match += (oend-8) - op;
1484
+ op = oend-8;
1546
1485
  }
1547
- if (((size_t)(litPtr - op) < 8) || ((size_t)(oend-(litPtr+litLength)) < 8))
1548
- memmove(op, litPtr, litLength); /* overwrite risk */
1549
- else
1550
- ZSTD_wildcopy(op, litPtr, litLength);
1551
- op += litLength; CHECK_OVERFLOW(op <= (BYTE *)dst + maxDstSize);
1552
- litPtr += litLength;
1553
-
1554
- /* Offset */
1555
- offsetCode = FSE_decodeSymbol(&stateOffb, &DStream);
1556
- if (ZSTD_32bits()) FSE_reloadDStream(&DStream);
1557
- nbBits = offsetCode - 1;
1558
- if (offsetCode==0) nbBits = 0; /* cmove */
1559
- newOffset = FSE_readBits(&DStream, nbBits);
1560
- if (ZSTD_32bits()) FSE_reloadDStream(&DStream);
1561
- newOffset += (size_t)1 << nbBits;
1562
- if (offsetCode==0) newOffset = prevOffset;
1563
- match = op - newOffset;
1564
- prevOffset = offset;
1565
- offset = newOffset;
1486
+ while (op<endMatch) *op++ = *match++;
1487
+ }
1488
+ else
1489
+ ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */
1566
1490
 
1567
- /* MatchLength */
1568
- matchLength = FSE_decodeSymbol(&stateML, &DStream);
1569
- if (matchLength == MaxML)
1570
- {
1571
- BYTE add = *dumps++;
1572
- if (add < 255) matchLength += add;
1573
- else
1574
- {
1575
- //matchLength = (*(U32*)dumps) & 0xFFFFFF;
1576
- matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF;
1577
- dumps += 3;
1578
- }
1579
- }
1580
- matchLength += MINMATCH;
1491
+ /* restore, in case of overlap */
1492
+ if (overlapRisk) memcpy(endMatch, saved, qutt);
1493
+ }
1581
1494
 
1582
- /* copy Match */
1583
- {
1584
- BYTE* const endMatch = op + matchLength;
1585
- U64 saved[2];
1586
-
1587
- if ((size_t)(litPtr - endMatch) < 12)
1588
- {
1589
- qutt = endMatch + 12 - litPtr;
1590
- if ((litPtr + qutt) > oend) qutt = oend-litPtr;
1591
- memcpy(saved, litPtr, qutt);
1592
- }
1593
-
1594
- if (offset < 8)
1595
- {
1596
- const size_t dec64 = dec64table[offset];
1597
- op[0] = match[0];
1598
- op[1] = match[1];
1599
- op[2] = match[2];
1600
- op[3] = match[3];
1601
- match += dec32table[offset];
1602
- ZSTD_copy4(op+4, match);
1603
- match -= dec64;
1604
- } else { ZSTD_copy8(op, match); }
1605
-
1606
- if (endMatch > oend-12)
1607
- {
1608
- if (op < oend-16)
1609
- {
1610
- ZSTD_wildcopy(op+8, match+8, (oend-8) - (op+8));
1611
- match += (oend-8) - op;
1612
- op = oend-8; CHECK_OVERFLOW(op <= (BYTE *)dst + maxDstSize);
1613
- }
1614
- while (op<endMatch) *op++ = *match++; CHECK_OVERFLOW(op <= (BYTE *)dst + maxDstSize);
1615
- }
1616
- else
1617
- ZSTD_wildcopy(op+8, match+8, matchLength-8); /* works even if matchLength < 8 */
1618
-
1619
- op = endMatch; CHECK_OVERFLOW(op <= (BYTE *)dst + maxDstSize);
1620
-
1621
- if ((size_t)(litPtr - endMatch) < 12)
1622
- memcpy((void*)litPtr, saved, qutt);
1623
- }
1495
+ return endMatch-ostart;
1496
+ }
1497
+
1498
+ typedef struct ZSTD_Dctx_s
1499
+ {
1500
+ U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
1501
+ U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
1502
+ U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
1503
+ void* previousDstEnd;
1504
+ void* base;
1505
+ size_t expected;
1506
+ blockType_t bType;
1507
+ U32 phase;
1508
+ } dctx_t;
1509
+
1510
+
1511
+ static size_t ZSTD_decompressSequences(
1512
+ void* ctx,
1513
+ void* dst, size_t maxDstSize,
1514
+ const void* seqStart, size_t seqSize,
1515
+ const BYTE* litStart, size_t litSize)
1516
+ {
1517
+ dctx_t* dctx = (dctx_t*)ctx;
1518
+ const BYTE* ip = (const BYTE*)seqStart;
1519
+ const BYTE* const iend = ip + seqSize;
1520
+ BYTE* const ostart = (BYTE* const)dst;
1521
+ BYTE* op = ostart;
1522
+ BYTE* const oend = ostart + maxDstSize;
1523
+ size_t errorCode;
1524
+ const BYTE* litPtr = litStart;
1525
+ const BYTE* const litEnd = litStart + litSize;
1526
+ int nbSeq;
1527
+ const BYTE* dumps;
1528
+ U32* DTableLL = dctx->LLTable;
1529
+ U32* DTableML = dctx->MLTable;
1530
+ U32* DTableOffb = dctx->OffTable;
1531
+ BYTE* const base = (BYTE*) (dctx->base);
1532
+
1533
+ /* Build Decoding Tables */
1534
+ errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps,
1535
+ DTableLL, DTableML, DTableOffb,
1536
+ ip, iend-ip);
1537
+ if (ZSTD_isError(errorCode)) return errorCode;
1538
+ ip += errorCode;
1539
+
1540
+ /* Regen sequences */
1541
+ {
1542
+ seq_t sequence;
1543
+ seqState_t seqState;
1544
+
1545
+ memset(&sequence, 0, sizeof(sequence));
1546
+ seqState.dumps = dumps;
1547
+ seqState.prevOffset = 1;
1548
+ errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip);
1549
+ if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_corruption;
1550
+ FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
1551
+ FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
1552
+ FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
1553
+
1554
+ for ( ; (FSE_reloadDStream(&(seqState.DStream)) < FSE_DStream_completed) || (nbSeq>0) ; )
1555
+ {
1556
+ size_t oneSeqSize;
1557
+ nbSeq--;
1558
+ ZSTD_decodeSequence(&sequence, &seqState);
1559
+ oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
1560
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1561
+ op += oneSeqSize;
1624
1562
  }
1625
1563
 
1626
1564
  /* check if reached exact end */
1627
- if (FSE_reloadDStream(&DStream) > 2) return (size_t)-ZSTD_ERROR_GENERIC; /* requested too much : data is corrupted */
1628
- if (!FSE_endOfDState(&stateLL) && !FSE_endOfDState(&stateML) && !FSE_endOfDState(&stateOffb)) goto _another_round; /* some ultra-compressible sequence remain ! */
1629
- if (litPtr != litEnd) goto _another_round; /* literals not entirely spent */
1565
+ if (FSE_reloadDStream(&(seqState.DStream)) > FSE_DStream_completed) return (size_t)-ZSTD_ERROR_corruption; /* requested too much : data is corrupted */
1566
+ if (nbSeq<0) return (size_t)-ZSTD_ERROR_corruption; /* requested too many sequences : data is corrupted */
1630
1567
 
1631
1568
  /* last literal segment */
1632
- if (op != litPtr) memmove(op, litPtr, lastLLSize);
1633
- op += lastLLSize; CHECK_OVERFLOW(op <= (BYTE *)dst + maxDstSize);
1569
+ {
1570
+ size_t lastLLSize = litEnd - litPtr;
1571
+ if (op+lastLLSize > oend) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1572
+ if (op != litPtr) memmove(op, litPtr, lastLLSize);
1573
+ op += lastLLSize;
1574
+ }
1634
1575
  }
1635
1576
 
1636
1577
  return op-ostart;
1637
1578
  }
1638
1579
 
1639
1580
 
1581
+ static size_t ZSTD_decompressBlock(
1582
+ void* ctx,
1583
+ void* dst, size_t maxDstSize,
1584
+ const void* src, size_t srcSize)
1585
+ {
1586
+ /* blockType == blockCompressed, srcSize is trusted */
1587
+ const BYTE* ip = (const BYTE*)src;
1588
+ const BYTE* litPtr;
1589
+ size_t litSize;
1590
+ size_t errorCode;
1591
+
1592
+ /* Decode literals sub-block */
1593
+ errorCode = ZSTD_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize);
1594
+ if (ZSTD_isError(errorCode)) return errorCode;
1595
+ ip += errorCode;
1596
+ srcSize -= errorCode;
1597
+
1598
+ return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize);
1599
+ }
1600
+
1601
+
1640
1602
  static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1641
1603
  {
1642
1604
  const BYTE* ip = (const BYTE*)src;
@@ -1649,22 +1611,21 @@ static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const
1649
1611
  size_t errorCode=0;
1650
1612
  blockProperties_t blockProperties;
1651
1613
 
1652
- /* Header */
1653
- if (srcSize < ZSTD_frameHeaderSize) return (size_t)-ZSTD_ERROR_wrongSrcSize;
1614
+ /* Frame Header */
1615
+ if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_SrcSize;
1654
1616
  magicNumber = ZSTD_readBE32(src);
1655
- if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_wrongMagicNumber;
1617
+ if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_MagicNumber;
1656
1618
  ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
1657
1619
 
1620
+ /* Loop on each block */
1658
1621
  while (1)
1659
1622
  {
1660
1623
  size_t blockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
1661
- if (ZSTD_isError(blockSize))
1662
- return blockSize;
1624
+ if (ZSTD_isError(blockSize)) return blockSize;
1663
1625
 
1664
1626
  ip += ZSTD_blockHeaderSize;
1665
1627
  remainingSize -= ZSTD_blockHeaderSize;
1666
- if (ip+blockSize > iend)
1667
- return (size_t)-ZSTD_ERROR_wrongSrcSize;
1628
+ if (blockSize > remainingSize) return (size_t)-ZSTD_ERROR_SrcSize;
1668
1629
 
1669
1630
  switch(blockProperties.blockType)
1670
1631
  {
@@ -1675,11 +1636,11 @@ static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const
1675
1636
  errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
1676
1637
  break;
1677
1638
  case bt_rle :
1678
- return (size_t)-ZSTD_ERROR_GENERIC; /* not yet handled */
1639
+ return (size_t)-ZSTD_ERROR_GENERIC; /* not yet supported */
1679
1640
  break;
1680
1641
  case bt_end :
1681
1642
  /* end of frame */
1682
- if (remainingSize) return (size_t)-ZSTD_ERROR_wrongSrcSize;
1643
+ if (remainingSize) return (size_t)-ZSTD_ERROR_SrcSize;
1683
1644
  break;
1684
1645
  default:
1685
1646
  return (size_t)-ZSTD_ERROR_GENERIC;
@@ -1687,7 +1648,7 @@ static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const
1687
1648
  if (blockSize == 0) break; /* bt_end */
1688
1649
 
1689
1650
  if (ZSTD_isError(errorCode)) return errorCode;
1690
- op += errorCode; CHECK_OVERFLOW(op <= oend);
1651
+ op += errorCode;
1691
1652
  ip += blockSize;
1692
1653
  remainingSize -= blockSize;
1693
1654
  }
@@ -1695,107 +1656,113 @@ static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const
1695
1656
  return op-ostart;
1696
1657
  }
1697
1658
 
1698
-
1699
1659
  size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1700
1660
  {
1701
- U32 ctx[FSE_DTABLE_SIZE_U32(LLFSELog) + FSE_DTABLE_SIZE_U32(OffFSELog) + FSE_DTABLE_SIZE_U32(MLFSELog)];
1702
- return ZSTD_decompressDCtx(ctx, dst, maxDstSize, src, srcSize);
1661
+ dctx_t ctx;
1662
+ ctx.base = dst;
1663
+ return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
1703
1664
  }
1704
1665
 
1705
1666
 
1706
- /******************************
1667
+ /*******************************
1707
1668
  * Streaming Decompression API
1708
- ******************************/
1669
+ *******************************/
1709
1670
 
1710
- typedef struct
1671
+ size_t ZSTD_resetDCtx(ZSTD_Dctx* dctx)
1711
1672
  {
1712
- U32 ctx[FSE_DTABLE_SIZE_U32(LLFSELog) + FSE_DTABLE_SIZE_U32(OffFSELog) + FSE_DTABLE_SIZE_U32(MLFSELog)];
1713
- size_t expected;
1714
- blockType_t bType;
1715
- U32 started;
1716
- } dctx_t;
1717
-
1673
+ dctx->expected = ZSTD_frameHeaderSize;
1674
+ dctx->phase = 0;
1675
+ dctx->previousDstEnd = NULL;
1676
+ dctx->base = NULL;
1677
+ return 0;
1678
+ }
1718
1679
 
1719
- ZSTD_dctx_t ZSTD_createDCtx(void)
1680
+ ZSTD_Dctx* ZSTD_createDCtx(void)
1720
1681
  {
1721
- dctx_t* dctx = (dctx_t*)malloc(sizeof(dctx_t));
1722
- dctx->expected = 4 + ZSTD_blockHeaderSize; // Frame Header + Block Header
1723
- dctx->started = 0;
1724
- return (ZSTD_dctx_t)dctx;
1682
+ ZSTD_Dctx* dctx = (ZSTD_Dctx*)malloc(sizeof(ZSTD_Dctx));
1683
+ if (dctx==NULL) return NULL;
1684
+ ZSTD_resetDCtx(dctx);
1685
+ return dctx;
1725
1686
  }
1726
1687
 
1727
- size_t ZSTD_freeDCtx(ZSTD_dctx_t dctx)
1688
+ size_t ZSTD_freeDCtx(ZSTD_Dctx* dctx)
1728
1689
  {
1729
1690
  free(dctx);
1730
1691
  return 0;
1731
1692
  }
1732
1693
 
1733
-
1734
- size_t ZSTD_getNextcBlockSize(ZSTD_dctx_t dctx)
1694
+ size_t ZSTD_nextSrcSizeToDecompress(ZSTD_Dctx* dctx)
1735
1695
  {
1736
1696
  return ((dctx_t*)dctx)->expected;
1737
1697
  }
1738
1698
 
1739
- size_t ZSTD_decompressContinue(ZSTD_dctx_t dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1699
+ size_t ZSTD_decompressContinue(ZSTD_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1740
1700
  {
1741
1701
  dctx_t* ctx = (dctx_t*)dctx;
1742
- size_t cSize = srcSize - ZSTD_blockHeaderSize;
1743
- size_t rSize;
1744
1702
 
1745
- // Sanity check
1746
- if (srcSize != ctx->expected) return (size_t)-ZSTD_ERROR_wrongSrcSize;
1703
+ /* Sanity check */
1704
+ if (srcSize != ctx->expected) return (size_t)-ZSTD_ERROR_SrcSize;
1705
+ if (dst != ctx->previousDstEnd) /* not contiguous */
1706
+ ctx->base = dst;
1747
1707
 
1748
- // Decompress
1749
- if (!ctx->started)
1708
+ /* Decompress : frame header */
1709
+ if (ctx->phase == 0)
1750
1710
  {
1751
- // Just check correct magic header
1711
+ /* Check frame magic header */
1752
1712
  U32 magicNumber = ZSTD_readBE32(src);
1753
- if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_wrongMagicNumber;
1754
- rSize = 0;
1713
+ if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_MagicNumber;
1714
+ ctx->phase = 1;
1715
+ ctx->expected = ZSTD_blockHeaderSize;
1716
+ return 0;
1755
1717
  }
1756
- else
1718
+
1719
+ /* Decompress : block header */
1720
+ if (ctx->phase == 1)
1721
+ {
1722
+ blockProperties_t bp;
1723
+ size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
1724
+ if (ZSTD_isError(blockSize)) return blockSize;
1725
+ if (bp.blockType == bt_end)
1726
+ {
1727
+ ctx->expected = 0;
1728
+ ctx->phase = 0;
1729
+ }
1730
+ else
1731
+ {
1732
+ ctx->expected = blockSize;
1733
+ ctx->bType = bp.blockType;
1734
+ ctx->phase = 2;
1735
+ }
1736
+
1737
+ return 0;
1738
+ }
1739
+
1740
+ /* Decompress : block content */
1757
1741
  {
1742
+ size_t rSize;
1758
1743
  switch(ctx->bType)
1759
1744
  {
1760
1745
  case bt_compressed:
1761
- rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, cSize);
1746
+ rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
1762
1747
  break;
1763
1748
  case bt_raw :
1764
- rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, cSize);
1749
+ rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
1765
1750
  break;
1766
1751
  case bt_rle :
1767
1752
  return (size_t)-ZSTD_ERROR_GENERIC; /* not yet handled */
1768
1753
  break;
1769
- case bt_end :
1754
+ case bt_end : /* should never happen (filtered at phase 1) */
1770
1755
  rSize = 0;
1771
1756
  break;
1772
1757
  default:
1773
1758
  return (size_t)-ZSTD_ERROR_GENERIC;
1774
1759
  }
1760
+ ctx->phase = 1;
1761
+ ctx->expected = ZSTD_blockHeaderSize;
1762
+ ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
1763
+ return rSize;
1775
1764
  }
1776
1765
 
1777
- // Prepare next block
1778
- {
1779
- const BYTE* header = (const BYTE*)src;
1780
- blockProperties_t bp;
1781
- size_t blockSize;
1782
- header += cSize;
1783
- blockSize = ZSTD_getcBlockSize(header, ZSTD_blockHeaderSize, &bp);
1784
- if (ZSTD_isError(blockSize)) return blockSize;
1785
- if (bp.blockType == bt_end)
1786
- {
1787
- ctx->expected = 0;
1788
- ctx->started = 0;
1789
- }
1790
- else
1791
- {
1792
- ctx->expected = blockSize + ZSTD_blockHeaderSize;
1793
- ctx->bType = bp.blockType;
1794
- ctx->started = 1;
1795
- }
1796
- }
1797
-
1798
- return rSize;
1799
1766
  }
1800
1767
 
1801
1768