extzstd 0.0.1.CONCEPT → 0.0.2.CONCEPT

Sign up to get free protection for your applications and to get access to all the features.
@@ -40,24 +40,32 @@ extern "C" {
40
40
 
41
41
 
42
42
  /******************************************
43
- * Tool functions
43
+ * FSE API compatible with DLL
44
44
  ******************************************/
45
- #define FSE_MAX_HEADERSIZE 512
46
- #define FSE_COMPRESSBOUND(size) (size + (size>>7) + FSE_MAX_HEADERSIZE) /* Macro can be useful for static allocation */
45
+ #include "fse.h"
47
46
 
48
47
 
49
48
  /******************************************
50
49
  * Static allocation
51
50
  ******************************************/
52
- /* You can statically allocate a CTable as a table of U32 using below macro */
51
+ /* FSE buffer bounds */
52
+ #define FSE_NCOUNTBOUND 512
53
+ #define FSE_BLOCKBOUND(size) (size + (size>>7))
54
+ #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
55
+
56
+ /* You can statically allocate FSE CTable/DTable as a table of unsigned using below macro */
53
57
  #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
54
- #define FSE_DTABLE_SIZE_U32(maxTableLog) ((1<<maxTableLog)+1)
58
+ #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
55
59
 
60
+ /* Huff0 buffer bounds */
61
+ #define HUF_CTABLEBOUND 129
62
+ #define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true if pre-filtered with fast heuristic */
63
+ #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
56
64
 
57
- /******************************************
58
- * FSE supported API for DLL
59
- ******************************************/
60
- #include "fse.h"
65
+ /* You can statically allocate Huff0 DTable as a table of unsigned short using below macro */
66
+ #define HUF_DTABLE_SIZE_U16(maxTableLog) (1 + (1<<maxTableLog))
67
+ #define HUF_CREATE_STATIC_DTABLE(DTable, maxTableLog) \
68
+ unsigned short DTable[HUF_DTABLE_SIZE_U16(maxTableLog)] = { maxTableLog }
61
69
 
62
70
 
63
71
  /******************************************
@@ -65,7 +73,7 @@ extern "C" {
65
73
  ******************************************/
66
74
  #define FSE_LIST_ERRORS(ITEM) \
67
75
  ITEM(FSE_OK_NoError) ITEM(FSE_ERROR_GENERIC) \
68
- ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) \
76
+ ITEM(FSE_ERROR_tableLog_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooLarge) ITEM(FSE_ERROR_maxSymbolValue_tooSmall) \
69
77
  ITEM(FSE_ERROR_dstSize_tooSmall) ITEM(FSE_ERROR_srcSize_wrong)\
70
78
  ITEM(FSE_ERROR_corruptionDetected) \
71
79
  ITEM(FSE_ERROR_maxCode)
@@ -77,30 +85,196 @@ typedef enum { FSE_LIST_ERRORS(FSE_GENERATE_ENUM) } FSE_errorCodes; /* enum is
77
85
  /******************************************
78
86
  * FSE advanced API
79
87
  ******************************************/
80
- size_t FSE_countFast(unsigned* count, const unsigned char* src, size_t srcSize, unsigned* maxSymbolValuePtr);
81
- /* same as FSE_count(), but won't check if input really respect that all values within src are <= *maxSymbolValuePtr */
88
+ size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const unsigned char* src, size_t srcSize);
89
+ /* same as FSE_count(), but blindly trust that all values within src are <= maxSymbolValuePtr[0] */
82
90
 
83
- size_t FSE_buildCTable_raw (void* CTable, unsigned nbBits);
84
- /* create a fake CTable, designed to not compress an input where each element uses nbBits */
91
+ size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
92
+ /* build a fake FSE_CTable, designed to not compress an input, where each symbol uses nbBits */
85
93
 
86
- size_t FSE_buildCTable_rle (void* CTable, unsigned char symbolValue);
87
- /* create a fake CTable, designed to compress a single identical value */
94
+ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
95
+ /* build a fake FSE_CTable, designed to compress always the same symbolValue */
88
96
 
89
- size_t FSE_buildDTable_raw (void* DTable, unsigned nbBits);
90
- /* create a fake DTable, designed to read an uncompressed bitstream where each element uses nbBits */
97
+ size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
98
+ /* build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
91
99
 
92
- size_t FSE_buildDTable_rle (void* DTable, unsigned char symbolValue);
93
- /* create a fake DTable, designed to always generate the same symbolValue */
100
+ size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
101
+ /* build a fake FSE_DTable, designed to always generate the same symbolValue */
94
102
 
95
103
 
96
104
  /******************************************
97
- * FSE streaming API
105
+ * FSE symbol compression API
98
106
  ******************************************/
99
- bitD_t FSE_readBitsFast(FSE_DStream_t* bitD, unsigned nbBits);
107
+ /*
108
+ This API consists of small unitary functions, which highly benefit from being inlined.
109
+ You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
110
+ Visual seems to do it automatically.
111
+ For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
112
+ If none of these solutions is applicable, include "fse.c" directly.
113
+ */
114
+
115
+ typedef struct
116
+ {
117
+ size_t bitContainer;
118
+ int bitPos;
119
+ char* startPtr;
120
+ char* ptr;
121
+ char* endPtr;
122
+ } FSE_CStream_t;
123
+
124
+ typedef struct
125
+ {
126
+ ptrdiff_t value;
127
+ const void* stateTable;
128
+ const void* symbolTT;
129
+ unsigned stateLog;
130
+ } FSE_CState_t;
131
+
132
+ size_t FSE_initCStream(FSE_CStream_t* bitC, void* dstBuffer, size_t maxDstSize);
133
+ void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct);
134
+
135
+ void FSE_encodeSymbol(FSE_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol);
136
+ void FSE_addBits(FSE_CStream_t* bitC, size_t value, unsigned nbBits);
137
+ void FSE_flushBits(FSE_CStream_t* bitC);
138
+
139
+ void FSE_flushCState(FSE_CStream_t* bitC, const FSE_CState_t* CStatePtr);
140
+ size_t FSE_closeCStream(FSE_CStream_t* bitC);
141
+
142
+ /*
143
+ These functions are inner components of FSE_compress_usingCTable().
144
+ They allow the creation of custom streams, mixing multiple tables and bit sources.
145
+
146
+ A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
147
+ So the first symbol you will encode is the last you will decode, like a LIFO stack.
148
+
149
+ You will need a few variables to track your CStream. They are :
150
+
151
+ FSE_CTable ct; // Provided by FSE_buildCTable()
152
+ FSE_CStream_t bitStream; // bitStream tracking structure
153
+ FSE_CState_t state; // State tracking structure (can have several)
154
+
155
+
156
+ The first thing to do is to init bitStream and state.
157
+ size_t errorCode = FSE_initCStream(&bitStream, dstBuffer, maxDstSize);
158
+ FSE_initCState(&state, ct);
159
+
160
+ Note that FSE_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
161
+ You can then encode your input data, byte after byte.
162
+ FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
163
+ Remember decoding will be done in reverse direction.
164
+ FSE_encodeByte(&bitStream, &state, symbol);
165
+
166
+ At any time, you can also add any bit sequence.
167
+ Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
168
+ FSE_addBits(&bitStream, bitField, nbBits);
169
+
170
+ The above methods don't commit data to memory, they just store it into local register, for speed.
171
+ Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
172
+ Writing data to memory is a manual operation, performed by the flushBits function.
173
+ FSE_flushBits(&bitStream);
174
+
175
+ Your last FSE encoding operation shall be to flush your last state value(s).
176
+ FSE_flushState(&bitStream, &state);
177
+
178
+ Finally, you must close the bitStream.
179
+ The function returns the size of CStream in bytes.
180
+ If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
181
+ If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
182
+ size_t size = FSE_closeCStream(&bitStream);
183
+ */
184
+
185
+
186
+ /******************************************
187
+ * FSE symbol decompression API
188
+ ******************************************/
189
+ typedef struct
190
+ {
191
+ size_t bitContainer;
192
+ unsigned bitsConsumed;
193
+ const char* ptr;
194
+ const char* start;
195
+ } FSE_DStream_t;
196
+
197
+ typedef struct
198
+ {
199
+ size_t state;
200
+ const void* table; /* precise table may vary, depending on U16 */
201
+ } FSE_DState_t;
202
+
203
+
204
+ size_t FSE_initDStream(FSE_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
205
+ void FSE_initDState(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD, const FSE_DTable* dt);
206
+
207
+ unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD);
208
+ size_t FSE_readBits(FSE_DStream_t* bitD, unsigned nbBits);
209
+ unsigned int FSE_reloadDStream(FSE_DStream_t* bitD);
210
+
211
+ unsigned FSE_endOfDStream(const FSE_DStream_t* bitD);
212
+ unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
213
+
214
+ typedef enum { FSE_DStream_unfinished = 0,
215
+ FSE_DStream_endOfBuffer = 1,
216
+ FSE_DStream_completed = 2,
217
+ FSE_DStream_tooFar = 3 } FSE_DStream_status; /* result of FSE_reloadDStream() */
218
+ /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... ?! */
219
+
220
+ /*
221
+ Let's now decompose FSE_decompress_usingDTable() into its unitary components.
222
+ You will decode FSE-encoded symbols from the bitStream,
223
+ and also any other bitFields you put in, **in reverse order**.
224
+
225
+ You will need a few variables to track your bitStream. They are :
226
+
227
+ FSE_DStream_t DStream; // Stream context
228
+ FSE_DState_t DState; // State context. Multiple ones are possible
229
+ FSE_DTable* DTablePtr; // Decoding table, provided by FSE_buildDTable()
230
+
231
+ The first thing to do is to init the bitStream.
232
+ errorCode = FSE_initDStream(&DStream, srcBuffer, srcSize);
233
+
234
+ You should then retrieve your initial state(s)
235
+ (in reverse flushing order if you have several ones) :
236
+ errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
237
+
238
+ You can then decode your data, symbol after symbol.
239
+ For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
240
+ Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
241
+ unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
242
+
243
+ You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
244
+ Note : maximum allowed nbBits is 25, for 32-bits compatibility
245
+ size_t bitField = FSE_readBits(&DStream, nbBits);
246
+
247
+ All above operations only read from local register (which size depends on size_t).
248
+ Refueling the register from memory is manually performed by the reload method.
249
+ endSignal = FSE_reloadDStream(&DStream);
250
+
251
+ FSE_reloadDStream() result tells if there is still some more data to read from DStream.
252
+ FSE_DStream_unfinished : there is still some data left into the DStream.
253
+ FSE_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
254
+ FSE_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
255
+ FSE_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
256
+
257
+ When reaching end of buffer (FSE_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
258
+ to properly detect the exact end of stream.
259
+ After each decoded symbol, check if DStream is fully consumed using this simple test :
260
+ FSE_reloadDStream(&DStream) >= FSE_DStream_completed
261
+
262
+ When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
263
+ Checking if DStream has reached its end is performed by :
264
+ FSE_endOfDStream(&DStream);
265
+ Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
266
+ FSE_endOfDState(&DState);
267
+ */
268
+
269
+
270
+ /******************************************
271
+ * FSE unsafe symbol API
272
+ ******************************************/
273
+ size_t FSE_readBitsFast(FSE_DStream_t* bitD, unsigned nbBits);
100
274
  /* faster, but works only if nbBits >= 1 (otherwise, result will be corrupted) */
101
275
 
102
276
  unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, FSE_DStream_t* bitD);
103
- /* faster, but works only if nbBits >= 1 (otherwise, result will be corrupted) */
277
+ /* faster, but works only if allways nbBits >= 1 (otherwise, result will be corrupted) */
104
278
 
105
279
 
106
280
  #if defined (__cplusplus)
@@ -68,9 +68,6 @@
68
68
  #include <stdio.h> /* debug : printf */
69
69
  #include "zstd_static.h"
70
70
  #if defined(__clang__) || defined(__GNUC__)
71
- # ifdef __clang__
72
- # pragma clang diagnostic ignored "-Wtypedef-redefinition"
73
- # endif
74
71
  # include "fse.c" /* due to GCC/Clang inlining limitations, including *.c runs noticeably faster */
75
72
  #else
76
73
  # include "fse_static.h"
@@ -80,7 +77,6 @@
80
77
  /********************************************************
81
78
  * Compiler specifics
82
79
  *********************************************************/
83
- //#if (!(defined(_MSC_VER) && (_MSC_VER<=1500))) /* exclude Visual 2008 and below */
84
80
  #ifdef __AVX2__
85
81
  # include <immintrin.h> /* AVX2 intrinsics */
86
82
  #endif
@@ -100,10 +96,12 @@
100
96
  #endif
101
97
 
102
98
 
99
+ #ifndef MEM_ACCESS_MODULE
100
+ #define MEM_ACCESS_MODULE
103
101
  /********************************************************
104
102
  * Basic Types
105
103
  *********************************************************/
106
- #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
104
+ #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
107
105
  # include <stdint.h>
108
106
  typedef uint8_t BYTE;
109
107
  typedef uint16_t U16;
@@ -120,19 +118,18 @@ typedef signed int S32;
120
118
  typedef unsigned long long U64;
121
119
  #endif
122
120
 
121
+ #endif /* MEM_ACCESS_MODULE */
122
+
123
123
 
124
124
  /********************************************************
125
125
  * Constants
126
126
  *********************************************************/
127
- static const U32 ZSTD_magicNumber = 0xFD2FB51C;
127
+ static const U32 ZSTD_magicNumber = 0xFD2FB51E; /* 3rd version : seqNb header */
128
128
 
129
129
  #define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
130
130
  #define HASH_TABLESIZE (1 << HASH_LOG)
131
131
  #define HASH_MASK (HASH_TABLESIZE - 1)
132
132
 
133
- #define MAXD_LOG 16
134
- #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
135
-
136
133
  #define KNUTH 2654435761
137
134
 
138
135
  #define BIT7 128
@@ -142,14 +139,14 @@ static const U32 ZSTD_magicNumber = 0xFD2FB51C;
142
139
 
143
140
  #define KB *(1 <<10)
144
141
  #define MB *(1 <<20)
145
- #define GB *(1U<<20)
142
+ #define GB *(1U<<30)
146
143
 
147
- #define BLOCKSIZE (128 KB) // define, for static allocation
148
- static const U32 g_maxDistance = 512 KB;
144
+ #define BLOCKSIZE (128 KB) /* define, for static allocation */
145
+ static const U32 g_maxDistance = 4 * BLOCKSIZE;
149
146
  static const U32 g_maxLimit = 1 GB;
150
147
  static const U32 g_searchStrength = 8;
151
148
 
152
- #define WORKPLACESIZE (BLOCKSIZE*11/4)
149
+ #define WORKPLACESIZE (BLOCKSIZE*3)
153
150
  #define MINMATCH 4
154
151
  #define MLbits 7
155
152
  #define LLbits 6
@@ -161,6 +158,8 @@ static const U32 g_searchStrength = 8;
161
158
  #define MLFSELog 10
162
159
  #define LLFSELog 10
163
160
  #define OffFSELog 9
161
+ #define MAX(a,b) ((a)<(b)?(b):(a))
162
+ #define MaxSeq MAX(MaxLL, MaxML)
164
163
 
165
164
  #define LITERAL_NOENTROPY 63
166
165
  #define COMMAND_NOENTROPY 7 /* to remove */
@@ -181,11 +180,13 @@ static unsigned ZSTD_isLittleEndian(void)
181
180
  return one.c[0];
182
181
  }
183
182
 
184
- static U16 ZSTD_read16(const void* p) { return *(U16*)p; }
183
+ static U16 ZSTD_read16(const void* p) { U16 r; memcpy(&r, p, sizeof(r)); return r; }
185
184
 
186
- static U32 ZSTD_read32(const void* p) { return *(U32*)p; }
185
+ static U32 ZSTD_read32(const void* p) { U32 r; memcpy(&r, p, sizeof(r)); return r; }
187
186
 
188
- static size_t ZSTD_read_ARCH(const void* p) { return *(size_t*)p; }
187
+ static U64 ZSTD_read64(const void* p) { U64 r; memcpy(&r, p, sizeof(r)); return r; }
188
+
189
+ static size_t ZSTD_read_ARCH(const void* p) { size_t r; memcpy(&r, p, sizeof(r)); return r; }
189
190
 
190
191
  static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
191
192
 
@@ -201,6 +202,27 @@ static void ZSTD_wildcopy(void* dst, const void* src, size_t length)
201
202
  while (op < oend) COPY8(op, ip);
202
203
  }
203
204
 
205
+ static U16 ZSTD_readLE16(const void* memPtr)
206
+ {
207
+ if (ZSTD_isLittleEndian()) return ZSTD_read16(memPtr);
208
+ else
209
+ {
210
+ const BYTE* p = (const BYTE*)memPtr;
211
+ return (U16)((U16)p[0] + ((U16)p[1]<<8));
212
+ }
213
+ }
214
+
215
+ static void ZSTD_writeLE16(void* memPtr, U16 val)
216
+ {
217
+ if (ZSTD_isLittleEndian()) memcpy(memPtr, &val, sizeof(val));
218
+ else
219
+ {
220
+ BYTE* p = (BYTE*)memPtr;
221
+ p[0] = (BYTE)val;
222
+ p[1] = (BYTE)(val>>8);
223
+ }
224
+ }
225
+
204
226
  static U32 ZSTD_readLE32(const void* memPtr)
205
227
  {
206
228
  if (ZSTD_isLittleEndian())
@@ -243,40 +265,6 @@ static void ZSTD_writeBE32(void* memPtr, U32 value)
243
265
  p[3] = (BYTE)(value>>0);
244
266
  }
245
267
 
246
- static size_t ZSTD_writeProgressive(void* ptr, size_t value)
247
- {
248
- BYTE* const bStart = (BYTE* const)ptr;
249
- BYTE* byte = bStart;
250
-
251
- do
252
- {
253
- BYTE l = value & 127;
254
- value >>= 7;
255
- if (value) l += 128;
256
- *byte++ = l;
257
- } while (value);
258
-
259
- return byte - bStart;
260
- }
261
-
262
-
263
- static size_t ZSTD_readProgressive(size_t* result, const void* ptr)
264
- {
265
- const BYTE* const bStart = (const BYTE* const)ptr;
266
- const BYTE* byte = bStart;
267
- size_t r = 0;
268
- U32 shift = 0;
269
-
270
- do
271
- {
272
- r += (*byte & 127) << shift;
273
- shift += 7;
274
- } while (*byte++ & 128);
275
-
276
- *result = r;
277
- return byte - bStart;
278
- }
279
-
280
268
 
281
269
  /**************************************
282
270
  * Local structures
@@ -289,12 +277,38 @@ typedef struct
289
277
  U32 origSize;
290
278
  } blockProperties_t;
291
279
 
292
- typedef struct
280
+ typedef struct {
281
+ void* buffer;
282
+ U32* offsetStart;
283
+ U32* offset;
284
+ BYTE* offCodeStart;
285
+ BYTE* offCode;
286
+ BYTE* litStart;
287
+ BYTE* lit;
288
+ BYTE* litLengthStart;
289
+ BYTE* litLength;
290
+ BYTE* matchLengthStart;
291
+ BYTE* matchLength;
292
+ BYTE* dumpsStart;
293
+ BYTE* dumps;
294
+ } seqStore_t;
295
+
296
+ void ZSTD_resetSeqStore(seqStore_t* ssPtr)
297
+ {
298
+ ssPtr->offset = ssPtr->offsetStart;
299
+ ssPtr->lit = ssPtr->litStart;
300
+ ssPtr->litLength = ssPtr->litLengthStart;
301
+ ssPtr->matchLength = ssPtr->matchLengthStart;
302
+ ssPtr->dumps = ssPtr->dumpsStart;
303
+ }
304
+
305
+
306
+ typedef struct ZSTD_Cctx_s
293
307
  {
294
308
  const BYTE* base;
295
309
  U32 current;
296
310
  U32 nextUpdate;
297
- BYTE* workplace;
311
+ seqStore_t seqStore;
298
312
  #ifdef __AVX2__
299
313
  __m256i hashTable[HASH_TABLESIZE>>3];
300
314
  #else
@@ -303,27 +317,35 @@ typedef struct
303
317
  } cctxi_t;
304
318
 
305
319
 
306
- ZSTD_cctx_t ZSTD_createCCtx(void)
320
+ ZSTD_Cctx* ZSTD_createCCtx(void)
307
321
  {
308
- cctxi_t* srt = (cctxi_t *) malloc( sizeof(cctxi_t) );
309
- srt->workplace = (BYTE*) malloc(WORKPLACESIZE);
310
- return (ZSTD_cctx_t)srt;
322
+ ZSTD_Cctx* ctx = (ZSTD_Cctx*) malloc( sizeof(ZSTD_Cctx) );
323
+ if (ctx==NULL) return NULL;
324
+ ctx->seqStore.buffer = malloc(WORKPLACESIZE);
325
+ if (ctx->seqStore.buffer==NULL)
326
+ {
327
+ free(ctx);
328
+ return NULL;
329
+ }
330
+ ctx->seqStore.offsetStart = (U32*) (ctx->seqStore.buffer);
331
+ ctx->seqStore.offCodeStart = (BYTE*) (ctx->seqStore.offsetStart + (BLOCKSIZE>>2));
332
+ ctx->seqStore.litStart = ctx->seqStore.offCodeStart + (BLOCKSIZE>>2);
333
+ ctx->seqStore.litLengthStart = ctx->seqStore.litStart + BLOCKSIZE;
334
+ ctx->seqStore.matchLengthStart = ctx->seqStore.litLengthStart + (BLOCKSIZE>>2);
335
+ ctx->seqStore.dumpsStart = ctx->seqStore.matchLengthStart + (BLOCKSIZE>>2);
336
+ return ctx;
311
337
  }
312
338
 
313
-
314
- void ZSTD_resetCCtx(ZSTD_cctx_t ctx)
339
+ void ZSTD_resetCCtx(ZSTD_Cctx* ctx)
315
340
  {
316
- cctxi_t* srt = (cctxi_t*)ctx;
317
- srt->base = NULL;
318
- memset(srt->hashTable, 0, HASH_TABLESIZE*4);
341
+ ctx->base = NULL;
342
+ memset(ctx->hashTable, 0, HASH_TABLESIZE*4);
319
343
  }
320
344
 
321
-
322
- size_t ZSTD_freeCCtx(ZSTD_cctx_t ctx)
345
+ size_t ZSTD_freeCCtx(ZSTD_Cctx* ctx)
323
346
  {
324
- cctxi_t *srt = (cctxi_t *) (ctx);
325
- free(srt->workplace);
326
- free(srt);
347
+ free(ctx->seqStore.buffer);
348
+ free(ctx);
327
349
  return 0;
328
350
  }
329
351
 
@@ -360,9 +382,9 @@ static unsigned ZSTD_highbit(U32 val)
360
382
  unsigned long r;
361
383
  _BitScanReverse(&r, val);
362
384
  return (unsigned)r;
363
- # elif defined(__GNUC__) && (GCC_VERSION >= 304) // GCC Intrinsic
385
+ # elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */
364
386
  return 31 - __builtin_clz(val);
365
- # else // Software version
387
+ # else /* Software version */
366
388
  static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
367
389
  U32 v = val;
368
390
  int r;
@@ -433,7 +455,7 @@ static unsigned ZSTD_NbCommonBytes (register size_t val)
433
455
  _BitScanReverse( &r, (unsigned long)val );
434
456
  return (unsigned)(r>>3);
435
457
  # elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
436
- return (__builtin_clz(val) >> 3);
458
+ return (__builtin_clz((U32)val) >> 3);
437
459
  # else
438
460
  unsigned r;
439
461
  if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
@@ -479,15 +501,13 @@ static size_t ZSTD_compressRle (void* dst, size_t maxDstSize, const void* src, s
479
501
  /* at this stage : dstSize >= FSE_compressBound(srcSize) > (ZSTD_blockHeaderSize+1) (checked by ZSTD_compressLiterals()) */
480
502
  (void)maxDstSize;
481
503
 
482
- ostart[ZSTD_blockHeaderSize] = *(BYTE*)src;
504
+ ostart[ZSTD_blockHeaderSize] = *(const BYTE*)src;
483
505
 
484
- // Build header
485
- {
486
- ostart[0] = (BYTE)(srcSize>>16);
487
- ostart[1] = (BYTE)(srcSize>>8);
488
- ostart[2] = (BYTE)srcSize;
489
- ostart[0] += (BYTE)(bt_rle<<6);
490
- }
506
+ /* Build header */
507
+ ostart[0] = (BYTE)(srcSize>>16);
508
+ ostart[1] = (BYTE)(srcSize>>8);
509
+ ostart[2] = (BYTE) srcSize;
510
+ ostart[0] += (BYTE)(bt_rle<<6);
491
511
 
492
512
  return ZSTD_blockHeaderSize+1;
493
513
  }
@@ -500,76 +520,16 @@ static size_t ZSTD_noCompressBlock (void* dst, size_t maxDstSize, const void* sr
500
520
  if (srcSize + ZSTD_blockHeaderSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
501
521
  memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize);
502
522
 
503
- // Build header
504
- {
505
- ostart[0] = (BYTE)(srcSize>>16);
506
- ostart[1] = (BYTE)(srcSize>>8);
507
- ostart[2] = (BYTE)srcSize;
508
- ostart[0] += (BYTE)(bt_raw<<6); /* is a raw (uncompressed) block */
509
- }
523
+ /* Build header */
524
+ ostart[0] = (BYTE)(srcSize>>16);
525
+ ostart[1] = (BYTE)(srcSize>>8);
526
+ ostart[2] = (BYTE) srcSize;
527
+ ostart[0] += (BYTE)(bt_raw<<6); /* is a raw (uncompressed) block */
510
528
 
511
529
  return ZSTD_blockHeaderSize+srcSize;
512
530
  }
513
531
 
514
532
 
515
- /* return : size of CStream in bits */
516
- static size_t ZSTD_compressLiterals_usingCTable(void* dst, size_t dstSize,
517
- const void* src, size_t srcSize,
518
- const void* CTable)
519
- {
520
- const BYTE* const istart = (const BYTE*)src;
521
- const BYTE* ip = istart;
522
- const BYTE* const iend = istart + srcSize;
523
- FSE_CStream_t bitC;
524
- FSE_CState_t CState1, CState2;
525
-
526
- // init
527
- (void)dstSize; // objective : ensure it fits into dstBuffer (Todo)
528
- FSE_initCStream(&bitC, dst);
529
- FSE_initCState(&CState1, CTable);
530
- CState2 = CState1;
531
-
532
- /* Note : at this stage, srcSize > LITERALS_NOENTROPY (checked by ZSTD_compressLiterals()) */
533
- // join to mod 2
534
- if (srcSize & 1)
535
- {
536
- FSE_encodeByte(&bitC, &CState1, *ip++);
537
- FSE_flushBits(&bitC);
538
- }
539
-
540
- // join to mod 4
541
- if ((sizeof(size_t)*8 > LitFSELog*4+7 ) && (srcSize & 2)) // test bit 2
542
- {
543
- FSE_encodeByte(&bitC, &CState2, *ip++);
544
- FSE_encodeByte(&bitC, &CState1, *ip++);
545
- FSE_flushBits(&bitC);
546
- }
547
-
548
- // 2 or 4 encoding per loop
549
- while (ip<iend)
550
- {
551
- FSE_encodeByte(&bitC, &CState2, *ip++);
552
-
553
- if (sizeof(size_t)*8 < LitFSELog*2+7 ) // this test must be static
554
- FSE_flushBits(&bitC);
555
-
556
- FSE_encodeByte(&bitC, &CState1, *ip++);
557
-
558
- if (sizeof(size_t)*8 > LitFSELog*4+7 ) // this test must be static
559
- {
560
- FSE_encodeByte(&bitC, &CState2, *ip++);
561
- FSE_encodeByte(&bitC, &CState1, *ip++);
562
- }
563
-
564
- FSE_flushBits(&bitC);
565
- }
566
-
567
- FSE_flushCState(&bitC, &CState2);
568
- FSE_flushCState(&bitC, &CState1);
569
- return FSE_closeCStream(&bitC);
570
- }
571
-
572
-
573
533
  size_t ZSTD_minGain(size_t srcSize)
574
534
  {
575
535
  return (srcSize >> 6) + 1;
@@ -579,89 +539,58 @@ size_t ZSTD_minGain(size_t srcSize)
579
539
  static size_t ZSTD_compressLiterals (void* dst, size_t dstSize,
580
540
  const void* src, size_t srcSize)
581
541
  {
582
- const BYTE* const istart = (const BYTE*) src;
583
- const BYTE* ip = istart;
584
-
585
- BYTE* const ostart = (BYTE*) dst;
586
- BYTE* op = ostart + ZSTD_blockHeaderSize;
587
- BYTE* const oend = ostart + dstSize;
588
-
589
- U32 maxSymbolValue = 256;
590
- U32 tableLog = LitFSELog;
591
- U32 count[256];
592
- S16 norm[256];
593
- U32 CTable[ FSE_CTABLE_SIZE_U32(LitFSELog, 256) ];
594
- size_t errorCode;
595
542
  const size_t minGain = ZSTD_minGain(srcSize);
543
+ BYTE* const ostart = (BYTE*)dst;
544
+ size_t hsize;
545
+ static const size_t LHSIZE = 5;
596
546
 
597
- // early out
598
- if (dstSize < FSE_compressBound(srcSize)) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
547
+ if (dstSize < LHSIZE+1) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; /* not enough space for compression */
599
548
 
600
- // Scan input and build symbol stats
601
- errorCode = FSE_count (count, ip, srcSize, &maxSymbolValue);
602
- if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
603
- if (errorCode == srcSize) return 1;
604
- if (errorCode < ((srcSize * 7) >> 10)) return 0;
549
+ hsize = HUF_compress(ostart+LHSIZE, dstSize-LHSIZE, src, srcSize);
550
+ if (hsize<2) return hsize; /* special cases */
551
+ if (hsize >= srcSize - minGain) return 0;
605
552
 
606
- tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
607
- errorCode = (int)FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
608
- if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
609
-
610
- // Write table description header
611
- errorCode = FSE_writeHeader (op, FSE_MAX_HEADERSIZE, norm, maxSymbolValue, tableLog);
612
- if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
613
- op += errorCode;
553
+ hsize += 2; /* work around vs fixed 3-bytes header */
614
554
 
615
- // Compress
616
- errorCode = FSE_buildCTable (&CTable, norm, maxSymbolValue, tableLog);
617
- if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
618
- errorCode = ZSTD_compressLiterals_usingCTable(op, oend - op, ip, srcSize, &CTable);
619
- if (ZSTD_isError(errorCode)) return errorCode;
620
- op += errorCode;
621
-
622
- // check compressibility
623
- if ( (size_t)(op-ostart) >= srcSize-minGain)
624
- return 0;
625
-
626
- // Build header
555
+ /* Build header */
627
556
  {
628
- size_t totalSize;
629
- totalSize = op - ostart - ZSTD_blockHeaderSize;
630
- ostart[0] = (BYTE)(totalSize>>16);
631
- ostart[1] = (BYTE)(totalSize>>8);
632
- ostart[2] = (BYTE)totalSize;
633
- ostart[0] += (BYTE)(bt_compressed<<6); /* is a block, is compressed */
557
+ ostart[0] = (BYTE)(bt_compressed<<6); /* is a block, is compressed */
558
+ ostart[0] += (BYTE)(hsize>>16);
559
+ ostart[1] = (BYTE)(hsize>>8);
560
+ ostart[2] = (BYTE)(hsize>>0);
561
+ ostart[0] += (BYTE)((srcSize>>16)<<3);
562
+ ostart[3] = (BYTE)(srcSize>>8);
563
+ ostart[4] = (BYTE)(srcSize>>0);
634
564
  }
635
565
 
636
- return op-ostart;
566
+ hsize -= 2;
567
+ return hsize+LHSIZE;
637
568
  }
638
569
 
639
570
 
640
- static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
641
- const BYTE* op_lit_start, const BYTE* op_lit,
642
- const BYTE* op_litLength_start, const BYTE* op_litLength,
643
- const BYTE* op_matchLength_start,
644
- const U32* op_offset_start,
645
- const BYTE* op_dumps_start, const BYTE* op_dumps,
646
- size_t srcSize, size_t lastLLSize
647
- )
571
+ static size_t ZSTD_compressSequences(BYTE* dst, size_t maxDstSize,
572
+ const seqStore_t* seqStorePtr,
573
+ size_t srcSize)
648
574
  {
649
- FSE_CStream_t blockStream;
650
- U32 count[256];
651
- S16 norm[256];
575
+ U32 count[MaxSeq+1];
576
+ S16 norm[MaxSeq+1];
652
577
  size_t mostFrequent;
653
578
  U32 max = 255;
654
579
  U32 tableLog = 11;
655
- const size_t nbSeq = op_litLength - op_litLength_start;
656
580
  U32 CTable_LitLength [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL )];
657
- U32 CTable_OffsetBits [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
581
+ U32 CTable_OffsetBits [FSE_CTABLE_SIZE_U32(OffFSELog,MaxOff)];
658
582
  U32 CTable_MatchLength[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML )];
659
- U32 LLtype, Offtype, MLtype;
660
- BYTE* op;
661
- const U32* op_offset = op_offset_start + nbSeq;
662
- const BYTE* op_matchLength = op_matchLength_start + nbSeq;
663
- BYTE offsetBits_start[BLOCKSIZE / 4];
664
- BYTE* offsetBitsPtr = offsetBits_start;
583
+ U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
584
+ const BYTE* const op_lit_start = seqStorePtr->litStart;
585
+ const BYTE* op_lit = seqStorePtr->lit;
586
+ const BYTE* const llTable = seqStorePtr->litLengthStart;
587
+ const BYTE* op_litLength = seqStorePtr->litLength;
588
+ const BYTE* const mlTable = seqStorePtr->matchLengthStart;
589
+ const U32* const offsetTable = seqStorePtr->offsetStart;
590
+ BYTE* const offCodeTable = seqStorePtr->offCodeStart;
591
+ BYTE* op = dst;
592
+ BYTE* const oend = dst + maxDstSize;
593
+ const size_t nbSeq = op_litLength - llTable;
665
594
  const size_t minGain = ZSTD_minGain(srcSize);
666
595
  const size_t maxCSize = srcSize - minGain;
667
596
  const size_t minSeqSize = 1 /*lastL*/ + 2 /*dHead*/ + 2 /*dumpsIn*/ + 5 /*SeqHead*/ + 3 /*SeqIn*/ + 1 /*margin*/ + ZSTD_blockHeaderSize;
@@ -669,13 +598,11 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
669
598
  BYTE* seqHead;
670
599
 
671
600
 
672
- /* init */
673
- op = dst;
674
-
675
- /* Encode literals */
601
+ /* Compress literals */
676
602
  {
677
603
  size_t cSize;
678
604
  size_t litSize = op_lit - op_lit_start;
605
+
679
606
  if (litSize <= LITERAL_NOENTROPY) cSize = ZSTD_noCompressBlock (op, maxDstSize, op_lit_start, litSize);
680
607
  else
681
608
  {
@@ -691,38 +618,39 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
691
618
  op += cSize;
692
619
  }
693
620
 
694
- /* Encode Sequences */
695
-
696
- /* seqHeader */
697
- op += ZSTD_writeProgressive(op, lastLLSize); CHECK_OVERFLOW(op <= dst + maxDstSize);
621
+ /* Sequences Header */
622
+ if ((oend-op) < 2+3+6) /* nbSeq + dumpsLength + 3*rleCTable*/
623
+ return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
624
+ ZSTD_writeLE16(op, (U16)nbSeq); op+=2;
698
625
  seqHead = op;
699
626
 
700
- /* dumps */
627
+ /* dumps : contains too large lengths */
701
628
  {
702
- size_t dumpsLength = op_dumps- op_dumps_start;
629
+ size_t dumpsLength = seqStorePtr->dumps - seqStorePtr->dumpsStart;
703
630
  if (dumpsLength < 512)
704
631
  {
705
632
  op[0] = (BYTE)(dumpsLength >> 8);
706
633
  op[1] = (BYTE)(dumpsLength);
707
- op += 2; CHECK_OVERFLOW(op <= dst + maxDstSize);
634
+ op += 2;
708
635
  }
709
636
  else
710
637
  {
711
638
  op[0] = 2;
712
639
  op[1] = (BYTE)(dumpsLength>>8);
713
640
  op[2] = (BYTE)(dumpsLength);
714
- op += 3; CHECK_OVERFLOW(op <= dst + maxDstSize);
641
+ op += 3;
715
642
  }
716
- memcpy(op, op_dumps_start, dumpsLength);
717
- op += dumpsLength; CHECK_OVERFLOW(op <= dst + maxDstSize);
643
+ if ((size_t)(oend-op) < dumpsLength+6) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
644
+ memcpy(op, seqStorePtr->dumpsStart, dumpsLength);
645
+ op += dumpsLength;
718
646
  }
719
647
 
720
- /* Encoding table of Literal Lengths */
648
+ /* CTable for Literal Lengths */
721
649
  max = MaxLL;
722
- mostFrequent = FSE_countFast(count, op_litLength_start, nbSeq, &max);
723
- if (mostFrequent == nbSeq)
650
+ mostFrequent = FSE_countFast(count, &max, seqStorePtr->litLengthStart, nbSeq);
651
+ if ((mostFrequent == nbSeq) && (nbSeq > 2))
724
652
  {
725
- *op++ = *op_litLength_start; CHECK_OVERFLOW(op <= dst + maxDstSize);
653
+ *op++ = *(seqStorePtr->litLengthStart);
726
654
  FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
727
655
  LLtype = bt_rle;
728
656
  }
@@ -733,29 +661,31 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
733
661
  }
734
662
  else
735
663
  {
664
+ size_t NCountSize;
736
665
  tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
737
666
  FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
738
- op += FSE_writeHeader(op, maxDstSize, norm, max, tableLog); CHECK_OVERFLOW(op <= dst + maxDstSize);
667
+ NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
668
+ if (FSE_isError(NCountSize)) return (size_t)-ZSTD_ERROR_GENERIC;
669
+ op += NCountSize;
739
670
  FSE_buildCTable(CTable_LitLength, norm, max, tableLog);
740
671
  LLtype = bt_compressed;
741
672
  }
742
673
 
743
- /* Encoding table of Offsets */
674
+ /* CTable for Offsets codes */
744
675
  {
745
- /* create OffsetBits */
676
+ /* create Offset codes */
746
677
  size_t i;
747
678
  max = MaxOff;
748
679
  for (i=0; i<nbSeq; i++)
749
680
  {
750
- offsetBits_start[i] = (BYTE)ZSTD_highbit(op_offset_start[i]) + 1;
751
- if (op_offset_start[i]==0) offsetBits_start[i]=0;
681
+ offCodeTable[i] = (BYTE)ZSTD_highbit(offsetTable[i]) + 1;
682
+ if (offsetTable[i]==0) offCodeTable[i]=0;
752
683
  }
753
- offsetBitsPtr += nbSeq;
754
- mostFrequent = FSE_countFast(count, offsetBits_start, nbSeq, &max);
684
+ mostFrequent = FSE_countFast(count, &max, offCodeTable, nbSeq);
755
685
  }
756
- if (mostFrequent == nbSeq)
686
+ if ((mostFrequent == nbSeq) && (nbSeq > 2))
757
687
  {
758
- *op++ = *offsetBits_start; CHECK_OVERFLOW(op <= dst + maxDstSize);
688
+ *op++ = *offCodeTable;
759
689
  FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
760
690
  Offtype = bt_rle;
761
691
  }
@@ -766,19 +696,22 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
766
696
  }
767
697
  else
768
698
  {
699
+ size_t NCountSize;
769
700
  tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
770
701
  FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
771
- op += FSE_writeHeader(op, maxDstSize, norm, max, tableLog); CHECK_OVERFLOW(op <= dst + maxDstSize);
702
+ NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
703
+ if (FSE_isError(NCountSize)) return (size_t)-ZSTD_ERROR_GENERIC;
704
+ op += NCountSize;
772
705
  FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog);
773
706
  Offtype = bt_compressed;
774
707
  }
775
708
 
776
- /* Encoding Table of MatchLengths */
709
+ /* CTable for MatchLengths */
777
710
  max = MaxML;
778
- mostFrequent = FSE_countFast(count, op_matchLength_start, nbSeq, &max);
779
- if (mostFrequent == nbSeq)
711
+ mostFrequent = FSE_countFast(count, &max, seqStorePtr->matchLengthStart, nbSeq);
712
+ if ((mostFrequent == nbSeq) && (nbSeq > 2))
780
713
  {
781
- *op++ = *op_matchLength_start; CHECK_OVERFLOW(op <= dst + maxDstSize);
714
+ *op++ = *seqStorePtr->matchLengthStart;
782
715
  FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
783
716
  MLtype = bt_rle;
784
717
  }
@@ -789,48 +722,57 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
789
722
  }
790
723
  else
791
724
  {
725
+ size_t NCountSize;
792
726
  tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
793
727
  FSE_normalizeCount(norm, tableLog, count, nbSeq, max);
794
- op += FSE_writeHeader(op, maxDstSize, norm, max, tableLog); CHECK_OVERFLOW(op <= dst + maxDstSize);
728
+ NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
729
+ if (FSE_isError(NCountSize)) return (size_t)-ZSTD_ERROR_GENERIC;
730
+ op += NCountSize;
795
731
  FSE_buildCTable(CTable_MatchLength, norm, max, tableLog);
796
732
  MLtype = bt_compressed;
797
733
  }
798
734
 
799
735
  seqHead[0] += (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
800
736
 
801
- /* Encoding */
737
+ /* Encoding Sequences */
802
738
  {
739
+ size_t streamSize, errorCode;
740
+ FSE_CStream_t blockStream;
803
741
  FSE_CState_t stateMatchLength;
804
742
  FSE_CState_t stateOffsetBits;
805
743
  FSE_CState_t stateLitLength;
744
+ int i;
806
745
 
807
- FSE_initCStream(&blockStream, op);
746
+ errorCode = FSE_initCStream(&blockStream, op, oend-op);
747
+ if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; /* not enough space remaining */
808
748
  FSE_initCState(&stateMatchLength, CTable_MatchLength);
809
749
  FSE_initCState(&stateOffsetBits, CTable_OffsetBits);
810
750
  FSE_initCState(&stateLitLength, CTable_LitLength);
811
751
 
812
- while (op_litLength > op_litLength_start)
752
+ for (i=(int)nbSeq-1; i>=0; i--)
813
753
  {
814
- BYTE matchLength = *(--op_matchLength);
815
- U32 offset = *(--op_offset);
816
- BYTE offCode = *(--offsetBitsPtr); /* 32b*/ /* 64b*/
754
+ BYTE matchLength = mlTable[i];
755
+ U32 offset = offsetTable[i];
756
+ BYTE offCode = offCodeTable[i]; /* 32b*/ /* 64b*/
817
757
  U32 nbBits = (offCode-1) * (!!offCode);
818
- BYTE litLength = *(--op_litLength); /* (7)*/ /* (7)*/
819
- FSE_encodeByte(&blockStream, &stateMatchLength, matchLength); /* 17 */ /* 17 */
758
+ BYTE litLength = llTable[i]; /* (7)*/ /* (7)*/
759
+ FSE_encodeSymbol(&blockStream, &stateMatchLength, matchLength); /* 17 */ /* 17 */
820
760
  if (ZSTD_32bits()) FSE_flushBits(&blockStream); /* 7 */
821
761
  FSE_addBits(&blockStream, offset, nbBits); /* 32 */ /* 42 */
822
762
  if (ZSTD_32bits()) FSE_flushBits(&blockStream); /* 7 */
823
- FSE_encodeByte(&blockStream, &stateOffsetBits, offCode); /* 16 */ /* 51 */
824
- FSE_encodeByte(&blockStream, &stateLitLength, litLength); /* 26 */ /* 61 */
763
+ FSE_encodeSymbol(&blockStream, &stateOffsetBits, offCode); /* 16 */ /* 51 */
764
+ FSE_encodeSymbol(&blockStream, &stateLitLength, litLength); /* 26 */ /* 61 */
825
765
  FSE_flushBits(&blockStream); /* 7 */ /* 7 */
826
766
  }
827
767
 
828
768
  FSE_flushCState(&blockStream, &stateMatchLength);
829
769
  FSE_flushCState(&blockStream, &stateOffsetBits);
830
770
  FSE_flushCState(&blockStream, &stateLitLength);
831
- }
832
771
 
833
- op += FSE_closeCStream(&blockStream); CHECK_OVERFLOW(op <= dst + maxDstSize);
772
+ streamSize = FSE_closeCStream(&blockStream);
773
+ if (streamSize==0) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; /* not enough space */
774
+ op += streamSize;
775
+ }
834
776
 
835
777
  /* check compressibility */
836
778
  if ((size_t)(op-dst) >= maxCSize) return 0;
@@ -839,57 +781,45 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
839
781
  }
840
782
 
841
783
 
842
- static size_t ZSTD_storeSeq(BYTE* op_lit, BYTE* op_ll, U32* op_offset, BYTE* op_ml, BYTE* op_dumps,
843
- size_t litLength, const BYTE* srcLit, size_t offset, size_t matchLength)
784
+ static void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, size_t offset, size_t matchLength)
844
785
  {
845
- const BYTE* const dumpStart = op_dumps;
846
- const BYTE* const l_end = op_lit + litLength;
847
-
786
+ BYTE* op_lit = seqStorePtr->lit;
787
+ BYTE* const l_end = op_lit + litLength;
848
788
 
849
789
  /* copy Literals */
850
- while (op_lit<l_end) COPY8(op_lit, srcLit);
790
+ while (op_lit<l_end) COPY8(op_lit, literals);
791
+ seqStorePtr->lit += litLength;
851
792
 
852
793
  /* literal Length */
853
794
  if (litLength >= MaxLL)
854
795
  {
855
- *op_ll++ = MaxLL;
796
+ *(seqStorePtr->litLength++) = MaxLL;
856
797
  if (litLength<255 + MaxLL)
857
- *op_dumps++ = (BYTE)(litLength - MaxLL);
798
+ *(seqStorePtr->dumps++) = (BYTE)(litLength - MaxLL);
858
799
  else
859
800
  {
860
- *op_dumps++ = 255;
861
- ZSTD_writeLE32(op_dumps, (U32)litLength); op_dumps += 3;
862
-
863
- //litLength |= 0xFF000000;
864
- //ZSTD_writeBE32(op_dumps, (U32)litLength);
865
- //op_dumps += 4;
801
+ *(seqStorePtr->dumps++) = 255;
802
+ ZSTD_writeLE32(seqStorePtr->dumps, (U32)litLength); seqStorePtr->dumps += 3;
866
803
  }
867
804
  }
868
- else *op_ll = (BYTE)litLength;
805
+ else *(seqStorePtr->litLength++) = (BYTE)litLength;
869
806
 
870
- /* match offset */
871
- *op_offset = (U32)offset;
807
+ /* match offset */
808
+ *(seqStorePtr->offset++) = (U32)offset;
872
809
 
873
810
  /* match Length */
874
811
  if (matchLength >= MaxML)
875
812
  {
876
- *op_ml++ = MaxML;
877
- if (matchLength<255 + MaxML)
878
- *op_dumps++ = (BYTE)(matchLength - MaxML);
813
+ *(seqStorePtr->matchLength++) = MaxML;
814
+ if (matchLength < 255+MaxML)
815
+ *(seqStorePtr->dumps++) = (BYTE)(matchLength - MaxML);
879
816
  else
880
817
  {
881
- *op_dumps++ = 255;
882
- ZSTD_writeLE32(op_dumps, (U32)matchLength); op_dumps+=3;
883
- //*(U32*)op_dumps = (U32)matchLength; op_dumps += 3; /* store direct result */
884
-
885
- //matchLength |= 0xFF000000;
886
- //ZSTD_writeBE32(op_dumps, (U32)matchLength);
887
- //op_dumps += 4;
818
+ *(seqStorePtr->dumps++) = 255;
819
+ ZSTD_writeLE32(seqStorePtr->dumps, (U32)matchLength); seqStorePtr->dumps+=3;
888
820
  }
889
821
  }
890
- else *op_ml = (BYTE)matchLength;
891
-
892
- return op_dumps - dumpStart;
822
+ else *(seqStorePtr->matchLength++) = (BYTE)matchLength;
893
823
  }
894
824
 
895
825
 
@@ -905,7 +835,7 @@ static const U64 prime7bytes = 58295818150454627ULL;
905
835
  //static U32 ZSTD_hashPtr(const void* p) { return ( ((*(U64*)p & 0xFFFFFFFFFFFFFF) * prime7bytes) >> (64-HASH_LOG)); }
906
836
 
907
837
  //static U32 ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime8bytes) >> (64-HASH_LOG)); }
908
- static U32 ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime7bytes) >> (56-HASH_LOG)) & HASH_MASK; }
838
+ static U32 ZSTD_hashPtr(const void* p) { return ( (ZSTD_read64(p) * prime7bytes) >> (56-HASH_LOG)) & HASH_MASK; }
909
839
  //static U32 ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime6bytes) >> (48-HASH_LOG)) & HASH_MASK; }
910
840
  //static U32 ZSTD_hashPtr(const void* p) { return ( (*(U64*)p * prime5bytes) >> (40-HASH_LOG)) & HASH_MASK; }
911
841
  //static U32 ZSTD_hashPtr(const void* p) { return ( (*(U32*)p * KNUTH) >> (32-HASH_LOG)); }
@@ -917,7 +847,6 @@ static const BYTE* ZSTD_updateMatch(U32* table, const BYTE* p, const BYTE* start
917
847
  U32 h = ZSTD_hashPtr(p);
918
848
  const BYTE* r;
919
849
  r = table[h] + start;
920
- //table[h] = (U32)(p - start);
921
850
  ZSTD_addPtr(table, p, start);
922
851
  return r;
923
852
  }
@@ -928,12 +857,12 @@ static int ZSTD_checkMatch(const BYTE* match, const BYTE* ip)
928
857
  }
929
858
 
930
859
 
931
- static size_t ZSTD_compressBlock(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
860
+ static size_t ZSTD_compressBlock(void* cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
932
861
  {
933
- cctxi_t* srt = (cctxi_t*) ctx;
934
- U32* HashTable = (U32*)(srt->hashTable);
935
- void* workplace = srt->workplace;
936
- const BYTE* const base = srt->base;
862
+ cctxi_t* ctx = (cctxi_t*) cctx;
863
+ U32* HashTable = (U32*)(ctx->hashTable);
864
+ seqStore_t* seqStorePtr = &(ctx->seqStore);
865
+ const BYTE* const base = ctx->base;
937
866
 
938
867
  const BYTE* const istart = (const BYTE*)src;
939
868
  const BYTE* ip = istart + 1;
@@ -941,19 +870,16 @@ static size_t ZSTD_compressBlock(void* ctx, void* dst, size_t maxDstSize, const
941
870
  const BYTE* const iend = istart + srcSize;
942
871
  const BYTE* const ilimit = iend - 16;
943
872
 
944
- U32 *op_offset = (U32*)(workplace), *op_offset_start = op_offset;
945
- BYTE *op_l = (BYTE*)workplace + srcSize + 4, *op_l_start = op_l;
946
- BYTE *op_rl = op_l + srcSize + 4, *op_rl_start = op_rl;
947
- BYTE *op_ml = op_rl + (srcSize >> 2) + 4, *op_ml_start = op_ml;
948
- BYTE *op_dumps = op_ml + (srcSize >> 2) + 4, *op_dumps_start = op_dumps;
949
873
  size_t prevOffset=0, offset=0;
950
- size_t lastLLSize;
951
874
 
952
875
 
876
+ /* init */
877
+ ZSTD_resetSeqStore(seqStorePtr);
878
+
953
879
  /* Main Search Loop */
954
880
  while (ip < ilimit)
955
881
  {
956
- const BYTE* match = (BYTE*) ZSTD_updateMatch(HashTable, ip, base);
882
+ const BYTE* match = (const BYTE*) ZSTD_updateMatch(HashTable, ip, base);
957
883
 
958
884
  if (!ZSTD_checkMatch(match,ip)) { ip += ((ip-anchor) >> g_searchStrength) + 1; continue; }
959
885
 
@@ -969,8 +895,7 @@ static size_t ZSTD_compressBlock(void* ctx, void* dst, size_t maxDstSize, const
969
895
  if (offsetCode == prevOffset) offsetCode = 0;
970
896
  prevOffset = offset;
971
897
  offset = ip-match;
972
- op_dumps += ZSTD_storeSeq(op_l, op_rl++, op_offset++, op_ml++, op_dumps, litLength, anchor, offsetCode, matchLength);
973
- op_l += litLength;
898
+ ZSTD_storeSeq(seqStorePtr, litLength, anchor, offsetCode, matchLength);
974
899
 
975
900
  /* Fill Table */
976
901
  ZSTD_addPtr(HashTable, ip+1, base);
@@ -981,18 +906,19 @@ static size_t ZSTD_compressBlock(void* ctx, void* dst, size_t maxDstSize, const
981
906
  }
982
907
 
983
908
  /* Last Literals */
984
- lastLLSize = iend - anchor;
985
- memcpy(op_l, anchor, lastLLSize);
986
- op_l += lastLLSize;
909
+ {
910
+ size_t lastLLSize = iend - anchor;
911
+ memcpy(seqStorePtr->lit, anchor, lastLLSize);
912
+ seqStorePtr->lit += lastLLSize;
913
+ }
987
914
 
988
915
  /* Finale compression stage */
989
- return ZSTD_compressEntropy((BYTE*)dst, maxDstSize,
990
- op_l_start, op_l, op_rl_start, op_rl, op_ml_start, op_offset_start, op_dumps_start, op_dumps,
991
- srcSize, lastLLSize);
916
+ return ZSTD_compressSequences((BYTE*)dst, maxDstSize,
917
+ seqStorePtr, srcSize);
992
918
  }
993
919
 
994
920
 
995
- size_t ZSTD_compressBegin(ZSTD_cctx_t ctx, void* dst, size_t maxDstSize)
921
+ size_t ZSTD_compressBegin(ZSTD_Cctx* ctx, void* dst, size_t maxDstSize)
996
922
  {
997
923
  /* Sanity check */
998
924
  if (maxDstSize < ZSTD_frameHeaderSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
@@ -1007,13 +933,12 @@ size_t ZSTD_compressBegin(ZSTD_cctx_t ctx, void* dst, size_t maxDstSize)
1007
933
  }
1008
934
 
1009
935
 
1010
- /* this should be auto-vectorized by compiler */
1011
936
  static void ZSTD_scaleDownCtx(void* cctx, const U32 limit)
1012
937
  {
1013
938
  cctxi_t* ctx = (cctxi_t*) cctx;
1014
939
  int i;
1015
940
 
1016
- #if defined(__AVX2__) /* <immintrin.h> */
941
+ #if defined(__AVX2__)
1017
942
  /* AVX2 version */
1018
943
  __m256i* h = ctx->hashTable;
1019
944
  const __m256i limit8 = _mm256_set1_epi32(limit);
@@ -1025,6 +950,7 @@ static void ZSTD_scaleDownCtx(void* cctx, const U32 limit)
1025
950
  _mm256_storeu_si256((__m256i*)(h+i), src);
1026
951
  }
1027
952
  #else
953
+ /* this should be auto-vectorized by compiler */
1028
954
  U32* h = ctx->hashTable;
1029
955
  for (i=0; i<HASH_TABLESIZE; ++i)
1030
956
  {
@@ -1036,7 +962,6 @@ static void ZSTD_scaleDownCtx(void* cctx, const U32 limit)
1036
962
  }
1037
963
 
1038
964
 
1039
- /* this should be auto-vectorized by compiler */
1040
965
  static void ZSTD_limitCtx(void* cctx, const U32 limit)
1041
966
  {
1042
967
  cctxi_t* ctx = (cctxi_t*) cctx;
@@ -1051,7 +976,7 @@ static void ZSTD_limitCtx(void* cctx, const U32 limit)
1051
976
  return;
1052
977
  }
1053
978
 
1054
- #if defined(__AVX2__) /* <immintrin.h> */
979
+ #if defined(__AVX2__)
1055
980
  /* AVX2 version */
1056
981
  {
1057
982
  __m256i* h = ctx->hashTable;
@@ -1065,6 +990,7 @@ static void ZSTD_limitCtx(void* cctx, const U32 limit)
1065
990
  }
1066
991
  }
1067
992
  #else
993
+ /* this should be auto-vectorized by compiler */
1068
994
  {
1069
995
  U32* h = (U32*)(ctx->hashTable);
1070
996
  for (i=0; i<HASH_TABLESIZE; ++i)
@@ -1076,7 +1002,7 @@ static void ZSTD_limitCtx(void* cctx, const U32 limit)
1076
1002
  }
1077
1003
 
1078
1004
 
1079
- size_t ZSTD_compressContinue(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1005
+ size_t ZSTD_compressContinue(ZSTD_Cctx* cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1080
1006
  {
1081
1007
  cctxi_t* ctx = (cctxi_t*) cctx;
1082
1008
  const BYTE* const istart = (const BYTE* const)src;
@@ -1090,9 +1016,9 @@ size_t ZSTD_compressContinue(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize, con
1090
1016
  ctx->base = (const BYTE*)src, ctx->current=0, ctx->nextUpdate = g_maxDistance;
1091
1017
  if (src != ctx->base + ctx->current) /* not contiguous */
1092
1018
  {
1093
- ZSTD_resetCCtx(ctx);
1094
- ctx->base = (const BYTE*)src;
1095
- ctx->current = 0;
1019
+ ZSTD_resetCCtx(ctx);
1020
+ ctx->base = (const BYTE*)src;
1021
+ ctx->current = 0;
1096
1022
  }
1097
1023
  ctx->current += (U32)srcSize;
1098
1024
 
@@ -1102,8 +1028,11 @@ size_t ZSTD_compressContinue(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize, con
1102
1028
  size_t blockSize = BLOCKSIZE;
1103
1029
  if (blockSize > srcSize) blockSize = srcSize;
1104
1030
 
1031
+ if (maxDstSize < 2*ZSTD_blockHeaderSize+1) /* one RLE block + endMark */
1032
+ return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1033
+
1105
1034
  /* update hash table */
1106
- if (g_maxDistance <= BLOCKSIZE) /* static test => all blocks are independent */
1035
+ if (g_maxDistance <= BLOCKSIZE) /* static test ; yes == blocks are independent */
1107
1036
  {
1108
1037
  ZSTD_resetCCtx(ctx);
1109
1038
  ctx->base = ip;
@@ -1116,7 +1045,6 @@ size_t ZSTD_compressContinue(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize, con
1116
1045
  }
1117
1046
 
1118
1047
  /* compress */
1119
- if (maxDstSize < ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1120
1048
  cSize = ZSTD_compressBlock(ctx, op+ZSTD_blockHeaderSize, maxDstSize-ZSTD_blockHeaderSize, ip, blockSize);
1121
1049
  if (cSize == 0)
1122
1050
  {
@@ -1142,7 +1070,7 @@ size_t ZSTD_compressContinue(ZSTD_cctx_t cctx, void* dst, size_t maxDstSize, con
1142
1070
  }
1143
1071
 
1144
1072
 
1145
- size_t ZSTD_compressEnd(ZSTD_cctx_t ctx, void* dst, size_t maxDstSize)
1073
+ size_t ZSTD_compressEnd(ZSTD_Cctx* ctx, void* dst, size_t maxDstSize)
1146
1074
  {
1147
1075
  BYTE* op = (BYTE*)dst;
1148
1076
 
@@ -1159,7 +1087,7 @@ size_t ZSTD_compressEnd(ZSTD_cctx_t ctx, void* dst, size_t maxDstSize)
1159
1087
  }
1160
1088
 
1161
1089
 
1162
- static size_t ZSTD_compressCCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1090
+ static size_t ZSTD_compressCCtx(ZSTD_Cctx* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1163
1091
  {
1164
1092
  BYTE* const ostart = (BYTE* const)dst;
1165
1093
  BYTE* op = ostart;
@@ -1193,10 +1121,11 @@ static size_t ZSTD_compressCCtx(void* ctx, void* dst, size_t maxDstSize, const v
1193
1121
 
1194
1122
  size_t ZSTD_compress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1195
1123
  {
1196
- void* ctx;
1124
+ ZSTD_Cctx* ctx;
1197
1125
  size_t r;
1198
1126
 
1199
1127
  ctx = ZSTD_createCCtx();
1128
+ if (ctx==NULL) return (size_t)-ZSTD_ERROR_GENERIC;
1200
1129
  r = ZSTD_compressCCtx(ctx, dst, maxDstSize, src, srcSize);
1201
1130
  ZSTD_freeCCtx(ctx);
1202
1131
  return r;
@@ -1213,7 +1142,7 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, blockProperties_t* bp
1213
1142
  BYTE headerFlags;
1214
1143
  U32 cSize;
1215
1144
 
1216
- if (srcSize < 3) return (size_t)-ZSTD_ERROR_wrongSrcSize;
1145
+ if (srcSize < 3) return (size_t)-ZSTD_ERROR_SrcSize;
1217
1146
 
1218
1147
  headerFlags = *in;
1219
1148
  cSize = in[2] + (in[1]<<8) + ((in[0] & 7)<<16);
@@ -1235,106 +1164,34 @@ static size_t ZSTD_copyUncompressedBlock(void* dst, size_t maxDstSize, const voi
1235
1164
  }
1236
1165
 
1237
1166
 
1238
- /* force inline : 'fast' really needs to be evaluated at compile time */
1239
- FORCE_INLINE size_t ZSTD_decompressLiterals_usingDTable_generic(
1240
- void* const dst, size_t maxDstSize,
1241
- const void* src, size_t srcSize,
1242
- const void* DTable, U32 fast)
1243
- {
1244
- BYTE* op = (BYTE*) dst;
1245
- BYTE* const olimit = op;
1246
- BYTE* const oend = op + maxDstSize;
1247
- FSE_DStream_t bitD;
1248
- FSE_DState_t state1, state2;
1249
- size_t errorCode;
1250
-
1251
- /* Init */
1252
- errorCode = FSE_initDStream(&bitD, src, srcSize);
1253
- if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
1254
-
1255
- FSE_initDState(&state1, &bitD, DTable);
1256
- FSE_initDState(&state2, &bitD, DTable);
1257
- op = oend;
1258
-
1259
- // 2 symbols per loop
1260
- while (!FSE_reloadDStream(&bitD) && (op>olimit+3))
1261
- {
1262
- *--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1263
-
1264
- if (LitFSELog*2+7 > sizeof(size_t)*8) // This test must be static
1265
- FSE_reloadDStream(&bitD);
1266
-
1267
- *--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1268
-
1269
- if (LitFSELog*4+7 < sizeof(size_t)*8) // This test must be static
1270
- {
1271
- *--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1272
- *--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1273
- }
1274
- }
1275
-
1276
- /* tail */
1277
- while (1)
1278
- {
1279
- if ( (FSE_reloadDStream(&bitD)>2) || (op==olimit) || (FSE_endOfDState(&state1) && FSE_endOfDStream(&bitD)) )
1280
- break;
1281
-
1282
- *--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
1283
-
1284
- if ( (FSE_reloadDStream(&bitD)>2) || (op==olimit) || (FSE_endOfDState(&state2) && FSE_endOfDStream(&bitD)) )
1285
- break;
1286
-
1287
- *--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
1288
- }
1289
-
1290
- /* end ? */
1291
- if (FSE_endOfDStream(&bitD) && FSE_endOfDState(&state1) && FSE_endOfDState(&state2) )
1292
- return oend-op;
1293
-
1294
- if (op==olimit) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; /* dst buffer is full, but cSrc unfinished */
1295
-
1296
- return (size_t)-ZSTD_ERROR_GENERIC;
1297
- }
1298
-
1299
- static size_t ZSTD_decompressLiterals_usingDTable(
1300
- void* const dst, size_t maxDstSize,
1301
- const void* src, size_t srcSize,
1302
- const void* DTable, U32 fast)
1303
- {
1304
- if (fast) return ZSTD_decompressLiterals_usingDTable_generic(dst, maxDstSize, src, srcSize, DTable, 1);
1305
- return ZSTD_decompressLiterals_usingDTable_generic(dst, maxDstSize, src, srcSize, DTable, 0);
1306
- }
1307
-
1308
- static size_t ZSTD_decompressLiterals(void* ctx, void* dst, size_t maxDstSize,
1167
+ static size_t ZSTD_decompressLiterals(void* ctx,
1168
+ void* dst, size_t maxDstSize,
1309
1169
  const void* src, size_t srcSize)
1310
1170
  {
1311
- /* assumed : blockType == blockCompressed */
1171
+ BYTE* op = (BYTE*)dst;
1172
+ BYTE* const oend = op + maxDstSize;
1312
1173
  const BYTE* ip = (const BYTE*)src;
1313
- short norm[256];
1314
- void* DTable = ctx;
1315
- U32 maxSymbolValue = 255;
1316
- U32 tableLog;
1317
- U32 fastMode;
1318
1174
  size_t errorCode;
1175
+ size_t litSize;
1319
1176
 
1320
- if (srcSize < 2) return (size_t)-ZSTD_ERROR_wrongLBlockSize; // too small input size
1177
+ /* check : minimum 2, for litSize, +1, for content */
1178
+ if (srcSize <= 3) return (size_t)-ZSTD_ERROR_corruption;
1321
1179
 
1322
- errorCode = FSE_readHeader (norm, &maxSymbolValue, &tableLog, ip, srcSize);
1323
- if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
1324
- ip += errorCode;
1325
- srcSize -= errorCode;
1180
+ litSize = ip[1] + (ip[0]<<8);
1181
+ litSize += ((ip[-3] >> 3) & 7) << 16; // mmmmh....
1182
+ op = oend - litSize;
1326
1183
 
1327
- errorCode = FSE_buildDTable (DTable, norm, maxSymbolValue, tableLog);
1184
+ (void)ctx;
1185
+ if (litSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1186
+ errorCode = HUF_decompress(op, litSize, ip+2, srcSize-2);
1328
1187
  if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
1329
- fastMode = (U32)errorCode;
1330
-
1331
- return ZSTD_decompressLiterals_usingDTable (dst, maxDstSize, ip, srcSize, DTable, fastMode);
1188
+ return litSize;
1332
1189
  }
1333
1190
 
1334
1191
 
1335
1192
  size_t ZSTD_decodeLiteralsBlock(void* ctx,
1336
1193
  void* dst, size_t maxDstSize,
1337
- const BYTE** litPtr,
1194
+ const BYTE** litStart, size_t* litSize,
1338
1195
  const void* src, size_t srcSize)
1339
1196
  {
1340
1197
  const BYTE* const istart = (const BYTE* const)src;
@@ -1345,25 +1202,32 @@ size_t ZSTD_decodeLiteralsBlock(void* ctx,
1345
1202
 
1346
1203
  size_t litcSize = ZSTD_getcBlockSize(src, srcSize, &litbp);
1347
1204
  if (ZSTD_isError(litcSize)) return litcSize;
1348
- if (litcSize > srcSize - ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_wrongLBlockSize;
1205
+ if (litcSize > srcSize - ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_SrcSize;
1349
1206
  ip += ZSTD_blockHeaderSize;
1350
1207
 
1351
1208
  switch(litbp.blockType)
1352
1209
  {
1353
- case bt_raw: *litPtr = ip; ip+= litcSize; break;
1210
+ case bt_raw:
1211
+ *litStart = ip;
1212
+ ip += litcSize;
1213
+ *litSize = litcSize;
1214
+ break;
1354
1215
  case bt_rle:
1355
1216
  {
1356
1217
  size_t rleSize = litbp.origSize;
1218
+ if (rleSize>maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1357
1219
  memset(oend - rleSize, *ip, rleSize);
1358
- *litPtr = oend - rleSize;
1220
+ *litStart = oend - rleSize;
1221
+ *litSize = rleSize;
1359
1222
  ip++;
1360
1223
  break;
1361
1224
  }
1362
1225
  case bt_compressed:
1363
1226
  {
1364
- size_t cSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
1365
- if (ZSTD_isError(cSize)) return cSize;
1366
- *litPtr = oend - cSize;
1227
+ size_t decodedLitSize = ZSTD_decompressLiterals(ctx, dst, maxDstSize, ip, litcSize);
1228
+ if (ZSTD_isError(decodedLitSize)) return decodedLitSize;
1229
+ *litStart = oend - decodedLitSize;
1230
+ *litSize = decodedLitSize;
1367
1231
  ip += litcSize;
1368
1232
  break;
1369
1233
  }
@@ -1375,8 +1239,8 @@ size_t ZSTD_decodeLiteralsBlock(void* ctx,
1375
1239
  }
1376
1240
 
1377
1241
 
1378
- size_t ZSTD_decodeSeqHeaders(size_t* lastLLPtr, const BYTE** dumpsPtr,
1379
- void* DTableLL, void* DTableML, void* DTableOffb,
1242
+ size_t ZSTD_decodeSeqHeaders(int* nbSeq, const BYTE** dumpsPtr,
1243
+ FSE_DTable* DTableLL, FSE_DTable* DTableML, FSE_DTable* DTableOffb,
1380
1244
  const void* src, size_t srcSize)
1381
1245
  {
1382
1246
  const BYTE* const istart = (const BYTE* const)src;
@@ -1386,8 +1250,11 @@ size_t ZSTD_decodeSeqHeaders(size_t* lastLLPtr, const BYTE** dumpsPtr,
1386
1250
  U32 LLlog, Offlog, MLlog;
1387
1251
  size_t dumpsLength;
1388
1252
 
1253
+ /* check */
1254
+ if (srcSize < 5) return (size_t)-ZSTD_ERROR_SrcSize;
1255
+
1389
1256
  /* SeqHead */
1390
- ip += ZSTD_readProgressive(lastLLPtr, ip);
1257
+ *nbSeq = ZSTD_readLE16(ip); ip+=2;
1391
1258
  LLtype = *ip >> 6;
1392
1259
  Offtype = (*ip >> 4) & 3;
1393
1260
  MLtype = (*ip >> 2) & 3;
@@ -1406,6 +1273,9 @@ size_t ZSTD_decodeSeqHeaders(size_t* lastLLPtr, const BYTE** dumpsPtr,
1406
1273
  *dumpsPtr = ip;
1407
1274
  ip += dumpsLength;
1408
1275
 
1276
+ /* check */
1277
+ if (ip > iend-3) return (size_t)-ZSTD_ERROR_SrcSize; /* min : all 3 are "raw", hence no header, but at least xxLog bits per type */
1278
+
1409
1279
  /* sequences */
1410
1280
  {
1411
1281
  S16 norm[MaxML+1]; /* assumption : MaxML >= MaxLL and MaxOff */
@@ -1423,8 +1293,9 @@ size_t ZSTD_decodeSeqHeaders(size_t* lastLLPtr, const BYTE** dumpsPtr,
1423
1293
  FSE_buildDTable_raw(DTableLL, LLbits); break;
1424
1294
  default :
1425
1295
  max = MaxLL;
1426
- headerSize = FSE_readHeader(norm, &max, &LLlog, ip, iend-ip);
1296
+ headerSize = FSE_readNCount(norm, &max, &LLlog, ip, iend-ip);
1427
1297
  if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC;
1298
+ if (LLlog > LLFSELog) return (size_t)-ZSTD_ERROR_corruption;
1428
1299
  ip += headerSize;
1429
1300
  FSE_buildDTable(DTableLL, norm, max, LLlog);
1430
1301
  }
@@ -1434,14 +1305,16 @@ size_t ZSTD_decodeSeqHeaders(size_t* lastLLPtr, const BYTE** dumpsPtr,
1434
1305
  U32 max;
1435
1306
  case bt_rle :
1436
1307
  Offlog = 0;
1308
+ if (ip > iend-2) return (size_t)-ZSTD_ERROR_SrcSize; /* min : "raw", hence no header, but at least xxLog bits */
1437
1309
  FSE_buildDTable_rle(DTableOffb, *ip++); break;
1438
1310
  case bt_raw :
1439
1311
  Offlog = Offbits;
1440
1312
  FSE_buildDTable_raw(DTableOffb, Offbits); break;
1441
1313
  default :
1442
1314
  max = MaxOff;
1443
- headerSize = FSE_readHeader(norm, &max, &Offlog, ip, iend-ip);
1315
+ headerSize = FSE_readNCount(norm, &max, &Offlog, ip, iend-ip);
1444
1316
  if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC;
1317
+ if (Offlog > OffFSELog) return (size_t)-ZSTD_ERROR_corruption;
1445
1318
  ip += headerSize;
1446
1319
  FSE_buildDTable(DTableOffb, norm, max, Offlog);
1447
1320
  }
@@ -1451,14 +1324,16 @@ size_t ZSTD_decodeSeqHeaders(size_t* lastLLPtr, const BYTE** dumpsPtr,
1451
1324
  U32 max;
1452
1325
  case bt_rle :
1453
1326
  MLlog = 0;
1327
+ if (ip > iend-2) return (size_t)-ZSTD_ERROR_SrcSize; /* min : "raw", hence no header, but at least xxLog bits */
1454
1328
  FSE_buildDTable_rle(DTableML, *ip++); break;
1455
1329
  case bt_raw :
1456
1330
  MLlog = MLbits;
1457
1331
  FSE_buildDTable_raw(DTableML, MLbits); break;
1458
1332
  default :
1459
1333
  max = MaxML;
1460
- headerSize = FSE_readHeader(norm, &max, &MLlog, ip, iend-ip);
1334
+ headerSize = FSE_readNCount(norm, &max, &MLlog, ip, iend-ip);
1461
1335
  if (FSE_isError(headerSize)) return (size_t)-ZSTD_ERROR_GENERIC;
1336
+ if (MLlog > MLFSELog) return (size_t)-ZSTD_ERROR_corruption;
1462
1337
  ip += headerSize;
1463
1338
  FSE_buildDTable(DTableML, norm, max, MLlog);
1464
1339
  }
@@ -1468,175 +1343,262 @@ size_t ZSTD_decodeSeqHeaders(size_t* lastLLPtr, const BYTE** dumpsPtr,
1468
1343
  }
1469
1344
 
1470
1345
 
1471
- #define ZSTD_prefetch(p) { const BYTE pByte = *(volatile const BYTE*)p; }
1346
+ typedef struct {
1347
+ size_t litLength;
1348
+ size_t offset;
1349
+ size_t matchLength;
1350
+ } seq_t;
1472
1351
 
1473
- FORCE_INLINE size_t ZSTD_decompressBlock(void* ctx, void* dst, size_t maxDstSize,
1474
- const void* src, size_t srcSize)
1475
- {
1476
- const BYTE* ip = (const BYTE*)src;
1477
- const BYTE* const iend = ip + srcSize;
1478
- BYTE* const ostart = (BYTE* const)dst;
1479
- BYTE* op = ostart;
1480
- BYTE* const oend = ostart + maxDstSize;
1481
- size_t errorCode;
1482
- size_t lastLLSize;
1352
+ typedef struct {
1353
+ FSE_DStream_t DStream;
1354
+ FSE_DState_t stateLL;
1355
+ FSE_DState_t stateOffb;
1356
+ FSE_DState_t stateML;
1357
+ size_t prevOffset;
1483
1358
  const BYTE* dumps;
1484
- const BYTE* litPtr;
1485
- const BYTE* litEnd;
1486
- const size_t dec32table[] = {4, 1, 2, 1, 4, 4, 4, 4}; /* added */
1487
- const size_t dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
1488
- void* DTableML = ctx;
1489
- void* DTableLL = ((U32*)ctx) + FSE_DTABLE_SIZE_U32(MLFSELog);
1490
- void* DTableOffb = ((U32*)DTableLL) + FSE_DTABLE_SIZE_U32(LLFSELog);
1359
+ } seqState_t;
1491
1360
 
1492
- /* blockType == blockCompressed, srcSize is trusted */
1493
1361
 
1494
- /* literal sub-block */
1495
- errorCode = ZSTD_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, src, srcSize);
1496
- if (ZSTD_isError(errorCode)) return errorCode;
1497
- ip += errorCode;
1362
+ static void ZSTD_decodeSequence(seq_t* seq, seqState_t* seqState)
1363
+ {
1364
+ size_t litLength;
1365
+ size_t prevOffset;
1366
+ size_t offset;
1367
+ size_t matchLength;
1368
+ const BYTE* dumps = seqState->dumps;
1369
+
1370
+ /* Literal length */
1371
+ litLength = FSE_decodeSymbol(&(seqState->stateLL), &(seqState->DStream));
1372
+ prevOffset = litLength ? seq->offset : seqState->prevOffset;
1373
+ seqState->prevOffset = seq->offset;
1374
+ if (litLength == MaxLL)
1375
+ {
1376
+ U32 add = *dumps++;
1377
+ if (add < 255) litLength += add;
1378
+ else
1379
+ {
1380
+ litLength = ZSTD_readLE32(dumps) & 0xFFFFFF;
1381
+ dumps += 3;
1382
+ }
1383
+ }
1498
1384
 
1499
- /* Build Decoding Tables */
1500
- errorCode = ZSTD_decodeSeqHeaders(&lastLLSize, &dumps,
1501
- DTableLL, DTableML, DTableOffb,
1502
- ip, iend-ip);
1503
- if (ZSTD_isError(errorCode)) return errorCode;
1504
- /* end pos */
1505
- if ((litPtr>=ostart) && (litPtr<=oend))
1506
- litEnd = oend - lastLLSize;
1385
+ /* Offset */
1386
+ {
1387
+ U32 offsetCode, nbBits;
1388
+ offsetCode = FSE_decodeSymbol(&(seqState->stateOffb), &(seqState->DStream));
1389
+ if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1390
+ nbBits = offsetCode - 1;
1391
+ if (offsetCode==0) nbBits = 0; /* cmove */
1392
+ offset = ((size_t)1 << (nbBits & ((sizeof(offset)*8)-1))) + FSE_readBits(&(seqState->DStream), nbBits);
1393
+ if (ZSTD_32bits()) FSE_reloadDStream(&(seqState->DStream));
1394
+ if (offsetCode==0) offset = prevOffset;
1395
+ }
1396
+
1397
+ /* MatchLength */
1398
+ matchLength = FSE_decodeSymbol(&(seqState->stateML), &(seqState->DStream));
1399
+ if (matchLength == MaxML)
1400
+ {
1401
+ U32 add = *dumps++;
1402
+ if (add < 255) matchLength += add;
1403
+ else
1404
+ {
1405
+ matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF; /* no pb : dumps is always followed by seq tables > 1 byte */
1406
+ dumps += 3;
1407
+ }
1408
+ }
1409
+ matchLength += MINMATCH;
1410
+
1411
+ /* save result */
1412
+ seq->litLength = litLength;
1413
+ seq->offset = offset;
1414
+ seq->matchLength = matchLength;
1415
+ seqState->dumps = dumps;
1416
+ }
1417
+
1418
+
1419
+ static size_t ZSTD_execSequence(BYTE* op,
1420
+ seq_t sequence,
1421
+ const BYTE** litPtr, const BYTE* const litLimit,
1422
+ BYTE* const base, BYTE* const oend)
1423
+ {
1424
+ static const int dec32table[] = {0, 1, 2, 1, 4, 4, 4, 4}; /* added */
1425
+ static const int dec64table[] = {8, 8, 8, 7, 8, 9,10,11}; /* substracted */
1426
+ const BYTE* const ostart = op;
1427
+ const size_t litLength = sequence.litLength;
1428
+ BYTE* const endMatch = op + litLength + sequence.matchLength; /* risk : address space overflow (32-bits) */
1429
+ const BYTE* const litEnd = *litPtr + litLength;
1430
+
1431
+ /* check */
1432
+ if (endMatch > oend) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; /* overwrite beyond dst buffer */
1433
+ if (litEnd > litLimit) return (size_t)-ZSTD_ERROR_corruption;
1434
+ if (sequence.matchLength > (size_t)(*litPtr-op)) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall; /* overwrite literal segment */
1435
+
1436
+ /* copy Literals */
1437
+ if (((size_t)(*litPtr - op) < 8) || ((size_t)(oend-litEnd) < 8) || (op+litLength > oend-8))
1438
+ memmove(op, *litPtr, litLength); /* overwrite risk */
1507
1439
  else
1508
- litEnd = ip - lastLLSize;
1509
- ip += errorCode;
1440
+ ZSTD_wildcopy(op, *litPtr, litLength);
1441
+ op += litLength;
1442
+ *litPtr = litEnd; /* update for next sequence */
1510
1443
 
1511
- /* decompression */
1444
+ /* check : last match must be at a minimum distance of 8 from end of dest buffer */
1445
+ if (oend-op < 8) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1446
+
1447
+ /* copy Match */
1512
1448
  {
1513
- FSE_DStream_t DStream;
1514
- FSE_DState_t stateLL, stateOffb, stateML;
1515
- size_t prevOffset = 0, offset = 0;
1516
- size_t qutt=0;
1449
+ const U32 overlapRisk = (((size_t)(litEnd - endMatch)) < 12);
1450
+ const BYTE* match = op - sequence.offset; /* possible underflow at op - offset ? */
1451
+ size_t qutt = 12;
1452
+ U64 saved[2];
1517
1453
 
1518
- FSE_initDStream(&DStream, ip, iend-ip);
1519
- FSE_initDState(&stateLL, &DStream, DTableLL);
1520
- FSE_initDState(&stateOffb, &DStream, DTableOffb);
1521
- FSE_initDState(&stateML, &DStream, DTableML);
1454
+ /* check */
1455
+ if (match < base) return (size_t)-ZSTD_ERROR_corruption;
1456
+ if (sequence.offset > (size_t)base) return (size_t)-ZSTD_ERROR_corruption;
1522
1457
 
1523
- while (FSE_reloadDStream(&DStream)<2)
1458
+ /* save beginning of literal sequence, in case of write overlap */
1459
+ if (overlapRisk)
1524
1460
  {
1525
- U32 nbBits, offsetCode;
1526
- const BYTE* match;
1527
- size_t litLength;
1528
- size_t matchLength;
1529
- size_t newOffset;
1530
-
1531
- _another_round:
1461
+ if ((endMatch + qutt) > oend) qutt = oend-endMatch;
1462
+ memcpy(saved, endMatch, qutt);
1463
+ }
1532
1464
 
1533
- /* Literals */
1534
- litLength = FSE_decodeSymbol(&stateLL, &DStream);
1535
- if (litLength) prevOffset = offset;
1536
- if (litLength == MaxLL)
1465
+ if (sequence.offset < 8)
1466
+ {
1467
+ const int dec64 = dec64table[sequence.offset];
1468
+ op[0] = match[0];
1469
+ op[1] = match[1];
1470
+ op[2] = match[2];
1471
+ op[3] = match[3];
1472
+ match += dec32table[sequence.offset];
1473
+ ZSTD_copy4(op+4, match);
1474
+ match -= dec64;
1475
+ } else { ZSTD_copy8(op, match); }
1476
+ op += 8; match += 8;
1477
+
1478
+ if (endMatch > oend-12)
1479
+ {
1480
+ if (op < oend-8)
1537
1481
  {
1538
- BYTE add = *dumps++;
1539
- if (add < 255) litLength += add;
1540
- else
1541
- {
1542
- //litLength = (*(U32*)dumps) & 0xFFFFFF;
1543
- litLength = ZSTD_readLE32(dumps) & 0xFFFFFF;
1544
- dumps += 3;
1545
- }
1482
+ ZSTD_wildcopy(op, match, (oend-8) - op);
1483
+ match += (oend-8) - op;
1484
+ op = oend-8;
1546
1485
  }
1547
- if (((size_t)(litPtr - op) < 8) || ((size_t)(oend-(litPtr+litLength)) < 8))
1548
- memmove(op, litPtr, litLength); /* overwrite risk */
1549
- else
1550
- ZSTD_wildcopy(op, litPtr, litLength);
1551
- op += litLength; CHECK_OVERFLOW(op <= (BYTE *)dst + maxDstSize);
1552
- litPtr += litLength;
1553
-
1554
- /* Offset */
1555
- offsetCode = FSE_decodeSymbol(&stateOffb, &DStream);
1556
- if (ZSTD_32bits()) FSE_reloadDStream(&DStream);
1557
- nbBits = offsetCode - 1;
1558
- if (offsetCode==0) nbBits = 0; /* cmove */
1559
- newOffset = FSE_readBits(&DStream, nbBits);
1560
- if (ZSTD_32bits()) FSE_reloadDStream(&DStream);
1561
- newOffset += (size_t)1 << nbBits;
1562
- if (offsetCode==0) newOffset = prevOffset;
1563
- match = op - newOffset;
1564
- prevOffset = offset;
1565
- offset = newOffset;
1486
+ while (op<endMatch) *op++ = *match++;
1487
+ }
1488
+ else
1489
+ ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */
1566
1490
 
1567
- /* MatchLength */
1568
- matchLength = FSE_decodeSymbol(&stateML, &DStream);
1569
- if (matchLength == MaxML)
1570
- {
1571
- BYTE add = *dumps++;
1572
- if (add < 255) matchLength += add;
1573
- else
1574
- {
1575
- //matchLength = (*(U32*)dumps) & 0xFFFFFF;
1576
- matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF;
1577
- dumps += 3;
1578
- }
1579
- }
1580
- matchLength += MINMATCH;
1491
+ /* restore, in case of overlap */
1492
+ if (overlapRisk) memcpy(endMatch, saved, qutt);
1493
+ }
1581
1494
 
1582
- /* copy Match */
1583
- {
1584
- BYTE* const endMatch = op + matchLength;
1585
- U64 saved[2];
1586
-
1587
- if ((size_t)(litPtr - endMatch) < 12)
1588
- {
1589
- qutt = endMatch + 12 - litPtr;
1590
- if ((litPtr + qutt) > oend) qutt = oend-litPtr;
1591
- memcpy(saved, litPtr, qutt);
1592
- }
1593
-
1594
- if (offset < 8)
1595
- {
1596
- const size_t dec64 = dec64table[offset];
1597
- op[0] = match[0];
1598
- op[1] = match[1];
1599
- op[2] = match[2];
1600
- op[3] = match[3];
1601
- match += dec32table[offset];
1602
- ZSTD_copy4(op+4, match);
1603
- match -= dec64;
1604
- } else { ZSTD_copy8(op, match); }
1605
-
1606
- if (endMatch > oend-12)
1607
- {
1608
- if (op < oend-16)
1609
- {
1610
- ZSTD_wildcopy(op+8, match+8, (oend-8) - (op+8));
1611
- match += (oend-8) - op;
1612
- op = oend-8; CHECK_OVERFLOW(op <= (BYTE *)dst + maxDstSize);
1613
- }
1614
- while (op<endMatch) *op++ = *match++; CHECK_OVERFLOW(op <= (BYTE *)dst + maxDstSize);
1615
- }
1616
- else
1617
- ZSTD_wildcopy(op+8, match+8, matchLength-8); /* works even if matchLength < 8 */
1618
-
1619
- op = endMatch; CHECK_OVERFLOW(op <= (BYTE *)dst + maxDstSize);
1620
-
1621
- if ((size_t)(litPtr - endMatch) < 12)
1622
- memcpy((void*)litPtr, saved, qutt);
1623
- }
1495
+ return endMatch-ostart;
1496
+ }
1497
+
1498
+ typedef struct ZSTD_Dctx_s
1499
+ {
1500
+ U32 LLTable[FSE_DTABLE_SIZE_U32(LLFSELog)];
1501
+ U32 OffTable[FSE_DTABLE_SIZE_U32(OffFSELog)];
1502
+ U32 MLTable[FSE_DTABLE_SIZE_U32(MLFSELog)];
1503
+ void* previousDstEnd;
1504
+ void* base;
1505
+ size_t expected;
1506
+ blockType_t bType;
1507
+ U32 phase;
1508
+ } dctx_t;
1509
+
1510
+
1511
+ static size_t ZSTD_decompressSequences(
1512
+ void* ctx,
1513
+ void* dst, size_t maxDstSize,
1514
+ const void* seqStart, size_t seqSize,
1515
+ const BYTE* litStart, size_t litSize)
1516
+ {
1517
+ dctx_t* dctx = (dctx_t*)ctx;
1518
+ const BYTE* ip = (const BYTE*)seqStart;
1519
+ const BYTE* const iend = ip + seqSize;
1520
+ BYTE* const ostart = (BYTE* const)dst;
1521
+ BYTE* op = ostart;
1522
+ BYTE* const oend = ostart + maxDstSize;
1523
+ size_t errorCode;
1524
+ const BYTE* litPtr = litStart;
1525
+ const BYTE* const litEnd = litStart + litSize;
1526
+ int nbSeq;
1527
+ const BYTE* dumps;
1528
+ U32* DTableLL = dctx->LLTable;
1529
+ U32* DTableML = dctx->MLTable;
1530
+ U32* DTableOffb = dctx->OffTable;
1531
+ BYTE* const base = (BYTE*) (dctx->base);
1532
+
1533
+ /* Build Decoding Tables */
1534
+ errorCode = ZSTD_decodeSeqHeaders(&nbSeq, &dumps,
1535
+ DTableLL, DTableML, DTableOffb,
1536
+ ip, iend-ip);
1537
+ if (ZSTD_isError(errorCode)) return errorCode;
1538
+ ip += errorCode;
1539
+
1540
+ /* Regen sequences */
1541
+ {
1542
+ seq_t sequence;
1543
+ seqState_t seqState;
1544
+
1545
+ memset(&sequence, 0, sizeof(sequence));
1546
+ seqState.dumps = dumps;
1547
+ seqState.prevOffset = 1;
1548
+ errorCode = FSE_initDStream(&(seqState.DStream), ip, iend-ip);
1549
+ if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_corruption;
1550
+ FSE_initDState(&(seqState.stateLL), &(seqState.DStream), DTableLL);
1551
+ FSE_initDState(&(seqState.stateOffb), &(seqState.DStream), DTableOffb);
1552
+ FSE_initDState(&(seqState.stateML), &(seqState.DStream), DTableML);
1553
+
1554
+ for ( ; (FSE_reloadDStream(&(seqState.DStream)) < FSE_DStream_completed) || (nbSeq>0) ; )
1555
+ {
1556
+ size_t oneSeqSize;
1557
+ nbSeq--;
1558
+ ZSTD_decodeSequence(&sequence, &seqState);
1559
+ oneSeqSize = ZSTD_execSequence(op, sequence, &litPtr, litEnd, base, oend);
1560
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1561
+ op += oneSeqSize;
1624
1562
  }
1625
1563
 
1626
1564
  /* check if reached exact end */
1627
- if (FSE_reloadDStream(&DStream) > 2) return (size_t)-ZSTD_ERROR_GENERIC; /* requested too much : data is corrupted */
1628
- if (!FSE_endOfDState(&stateLL) && !FSE_endOfDState(&stateML) && !FSE_endOfDState(&stateOffb)) goto _another_round; /* some ultra-compressible sequence remain ! */
1629
- if (litPtr != litEnd) goto _another_round; /* literals not entirely spent */
1565
+ if (FSE_reloadDStream(&(seqState.DStream)) > FSE_DStream_completed) return (size_t)-ZSTD_ERROR_corruption; /* requested too much : data is corrupted */
1566
+ if (nbSeq<0) return (size_t)-ZSTD_ERROR_corruption; /* requested too many sequences : data is corrupted */
1630
1567
 
1631
1568
  /* last literal segment */
1632
- if (op != litPtr) memmove(op, litPtr, lastLLSize);
1633
- op += lastLLSize; CHECK_OVERFLOW(op <= (BYTE *)dst + maxDstSize);
1569
+ {
1570
+ size_t lastLLSize = litEnd - litPtr;
1571
+ if (op+lastLLSize > oend) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
1572
+ if (op != litPtr) memmove(op, litPtr, lastLLSize);
1573
+ op += lastLLSize;
1574
+ }
1634
1575
  }
1635
1576
 
1636
1577
  return op-ostart;
1637
1578
  }
1638
1579
 
1639
1580
 
1581
+ static size_t ZSTD_decompressBlock(
1582
+ void* ctx,
1583
+ void* dst, size_t maxDstSize,
1584
+ const void* src, size_t srcSize)
1585
+ {
1586
+ /* blockType == blockCompressed, srcSize is trusted */
1587
+ const BYTE* ip = (const BYTE*)src;
1588
+ const BYTE* litPtr;
1589
+ size_t litSize;
1590
+ size_t errorCode;
1591
+
1592
+ /* Decode literals sub-block */
1593
+ errorCode = ZSTD_decodeLiteralsBlock(ctx, dst, maxDstSize, &litPtr, &litSize, src, srcSize);
1594
+ if (ZSTD_isError(errorCode)) return errorCode;
1595
+ ip += errorCode;
1596
+ srcSize -= errorCode;
1597
+
1598
+ return ZSTD_decompressSequences(ctx, dst, maxDstSize, ip, srcSize, litPtr, litSize);
1599
+ }
1600
+
1601
+
1640
1602
  static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1641
1603
  {
1642
1604
  const BYTE* ip = (const BYTE*)src;
@@ -1649,22 +1611,21 @@ static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const
1649
1611
  size_t errorCode=0;
1650
1612
  blockProperties_t blockProperties;
1651
1613
 
1652
- /* Header */
1653
- if (srcSize < ZSTD_frameHeaderSize) return (size_t)-ZSTD_ERROR_wrongSrcSize;
1614
+ /* Frame Header */
1615
+ if (srcSize < ZSTD_frameHeaderSize+ZSTD_blockHeaderSize) return (size_t)-ZSTD_ERROR_SrcSize;
1654
1616
  magicNumber = ZSTD_readBE32(src);
1655
- if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_wrongMagicNumber;
1617
+ if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_MagicNumber;
1656
1618
  ip += ZSTD_frameHeaderSize; remainingSize -= ZSTD_frameHeaderSize;
1657
1619
 
1620
+ /* Loop on each block */
1658
1621
  while (1)
1659
1622
  {
1660
1623
  size_t blockSize = ZSTD_getcBlockSize(ip, iend-ip, &blockProperties);
1661
- if (ZSTD_isError(blockSize))
1662
- return blockSize;
1624
+ if (ZSTD_isError(blockSize)) return blockSize;
1663
1625
 
1664
1626
  ip += ZSTD_blockHeaderSize;
1665
1627
  remainingSize -= ZSTD_blockHeaderSize;
1666
- if (ip+blockSize > iend)
1667
- return (size_t)-ZSTD_ERROR_wrongSrcSize;
1628
+ if (blockSize > remainingSize) return (size_t)-ZSTD_ERROR_SrcSize;
1668
1629
 
1669
1630
  switch(blockProperties.blockType)
1670
1631
  {
@@ -1675,11 +1636,11 @@ static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const
1675
1636
  errorCode = ZSTD_copyUncompressedBlock(op, oend-op, ip, blockSize);
1676
1637
  break;
1677
1638
  case bt_rle :
1678
- return (size_t)-ZSTD_ERROR_GENERIC; /* not yet handled */
1639
+ return (size_t)-ZSTD_ERROR_GENERIC; /* not yet supported */
1679
1640
  break;
1680
1641
  case bt_end :
1681
1642
  /* end of frame */
1682
- if (remainingSize) return (size_t)-ZSTD_ERROR_wrongSrcSize;
1643
+ if (remainingSize) return (size_t)-ZSTD_ERROR_SrcSize;
1683
1644
  break;
1684
1645
  default:
1685
1646
  return (size_t)-ZSTD_ERROR_GENERIC;
@@ -1687,7 +1648,7 @@ static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const
1687
1648
  if (blockSize == 0) break; /* bt_end */
1688
1649
 
1689
1650
  if (ZSTD_isError(errorCode)) return errorCode;
1690
- op += errorCode; CHECK_OVERFLOW(op <= oend);
1651
+ op += errorCode;
1691
1652
  ip += blockSize;
1692
1653
  remainingSize -= blockSize;
1693
1654
  }
@@ -1695,107 +1656,113 @@ static size_t ZSTD_decompressDCtx(void* ctx, void* dst, size_t maxDstSize, const
1695
1656
  return op-ostart;
1696
1657
  }
1697
1658
 
1698
-
1699
1659
  size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1700
1660
  {
1701
- U32 ctx[FSE_DTABLE_SIZE_U32(LLFSELog) + FSE_DTABLE_SIZE_U32(OffFSELog) + FSE_DTABLE_SIZE_U32(MLFSELog)];
1702
- return ZSTD_decompressDCtx(ctx, dst, maxDstSize, src, srcSize);
1661
+ dctx_t ctx;
1662
+ ctx.base = dst;
1663
+ return ZSTD_decompressDCtx(&ctx, dst, maxDstSize, src, srcSize);
1703
1664
  }
1704
1665
 
1705
1666
 
1706
- /******************************
1667
+ /*******************************
1707
1668
  * Streaming Decompression API
1708
- ******************************/
1669
+ *******************************/
1709
1670
 
1710
- typedef struct
1671
+ size_t ZSTD_resetDCtx(ZSTD_Dctx* dctx)
1711
1672
  {
1712
- U32 ctx[FSE_DTABLE_SIZE_U32(LLFSELog) + FSE_DTABLE_SIZE_U32(OffFSELog) + FSE_DTABLE_SIZE_U32(MLFSELog)];
1713
- size_t expected;
1714
- blockType_t bType;
1715
- U32 started;
1716
- } dctx_t;
1717
-
1673
+ dctx->expected = ZSTD_frameHeaderSize;
1674
+ dctx->phase = 0;
1675
+ dctx->previousDstEnd = NULL;
1676
+ dctx->base = NULL;
1677
+ return 0;
1678
+ }
1718
1679
 
1719
- ZSTD_dctx_t ZSTD_createDCtx(void)
1680
+ ZSTD_Dctx* ZSTD_createDCtx(void)
1720
1681
  {
1721
- dctx_t* dctx = (dctx_t*)malloc(sizeof(dctx_t));
1722
- dctx->expected = 4 + ZSTD_blockHeaderSize; // Frame Header + Block Header
1723
- dctx->started = 0;
1724
- return (ZSTD_dctx_t)dctx;
1682
+ ZSTD_Dctx* dctx = (ZSTD_Dctx*)malloc(sizeof(ZSTD_Dctx));
1683
+ if (dctx==NULL) return NULL;
1684
+ ZSTD_resetDCtx(dctx);
1685
+ return dctx;
1725
1686
  }
1726
1687
 
1727
- size_t ZSTD_freeDCtx(ZSTD_dctx_t dctx)
1688
+ size_t ZSTD_freeDCtx(ZSTD_Dctx* dctx)
1728
1689
  {
1729
1690
  free(dctx);
1730
1691
  return 0;
1731
1692
  }
1732
1693
 
1733
-
1734
- size_t ZSTD_getNextcBlockSize(ZSTD_dctx_t dctx)
1694
+ size_t ZSTD_nextSrcSizeToDecompress(ZSTD_Dctx* dctx)
1735
1695
  {
1736
1696
  return ((dctx_t*)dctx)->expected;
1737
1697
  }
1738
1698
 
1739
- size_t ZSTD_decompressContinue(ZSTD_dctx_t dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1699
+ size_t ZSTD_decompressContinue(ZSTD_Dctx* dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
1740
1700
  {
1741
1701
  dctx_t* ctx = (dctx_t*)dctx;
1742
- size_t cSize = srcSize - ZSTD_blockHeaderSize;
1743
- size_t rSize;
1744
1702
 
1745
- // Sanity check
1746
- if (srcSize != ctx->expected) return (size_t)-ZSTD_ERROR_wrongSrcSize;
1703
+ /* Sanity check */
1704
+ if (srcSize != ctx->expected) return (size_t)-ZSTD_ERROR_SrcSize;
1705
+ if (dst != ctx->previousDstEnd) /* not contiguous */
1706
+ ctx->base = dst;
1747
1707
 
1748
- // Decompress
1749
- if (!ctx->started)
1708
+ /* Decompress : frame header */
1709
+ if (ctx->phase == 0)
1750
1710
  {
1751
- // Just check correct magic header
1711
+ /* Check frame magic header */
1752
1712
  U32 magicNumber = ZSTD_readBE32(src);
1753
- if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_wrongMagicNumber;
1754
- rSize = 0;
1713
+ if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_MagicNumber;
1714
+ ctx->phase = 1;
1715
+ ctx->expected = ZSTD_blockHeaderSize;
1716
+ return 0;
1755
1717
  }
1756
- else
1718
+
1719
+ /* Decompress : block header */
1720
+ if (ctx->phase == 1)
1721
+ {
1722
+ blockProperties_t bp;
1723
+ size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
1724
+ if (ZSTD_isError(blockSize)) return blockSize;
1725
+ if (bp.blockType == bt_end)
1726
+ {
1727
+ ctx->expected = 0;
1728
+ ctx->phase = 0;
1729
+ }
1730
+ else
1731
+ {
1732
+ ctx->expected = blockSize;
1733
+ ctx->bType = bp.blockType;
1734
+ ctx->phase = 2;
1735
+ }
1736
+
1737
+ return 0;
1738
+ }
1739
+
1740
+ /* Decompress : block content */
1757
1741
  {
1742
+ size_t rSize;
1758
1743
  switch(ctx->bType)
1759
1744
  {
1760
1745
  case bt_compressed:
1761
- rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, cSize);
1746
+ rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
1762
1747
  break;
1763
1748
  case bt_raw :
1764
- rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, cSize);
1749
+ rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
1765
1750
  break;
1766
1751
  case bt_rle :
1767
1752
  return (size_t)-ZSTD_ERROR_GENERIC; /* not yet handled */
1768
1753
  break;
1769
- case bt_end :
1754
+ case bt_end : /* should never happen (filtered at phase 1) */
1770
1755
  rSize = 0;
1771
1756
  break;
1772
1757
  default:
1773
1758
  return (size_t)-ZSTD_ERROR_GENERIC;
1774
1759
  }
1760
+ ctx->phase = 1;
1761
+ ctx->expected = ZSTD_blockHeaderSize;
1762
+ ctx->previousDstEnd = (void*)( ((char*)dst) + rSize);
1763
+ return rSize;
1775
1764
  }
1776
1765
 
1777
- // Prepare next block
1778
- {
1779
- const BYTE* header = (const BYTE*)src;
1780
- blockProperties_t bp;
1781
- size_t blockSize;
1782
- header += cSize;
1783
- blockSize = ZSTD_getcBlockSize(header, ZSTD_blockHeaderSize, &bp);
1784
- if (ZSTD_isError(blockSize)) return blockSize;
1785
- if (bp.blockType == bt_end)
1786
- {
1787
- ctx->expected = 0;
1788
- ctx->started = 0;
1789
- }
1790
- else
1791
- {
1792
- ctx->expected = blockSize + ZSTD_blockHeaderSize;
1793
- ctx->bType = bp.blockType;
1794
- ctx->started = 1;
1795
- }
1796
- }
1797
-
1798
- return rSize;
1799
1766
  }
1800
1767
 
1801
1768