extlz4 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +9 -4
  3. data/bin/extlz4 +1 -1
  4. data/contrib/lz4/NEWS +36 -0
  5. data/contrib/lz4/README.md +11 -12
  6. data/contrib/lz4/build/README.md +55 -0
  7. data/contrib/lz4/build/VS2010/datagen/datagen.vcxproj +169 -0
  8. data/contrib/lz4/build/VS2010/frametest/frametest.vcxproj +176 -0
  9. data/contrib/lz4/build/VS2010/fullbench-dll/fullbench-dll.vcxproj +180 -0
  10. data/contrib/lz4/build/VS2010/fullbench/fullbench.vcxproj +176 -0
  11. data/contrib/lz4/build/VS2010/fuzzer/fuzzer.vcxproj +173 -0
  12. data/contrib/lz4/build/VS2010/liblz4-dll/liblz4-dll.rc +51 -0
  13. data/contrib/lz4/build/VS2010/liblz4-dll/liblz4-dll.vcxproj +179 -0
  14. data/contrib/lz4/build/VS2010/liblz4/liblz4.vcxproj +175 -0
  15. data/contrib/lz4/build/VS2010/lz4.sln +98 -0
  16. data/contrib/lz4/build/VS2010/lz4/lz4.rc +51 -0
  17. data/contrib/lz4/build/VS2010/lz4/lz4.vcxproj +189 -0
  18. data/contrib/lz4/build/VS2017/datagen/datagen.vcxproj +173 -0
  19. data/contrib/lz4/build/VS2017/frametest/frametest.vcxproj +180 -0
  20. data/contrib/lz4/build/VS2017/fullbench-dll/fullbench-dll.vcxproj +184 -0
  21. data/contrib/lz4/build/VS2017/fullbench/fullbench.vcxproj +180 -0
  22. data/contrib/lz4/build/VS2017/fuzzer/fuzzer.vcxproj +177 -0
  23. data/contrib/lz4/build/VS2017/liblz4-dll/liblz4-dll.rc +51 -0
  24. data/contrib/lz4/build/VS2017/liblz4-dll/liblz4-dll.vcxproj +183 -0
  25. data/contrib/lz4/build/VS2017/liblz4/liblz4.vcxproj +179 -0
  26. data/contrib/lz4/build/VS2017/lz4.sln +103 -0
  27. data/contrib/lz4/build/VS2017/lz4/lz4.rc +51 -0
  28. data/contrib/lz4/build/VS2017/lz4/lz4.vcxproj +164 -0
  29. data/contrib/lz4/build/cmake/CMakeLists.txt +235 -0
  30. data/contrib/lz4/lib/README.md +27 -10
  31. data/contrib/lz4/lib/lz4.c +327 -230
  32. data/contrib/lz4/lib/lz4.h +80 -70
  33. data/contrib/lz4/lib/lz4frame.c +93 -54
  34. data/contrib/lz4/lib/lz4frame.h +22 -14
  35. data/contrib/lz4/lib/lz4hc.c +192 -115
  36. data/contrib/lz4/lib/lz4hc.h +15 -40
  37. data/contrib/lz4/ossfuzz/Makefile +12 -8
  38. data/contrib/lz4/ossfuzz/compress_frame_fuzzer.c +11 -5
  39. data/contrib/lz4/ossfuzz/compress_fuzzer.c +9 -2
  40. data/contrib/lz4/ossfuzz/compress_hc_fuzzer.c +10 -3
  41. data/contrib/lz4/ossfuzz/decompress_frame_fuzzer.c +11 -3
  42. data/contrib/lz4/ossfuzz/decompress_fuzzer.c +6 -2
  43. data/contrib/lz4/ossfuzz/fuzz_data_producer.c +77 -0
  44. data/contrib/lz4/ossfuzz/fuzz_data_producer.h +36 -0
  45. data/contrib/lz4/ossfuzz/round_trip_frame_fuzzer.c +8 -4
  46. data/contrib/lz4/ossfuzz/round_trip_fuzzer.c +9 -2
  47. data/contrib/lz4/ossfuzz/round_trip_hc_fuzzer.c +7 -2
  48. data/contrib/lz4/ossfuzz/travisoss.sh +6 -1
  49. data/contrib/lz4/tmp +0 -0
  50. data/contrib/lz4/tmpsparse +0 -0
  51. data/ext/extlz4.c +2 -0
  52. data/ext/extlz4.h +5 -0
  53. data/ext/hashargs.c +1 -1
  54. data/ext/hashargs.h +1 -1
  55. data/gemstub.rb +3 -14
  56. data/lib/extlz4.rb +0 -2
  57. data/lib/extlz4/oldstream.rb +1 -1
  58. metadata +40 -25
  59. data/lib/extlz4/version.rb +0 -3
@@ -35,21 +35,22 @@ So it's necessary to include all `*.c` and `*.h` files present in `/lib`.
35
35
 
36
36
  Definitions which are not guaranteed to remain stable in future versions,
37
37
  are protected behind macros, such as `LZ4_STATIC_LINKING_ONLY`.
38
- As the name implies, these definitions can only be invoked
38
+ As the name strongly implies, these definitions should only be invoked
39
39
  in the context of static linking ***only***.
40
40
  Otherwise, dependent application may fail on API or ABI break in the future.
41
- The associated symbols are also not present in dynamic library by default.
41
+ The associated symbols are also not exposed by the dynamic library by default.
42
42
  Should they be nonetheless needed, it's possible to force their publication
43
- by using build macro `LZ4_PUBLISH_STATIC_FUNCTIONS`.
43
+ by using build macros `LZ4_PUBLISH_STATIC_FUNCTIONS`
44
+ and `LZ4F_PUBLISH_STATIC_FUNCTIONS`.
44
45
 
45
46
 
46
47
  #### Build macros
47
48
 
48
- The following build macro can be selected at compilation time :
49
+ The following build macro can be selected to adjust source code behavior at compilation time :
49
50
 
50
- - `LZ4_FAST_DEC_LOOP` : this triggers the optimized decompression loop.
51
- This loops works great on x86/x64 cpus, and is automatically enabled on this platform.
52
- It's possible to enable or disable it manually, by passing `LZ4_FAST_DEC_LOOP=1` or `0` to the preprocessor.
51
+ - `LZ4_FAST_DEC_LOOP` : this triggers a speed optimized decompression loop, more powerful on modern cpus.
52
+ This loop works great on `x86`, `x64` and `aarch64` cpus, and is automatically enabled for them.
53
+ It's also possible to enable or disable it manually, by passing `LZ4_FAST_DEC_LOOP=1` or `0` to the preprocessor.
53
54
  For example, with `gcc` : `-DLZ4_FAST_DEC_LOOP=1`,
54
55
  and with `make` : `CPPFLAGS+=-DLZ4_FAST_DEC_LOOP=1 make lz4`.
55
56
 
@@ -65,8 +66,24 @@ The following build macro can be selected at compilation time :
65
66
  Should this be a problem, it's generally possible to make the compiler ignore these warnings,
66
67
  for example with `-Wno-deprecated-declarations` on `gcc`,
67
68
  or `_CRT_SECURE_NO_WARNINGS` for Visual Studio.
68
- Another method is to define `LZ4_DISABLE_DEPRECATE_WARNINGS`
69
- before including the LZ4 header files.
69
+ This build macro offers another project-specific method
70
+ by defining `LZ4_DISABLE_DEPRECATE_WARNINGS` before including the LZ4 header files.
71
+
72
+ - `LZ4_USER_MEMORY_FUNCTIONS` : replace calls to <stdlib>'s `malloc`, `calloc` and `free`
73
+ by user-defined functions, which must be called `LZ4_malloc()`, `LZ4_calloc()` and `LZ4_free()`.
74
+ User functions must be available at link time.
75
+
76
+ - `LZ4_FORCE_SW_BITCOUNT` : by default, the compression algorithm tries to determine lengths
77
+ by using bitcount instructions, generally implemented as fast single instructions in many cpus.
78
+ In case the target cpus doesn't support it, or compiler intrinsic doesn't work, or feature bad performance,
79
+ it's possible to use an optimized software path instead.
80
+ This is achieved by setting this build macros .
81
+ In most cases, it's not expected to be necessary,
82
+ but it can be legitimately considered for less common platforms.
83
+
84
+ - `LZ4_ALIGN_TEST` : alignment test ensures that the memory area
85
+ passed as argument to become a compression state is suitably aligned.
86
+ This test can be disabled if it proves flaky, by setting this value to 0.
70
87
 
71
88
 
72
89
  #### Amalgamation
@@ -102,7 +119,7 @@ The compiled executable will require LZ4 DLL which is available at `dll\liblz4.d
102
119
 
103
120
  #### Miscellaneous
104
121
 
105
- Other files present in the directory are not source code. There are :
122
+ Other files present in the directory are not source code. They are :
106
123
 
107
124
  - `LICENSE` : contains the BSD license text
108
125
  - `Makefile` : `make` script to compile and install lz4 library (static and dynamic)
@@ -45,10 +45,16 @@
45
45
  #endif
46
46
 
47
47
  /*
48
- * ACCELERATION_DEFAULT :
48
+ * LZ4_ACCELERATION_DEFAULT :
49
49
  * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
50
50
  */
51
- #define ACCELERATION_DEFAULT 1
51
+ #define LZ4_ACCELERATION_DEFAULT 1
52
+ /*
53
+ * LZ4_ACCELERATION_MAX :
54
+ * Any "acceleration" value higher than this threshold
55
+ * get treated as LZ4_ACCELERATION_MAX instead (fix #876)
56
+ */
57
+ #define LZ4_ACCELERATION_MAX 65537
52
58
 
53
59
 
54
60
  /*-************************************
@@ -82,6 +88,7 @@
82
88
  * Define this parameter if your target system or compiler does not support hardware bit count
83
89
  */
84
90
  #if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for WinCE doesn't support Hardware bit count */
91
+ # undef LZ4_FORCE_SW_BITCOUNT /* avoid double def */
85
92
  # define LZ4_FORCE_SW_BITCOUNT
86
93
  #endif
87
94
 
@@ -114,10 +121,9 @@
114
121
  /*-************************************
115
122
  * Compiler Options
116
123
  **************************************/
117
- #ifdef _MSC_VER /* Visual Studio */
118
- # include <intrin.h>
119
- # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
120
- # pragma warning(disable : 4293) /* disable: C4293: too large shift (32-bits) */
124
+ #if defined(_MSC_VER) && (_MSC_VER >= 1400) /* Visual Studio 2005+ */
125
+ # include <intrin.h> /* only present in VS2005+ */
126
+ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
121
127
  #endif /* _MSC_VER */
122
128
 
123
129
  #ifndef LZ4_FORCE_INLINE
@@ -136,7 +142,7 @@
136
142
  # endif /* _MSC_VER */
137
143
  #endif /* LZ4_FORCE_INLINE */
138
144
 
139
- /* LZ4_FORCE_O2_GCC_PPC64LE and LZ4_FORCE_O2_INLINE_GCC_PPC64LE
145
+ /* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
140
146
  * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
141
147
  * together with a simple 8-byte copy loop as a fall-back path.
142
148
  * However, this optimization hurts the decompression speed by >30%,
@@ -151,11 +157,11 @@
151
157
  * of LZ4_wildCopy8 does not affect the compression speed.
152
158
  */
153
159
  #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
154
- # define LZ4_FORCE_O2_GCC_PPC64LE __attribute__((optimize("O2")))
155
- # define LZ4_FORCE_O2_INLINE_GCC_PPC64LE __attribute__((optimize("O2"))) LZ4_FORCE_INLINE
160
+ # define LZ4_FORCE_O2 __attribute__((optimize("O2")))
161
+ # undef LZ4_FORCE_INLINE
162
+ # define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline))
156
163
  #else
157
- # define LZ4_FORCE_O2_GCC_PPC64LE
158
- # define LZ4_FORCE_O2_INLINE_GCC_PPC64LE static
164
+ # define LZ4_FORCE_O2
159
165
  #endif
160
166
 
161
167
  #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
@@ -171,14 +177,33 @@
171
177
  #define unlikely(expr) expect((expr) != 0, 0)
172
178
  #endif
173
179
 
180
+ /* Should the alignment test prove unreliable, for some reason,
181
+ * it can be disabled by setting LZ4_ALIGN_TEST to 0 */
182
+ #ifndef LZ4_ALIGN_TEST /* can be externally provided */
183
+ # define LZ4_ALIGN_TEST 1
184
+ #endif
185
+
174
186
 
175
187
  /*-************************************
176
188
  * Memory routines
177
189
  **************************************/
178
- #include <stdlib.h> /* malloc, calloc, free */
179
- #define ALLOC(s) malloc(s)
180
- #define ALLOC_AND_ZERO(s) calloc(1,s)
181
- #define FREEMEM(p) free(p)
190
+ #ifdef LZ4_USER_MEMORY_FUNCTIONS
191
+ /* memory management functions can be customized by user project.
192
+ * Below functions must exist somewhere in the Project
193
+ * and be available at link time */
194
+ void* LZ4_malloc(size_t s);
195
+ void* LZ4_calloc(size_t n, size_t s);
196
+ void LZ4_free(void* p);
197
+ # define ALLOC(s) LZ4_malloc(s)
198
+ # define ALLOC_AND_ZERO(s) LZ4_calloc(1,s)
199
+ # define FREEMEM(p) LZ4_free(p)
200
+ #else
201
+ # include <stdlib.h> /* malloc, calloc, free */
202
+ # define ALLOC(s) malloc(s)
203
+ # define ALLOC_AND_ZERO(s) calloc(1,s)
204
+ # define FREEMEM(p) free(p)
205
+ #endif
206
+
182
207
  #include <string.h> /* memset, memcpy */
183
208
  #define MEM_INIT(p,v,s) memset((p),(v),(s))
184
209
 
@@ -225,21 +250,27 @@ static const int LZ4_minLength = (MFLIMIT+1);
225
250
 
226
251
  #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
227
252
  # include <stdio.h>
228
- static int g_debuglog_enable = 1;
229
- # define DEBUGLOG(l, ...) { \
230
- if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \
231
- fprintf(stderr, __FILE__ ": "); \
232
- fprintf(stderr, __VA_ARGS__); \
233
- fprintf(stderr, " \n"); \
234
- } }
253
+ static int g_debuglog_enable = 1;
254
+ # define DEBUGLOG(l, ...) { \
255
+ if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \
256
+ fprintf(stderr, __FILE__ ": "); \
257
+ fprintf(stderr, __VA_ARGS__); \
258
+ fprintf(stderr, " \n"); \
259
+ } }
235
260
  #else
236
- # define DEBUGLOG(l, ...) {} /* disabled */
261
+ # define DEBUGLOG(l, ...) {} /* disabled */
237
262
  #endif
238
263
 
264
+ static int LZ4_isAligned(const void* ptr, size_t alignment)
265
+ {
266
+ return ((size_t)ptr & (alignment -1)) == 0;
267
+ }
268
+
239
269
 
240
270
  /*-************************************
241
271
  * Types
242
272
  **************************************/
273
+ #include <limits.h>
243
274
  #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
244
275
  # include <stdint.h>
245
276
  typedef uint8_t BYTE;
@@ -249,6 +280,9 @@ static int g_debuglog_enable = 1;
249
280
  typedef uint64_t U64;
250
281
  typedef uintptr_t uptrval;
251
282
  #else
283
+ # if UINT_MAX != 4294967295UL
284
+ # error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
285
+ # endif
252
286
  typedef unsigned char BYTE;
253
287
  typedef unsigned short U16;
254
288
  typedef unsigned int U32;
@@ -273,6 +307,21 @@ typedef enum {
273
307
  /*-************************************
274
308
  * Reading and writing into memory
275
309
  **************************************/
310
+
311
+ /**
312
+ * LZ4 relies on memcpy with a constant size being inlined. In freestanding
313
+ * environments, the compiler can't assume the implementation of memcpy() is
314
+ * standard compliant, so it can't apply its specialized memcpy() inlining
315
+ * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
316
+ * memcpy() as if it were standard compliant, so it can inline it in freestanding
317
+ * environments. This is needed when decompressing the Linux Kernel, for example.
318
+ */
319
+ #if defined(__GNUC__) && (__GNUC__ >= 4)
320
+ #define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
321
+ #else
322
+ #define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
323
+ #endif
324
+
276
325
  static unsigned LZ4_isLittleEndian(void)
277
326
  {
278
327
  const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
@@ -307,27 +356,27 @@ static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = val
307
356
 
308
357
  static U16 LZ4_read16(const void* memPtr)
309
358
  {
310
- U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
359
+ U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
311
360
  }
312
361
 
313
362
  static U32 LZ4_read32(const void* memPtr)
314
363
  {
315
- U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
364
+ U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
316
365
  }
317
366
 
318
367
  static reg_t LZ4_read_ARCH(const void* memPtr)
319
368
  {
320
- reg_t val; memcpy(&val, memPtr, sizeof(val)); return val;
369
+ reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
321
370
  }
322
371
 
323
372
  static void LZ4_write16(void* memPtr, U16 value)
324
373
  {
325
- memcpy(memPtr, &value, sizeof(value));
374
+ LZ4_memcpy(memPtr, &value, sizeof(value));
326
375
  }
327
376
 
328
377
  static void LZ4_write32(void* memPtr, U32 value)
329
378
  {
330
- memcpy(memPtr, &value, sizeof(value));
379
+ LZ4_memcpy(memPtr, &value, sizeof(value));
331
380
  }
332
381
 
333
382
  #endif /* LZ4_FORCE_MEMORY_ACCESS */
@@ -355,14 +404,14 @@ static void LZ4_writeLE16(void* memPtr, U16 value)
355
404
  }
356
405
 
357
406
  /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
358
- LZ4_FORCE_O2_INLINE_GCC_PPC64LE
407
+ LZ4_FORCE_INLINE
359
408
  void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
360
409
  {
361
410
  BYTE* d = (BYTE*)dstPtr;
362
411
  const BYTE* s = (const BYTE*)srcPtr;
363
412
  BYTE* const e = (BYTE*)dstEnd;
364
413
 
365
- do { memcpy(d,s,8); d+=8; s+=8; } while (d<e);
414
+ do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
366
415
  }
367
416
 
368
417
  static const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
@@ -370,12 +419,12 @@ static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
370
419
 
371
420
 
372
421
  #ifndef LZ4_FAST_DEC_LOOP
373
- # if defined(__i386__) || defined(__x86_64__)
422
+ # if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
374
423
  # define LZ4_FAST_DEC_LOOP 1
375
424
  # elif defined(__aarch64__) && !defined(__clang__)
376
425
  /* On aarch64, we disable this optimization for clang because on certain
377
- * mobile chipsets and clang, it reduces performance. For more information
378
- * refer to https://github.com/lz4/lz4/pull/707. */
426
+ * mobile chipsets, performance is reduced with clang. For information
427
+ * refer to https://github.com/lz4/lz4/pull/707 */
379
428
  # define LZ4_FAST_DEC_LOOP 1
380
429
  # else
381
430
  # define LZ4_FAST_DEC_LOOP 0
@@ -384,20 +433,22 @@ static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
384
433
 
385
434
  #if LZ4_FAST_DEC_LOOP
386
435
 
387
- LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
436
+ LZ4_FORCE_INLINE void
388
437
  LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
389
438
  {
439
+ assert(srcPtr + offset == dstPtr);
390
440
  if (offset < 8) {
441
+ LZ4_write32(dstPtr, 0); /* silence an msan warning when offset==0 */
391
442
  dstPtr[0] = srcPtr[0];
392
443
  dstPtr[1] = srcPtr[1];
393
444
  dstPtr[2] = srcPtr[2];
394
445
  dstPtr[3] = srcPtr[3];
395
446
  srcPtr += inc32table[offset];
396
- memcpy(dstPtr+4, srcPtr, 4);
447
+ LZ4_memcpy(dstPtr+4, srcPtr, 4);
397
448
  srcPtr -= dec64table[offset];
398
449
  dstPtr += 8;
399
450
  } else {
400
- memcpy(dstPtr, srcPtr, 8);
451
+ LZ4_memcpy(dstPtr, srcPtr, 8);
401
452
  dstPtr += 8;
402
453
  srcPtr += 8;
403
454
  }
@@ -408,49 +459,48 @@ LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, con
408
459
  /* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
409
460
  * this version copies two times 16 bytes (instead of one time 32 bytes)
410
461
  * because it must be compatible with offsets >= 16. */
411
- LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
462
+ LZ4_FORCE_INLINE void
412
463
  LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
413
464
  {
414
465
  BYTE* d = (BYTE*)dstPtr;
415
466
  const BYTE* s = (const BYTE*)srcPtr;
416
467
  BYTE* const e = (BYTE*)dstEnd;
417
468
 
418
- do { memcpy(d,s,16); memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
469
+ do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
419
470
  }
420
471
 
421
472
  /* LZ4_memcpy_using_offset() presumes :
422
473
  * - dstEnd >= dstPtr + MINMATCH
423
474
  * - there is at least 8 bytes available to write after dstEnd */
424
- LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
475
+ LZ4_FORCE_INLINE void
425
476
  LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
426
477
  {
427
478
  BYTE v[8];
428
479
 
429
480
  assert(dstEnd >= dstPtr + MINMATCH);
430
- LZ4_write32(dstPtr, 0); /* silence an msan warning when offset==0 */
431
481
 
432
482
  switch(offset) {
433
483
  case 1:
434
- memset(v, *srcPtr, 8);
484
+ MEM_INIT(v, *srcPtr, 8);
435
485
  break;
436
486
  case 2:
437
- memcpy(v, srcPtr, 2);
438
- memcpy(&v[2], srcPtr, 2);
439
- memcpy(&v[4], &v[0], 4);
487
+ LZ4_memcpy(v, srcPtr, 2);
488
+ LZ4_memcpy(&v[2], srcPtr, 2);
489
+ LZ4_memcpy(&v[4], v, 4);
440
490
  break;
441
491
  case 4:
442
- memcpy(v, srcPtr, 4);
443
- memcpy(&v[4], srcPtr, 4);
492
+ LZ4_memcpy(v, srcPtr, 4);
493
+ LZ4_memcpy(&v[4], srcPtr, 4);
444
494
  break;
445
495
  default:
446
496
  LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
447
497
  return;
448
498
  }
449
499
 
450
- memcpy(dstPtr, v, 8);
500
+ LZ4_memcpy(dstPtr, v, 8);
451
501
  dstPtr += 8;
452
502
  while (dstPtr < dstEnd) {
453
- memcpy(dstPtr, v, 8);
503
+ LZ4_memcpy(dstPtr, v, 8);
454
504
  dstPtr += 8;
455
505
  }
456
506
  }
@@ -462,75 +512,92 @@ LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const si
462
512
  **************************************/
463
513
  static unsigned LZ4_NbCommonBytes (reg_t val)
464
514
  {
515
+ assert(val != 0);
465
516
  if (LZ4_isLittleEndian()) {
466
- if (sizeof(val)==8) {
467
- # if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
517
+ if (sizeof(val) == 8) {
518
+ # if defined(_MSC_VER) && (_MSC_VER >= 1800) && defined(_M_AMD64) && !defined(LZ4_FORCE_SW_BITCOUNT)
519
+ /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */
520
+ return (unsigned)_tzcnt_u64(val) >> 3;
521
+ # elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
468
522
  unsigned long r = 0;
469
- _BitScanForward64( &r, (U64)val );
470
- return (int)(r>>3);
471
- # elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
523
+ _BitScanForward64(&r, (U64)val);
524
+ return (unsigned)r >> 3;
525
+ # elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
526
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
527
+ !defined(LZ4_FORCE_SW_BITCOUNT)
472
528
  return (unsigned)__builtin_ctzll((U64)val) >> 3;
473
529
  # else
474
- static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
475
- 0, 3, 1, 3, 1, 4, 2, 7,
476
- 0, 2, 3, 6, 1, 5, 3, 5,
477
- 1, 3, 4, 4, 2, 5, 6, 7,
478
- 7, 0, 1, 2, 3, 3, 4, 6,
479
- 2, 6, 5, 5, 3, 4, 5, 6,
480
- 7, 1, 2, 4, 6, 4, 4, 5,
481
- 7, 2, 6, 5, 7, 6, 7, 7 };
482
- return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
530
+ const U64 m = 0x0101010101010101ULL;
531
+ val ^= val - 1;
532
+ return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56);
483
533
  # endif
484
534
  } else /* 32 bits */ {
485
- # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
535
+ # if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
486
536
  unsigned long r;
487
- _BitScanForward( &r, (U32)val );
488
- return (int)(r>>3);
489
- # elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
537
+ _BitScanForward(&r, (U32)val);
538
+ return (unsigned)r >> 3;
539
+ # elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
540
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
541
+ !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
490
542
  return (unsigned)__builtin_ctz((U32)val) >> 3;
491
543
  # else
492
- static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
493
- 3, 2, 2, 1, 3, 2, 0, 1,
494
- 3, 3, 1, 2, 2, 2, 2, 0,
495
- 3, 1, 2, 0, 1, 0, 1, 1 };
496
- return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
544
+ const U32 m = 0x01010101;
545
+ return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24;
497
546
  # endif
498
547
  }
499
548
  } else /* Big Endian CPU */ {
500
- if (sizeof(val)==8) { /* 64-bits */
501
- # if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
502
- unsigned long r = 0;
503
- _BitScanReverse64( &r, val );
504
- return (unsigned)(r>>3);
505
- # elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
549
+ if (sizeof(val)==8) {
550
+ # if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
551
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
552
+ !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
506
553
  return (unsigned)__builtin_clzll((U64)val) >> 3;
507
554
  # else
555
+ #if 1
556
+ /* this method is probably faster,
557
+ * but adds a 128 bytes lookup table */
558
+ static const unsigned char ctz7_tab[128] = {
559
+ 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
560
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
561
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
562
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
563
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
564
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
565
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
566
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
567
+ };
568
+ U64 const mask = 0x0101010101010101ULL;
569
+ U64 const t = (((val >> 8) - mask) | val) & mask;
570
+ return ctz7_tab[(t * 0x0080402010080402ULL) >> 57];
571
+ #else
572
+ /* this method doesn't consume memory space like the previous one,
573
+ * but it contains several branches,
574
+ * that may end up slowing execution */
508
575
  static const U32 by32 = sizeof(val)*4; /* 32 on 64 bits (goal), 16 on 32 bits.
509
- Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
510
- Note that this code path is never triggered in 32-bits mode. */
576
+ Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
577
+ Note that this code path is never triggered in 32-bits mode. */
511
578
  unsigned r;
512
579
  if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
513
580
  if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
514
581
  r += (!val);
515
582
  return r;
583
+ #endif
516
584
  # endif
517
585
  } else /* 32 bits */ {
518
- # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
519
- unsigned long r = 0;
520
- _BitScanReverse( &r, (unsigned long)val );
521
- return (unsigned)(r>>3);
522
- # elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
586
+ # if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
587
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
588
+ !defined(LZ4_FORCE_SW_BITCOUNT)
523
589
  return (unsigned)__builtin_clz((U32)val) >> 3;
524
590
  # else
525
- unsigned r;
526
- if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
527
- r += (!val);
528
- return r;
591
+ val >>= 8;
592
+ val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) |
593
+ (val + 0x00FF0000)) >> 24;
594
+ return (unsigned)val ^ 3;
529
595
  # endif
530
596
  }
531
597
  }
532
598
  }
533
599
 
600
+
534
601
  #define STEPSIZE sizeof(reg_t)
535
602
  LZ4_FORCE_INLINE
536
603
  unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
@@ -605,7 +672,7 @@ typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
605
672
  int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
606
673
  const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
607
674
  int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); }
608
- int LZ4_sizeofState() { return LZ4_STREAMSIZE; }
675
+ int LZ4_sizeofState(void) { return LZ4_STREAMSIZE; }
609
676
 
610
677
 
611
678
  /*-************************************
@@ -628,7 +695,7 @@ int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
628
695
  /*-******************************
629
696
  * Compression functions
630
697
  ********************************/
631
- static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
698
+ LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
632
699
  {
633
700
  if (tableType == byU16)
634
701
  return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
@@ -636,7 +703,7 @@ static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
636
703
  return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
637
704
  }
638
705
 
639
- static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
706
+ LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
640
707
  {
641
708
  const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
642
709
  if (LZ4_isLittleEndian()) {
@@ -654,7 +721,7 @@ LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tab
654
721
  return LZ4_hash4(LZ4_read32(p), tableType);
655
722
  }
656
723
 
657
- static void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
724
+ LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
658
725
  {
659
726
  switch (tableType)
660
727
  {
@@ -666,7 +733,7 @@ static void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
666
733
  }
667
734
  }
668
735
 
669
- static void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
736
+ LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
670
737
  {
671
738
  switch (tableType)
672
739
  {
@@ -678,7 +745,7 @@ static void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t cons
678
745
  }
679
746
  }
680
747
 
681
- static void LZ4_putPositionOnHash(const BYTE* p, U32 h,
748
+ LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h,
682
749
  void* tableBase, tableType_t const tableType,
683
750
  const BYTE* srcBase)
684
751
  {
@@ -703,7 +770,7 @@ LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_
703
770
  * Assumption 1 : only valid if tableType == byU32 or byU16.
704
771
  * Assumption 2 : h is presumed valid (within limits of hash table)
705
772
  */
706
- static U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
773
+ LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
707
774
  {
708
775
  LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
709
776
  if (tableType == byU32) {
@@ -739,22 +806,13 @@ LZ4_FORCE_INLINE void
739
806
  LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
740
807
  const int inputSize,
741
808
  const tableType_t tableType) {
742
- /* If compression failed during the previous step, then the context
743
- * is marked as dirty, therefore, it has to be fully reset.
744
- */
745
- if (cctx->dirty) {
746
- DEBUGLOG(5, "LZ4_prepareTable: Full reset for %p", cctx);
747
- MEM_INIT(cctx, 0, sizeof(LZ4_stream_t_internal));
748
- return;
749
- }
750
-
751
809
  /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
752
810
  * therefore safe to use no matter what mode we're in. Otherwise, we figure
753
811
  * out if it's safe to leave as is or whether it needs to be reset.
754
812
  */
755
- if (cctx->tableType != clearedTable) {
813
+ if ((tableType_t)cctx->tableType != clearedTable) {
756
814
  assert(inputSize >= 0);
757
- if (cctx->tableType != tableType
815
+ if ((tableType_t)cctx->tableType != tableType
758
816
  || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU)
759
817
  || ((tableType == byU32) && cctx->currentOffset > 1 GB)
760
818
  || tableType == byPtr
@@ -763,7 +821,7 @@ LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
763
821
  DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx);
764
822
  MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE);
765
823
  cctx->currentOffset = 0;
766
- cctx->tableType = clearedTable;
824
+ cctx->tableType = (U32)clearedTable;
767
825
  } else {
768
826
  DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)");
769
827
  }
@@ -785,8 +843,12 @@ LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
785
843
  }
786
844
 
787
845
  /** LZ4_compress_generic() :
788
- inlined, to ensure branches are decided at compilation time */
789
- LZ4_FORCE_INLINE int LZ4_compress_generic(
846
+ * inlined, to ensure branches are decided at compilation time.
847
+ * Presumed already validated at this stage:
848
+ * - source != NULL
849
+ * - inputSize > 0
850
+ */
851
+ LZ4_FORCE_INLINE int LZ4_compress_generic_validated(
790
852
  LZ4_stream_t_internal* const cctx,
791
853
  const char* const source,
792
854
  char* const dest,
@@ -815,7 +877,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
815
877
 
816
878
  int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
817
879
  U32 const prefixIdxLimit = startIndex - dictSize; /* used when dictDirective == dictSmall */
818
- const BYTE* const dictEnd = dictionary + dictSize;
880
+ const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary;
819
881
  const BYTE* anchor = (const BYTE*) source;
820
882
  const BYTE* const iend = ip + inputSize;
821
883
  const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
@@ -823,7 +885,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
823
885
 
824
886
  /* the dictCtx currentOffset is indexed on the start of the dictionary,
825
887
  * while a dictionary in the current context precedes the currentOffset */
826
- const BYTE* dictBase = (dictDirective == usingDictCtx) ?
888
+ const BYTE* dictBase = !dictionary ? NULL : (dictDirective == usingDictCtx) ?
827
889
  dictionary + dictSize - dictCtx->currentOffset :
828
890
  dictionary + dictSize - startIndex;
829
891
 
@@ -833,11 +895,11 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
833
895
  U32 offset = 0;
834
896
  U32 forwardH;
835
897
 
836
- DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, tableType=%u", inputSize, tableType);
898
+ DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType);
899
+ assert(ip != NULL);
837
900
  /* If init conditions are not met, we don't have to mark stream
838
901
  * as having dirty context, since no action was taken yet */
839
902
  if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */
840
- if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; } /* Unsupported inputSize, too large (or negative) */
841
903
  if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; } /* Size too large (not within 64K limit) */
842
904
  if (tableType==byPtr) assert(dictDirective==noDict); /* only supported use case with byPtr */
843
905
  assert(acceleration >= 1);
@@ -854,7 +916,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
854
916
  cctx->dictSize += (U32)inputSize;
855
917
  }
856
918
  cctx->currentOffset += (U32)inputSize;
857
- cctx->tableType = (U16)tableType;
919
+ cctx->tableType = (U32)tableType;
858
920
 
859
921
  if (inputSize<LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
860
922
 
@@ -1147,13 +1209,14 @@ _last_literals:
1147
1209
  if (outputDirective == fillOutput) {
1148
1210
  /* adapt lastRun to fill 'dst' */
1149
1211
  assert(olimit >= op);
1150
- lastRun = (size_t)(olimit-op) - 1;
1151
- lastRun -= (lastRun+240)/255;
1212
+ lastRun = (size_t)(olimit-op) - 1/*token*/;
1213
+ lastRun -= (lastRun + 256 - RUN_MASK) / 256; /*additional length tokens*/
1152
1214
  } else {
1153
1215
  assert(outputDirective == limitedOutput);
1154
1216
  return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
1155
1217
  }
1156
1218
  }
1219
+ DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun);
1157
1220
  if (lastRun >= RUN_MASK) {
1158
1221
  size_t accumulator = lastRun - RUN_MASK;
1159
1222
  *op++ = RUN_MASK << ML_BITS;
@@ -1162,7 +1225,7 @@ _last_literals:
1162
1225
  } else {
1163
1226
  *op++ = (BYTE)(lastRun<<ML_BITS);
1164
1227
  }
1165
- memcpy(op, anchor, lastRun);
1228
+ LZ4_memcpy(op, anchor, lastRun);
1166
1229
  ip = anchor + lastRun;
1167
1230
  op += lastRun;
1168
1231
  }
@@ -1170,18 +1233,60 @@ _last_literals:
1170
1233
  if (outputDirective == fillOutput) {
1171
1234
  *inputConsumed = (int) (((const char*)ip)-source);
1172
1235
  }
1173
- DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, (int)(((char*)op) - dest));
1174
1236
  result = (int)(((char*)op) - dest);
1175
1237
  assert(result > 0);
1238
+ DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result);
1176
1239
  return result;
1177
1240
  }
1178
1241
 
1242
+ /** LZ4_compress_generic() :
1243
+ * inlined, to ensure branches are decided at compilation time;
1244
+ * takes care of src == (NULL, 0)
1245
+ * and forward the rest to LZ4_compress_generic_validated */
1246
+ LZ4_FORCE_INLINE int LZ4_compress_generic(
1247
+ LZ4_stream_t_internal* const cctx,
1248
+ const char* const src,
1249
+ char* const dst,
1250
+ const int srcSize,
1251
+ int *inputConsumed, /* only written when outputDirective == fillOutput */
1252
+ const int dstCapacity,
1253
+ const limitedOutput_directive outputDirective,
1254
+ const tableType_t tableType,
1255
+ const dict_directive dictDirective,
1256
+ const dictIssue_directive dictIssue,
1257
+ const int acceleration)
1258
+ {
1259
+ DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i",
1260
+ srcSize, dstCapacity);
1261
+
1262
+ if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; } /* Unsupported srcSize, too large (or negative) */
1263
+ if (srcSize == 0) { /* src == NULL supported if srcSize == 0 */
1264
+ if (outputDirective != notLimited && dstCapacity <= 0) return 0; /* no output, can't write anything */
1265
+ DEBUGLOG(5, "Generating an empty block");
1266
+ assert(outputDirective == notLimited || dstCapacity >= 1);
1267
+ assert(dst != NULL);
1268
+ dst[0] = 0;
1269
+ if (outputDirective == fillOutput) {
1270
+ assert (inputConsumed != NULL);
1271
+ *inputConsumed = 0;
1272
+ }
1273
+ return 1;
1274
+ }
1275
+ assert(src != NULL);
1276
+
1277
+ return LZ4_compress_generic_validated(cctx, src, dst, srcSize,
1278
+ inputConsumed, /* only written into if outputDirective == fillOutput */
1279
+ dstCapacity, outputDirective,
1280
+ tableType, dictDirective, dictIssue, acceleration);
1281
+ }
1282
+
1179
1283
 
1180
1284
  int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
1181
1285
  {
1182
1286
  LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse;
1183
1287
  assert(ctx != NULL);
1184
- if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
1288
+ if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
1289
+ if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
1185
1290
  if (maxOutputSize >= LZ4_compressBound(inputSize)) {
1186
1291
  if (inputSize < LZ4_64Klimit) {
1187
1292
  return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
@@ -1211,7 +1316,8 @@ int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int
1211
1316
  int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
1212
1317
  {
1213
1318
  LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
1214
- if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
1319
+ if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
1320
+ if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
1215
1321
 
1216
1322
  if (dstCapacity >= LZ4_compressBound(srcSize)) {
1217
1323
  if (srcSize < LZ4_64Klimit) {
@@ -1270,22 +1376,6 @@ int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputS
1270
1376
  }
1271
1377
 
1272
1378
 
1273
- /* hidden debug function */
1274
- /* strangely enough, gcc generates faster code when this function is uncommented, even if unused */
1275
- int LZ4_compress_fast_force(const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
1276
- {
1277
- LZ4_stream_t ctx;
1278
- LZ4_initStream(&ctx, sizeof(ctx));
1279
-
1280
- if (srcSize < LZ4_64Klimit) {
1281
- return LZ4_compress_generic(&ctx.internal_donotuse, src, dst, srcSize, NULL, dstCapacity, limitedOutput, byU16, noDict, noDictIssue, acceleration);
1282
- } else {
1283
- tableType_t const addrMode = (sizeof(void*) > 4) ? byU32 : byPtr;
1284
- return LZ4_compress_generic(&ctx.internal_donotuse, src, dst, srcSize, NULL, dstCapacity, limitedOutput, addrMode, noDict, noDictIssue, acceleration);
1285
- }
1286
- }
1287
-
1288
-
1289
1379
  /* Note!: This function leaves the stream in an unclean/broken state!
1290
1380
  * It is not safe to subsequently use the same state with a _fastReset() or
1291
1381
  * _continue() call without resetting it. */
@@ -1340,27 +1430,23 @@ LZ4_stream_t* LZ4_createStream(void)
1340
1430
  return lz4s;
1341
1431
  }
1342
1432
 
1343
- #ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 :
1344
- it reports an aligment of 8-bytes,
1345
- while actually aligning LZ4_stream_t on 4 bytes. */
1346
1433
  static size_t LZ4_stream_t_alignment(void)
1347
1434
  {
1348
- struct { char c; LZ4_stream_t t; } t_a;
1349
- return sizeof(t_a) - sizeof(t_a.t);
1350
- }
1435
+ #if LZ4_ALIGN_TEST
1436
+ typedef struct { char c; LZ4_stream_t t; } t_a;
1437
+ return sizeof(t_a) - sizeof(LZ4_stream_t);
1438
+ #else
1439
+ return 1; /* effectively disabled */
1351
1440
  #endif
1441
+ }
1352
1442
 
1353
1443
  LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
1354
1444
  {
1355
1445
  DEBUGLOG(5, "LZ4_initStream");
1356
1446
  if (buffer == NULL) { return NULL; }
1357
1447
  if (size < sizeof(LZ4_stream_t)) { return NULL; }
1358
- #ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 :
1359
- it reports an aligment of 8-bytes,
1360
- while actually aligning LZ4_stream_t on 4 bytes. */
1361
- if (((size_t)buffer) & (LZ4_stream_t_alignment() - 1)) { return NULL; } /* alignment check */
1362
- #endif
1363
- MEM_INIT(buffer, 0, sizeof(LZ4_stream_t));
1448
+ if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL;
1449
+ MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal));
1364
1450
  return (LZ4_stream_t*)buffer;
1365
1451
  }
1366
1452
 
@@ -1369,7 +1455,7 @@ LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
1369
1455
  void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
1370
1456
  {
1371
1457
  DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
1372
- MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t));
1458
+ MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal));
1373
1459
  }
1374
1460
 
1375
1461
  void LZ4_resetStream_fast(LZ4_stream_t* ctx) {
@@ -1418,7 +1504,7 @@ int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
1418
1504
  base = dictEnd - dict->currentOffset;
1419
1505
  dict->dictionary = p;
1420
1506
  dict->dictSize = (U32)(dictEnd - p);
1421
- dict->tableType = tableType;
1507
+ dict->tableType = (U32)tableType;
1422
1508
 
1423
1509
  while (p <= dictEnd-HASH_UNIT) {
1424
1510
  LZ4_putPosition(p, dict->hashTable, tableType, base);
@@ -1436,12 +1522,6 @@ void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dict
1436
1522
  workingStream, dictionaryStream,
1437
1523
  dictCtx != NULL ? dictCtx->dictSize : 0);
1438
1524
 
1439
- /* Calling LZ4_resetStream_fast() here makes sure that changes will not be
1440
- * erased by subsequent calls to LZ4_resetStream_fast() in case stream was
1441
- * marked as having dirty context, e.g. requiring full reset.
1442
- */
1443
- LZ4_resetStream_fast(workingStream);
1444
-
1445
1525
  if (dictCtx != NULL) {
1446
1526
  /* If the current offset is zero, we will never look in the
1447
1527
  * external dictionary context, since there is no value a table
@@ -1493,9 +1573,9 @@ int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
1493
1573
 
1494
1574
  DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize);
1495
1575
 
1496
- if (streamPtr->dirty) { return 0; } /* Uninitialized structure detected */
1497
1576
  LZ4_renormDictT(streamPtr, inputSize); /* avoid index overflow */
1498
- if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
1577
+ if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
1578
+ if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
1499
1579
 
1500
1580
  /* invalidate tiny dictionaries */
1501
1581
  if ( (streamPtr->dictSize-1 < 4-1) /* intentional underflow */
@@ -1538,7 +1618,7 @@ int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
1538
1618
  * cost to copy the dictionary's tables into the active context,
1539
1619
  * so that the compression loop is only looking into one table.
1540
1620
  */
1541
- memcpy(streamPtr, streamPtr->dictCtx, sizeof(LZ4_stream_t));
1621
+ LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr));
1542
1622
  result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
1543
1623
  } else {
1544
1624
  result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
@@ -1593,7 +1673,9 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
1593
1673
  if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */
1594
1674
  if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }
1595
1675
 
1596
- memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
1676
+ if (safeBuffer == NULL) assert(dictSize == 0);
1677
+ if (dictSize > 0)
1678
+ memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
1597
1679
 
1598
1680
  dict->dictionary = (const BYTE*)safeBuffer;
1599
1681
  dict->dictSize = (U32)dictSize;
@@ -1623,25 +1705,27 @@ typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
1623
1705
  */
1624
1706
  typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error;
1625
1707
  LZ4_FORCE_INLINE unsigned
1626
- read_variable_length(const BYTE**ip, const BYTE* lencheck, int loop_check, int initial_check, variable_length_error* error)
1627
- {
1628
- unsigned length = 0;
1629
- unsigned s;
1630
- if (initial_check && unlikely((*ip) >= lencheck)) { /* overflow detection */
1631
- *error = initial_error;
1632
- return length;
1633
- }
1634
- do {
1635
- s = **ip;
1636
- (*ip)++;
1637
- length += s;
1638
- if (loop_check && unlikely((*ip) >= lencheck)) { /* overflow detection */
1639
- *error = loop_error;
1640
- return length;
1708
+ read_variable_length(const BYTE**ip, const BYTE* lencheck,
1709
+ int loop_check, int initial_check,
1710
+ variable_length_error* error)
1711
+ {
1712
+ U32 length = 0;
1713
+ U32 s;
1714
+ if (initial_check && unlikely((*ip) >= lencheck)) { /* overflow detection */
1715
+ *error = initial_error;
1716
+ return length;
1641
1717
  }
1642
- } while (s==255);
1718
+ do {
1719
+ s = **ip;
1720
+ (*ip)++;
1721
+ length += s;
1722
+ if (loop_check && unlikely((*ip) >= lencheck)) { /* overflow detection */
1723
+ *error = loop_error;
1724
+ return length;
1725
+ }
1726
+ } while (s==255);
1643
1727
 
1644
- return length;
1728
+ return length;
1645
1729
  }
1646
1730
 
1647
1731
  /*! LZ4_decompress_generic() :
@@ -1722,7 +1806,7 @@ LZ4_decompress_generic(
1722
1806
  /* decode literal length */
1723
1807
  if (length == RUN_MASK) {
1724
1808
  variable_length_error error = ok;
1725
- length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error);
1809
+ length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
1726
1810
  if (error == initial_error) { goto _output_error; }
1727
1811
  if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
1728
1812
  if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
@@ -1746,12 +1830,12 @@ LZ4_decompress_generic(
1746
1830
  /* We don't need to check oend, since we check it once for each loop below */
1747
1831
  if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
1748
1832
  /* Literals can only be 14, but hope compilers optimize if we copy by a register size */
1749
- memcpy(op, ip, 16);
1833
+ LZ4_memcpy(op, ip, 16);
1750
1834
  } else { /* LZ4_decompress_fast() */
1751
1835
  /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
1752
1836
  * it doesn't know input length, and relies on end-of-block properties */
1753
- memcpy(op, ip, 8);
1754
- if (length > 8) { memcpy(op+8, ip+8, 8); }
1837
+ LZ4_memcpy(op, ip, 8);
1838
+ if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); }
1755
1839
  }
1756
1840
  ip += length; op = cpy;
1757
1841
  }
@@ -1765,10 +1849,10 @@ LZ4_decompress_generic(
1765
1849
  length = token & ML_MASK;
1766
1850
 
1767
1851
  if (length == ML_MASK) {
1768
- variable_length_error error = ok;
1769
- if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
1770
- length += read_variable_length(&ip, iend - LASTLITERALS + 1, endOnInput, 0, &error);
1771
- if (error != ok) { goto _output_error; }
1852
+ variable_length_error error = ok;
1853
+ if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
1854
+ length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
1855
+ if (error != ok) { goto _output_error; }
1772
1856
  if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
1773
1857
  length += MINMATCH;
1774
1858
  if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
@@ -1787,19 +1871,20 @@ LZ4_decompress_generic(
1787
1871
  assert(match <= op);
1788
1872
  assert(op + 18 <= oend);
1789
1873
 
1790
- memcpy(op, match, 8);
1791
- memcpy(op+8, match+8, 8);
1792
- memcpy(op+16, match+16, 2);
1874
+ LZ4_memcpy(op, match, 8);
1875
+ LZ4_memcpy(op+8, match+8, 8);
1876
+ LZ4_memcpy(op+16, match+16, 2);
1793
1877
  op += length;
1794
1878
  continue;
1795
1879
  } } }
1796
1880
 
1797
- if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
1881
+ if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
1798
1882
  /* match starting within external dictionary */
1799
1883
  if ((dict==usingExtDict) && (match < lowPrefix)) {
1800
1884
  if (unlikely(op+length > oend-LASTLITERALS)) {
1801
1885
  if (partialDecoding) {
1802
- length = MIN(length, (size_t)(oend-op)); /* reach end of buffer */
1886
+ DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
1887
+ length = MIN(length, (size_t)(oend-op));
1803
1888
  } else {
1804
1889
  goto _output_error; /* end-of-block condition violated */
1805
1890
  } }
@@ -1812,14 +1897,14 @@ LZ4_decompress_generic(
1812
1897
  /* match stretches into both external dictionary and current block */
1813
1898
  size_t const copySize = (size_t)(lowPrefix - match);
1814
1899
  size_t const restSize = length - copySize;
1815
- memcpy(op, dictEnd - copySize, copySize);
1900
+ LZ4_memcpy(op, dictEnd - copySize, copySize);
1816
1901
  op += copySize;
1817
1902
  if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */
1818
1903
  BYTE* const endOfMatch = op + restSize;
1819
1904
  const BYTE* copyFrom = lowPrefix;
1820
1905
  while (op < endOfMatch) { *op++ = *copyFrom++; }
1821
1906
  } else {
1822
- memcpy(op, lowPrefix, restSize);
1907
+ LZ4_memcpy(op, lowPrefix, restSize);
1823
1908
  op += restSize;
1824
1909
  } }
1825
1910
  continue;
@@ -1860,7 +1945,7 @@ LZ4_decompress_generic(
1860
1945
  /* strictly "less than" on input, to re-enter the loop with at least one byte */
1861
1946
  && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
1862
1947
  /* Copy the literals */
1863
- memcpy(op, ip, endOnInput ? 16 : 8);
1948
+ LZ4_memcpy(op, ip, endOnInput ? 16 : 8);
1864
1949
  op += length; ip += length;
1865
1950
 
1866
1951
  /* The second stage: prepare for match copying, decode full info.
@@ -1875,9 +1960,9 @@ LZ4_decompress_generic(
1875
1960
  && (offset >= 8)
1876
1961
  && (dict==withPrefix64k || match >= lowPrefix) ) {
1877
1962
  /* Copy the match. */
1878
- memcpy(op + 0, match + 0, 8);
1879
- memcpy(op + 8, match + 8, 8);
1880
- memcpy(op +16, match +16, 2);
1963
+ LZ4_memcpy(op + 0, match + 0, 8);
1964
+ LZ4_memcpy(op + 8, match + 8, 8);
1965
+ LZ4_memcpy(op +16, match +16, 2);
1881
1966
  op += length + MINMATCH;
1882
1967
  /* Both stages worked, load the next token. */
1883
1968
  continue;
@@ -1891,7 +1976,7 @@ LZ4_decompress_generic(
1891
1976
  /* decode literal length */
1892
1977
  if (length == RUN_MASK) {
1893
1978
  variable_length_error error = ok;
1894
- length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error);
1979
+ length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
1895
1980
  if (error == initial_error) { goto _output_error; }
1896
1981
  if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
1897
1982
  if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
@@ -1907,29 +1992,34 @@ LZ4_decompress_generic(
1907
1992
  || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
1908
1993
  {
1909
1994
  /* We've either hit the input parsing restriction or the output parsing restriction.
1910
- * If we've hit the input parsing condition then this must be the last sequence.
1911
- * If we've hit the output parsing condition then we are either using partialDecoding
1912
- * or we've hit the output parsing condition.
1995
+ * In the normal scenario, decoding a full block, it must be the last sequence,
1996
+ * otherwise it's an error (invalid input or dimensions).
1997
+ * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
1913
1998
  */
1914
1999
  if (partialDecoding) {
1915
2000
  /* Since we are partial decoding we may be in this block because of the output parsing
1916
2001
  * restriction, which is not valid since the output buffer is allowed to be undersized.
1917
2002
  */
1918
2003
  assert(endOnInput);
1919
- /* If we're in this block because of the input parsing condition, then we must be on the
1920
- * last sequence (or invalid), so we must check that we exactly consume the input.
2004
+ DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
2005
+ DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
2006
+ DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
2007
+ DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
2008
+ /* Finishing in the middle of a literals segment,
2009
+ * due to lack of input.
1921
2010
  */
1922
- if ((ip+length>iend-(2+1+LASTLITERALS)) && (ip+length != iend)) { goto _output_error; }
1923
- assert(ip+length <= iend);
1924
- /* We are finishing in the middle of a literals segment.
1925
- * Break after the copy.
2011
+ if (ip+length > iend) {
2012
+ length = (size_t)(iend-ip);
2013
+ cpy = op + length;
2014
+ }
2015
+ /* Finishing in the middle of a literals segment,
2016
+ * due to lack of output space.
1926
2017
  */
1927
2018
  if (cpy > oend) {
1928
2019
  cpy = oend;
1929
2020
  assert(op<=oend);
1930
2021
  length = (size_t)(oend-op);
1931
2022
  }
1932
- assert(ip+length <= iend);
1933
2023
  } else {
1934
2024
  /* We must be on the last sequence because of the parsing limitations so check
1935
2025
  * that we exactly regenerate the original size (must be exact when !endOnInput).
@@ -1938,16 +2028,22 @@ LZ4_decompress_generic(
1938
2028
  /* We must be on the last sequence (or invalid) because of the parsing limitations
1939
2029
  * so check that we exactly consume the input and don't overrun the output buffer.
1940
2030
  */
1941
- if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { goto _output_error; }
2031
+ if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) {
2032
+ DEBUGLOG(6, "should have been last run of literals")
2033
+ DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
2034
+ DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend);
2035
+ goto _output_error;
2036
+ }
1942
2037
  }
1943
- memmove(op, ip, length); /* supports overlapping memory regions, which only matters for in-place decompression scenarios */
2038
+ memmove(op, ip, length); /* supports overlapping memory regions; only matters for in-place decompression scenarios */
1944
2039
  ip += length;
1945
2040
  op += length;
1946
- /* Necessarily EOF when !partialDecoding. When partialDecoding
1947
- * it is EOF if we've either filled the output buffer or hit
1948
- * the input parsing restriction.
2041
+ /* Necessarily EOF when !partialDecoding.
2042
+ * When partialDecoding, it is EOF if we've either
2043
+ * filled the output buffer or
2044
+ * can't proceed with reading an offset for following match.
1949
2045
  */
1950
- if (!partialDecoding || (cpy == oend) || (ip == iend)) {
2046
+ if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
1951
2047
  break;
1952
2048
  }
1953
2049
  } else {
@@ -1965,7 +2061,7 @@ LZ4_decompress_generic(
1965
2061
  _copy_match:
1966
2062
  if (length == ML_MASK) {
1967
2063
  variable_length_error error = ok;
1968
- length += read_variable_length(&ip, iend - LASTLITERALS + 1, endOnInput, 0, &error);
2064
+ length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
1969
2065
  if (error != ok) goto _output_error;
1970
2066
  if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */
1971
2067
  }
@@ -1990,14 +2086,14 @@ LZ4_decompress_generic(
1990
2086
  /* match stretches into both external dictionary and current block */
1991
2087
  size_t const copySize = (size_t)(lowPrefix - match);
1992
2088
  size_t const restSize = length - copySize;
1993
- memcpy(op, dictEnd - copySize, copySize);
2089
+ LZ4_memcpy(op, dictEnd - copySize, copySize);
1994
2090
  op += copySize;
1995
2091
  if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */
1996
2092
  BYTE* const endOfMatch = op + restSize;
1997
2093
  const BYTE* copyFrom = lowPrefix;
1998
2094
  while (op < endOfMatch) *op++ = *copyFrom++;
1999
2095
  } else {
2000
- memcpy(op, lowPrefix, restSize);
2096
+ LZ4_memcpy(op, lowPrefix, restSize);
2001
2097
  op += restSize;
2002
2098
  } }
2003
2099
  continue;
@@ -2016,7 +2112,7 @@ LZ4_decompress_generic(
2016
2112
  if (matchEnd > op) { /* overlap copy */
2017
2113
  while (op < copyEnd) { *op++ = *match++; }
2018
2114
  } else {
2019
- memcpy(op, match, mlen);
2115
+ LZ4_memcpy(op, match, mlen);
2020
2116
  }
2021
2117
  op = copyEnd;
2022
2118
  if (op == oend) { break; }
@@ -2030,10 +2126,10 @@ LZ4_decompress_generic(
2030
2126
  op[2] = match[2];
2031
2127
  op[3] = match[3];
2032
2128
  match += inc32table[offset];
2033
- memcpy(op+4, match, 4);
2129
+ LZ4_memcpy(op+4, match, 4);
2034
2130
  match -= dec64table[offset];
2035
2131
  } else {
2036
- memcpy(op, match, 8);
2132
+ LZ4_memcpy(op, match, 8);
2037
2133
  match += 8;
2038
2134
  }
2039
2135
  op += 8;
@@ -2048,7 +2144,7 @@ LZ4_decompress_generic(
2048
2144
  }
2049
2145
  while (op < cpy) { *op++ = *match++; }
2050
2146
  } else {
2051
- memcpy(op, match, 8);
2147
+ LZ4_memcpy(op, match, 8);
2052
2148
  if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); }
2053
2149
  }
2054
2150
  op = cpy; /* wildcopy correction */
@@ -2056,6 +2152,7 @@ LZ4_decompress_generic(
2056
2152
 
2057
2153
  /* end of decoding */
2058
2154
  if (endOnInput) {
2155
+ DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
2059
2156
  return (int) (((char*)op)-dst); /* Nb of output bytes decoded */
2060
2157
  } else {
2061
2158
  return (int) (((const char*)ip)-src); /* Nb of input bytes read */
@@ -2070,7 +2167,7 @@ LZ4_decompress_generic(
2070
2167
 
2071
2168
  /*===== Instantiate the API decoding functions. =====*/
2072
2169
 
2073
- LZ4_FORCE_O2_GCC_PPC64LE
2170
+ LZ4_FORCE_O2
2074
2171
  int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
2075
2172
  {
2076
2173
  return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
@@ -2078,7 +2175,7 @@ int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int
2078
2175
  (BYTE*)dest, NULL, 0);
2079
2176
  }
2080
2177
 
2081
- LZ4_FORCE_O2_GCC_PPC64LE
2178
+ LZ4_FORCE_O2
2082
2179
  int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity)
2083
2180
  {
2084
2181
  dstCapacity = MIN(targetOutputSize, dstCapacity);
@@ -2087,7 +2184,7 @@ int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize,
2087
2184
  noDict, (BYTE*)dst, NULL, 0);
2088
2185
  }
2089
2186
 
2090
- LZ4_FORCE_O2_GCC_PPC64LE
2187
+ LZ4_FORCE_O2
2091
2188
  int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
2092
2189
  {
2093
2190
  return LZ4_decompress_generic(source, dest, 0, originalSize,
@@ -2097,7 +2194,7 @@ int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
2097
2194
 
2098
2195
  /*===== Instantiate a few more decoding cases, used more than once. =====*/
2099
2196
 
2100
- LZ4_FORCE_O2_GCC_PPC64LE /* Exported, an obsolete API function. */
2197
+ LZ4_FORCE_O2 /* Exported, an obsolete API function. */
2101
2198
  int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
2102
2199
  {
2103
2200
  return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
@@ -2113,7 +2210,7 @@ int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int origin
2113
2210
  return LZ4_decompress_fast(source, dest, originalSize);
2114
2211
  }
2115
2212
 
2116
- LZ4_FORCE_O2_GCC_PPC64LE
2213
+ LZ4_FORCE_O2
2117
2214
  static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
2118
2215
  size_t prefixSize)
2119
2216
  {
@@ -2122,7 +2219,7 @@ static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, i
2122
2219
  (BYTE*)dest-prefixSize, NULL, 0);
2123
2220
  }
2124
2221
 
2125
- LZ4_FORCE_O2_GCC_PPC64LE
2222
+ LZ4_FORCE_O2
2126
2223
  int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
2127
2224
  int compressedSize, int maxOutputSize,
2128
2225
  const void* dictStart, size_t dictSize)
@@ -2132,7 +2229,7 @@ int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
2132
2229
  (BYTE*)dest, (const BYTE*)dictStart, dictSize);
2133
2230
  }
2134
2231
 
2135
- LZ4_FORCE_O2_GCC_PPC64LE
2232
+ LZ4_FORCE_O2
2136
2233
  static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
2137
2234
  const void* dictStart, size_t dictSize)
2138
2235
  {
@@ -2221,7 +2318,7 @@ int LZ4_decoderRingBufferSize(int maxBlockSize)
2221
2318
  If it's not possible, save the relevant part of decoded data into a safe buffer,
2222
2319
  and indicate where it stands using LZ4_setStreamDecode()
2223
2320
  */
2224
- LZ4_FORCE_O2_GCC_PPC64LE
2321
+ LZ4_FORCE_O2
2225
2322
  int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
2226
2323
  {
2227
2324
  LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
@@ -2261,7 +2358,7 @@ int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch
2261
2358
  return result;
2262
2359
  }
2263
2360
 
2264
- LZ4_FORCE_O2_GCC_PPC64LE
2361
+ LZ4_FORCE_O2
2265
2362
  int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
2266
2363
  {
2267
2364
  LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
@@ -2374,7 +2471,7 @@ int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize,
2374
2471
 
2375
2472
  /* Obsolete Streaming functions */
2376
2473
 
2377
- int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; }
2474
+ int LZ4_sizeofStreamState(void) { return LZ4_STREAMSIZE; }
2378
2475
 
2379
2476
  int LZ4_resetStreamState(void* state, char* inputBuffer)
2380
2477
  {