extlz4 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +9 -4
  3. data/bin/extlz4 +1 -1
  4. data/contrib/lz4/NEWS +36 -0
  5. data/contrib/lz4/README.md +11 -12
  6. data/contrib/lz4/build/README.md +55 -0
  7. data/contrib/lz4/build/VS2010/datagen/datagen.vcxproj +169 -0
  8. data/contrib/lz4/build/VS2010/frametest/frametest.vcxproj +176 -0
  9. data/contrib/lz4/build/VS2010/fullbench-dll/fullbench-dll.vcxproj +180 -0
  10. data/contrib/lz4/build/VS2010/fullbench/fullbench.vcxproj +176 -0
  11. data/contrib/lz4/build/VS2010/fuzzer/fuzzer.vcxproj +173 -0
  12. data/contrib/lz4/build/VS2010/liblz4-dll/liblz4-dll.rc +51 -0
  13. data/contrib/lz4/build/VS2010/liblz4-dll/liblz4-dll.vcxproj +179 -0
  14. data/contrib/lz4/build/VS2010/liblz4/liblz4.vcxproj +175 -0
  15. data/contrib/lz4/build/VS2010/lz4.sln +98 -0
  16. data/contrib/lz4/build/VS2010/lz4/lz4.rc +51 -0
  17. data/contrib/lz4/build/VS2010/lz4/lz4.vcxproj +189 -0
  18. data/contrib/lz4/build/VS2017/datagen/datagen.vcxproj +173 -0
  19. data/contrib/lz4/build/VS2017/frametest/frametest.vcxproj +180 -0
  20. data/contrib/lz4/build/VS2017/fullbench-dll/fullbench-dll.vcxproj +184 -0
  21. data/contrib/lz4/build/VS2017/fullbench/fullbench.vcxproj +180 -0
  22. data/contrib/lz4/build/VS2017/fuzzer/fuzzer.vcxproj +177 -0
  23. data/contrib/lz4/build/VS2017/liblz4-dll/liblz4-dll.rc +51 -0
  24. data/contrib/lz4/build/VS2017/liblz4-dll/liblz4-dll.vcxproj +183 -0
  25. data/contrib/lz4/build/VS2017/liblz4/liblz4.vcxproj +179 -0
  26. data/contrib/lz4/build/VS2017/lz4.sln +103 -0
  27. data/contrib/lz4/build/VS2017/lz4/lz4.rc +51 -0
  28. data/contrib/lz4/build/VS2017/lz4/lz4.vcxproj +164 -0
  29. data/contrib/lz4/build/cmake/CMakeLists.txt +235 -0
  30. data/contrib/lz4/lib/README.md +27 -10
  31. data/contrib/lz4/lib/lz4.c +327 -230
  32. data/contrib/lz4/lib/lz4.h +80 -70
  33. data/contrib/lz4/lib/lz4frame.c +93 -54
  34. data/contrib/lz4/lib/lz4frame.h +22 -14
  35. data/contrib/lz4/lib/lz4hc.c +192 -115
  36. data/contrib/lz4/lib/lz4hc.h +15 -40
  37. data/contrib/lz4/ossfuzz/Makefile +12 -8
  38. data/contrib/lz4/ossfuzz/compress_frame_fuzzer.c +11 -5
  39. data/contrib/lz4/ossfuzz/compress_fuzzer.c +9 -2
  40. data/contrib/lz4/ossfuzz/compress_hc_fuzzer.c +10 -3
  41. data/contrib/lz4/ossfuzz/decompress_frame_fuzzer.c +11 -3
  42. data/contrib/lz4/ossfuzz/decompress_fuzzer.c +6 -2
  43. data/contrib/lz4/ossfuzz/fuzz_data_producer.c +77 -0
  44. data/contrib/lz4/ossfuzz/fuzz_data_producer.h +36 -0
  45. data/contrib/lz4/ossfuzz/round_trip_frame_fuzzer.c +8 -4
  46. data/contrib/lz4/ossfuzz/round_trip_fuzzer.c +9 -2
  47. data/contrib/lz4/ossfuzz/round_trip_hc_fuzzer.c +7 -2
  48. data/contrib/lz4/ossfuzz/travisoss.sh +6 -1
  49. data/contrib/lz4/tmp +0 -0
  50. data/contrib/lz4/tmpsparse +0 -0
  51. data/ext/extlz4.c +2 -0
  52. data/ext/extlz4.h +5 -0
  53. data/ext/hashargs.c +1 -1
  54. data/ext/hashargs.h +1 -1
  55. data/gemstub.rb +3 -14
  56. data/lib/extlz4.rb +0 -2
  57. data/lib/extlz4/oldstream.rb +1 -1
  58. metadata +40 -25
  59. data/lib/extlz4/version.rb +0 -3
@@ -35,21 +35,22 @@ So it's necessary to include all `*.c` and `*.h` files present in `/lib`.
35
35
 
36
36
  Definitions which are not guaranteed to remain stable in future versions,
37
37
  are protected behind macros, such as `LZ4_STATIC_LINKING_ONLY`.
38
- As the name implies, these definitions can only be invoked
38
+ As the name strongly implies, these definitions should only be invoked
39
39
  in the context of static linking ***only***.
40
40
  Otherwise, dependent application may fail on API or ABI break in the future.
41
- The associated symbols are also not present in dynamic library by default.
41
+ The associated symbols are also not exposed by the dynamic library by default.
42
42
  Should they be nonetheless needed, it's possible to force their publication
43
- by using build macro `LZ4_PUBLISH_STATIC_FUNCTIONS`.
43
+ by using build macros `LZ4_PUBLISH_STATIC_FUNCTIONS`
44
+ and `LZ4F_PUBLISH_STATIC_FUNCTIONS`.
44
45
 
45
46
 
46
47
  #### Build macros
47
48
 
48
- The following build macro can be selected at compilation time :
49
+ The following build macro can be selected to adjust source code behavior at compilation time :
49
50
 
50
- - `LZ4_FAST_DEC_LOOP` : this triggers the optimized decompression loop.
51
- This loops works great on x86/x64 cpus, and is automatically enabled on this platform.
52
- It's possible to enable or disable it manually, by passing `LZ4_FAST_DEC_LOOP=1` or `0` to the preprocessor.
51
+ - `LZ4_FAST_DEC_LOOP` : this triggers a speed optimized decompression loop, more powerful on modern cpus.
52
+ This loop works great on `x86`, `x64` and `aarch64` cpus, and is automatically enabled for them.
53
+ It's also possible to enable or disable it manually, by passing `LZ4_FAST_DEC_LOOP=1` or `0` to the preprocessor.
53
54
  For example, with `gcc` : `-DLZ4_FAST_DEC_LOOP=1`,
54
55
  and with `make` : `CPPFLAGS+=-DLZ4_FAST_DEC_LOOP=1 make lz4`.
55
56
 
@@ -65,8 +66,24 @@ The following build macro can be selected at compilation time :
65
66
  Should this be a problem, it's generally possible to make the compiler ignore these warnings,
66
67
  for example with `-Wno-deprecated-declarations` on `gcc`,
67
68
  or `_CRT_SECURE_NO_WARNINGS` for Visual Studio.
68
- Another method is to define `LZ4_DISABLE_DEPRECATE_WARNINGS`
69
- before including the LZ4 header files.
69
+ This build macro offers another project-specific method
70
+ by defining `LZ4_DISABLE_DEPRECATE_WARNINGS` before including the LZ4 header files.
71
+
72
+ - `LZ4_USER_MEMORY_FUNCTIONS` : replace calls to <stdlib>'s `malloc`, `calloc` and `free`
73
+ by user-defined functions, which must be called `LZ4_malloc()`, `LZ4_calloc()` and `LZ4_free()`.
74
+ User functions must be available at link time.
75
+
76
+ - `LZ4_FORCE_SW_BITCOUNT` : by default, the compression algorithm tries to determine lengths
77
+ by using bitcount instructions, generally implemented as fast single instructions in many cpus.
78
+ In case the target cpus doesn't support it, or compiler intrinsic doesn't work, or feature bad performance,
79
+ it's possible to use an optimized software path instead.
80
+ This is achieved by setting this build macros .
81
+ In most cases, it's not expected to be necessary,
82
+ but it can be legitimately considered for less common platforms.
83
+
84
+ - `LZ4_ALIGN_TEST` : alignment test ensures that the memory area
85
+ passed as argument to become a compression state is suitably aligned.
86
+ This test can be disabled if it proves flaky, by setting this value to 0.
70
87
 
71
88
 
72
89
  #### Amalgamation
@@ -102,7 +119,7 @@ The compiled executable will require LZ4 DLL which is available at `dll\liblz4.d
102
119
 
103
120
  #### Miscellaneous
104
121
 
105
- Other files present in the directory are not source code. There are :
122
+ Other files present in the directory are not source code. They are :
106
123
 
107
124
  - `LICENSE` : contains the BSD license text
108
125
  - `Makefile` : `make` script to compile and install lz4 library (static and dynamic)
@@ -45,10 +45,16 @@
45
45
  #endif
46
46
 
47
47
  /*
48
- * ACCELERATION_DEFAULT :
48
+ * LZ4_ACCELERATION_DEFAULT :
49
49
  * Select "acceleration" for LZ4_compress_fast() when parameter value <= 0
50
50
  */
51
- #define ACCELERATION_DEFAULT 1
51
+ #define LZ4_ACCELERATION_DEFAULT 1
52
+ /*
53
+ * LZ4_ACCELERATION_MAX :
54
+ * Any "acceleration" value higher than this threshold
55
+ * get treated as LZ4_ACCELERATION_MAX instead (fix #876)
56
+ */
57
+ #define LZ4_ACCELERATION_MAX 65537
52
58
 
53
59
 
54
60
  /*-************************************
@@ -82,6 +88,7 @@
82
88
  * Define this parameter if your target system or compiler does not support hardware bit count
83
89
  */
84
90
  #if defined(_MSC_VER) && defined(_WIN32_WCE) /* Visual Studio for WinCE doesn't support Hardware bit count */
91
+ # undef LZ4_FORCE_SW_BITCOUNT /* avoid double def */
85
92
  # define LZ4_FORCE_SW_BITCOUNT
86
93
  #endif
87
94
 
@@ -114,10 +121,9 @@
114
121
  /*-************************************
115
122
  * Compiler Options
116
123
  **************************************/
117
- #ifdef _MSC_VER /* Visual Studio */
118
- # include <intrin.h>
119
- # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
120
- # pragma warning(disable : 4293) /* disable: C4293: too large shift (32-bits) */
124
+ #if defined(_MSC_VER) && (_MSC_VER >= 1400) /* Visual Studio 2005+ */
125
+ # include <intrin.h> /* only present in VS2005+ */
126
+ # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
121
127
  #endif /* _MSC_VER */
122
128
 
123
129
  #ifndef LZ4_FORCE_INLINE
@@ -136,7 +142,7 @@
136
142
  # endif /* _MSC_VER */
137
143
  #endif /* LZ4_FORCE_INLINE */
138
144
 
139
- /* LZ4_FORCE_O2_GCC_PPC64LE and LZ4_FORCE_O2_INLINE_GCC_PPC64LE
145
+ /* LZ4_FORCE_O2 and LZ4_FORCE_INLINE
140
146
  * gcc on ppc64le generates an unrolled SIMDized loop for LZ4_wildCopy8,
141
147
  * together with a simple 8-byte copy loop as a fall-back path.
142
148
  * However, this optimization hurts the decompression speed by >30%,
@@ -151,11 +157,11 @@
151
157
  * of LZ4_wildCopy8 does not affect the compression speed.
152
158
  */
153
159
  #if defined(__PPC64__) && defined(__LITTLE_ENDIAN__) && defined(__GNUC__) && !defined(__clang__)
154
- # define LZ4_FORCE_O2_GCC_PPC64LE __attribute__((optimize("O2")))
155
- # define LZ4_FORCE_O2_INLINE_GCC_PPC64LE __attribute__((optimize("O2"))) LZ4_FORCE_INLINE
160
+ # define LZ4_FORCE_O2 __attribute__((optimize("O2")))
161
+ # undef LZ4_FORCE_INLINE
162
+ # define LZ4_FORCE_INLINE static __inline __attribute__((optimize("O2"),always_inline))
156
163
  #else
157
- # define LZ4_FORCE_O2_GCC_PPC64LE
158
- # define LZ4_FORCE_O2_INLINE_GCC_PPC64LE static
164
+ # define LZ4_FORCE_O2
159
165
  #endif
160
166
 
161
167
  #if (defined(__GNUC__) && (__GNUC__ >= 3)) || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) || defined(__clang__)
@@ -171,14 +177,33 @@
171
177
  #define unlikely(expr) expect((expr) != 0, 0)
172
178
  #endif
173
179
 
180
+ /* Should the alignment test prove unreliable, for some reason,
181
+ * it can be disabled by setting LZ4_ALIGN_TEST to 0 */
182
+ #ifndef LZ4_ALIGN_TEST /* can be externally provided */
183
+ # define LZ4_ALIGN_TEST 1
184
+ #endif
185
+
174
186
 
175
187
  /*-************************************
176
188
  * Memory routines
177
189
  **************************************/
178
- #include <stdlib.h> /* malloc, calloc, free */
179
- #define ALLOC(s) malloc(s)
180
- #define ALLOC_AND_ZERO(s) calloc(1,s)
181
- #define FREEMEM(p) free(p)
190
+ #ifdef LZ4_USER_MEMORY_FUNCTIONS
191
+ /* memory management functions can be customized by user project.
192
+ * Below functions must exist somewhere in the Project
193
+ * and be available at link time */
194
+ void* LZ4_malloc(size_t s);
195
+ void* LZ4_calloc(size_t n, size_t s);
196
+ void LZ4_free(void* p);
197
+ # define ALLOC(s) LZ4_malloc(s)
198
+ # define ALLOC_AND_ZERO(s) LZ4_calloc(1,s)
199
+ # define FREEMEM(p) LZ4_free(p)
200
+ #else
201
+ # include <stdlib.h> /* malloc, calloc, free */
202
+ # define ALLOC(s) malloc(s)
203
+ # define ALLOC_AND_ZERO(s) calloc(1,s)
204
+ # define FREEMEM(p) free(p)
205
+ #endif
206
+
182
207
  #include <string.h> /* memset, memcpy */
183
208
  #define MEM_INIT(p,v,s) memset((p),(v),(s))
184
209
 
@@ -225,21 +250,27 @@ static const int LZ4_minLength = (MFLIMIT+1);
225
250
 
226
251
  #if defined(LZ4_DEBUG) && (LZ4_DEBUG>=2)
227
252
  # include <stdio.h>
228
- static int g_debuglog_enable = 1;
229
- # define DEBUGLOG(l, ...) { \
230
- if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \
231
- fprintf(stderr, __FILE__ ": "); \
232
- fprintf(stderr, __VA_ARGS__); \
233
- fprintf(stderr, " \n"); \
234
- } }
253
+ static int g_debuglog_enable = 1;
254
+ # define DEBUGLOG(l, ...) { \
255
+ if ((g_debuglog_enable) && (l<=LZ4_DEBUG)) { \
256
+ fprintf(stderr, __FILE__ ": "); \
257
+ fprintf(stderr, __VA_ARGS__); \
258
+ fprintf(stderr, " \n"); \
259
+ } }
235
260
  #else
236
- # define DEBUGLOG(l, ...) {} /* disabled */
261
+ # define DEBUGLOG(l, ...) {} /* disabled */
237
262
  #endif
238
263
 
264
+ static int LZ4_isAligned(const void* ptr, size_t alignment)
265
+ {
266
+ return ((size_t)ptr & (alignment -1)) == 0;
267
+ }
268
+
239
269
 
240
270
  /*-************************************
241
271
  * Types
242
272
  **************************************/
273
+ #include <limits.h>
243
274
  #if defined(__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
244
275
  # include <stdint.h>
245
276
  typedef uint8_t BYTE;
@@ -249,6 +280,9 @@ static int g_debuglog_enable = 1;
249
280
  typedef uint64_t U64;
250
281
  typedef uintptr_t uptrval;
251
282
  #else
283
+ # if UINT_MAX != 4294967295UL
284
+ # error "LZ4 code (when not C++ or C99) assumes that sizeof(int) == 4"
285
+ # endif
252
286
  typedef unsigned char BYTE;
253
287
  typedef unsigned short U16;
254
288
  typedef unsigned int U32;
@@ -273,6 +307,21 @@ typedef enum {
273
307
  /*-************************************
274
308
  * Reading and writing into memory
275
309
  **************************************/
310
+
311
+ /**
312
+ * LZ4 relies on memcpy with a constant size being inlined. In freestanding
313
+ * environments, the compiler can't assume the implementation of memcpy() is
314
+ * standard compliant, so it can't apply its specialized memcpy() inlining
315
+ * logic. When possible, use __builtin_memcpy() to tell the compiler to analyze
316
+ * memcpy() as if it were standard compliant, so it can inline it in freestanding
317
+ * environments. This is needed when decompressing the Linux Kernel, for example.
318
+ */
319
+ #if defined(__GNUC__) && (__GNUC__ >= 4)
320
+ #define LZ4_memcpy(dst, src, size) __builtin_memcpy(dst, src, size)
321
+ #else
322
+ #define LZ4_memcpy(dst, src, size) memcpy(dst, src, size)
323
+ #endif
324
+
276
325
  static unsigned LZ4_isLittleEndian(void)
277
326
  {
278
327
  const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
@@ -307,27 +356,27 @@ static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = val
307
356
 
308
357
  static U16 LZ4_read16(const void* memPtr)
309
358
  {
310
- U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
359
+ U16 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
311
360
  }
312
361
 
313
362
  static U32 LZ4_read32(const void* memPtr)
314
363
  {
315
- U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
364
+ U32 val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
316
365
  }
317
366
 
318
367
  static reg_t LZ4_read_ARCH(const void* memPtr)
319
368
  {
320
- reg_t val; memcpy(&val, memPtr, sizeof(val)); return val;
369
+ reg_t val; LZ4_memcpy(&val, memPtr, sizeof(val)); return val;
321
370
  }
322
371
 
323
372
  static void LZ4_write16(void* memPtr, U16 value)
324
373
  {
325
- memcpy(memPtr, &value, sizeof(value));
374
+ LZ4_memcpy(memPtr, &value, sizeof(value));
326
375
  }
327
376
 
328
377
  static void LZ4_write32(void* memPtr, U32 value)
329
378
  {
330
- memcpy(memPtr, &value, sizeof(value));
379
+ LZ4_memcpy(memPtr, &value, sizeof(value));
331
380
  }
332
381
 
333
382
  #endif /* LZ4_FORCE_MEMORY_ACCESS */
@@ -355,14 +404,14 @@ static void LZ4_writeLE16(void* memPtr, U16 value)
355
404
  }
356
405
 
357
406
  /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */
358
- LZ4_FORCE_O2_INLINE_GCC_PPC64LE
407
+ LZ4_FORCE_INLINE
359
408
  void LZ4_wildCopy8(void* dstPtr, const void* srcPtr, void* dstEnd)
360
409
  {
361
410
  BYTE* d = (BYTE*)dstPtr;
362
411
  const BYTE* s = (const BYTE*)srcPtr;
363
412
  BYTE* const e = (BYTE*)dstEnd;
364
413
 
365
- do { memcpy(d,s,8); d+=8; s+=8; } while (d<e);
414
+ do { LZ4_memcpy(d,s,8); d+=8; s+=8; } while (d<e);
366
415
  }
367
416
 
368
417
  static const unsigned inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
@@ -370,12 +419,12 @@ static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
370
419
 
371
420
 
372
421
  #ifndef LZ4_FAST_DEC_LOOP
373
- # if defined(__i386__) || defined(__x86_64__)
422
+ # if defined __i386__ || defined _M_IX86 || defined __x86_64__ || defined _M_X64
374
423
  # define LZ4_FAST_DEC_LOOP 1
375
424
  # elif defined(__aarch64__) && !defined(__clang__)
376
425
  /* On aarch64, we disable this optimization for clang because on certain
377
- * mobile chipsets and clang, it reduces performance. For more information
378
- * refer to https://github.com/lz4/lz4/pull/707. */
426
+ * mobile chipsets, performance is reduced with clang. For information
427
+ * refer to https://github.com/lz4/lz4/pull/707 */
379
428
  # define LZ4_FAST_DEC_LOOP 1
380
429
  # else
381
430
  # define LZ4_FAST_DEC_LOOP 0
@@ -384,20 +433,22 @@ static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
384
433
 
385
434
  #if LZ4_FAST_DEC_LOOP
386
435
 
387
- LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
436
+ LZ4_FORCE_INLINE void
388
437
  LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
389
438
  {
439
+ assert(srcPtr + offset == dstPtr);
390
440
  if (offset < 8) {
441
+ LZ4_write32(dstPtr, 0); /* silence an msan warning when offset==0 */
391
442
  dstPtr[0] = srcPtr[0];
392
443
  dstPtr[1] = srcPtr[1];
393
444
  dstPtr[2] = srcPtr[2];
394
445
  dstPtr[3] = srcPtr[3];
395
446
  srcPtr += inc32table[offset];
396
- memcpy(dstPtr+4, srcPtr, 4);
447
+ LZ4_memcpy(dstPtr+4, srcPtr, 4);
397
448
  srcPtr -= dec64table[offset];
398
449
  dstPtr += 8;
399
450
  } else {
400
- memcpy(dstPtr, srcPtr, 8);
451
+ LZ4_memcpy(dstPtr, srcPtr, 8);
401
452
  dstPtr += 8;
402
453
  srcPtr += 8;
403
454
  }
@@ -408,49 +459,48 @@ LZ4_memcpy_using_offset_base(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, con
408
459
  /* customized variant of memcpy, which can overwrite up to 32 bytes beyond dstEnd
409
460
  * this version copies two times 16 bytes (instead of one time 32 bytes)
410
461
  * because it must be compatible with offsets >= 16. */
411
- LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
462
+ LZ4_FORCE_INLINE void
412
463
  LZ4_wildCopy32(void* dstPtr, const void* srcPtr, void* dstEnd)
413
464
  {
414
465
  BYTE* d = (BYTE*)dstPtr;
415
466
  const BYTE* s = (const BYTE*)srcPtr;
416
467
  BYTE* const e = (BYTE*)dstEnd;
417
468
 
418
- do { memcpy(d,s,16); memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
469
+ do { LZ4_memcpy(d,s,16); LZ4_memcpy(d+16,s+16,16); d+=32; s+=32; } while (d<e);
419
470
  }
420
471
 
421
472
  /* LZ4_memcpy_using_offset() presumes :
422
473
  * - dstEnd >= dstPtr + MINMATCH
423
474
  * - there is at least 8 bytes available to write after dstEnd */
424
- LZ4_FORCE_O2_INLINE_GCC_PPC64LE void
475
+ LZ4_FORCE_INLINE void
425
476
  LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const size_t offset)
426
477
  {
427
478
  BYTE v[8];
428
479
 
429
480
  assert(dstEnd >= dstPtr + MINMATCH);
430
- LZ4_write32(dstPtr, 0); /* silence an msan warning when offset==0 */
431
481
 
432
482
  switch(offset) {
433
483
  case 1:
434
- memset(v, *srcPtr, 8);
484
+ MEM_INIT(v, *srcPtr, 8);
435
485
  break;
436
486
  case 2:
437
- memcpy(v, srcPtr, 2);
438
- memcpy(&v[2], srcPtr, 2);
439
- memcpy(&v[4], &v[0], 4);
487
+ LZ4_memcpy(v, srcPtr, 2);
488
+ LZ4_memcpy(&v[2], srcPtr, 2);
489
+ LZ4_memcpy(&v[4], v, 4);
440
490
  break;
441
491
  case 4:
442
- memcpy(v, srcPtr, 4);
443
- memcpy(&v[4], srcPtr, 4);
492
+ LZ4_memcpy(v, srcPtr, 4);
493
+ LZ4_memcpy(&v[4], srcPtr, 4);
444
494
  break;
445
495
  default:
446
496
  LZ4_memcpy_using_offset_base(dstPtr, srcPtr, dstEnd, offset);
447
497
  return;
448
498
  }
449
499
 
450
- memcpy(dstPtr, v, 8);
500
+ LZ4_memcpy(dstPtr, v, 8);
451
501
  dstPtr += 8;
452
502
  while (dstPtr < dstEnd) {
453
- memcpy(dstPtr, v, 8);
503
+ LZ4_memcpy(dstPtr, v, 8);
454
504
  dstPtr += 8;
455
505
  }
456
506
  }
@@ -462,75 +512,92 @@ LZ4_memcpy_using_offset(BYTE* dstPtr, const BYTE* srcPtr, BYTE* dstEnd, const si
462
512
  **************************************/
463
513
  static unsigned LZ4_NbCommonBytes (reg_t val)
464
514
  {
515
+ assert(val != 0);
465
516
  if (LZ4_isLittleEndian()) {
466
- if (sizeof(val)==8) {
467
- # if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
517
+ if (sizeof(val) == 8) {
518
+ # if defined(_MSC_VER) && (_MSC_VER >= 1800) && defined(_M_AMD64) && !defined(LZ4_FORCE_SW_BITCOUNT)
519
+ /* x64 CPUS without BMI support interpret `TZCNT` as `REP BSF` */
520
+ return (unsigned)_tzcnt_u64(val) >> 3;
521
+ # elif defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
468
522
  unsigned long r = 0;
469
- _BitScanForward64( &r, (U64)val );
470
- return (int)(r>>3);
471
- # elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
523
+ _BitScanForward64(&r, (U64)val);
524
+ return (unsigned)r >> 3;
525
+ # elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
526
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
527
+ !defined(LZ4_FORCE_SW_BITCOUNT)
472
528
  return (unsigned)__builtin_ctzll((U64)val) >> 3;
473
529
  # else
474
- static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2,
475
- 0, 3, 1, 3, 1, 4, 2, 7,
476
- 0, 2, 3, 6, 1, 5, 3, 5,
477
- 1, 3, 4, 4, 2, 5, 6, 7,
478
- 7, 0, 1, 2, 3, 3, 4, 6,
479
- 2, 6, 5, 5, 3, 4, 5, 6,
480
- 7, 1, 2, 4, 6, 4, 4, 5,
481
- 7, 2, 6, 5, 7, 6, 7, 7 };
482
- return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
530
+ const U64 m = 0x0101010101010101ULL;
531
+ val ^= val - 1;
532
+ return (unsigned)(((U64)((val & (m - 1)) * m)) >> 56);
483
533
  # endif
484
534
  } else /* 32 bits */ {
485
- # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
535
+ # if defined(_MSC_VER) && (_MSC_VER >= 1400) && !defined(LZ4_FORCE_SW_BITCOUNT)
486
536
  unsigned long r;
487
- _BitScanForward( &r, (U32)val );
488
- return (int)(r>>3);
489
- # elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
537
+ _BitScanForward(&r, (U32)val);
538
+ return (unsigned)r >> 3;
539
+ # elif (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
540
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
541
+ !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
490
542
  return (unsigned)__builtin_ctz((U32)val) >> 3;
491
543
  # else
492
- static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0,
493
- 3, 2, 2, 1, 3, 2, 0, 1,
494
- 3, 3, 1, 2, 2, 2, 2, 0,
495
- 3, 1, 2, 0, 1, 0, 1, 1 };
496
- return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
544
+ const U32 m = 0x01010101;
545
+ return (unsigned)((((val - 1) ^ val) & (m - 1)) * m) >> 24;
497
546
  # endif
498
547
  }
499
548
  } else /* Big Endian CPU */ {
500
- if (sizeof(val)==8) { /* 64-bits */
501
- # if defined(_MSC_VER) && defined(_WIN64) && !defined(LZ4_FORCE_SW_BITCOUNT)
502
- unsigned long r = 0;
503
- _BitScanReverse64( &r, val );
504
- return (unsigned)(r>>3);
505
- # elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
549
+ if (sizeof(val)==8) {
550
+ # if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
551
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
552
+ !defined(__TINYC__) && !defined(LZ4_FORCE_SW_BITCOUNT)
506
553
  return (unsigned)__builtin_clzll((U64)val) >> 3;
507
554
  # else
555
+ #if 1
556
+ /* this method is probably faster,
557
+ * but adds a 128 bytes lookup table */
558
+ static const unsigned char ctz7_tab[128] = {
559
+ 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
560
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
561
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
562
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
563
+ 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
564
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
565
+ 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
566
+ 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
567
+ };
568
+ U64 const mask = 0x0101010101010101ULL;
569
+ U64 const t = (((val >> 8) - mask) | val) & mask;
570
+ return ctz7_tab[(t * 0x0080402010080402ULL) >> 57];
571
+ #else
572
+ /* this method doesn't consume memory space like the previous one,
573
+ * but it contains several branches,
574
+ * that may end up slowing execution */
508
575
  static const U32 by32 = sizeof(val)*4; /* 32 on 64 bits (goal), 16 on 32 bits.
509
- Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
510
- Note that this code path is never triggered in 32-bits mode. */
576
+ Just to avoid some static analyzer complaining about shift by 32 on 32-bits target.
577
+ Note that this code path is never triggered in 32-bits mode. */
511
578
  unsigned r;
512
579
  if (!(val>>by32)) { r=4; } else { r=0; val>>=by32; }
513
580
  if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
514
581
  r += (!val);
515
582
  return r;
583
+ #endif
516
584
  # endif
517
585
  } else /* 32 bits */ {
518
- # if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
519
- unsigned long r = 0;
520
- _BitScanReverse( &r, (unsigned long)val );
521
- return (unsigned)(r>>3);
522
- # elif (defined(__clang__) || (defined(__GNUC__) && (__GNUC__>=3))) && !defined(LZ4_FORCE_SW_BITCOUNT)
586
+ # if (defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ > 3) || \
587
+ ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))) && \
588
+ !defined(LZ4_FORCE_SW_BITCOUNT)
523
589
  return (unsigned)__builtin_clz((U32)val) >> 3;
524
590
  # else
525
- unsigned r;
526
- if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
527
- r += (!val);
528
- return r;
591
+ val >>= 8;
592
+ val = ((((val + 0x00FFFF00) | 0x00FFFFFF) + val) |
593
+ (val + 0x00FF0000)) >> 24;
594
+ return (unsigned)val ^ 3;
529
595
  # endif
530
596
  }
531
597
  }
532
598
  }
533
599
 
600
+
534
601
  #define STEPSIZE sizeof(reg_t)
535
602
  LZ4_FORCE_INLINE
536
603
  unsigned LZ4_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* pInLimit)
@@ -605,7 +672,7 @@ typedef enum { noDictIssue = 0, dictSmall } dictIssue_directive;
605
672
  int LZ4_versionNumber (void) { return LZ4_VERSION_NUMBER; }
606
673
  const char* LZ4_versionString(void) { return LZ4_VERSION_STRING; }
607
674
  int LZ4_compressBound(int isize) { return LZ4_COMPRESSBOUND(isize); }
608
- int LZ4_sizeofState() { return LZ4_STREAMSIZE; }
675
+ int LZ4_sizeofState(void) { return LZ4_STREAMSIZE; }
609
676
 
610
677
 
611
678
  /*-************************************
@@ -628,7 +695,7 @@ int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
628
695
  /*-******************************
629
696
  * Compression functions
630
697
  ********************************/
631
- static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
698
+ LZ4_FORCE_INLINE U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
632
699
  {
633
700
  if (tableType == byU16)
634
701
  return ((sequence * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
@@ -636,7 +703,7 @@ static U32 LZ4_hash4(U32 sequence, tableType_t const tableType)
636
703
  return ((sequence * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
637
704
  }
638
705
 
639
- static U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
706
+ LZ4_FORCE_INLINE U32 LZ4_hash5(U64 sequence, tableType_t const tableType)
640
707
  {
641
708
  const U32 hashLog = (tableType == byU16) ? LZ4_HASHLOG+1 : LZ4_HASHLOG;
642
709
  if (LZ4_isLittleEndian()) {
@@ -654,7 +721,7 @@ LZ4_FORCE_INLINE U32 LZ4_hashPosition(const void* const p, tableType_t const tab
654
721
  return LZ4_hash4(LZ4_read32(p), tableType);
655
722
  }
656
723
 
657
- static void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
724
+ LZ4_FORCE_INLINE void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
658
725
  {
659
726
  switch (tableType)
660
727
  {
@@ -666,7 +733,7 @@ static void LZ4_clearHash(U32 h, void* tableBase, tableType_t const tableType)
666
733
  }
667
734
  }
668
735
 
669
- static void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
736
+ LZ4_FORCE_INLINE void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t const tableType)
670
737
  {
671
738
  switch (tableType)
672
739
  {
@@ -678,7 +745,7 @@ static void LZ4_putIndexOnHash(U32 idx, U32 h, void* tableBase, tableType_t cons
678
745
  }
679
746
  }
680
747
 
681
- static void LZ4_putPositionOnHash(const BYTE* p, U32 h,
748
+ LZ4_FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h,
682
749
  void* tableBase, tableType_t const tableType,
683
750
  const BYTE* srcBase)
684
751
  {
@@ -703,7 +770,7 @@ LZ4_FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_
703
770
  * Assumption 1 : only valid if tableType == byU32 or byU16.
704
771
  * Assumption 2 : h is presumed valid (within limits of hash table)
705
772
  */
706
- static U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
773
+ LZ4_FORCE_INLINE U32 LZ4_getIndexOnHash(U32 h, const void* tableBase, tableType_t tableType)
707
774
  {
708
775
  LZ4_STATIC_ASSERT(LZ4_MEMORY_USAGE > 2);
709
776
  if (tableType == byU32) {
@@ -739,22 +806,13 @@ LZ4_FORCE_INLINE void
739
806
  LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
740
807
  const int inputSize,
741
808
  const tableType_t tableType) {
742
- /* If compression failed during the previous step, then the context
743
- * is marked as dirty, therefore, it has to be fully reset.
744
- */
745
- if (cctx->dirty) {
746
- DEBUGLOG(5, "LZ4_prepareTable: Full reset for %p", cctx);
747
- MEM_INIT(cctx, 0, sizeof(LZ4_stream_t_internal));
748
- return;
749
- }
750
-
751
809
  /* If the table hasn't been used, it's guaranteed to be zeroed out, and is
752
810
  * therefore safe to use no matter what mode we're in. Otherwise, we figure
753
811
  * out if it's safe to leave as is or whether it needs to be reset.
754
812
  */
755
- if (cctx->tableType != clearedTable) {
813
+ if ((tableType_t)cctx->tableType != clearedTable) {
756
814
  assert(inputSize >= 0);
757
- if (cctx->tableType != tableType
815
+ if ((tableType_t)cctx->tableType != tableType
758
816
  || ((tableType == byU16) && cctx->currentOffset + (unsigned)inputSize >= 0xFFFFU)
759
817
  || ((tableType == byU32) && cctx->currentOffset > 1 GB)
760
818
  || tableType == byPtr
@@ -763,7 +821,7 @@ LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
763
821
  DEBUGLOG(4, "LZ4_prepareTable: Resetting table in %p", cctx);
764
822
  MEM_INIT(cctx->hashTable, 0, LZ4_HASHTABLESIZE);
765
823
  cctx->currentOffset = 0;
766
- cctx->tableType = clearedTable;
824
+ cctx->tableType = (U32)clearedTable;
767
825
  } else {
768
826
  DEBUGLOG(4, "LZ4_prepareTable: Re-use hash table (no reset)");
769
827
  }
@@ -785,8 +843,12 @@ LZ4_prepareTable(LZ4_stream_t_internal* const cctx,
785
843
  }
786
844
 
787
845
  /** LZ4_compress_generic() :
788
- inlined, to ensure branches are decided at compilation time */
789
- LZ4_FORCE_INLINE int LZ4_compress_generic(
846
+ * inlined, to ensure branches are decided at compilation time.
847
+ * Presumed already validated at this stage:
848
+ * - source != NULL
849
+ * - inputSize > 0
850
+ */
851
+ LZ4_FORCE_INLINE int LZ4_compress_generic_validated(
790
852
  LZ4_stream_t_internal* const cctx,
791
853
  const char* const source,
792
854
  char* const dest,
@@ -815,7 +877,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
815
877
 
816
878
  int const maybe_extMem = (dictDirective == usingExtDict) || (dictDirective == usingDictCtx);
817
879
  U32 const prefixIdxLimit = startIndex - dictSize; /* used when dictDirective == dictSmall */
818
- const BYTE* const dictEnd = dictionary + dictSize;
880
+ const BYTE* const dictEnd = dictionary ? dictionary + dictSize : dictionary;
819
881
  const BYTE* anchor = (const BYTE*) source;
820
882
  const BYTE* const iend = ip + inputSize;
821
883
  const BYTE* const mflimitPlusOne = iend - MFLIMIT + 1;
@@ -823,7 +885,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
823
885
 
824
886
  /* the dictCtx currentOffset is indexed on the start of the dictionary,
825
887
  * while a dictionary in the current context precedes the currentOffset */
826
- const BYTE* dictBase = (dictDirective == usingDictCtx) ?
888
+ const BYTE* dictBase = !dictionary ? NULL : (dictDirective == usingDictCtx) ?
827
889
  dictionary + dictSize - dictCtx->currentOffset :
828
890
  dictionary + dictSize - startIndex;
829
891
 
@@ -833,11 +895,11 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
833
895
  U32 offset = 0;
834
896
  U32 forwardH;
835
897
 
836
- DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, tableType=%u", inputSize, tableType);
898
+ DEBUGLOG(5, "LZ4_compress_generic_validated: srcSize=%i, tableType=%u", inputSize, tableType);
899
+ assert(ip != NULL);
837
900
  /* If init conditions are not met, we don't have to mark stream
838
901
  * as having dirty context, since no action was taken yet */
839
902
  if (outputDirective == fillOutput && maxOutputSize < 1) { return 0; } /* Impossible to store anything */
840
- if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; } /* Unsupported inputSize, too large (or negative) */
841
903
  if ((tableType == byU16) && (inputSize>=LZ4_64Klimit)) { return 0; } /* Size too large (not within 64K limit) */
842
904
  if (tableType==byPtr) assert(dictDirective==noDict); /* only supported use case with byPtr */
843
905
  assert(acceleration >= 1);
@@ -854,7 +916,7 @@ LZ4_FORCE_INLINE int LZ4_compress_generic(
854
916
  cctx->dictSize += (U32)inputSize;
855
917
  }
856
918
  cctx->currentOffset += (U32)inputSize;
857
- cctx->tableType = (U16)tableType;
919
+ cctx->tableType = (U32)tableType;
858
920
 
859
921
  if (inputSize<LZ4_minLength) goto _last_literals; /* Input too small, no compression (all literals) */
860
922
 
@@ -1147,13 +1209,14 @@ _last_literals:
1147
1209
  if (outputDirective == fillOutput) {
1148
1210
  /* adapt lastRun to fill 'dst' */
1149
1211
  assert(olimit >= op);
1150
- lastRun = (size_t)(olimit-op) - 1;
1151
- lastRun -= (lastRun+240)/255;
1212
+ lastRun = (size_t)(olimit-op) - 1/*token*/;
1213
+ lastRun -= (lastRun + 256 - RUN_MASK) / 256; /*additional length tokens*/
1152
1214
  } else {
1153
1215
  assert(outputDirective == limitedOutput);
1154
1216
  return 0; /* cannot compress within `dst` budget. Stored indexes in hash table are nonetheless fine */
1155
1217
  }
1156
1218
  }
1219
+ DEBUGLOG(6, "Final literal run : %i literals", (int)lastRun);
1157
1220
  if (lastRun >= RUN_MASK) {
1158
1221
  size_t accumulator = lastRun - RUN_MASK;
1159
1222
  *op++ = RUN_MASK << ML_BITS;
@@ -1162,7 +1225,7 @@ _last_literals:
1162
1225
  } else {
1163
1226
  *op++ = (BYTE)(lastRun<<ML_BITS);
1164
1227
  }
1165
- memcpy(op, anchor, lastRun);
1228
+ LZ4_memcpy(op, anchor, lastRun);
1166
1229
  ip = anchor + lastRun;
1167
1230
  op += lastRun;
1168
1231
  }
@@ -1170,18 +1233,60 @@ _last_literals:
1170
1233
  if (outputDirective == fillOutput) {
1171
1234
  *inputConsumed = (int) (((const char*)ip)-source);
1172
1235
  }
1173
- DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, (int)(((char*)op) - dest));
1174
1236
  result = (int)(((char*)op) - dest);
1175
1237
  assert(result > 0);
1238
+ DEBUGLOG(5, "LZ4_compress_generic: compressed %i bytes into %i bytes", inputSize, result);
1176
1239
  return result;
1177
1240
  }
1178
1241
 
1242
+ /** LZ4_compress_generic() :
1243
+ * inlined, to ensure branches are decided at compilation time;
1244
+ * takes care of src == (NULL, 0)
1245
+ * and forward the rest to LZ4_compress_generic_validated */
1246
+ LZ4_FORCE_INLINE int LZ4_compress_generic(
1247
+ LZ4_stream_t_internal* const cctx,
1248
+ const char* const src,
1249
+ char* const dst,
1250
+ const int srcSize,
1251
+ int *inputConsumed, /* only written when outputDirective == fillOutput */
1252
+ const int dstCapacity,
1253
+ const limitedOutput_directive outputDirective,
1254
+ const tableType_t tableType,
1255
+ const dict_directive dictDirective,
1256
+ const dictIssue_directive dictIssue,
1257
+ const int acceleration)
1258
+ {
1259
+ DEBUGLOG(5, "LZ4_compress_generic: srcSize=%i, dstCapacity=%i",
1260
+ srcSize, dstCapacity);
1261
+
1262
+ if ((U32)srcSize > (U32)LZ4_MAX_INPUT_SIZE) { return 0; } /* Unsupported srcSize, too large (or negative) */
1263
+ if (srcSize == 0) { /* src == NULL supported if srcSize == 0 */
1264
+ if (outputDirective != notLimited && dstCapacity <= 0) return 0; /* no output, can't write anything */
1265
+ DEBUGLOG(5, "Generating an empty block");
1266
+ assert(outputDirective == notLimited || dstCapacity >= 1);
1267
+ assert(dst != NULL);
1268
+ dst[0] = 0;
1269
+ if (outputDirective == fillOutput) {
1270
+ assert (inputConsumed != NULL);
1271
+ *inputConsumed = 0;
1272
+ }
1273
+ return 1;
1274
+ }
1275
+ assert(src != NULL);
1276
+
1277
+ return LZ4_compress_generic_validated(cctx, src, dst, srcSize,
1278
+ inputConsumed, /* only written into if outputDirective == fillOutput */
1279
+ dstCapacity, outputDirective,
1280
+ tableType, dictDirective, dictIssue, acceleration);
1281
+ }
1282
+
1179
1283
 
1180
1284
  int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int inputSize, int maxOutputSize, int acceleration)
1181
1285
  {
1182
1286
  LZ4_stream_t_internal* const ctx = & LZ4_initStream(state, sizeof(LZ4_stream_t)) -> internal_donotuse;
1183
1287
  assert(ctx != NULL);
1184
- if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
1288
+ if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
1289
+ if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
1185
1290
  if (maxOutputSize >= LZ4_compressBound(inputSize)) {
1186
1291
  if (inputSize < LZ4_64Klimit) {
1187
1292
  return LZ4_compress_generic(ctx, source, dest, inputSize, NULL, 0, notLimited, byU16, noDict, noDictIssue, acceleration);
@@ -1211,7 +1316,8 @@ int LZ4_compress_fast_extState(void* state, const char* source, char* dest, int
1211
1316
  int LZ4_compress_fast_extState_fastReset(void* state, const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
1212
1317
  {
1213
1318
  LZ4_stream_t_internal* ctx = &((LZ4_stream_t*)state)->internal_donotuse;
1214
- if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
1319
+ if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
1320
+ if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
1215
1321
 
1216
1322
  if (dstCapacity >= LZ4_compressBound(srcSize)) {
1217
1323
  if (srcSize < LZ4_64Klimit) {
@@ -1270,22 +1376,6 @@ int LZ4_compress_default(const char* src, char* dst, int srcSize, int maxOutputS
1270
1376
  }
1271
1377
 
1272
1378
 
1273
- /* hidden debug function */
1274
- /* strangely enough, gcc generates faster code when this function is uncommented, even if unused */
1275
- int LZ4_compress_fast_force(const char* src, char* dst, int srcSize, int dstCapacity, int acceleration)
1276
- {
1277
- LZ4_stream_t ctx;
1278
- LZ4_initStream(&ctx, sizeof(ctx));
1279
-
1280
- if (srcSize < LZ4_64Klimit) {
1281
- return LZ4_compress_generic(&ctx.internal_donotuse, src, dst, srcSize, NULL, dstCapacity, limitedOutput, byU16, noDict, noDictIssue, acceleration);
1282
- } else {
1283
- tableType_t const addrMode = (sizeof(void*) > 4) ? byU32 : byPtr;
1284
- return LZ4_compress_generic(&ctx.internal_donotuse, src, dst, srcSize, NULL, dstCapacity, limitedOutput, addrMode, noDict, noDictIssue, acceleration);
1285
- }
1286
- }
1287
-
1288
-
1289
1379
  /* Note!: This function leaves the stream in an unclean/broken state!
1290
1380
  * It is not safe to subsequently use the same state with a _fastReset() or
1291
1381
  * _continue() call without resetting it. */
@@ -1340,27 +1430,23 @@ LZ4_stream_t* LZ4_createStream(void)
1340
1430
  return lz4s;
1341
1431
  }
1342
1432
 
1343
- #ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 :
1344
- it reports an aligment of 8-bytes,
1345
- while actually aligning LZ4_stream_t on 4 bytes. */
1346
1433
  static size_t LZ4_stream_t_alignment(void)
1347
1434
  {
1348
- struct { char c; LZ4_stream_t t; } t_a;
1349
- return sizeof(t_a) - sizeof(t_a.t);
1350
- }
1435
+ #if LZ4_ALIGN_TEST
1436
+ typedef struct { char c; LZ4_stream_t t; } t_a;
1437
+ return sizeof(t_a) - sizeof(LZ4_stream_t);
1438
+ #else
1439
+ return 1; /* effectively disabled */
1351
1440
  #endif
1441
+ }
1352
1442
 
1353
1443
  LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
1354
1444
  {
1355
1445
  DEBUGLOG(5, "LZ4_initStream");
1356
1446
  if (buffer == NULL) { return NULL; }
1357
1447
  if (size < sizeof(LZ4_stream_t)) { return NULL; }
1358
- #ifndef _MSC_VER /* for some reason, Visual fails the aligment test on 32-bit x86 :
1359
- it reports an aligment of 8-bytes,
1360
- while actually aligning LZ4_stream_t on 4 bytes. */
1361
- if (((size_t)buffer) & (LZ4_stream_t_alignment() - 1)) { return NULL; } /* alignment check */
1362
- #endif
1363
- MEM_INIT(buffer, 0, sizeof(LZ4_stream_t));
1448
+ if (!LZ4_isAligned(buffer, LZ4_stream_t_alignment())) return NULL;
1449
+ MEM_INIT(buffer, 0, sizeof(LZ4_stream_t_internal));
1364
1450
  return (LZ4_stream_t*)buffer;
1365
1451
  }
1366
1452
 
@@ -1369,7 +1455,7 @@ LZ4_stream_t* LZ4_initStream (void* buffer, size_t size)
1369
1455
  void LZ4_resetStream (LZ4_stream_t* LZ4_stream)
1370
1456
  {
1371
1457
  DEBUGLOG(5, "LZ4_resetStream (ctx:%p)", LZ4_stream);
1372
- MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t));
1458
+ MEM_INIT(LZ4_stream, 0, sizeof(LZ4_stream_t_internal));
1373
1459
  }
1374
1460
 
1375
1461
  void LZ4_resetStream_fast(LZ4_stream_t* ctx) {
@@ -1418,7 +1504,7 @@ int LZ4_loadDict (LZ4_stream_t* LZ4_dict, const char* dictionary, int dictSize)
1418
1504
  base = dictEnd - dict->currentOffset;
1419
1505
  dict->dictionary = p;
1420
1506
  dict->dictSize = (U32)(dictEnd - p);
1421
- dict->tableType = tableType;
1507
+ dict->tableType = (U32)tableType;
1422
1508
 
1423
1509
  while (p <= dictEnd-HASH_UNIT) {
1424
1510
  LZ4_putPosition(p, dict->hashTable, tableType, base);
@@ -1436,12 +1522,6 @@ void LZ4_attach_dictionary(LZ4_stream_t* workingStream, const LZ4_stream_t* dict
1436
1522
  workingStream, dictionaryStream,
1437
1523
  dictCtx != NULL ? dictCtx->dictSize : 0);
1438
1524
 
1439
- /* Calling LZ4_resetStream_fast() here makes sure that changes will not be
1440
- * erased by subsequent calls to LZ4_resetStream_fast() in case stream was
1441
- * marked as having dirty context, e.g. requiring full reset.
1442
- */
1443
- LZ4_resetStream_fast(workingStream);
1444
-
1445
1525
  if (dictCtx != NULL) {
1446
1526
  /* If the current offset is zero, we will never look in the
1447
1527
  * external dictionary context, since there is no value a table
@@ -1493,9 +1573,9 @@ int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
1493
1573
 
1494
1574
  DEBUGLOG(5, "LZ4_compress_fast_continue (inputSize=%i)", inputSize);
1495
1575
 
1496
- if (streamPtr->dirty) { return 0; } /* Uninitialized structure detected */
1497
1576
  LZ4_renormDictT(streamPtr, inputSize); /* avoid index overflow */
1498
- if (acceleration < 1) acceleration = ACCELERATION_DEFAULT;
1577
+ if (acceleration < 1) acceleration = LZ4_ACCELERATION_DEFAULT;
1578
+ if (acceleration > LZ4_ACCELERATION_MAX) acceleration = LZ4_ACCELERATION_MAX;
1499
1579
 
1500
1580
  /* invalidate tiny dictionaries */
1501
1581
  if ( (streamPtr->dictSize-1 < 4-1) /* intentional underflow */
@@ -1538,7 +1618,7 @@ int LZ4_compress_fast_continue (LZ4_stream_t* LZ4_stream,
1538
1618
  * cost to copy the dictionary's tables into the active context,
1539
1619
  * so that the compression loop is only looking into one table.
1540
1620
  */
1541
- memcpy(streamPtr, streamPtr->dictCtx, sizeof(LZ4_stream_t));
1621
+ LZ4_memcpy(streamPtr, streamPtr->dictCtx, sizeof(*streamPtr));
1542
1622
  result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingExtDict, noDictIssue, acceleration);
1543
1623
  } else {
1544
1624
  result = LZ4_compress_generic(streamPtr, source, dest, inputSize, NULL, maxOutputSize, limitedOutput, tableType, usingDictCtx, noDictIssue, acceleration);
@@ -1593,7 +1673,9 @@ int LZ4_saveDict (LZ4_stream_t* LZ4_dict, char* safeBuffer, int dictSize)
1593
1673
  if ((U32)dictSize > 64 KB) { dictSize = 64 KB; } /* useless to define a dictionary > 64 KB */
1594
1674
  if ((U32)dictSize > dict->dictSize) { dictSize = (int)dict->dictSize; }
1595
1675
 
1596
- memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
1676
+ if (safeBuffer == NULL) assert(dictSize == 0);
1677
+ if (dictSize > 0)
1678
+ memmove(safeBuffer, previousDictEnd - dictSize, dictSize);
1597
1679
 
1598
1680
  dict->dictionary = (const BYTE*)safeBuffer;
1599
1681
  dict->dictSize = (U32)dictSize;
@@ -1623,25 +1705,27 @@ typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
1623
1705
  */
1624
1706
  typedef enum { loop_error = -2, initial_error = -1, ok = 0 } variable_length_error;
1625
1707
  LZ4_FORCE_INLINE unsigned
1626
- read_variable_length(const BYTE**ip, const BYTE* lencheck, int loop_check, int initial_check, variable_length_error* error)
1627
- {
1628
- unsigned length = 0;
1629
- unsigned s;
1630
- if (initial_check && unlikely((*ip) >= lencheck)) { /* overflow detection */
1631
- *error = initial_error;
1632
- return length;
1633
- }
1634
- do {
1635
- s = **ip;
1636
- (*ip)++;
1637
- length += s;
1638
- if (loop_check && unlikely((*ip) >= lencheck)) { /* overflow detection */
1639
- *error = loop_error;
1640
- return length;
1708
+ read_variable_length(const BYTE**ip, const BYTE* lencheck,
1709
+ int loop_check, int initial_check,
1710
+ variable_length_error* error)
1711
+ {
1712
+ U32 length = 0;
1713
+ U32 s;
1714
+ if (initial_check && unlikely((*ip) >= lencheck)) { /* overflow detection */
1715
+ *error = initial_error;
1716
+ return length;
1641
1717
  }
1642
- } while (s==255);
1718
+ do {
1719
+ s = **ip;
1720
+ (*ip)++;
1721
+ length += s;
1722
+ if (loop_check && unlikely((*ip) >= lencheck)) { /* overflow detection */
1723
+ *error = loop_error;
1724
+ return length;
1725
+ }
1726
+ } while (s==255);
1643
1727
 
1644
- return length;
1728
+ return length;
1645
1729
  }
1646
1730
 
1647
1731
  /*! LZ4_decompress_generic() :
@@ -1722,7 +1806,7 @@ LZ4_decompress_generic(
1722
1806
  /* decode literal length */
1723
1807
  if (length == RUN_MASK) {
1724
1808
  variable_length_error error = ok;
1725
- length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error);
1809
+ length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
1726
1810
  if (error == initial_error) { goto _output_error; }
1727
1811
  if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
1728
1812
  if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
@@ -1746,12 +1830,12 @@ LZ4_decompress_generic(
1746
1830
  /* We don't need to check oend, since we check it once for each loop below */
1747
1831
  if (ip > iend-(16 + 1/*max lit + offset + nextToken*/)) { goto safe_literal_copy; }
1748
1832
  /* Literals can only be 14, but hope compilers optimize if we copy by a register size */
1749
- memcpy(op, ip, 16);
1833
+ LZ4_memcpy(op, ip, 16);
1750
1834
  } else { /* LZ4_decompress_fast() */
1751
1835
  /* LZ4_decompress_fast() cannot copy more than 8 bytes at a time :
1752
1836
  * it doesn't know input length, and relies on end-of-block properties */
1753
- memcpy(op, ip, 8);
1754
- if (length > 8) { memcpy(op+8, ip+8, 8); }
1837
+ LZ4_memcpy(op, ip, 8);
1838
+ if (length > 8) { LZ4_memcpy(op+8, ip+8, 8); }
1755
1839
  }
1756
1840
  ip += length; op = cpy;
1757
1841
  }
@@ -1765,10 +1849,10 @@ LZ4_decompress_generic(
1765
1849
  length = token & ML_MASK;
1766
1850
 
1767
1851
  if (length == ML_MASK) {
1768
- variable_length_error error = ok;
1769
- if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
1770
- length += read_variable_length(&ip, iend - LASTLITERALS + 1, endOnInput, 0, &error);
1771
- if (error != ok) { goto _output_error; }
1852
+ variable_length_error error = ok;
1853
+ if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
1854
+ length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
1855
+ if (error != ok) { goto _output_error; }
1772
1856
  if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) { goto _output_error; } /* overflow detection */
1773
1857
  length += MINMATCH;
1774
1858
  if (op + length >= oend - FASTLOOP_SAFE_DISTANCE) {
@@ -1787,19 +1871,20 @@ LZ4_decompress_generic(
1787
1871
  assert(match <= op);
1788
1872
  assert(op + 18 <= oend);
1789
1873
 
1790
- memcpy(op, match, 8);
1791
- memcpy(op+8, match+8, 8);
1792
- memcpy(op+16, match+16, 2);
1874
+ LZ4_memcpy(op, match, 8);
1875
+ LZ4_memcpy(op+8, match+8, 8);
1876
+ LZ4_memcpy(op+16, match+16, 2);
1793
1877
  op += length;
1794
1878
  continue;
1795
1879
  } } }
1796
1880
 
1797
- if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
1881
+ if (checkOffset && (unlikely(match + dictSize < lowPrefix))) { goto _output_error; } /* Error : offset outside buffers */
1798
1882
  /* match starting within external dictionary */
1799
1883
  if ((dict==usingExtDict) && (match < lowPrefix)) {
1800
1884
  if (unlikely(op+length > oend-LASTLITERALS)) {
1801
1885
  if (partialDecoding) {
1802
- length = MIN(length, (size_t)(oend-op)); /* reach end of buffer */
1886
+ DEBUGLOG(7, "partialDecoding: dictionary match, close to dstEnd");
1887
+ length = MIN(length, (size_t)(oend-op));
1803
1888
  } else {
1804
1889
  goto _output_error; /* end-of-block condition violated */
1805
1890
  } }
@@ -1812,14 +1897,14 @@ LZ4_decompress_generic(
1812
1897
  /* match stretches into both external dictionary and current block */
1813
1898
  size_t const copySize = (size_t)(lowPrefix - match);
1814
1899
  size_t const restSize = length - copySize;
1815
- memcpy(op, dictEnd - copySize, copySize);
1900
+ LZ4_memcpy(op, dictEnd - copySize, copySize);
1816
1901
  op += copySize;
1817
1902
  if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */
1818
1903
  BYTE* const endOfMatch = op + restSize;
1819
1904
  const BYTE* copyFrom = lowPrefix;
1820
1905
  while (op < endOfMatch) { *op++ = *copyFrom++; }
1821
1906
  } else {
1822
- memcpy(op, lowPrefix, restSize);
1907
+ LZ4_memcpy(op, lowPrefix, restSize);
1823
1908
  op += restSize;
1824
1909
  } }
1825
1910
  continue;
@@ -1860,7 +1945,7 @@ LZ4_decompress_generic(
1860
1945
  /* strictly "less than" on input, to re-enter the loop with at least one byte */
1861
1946
  && likely((endOnInput ? ip < shortiend : 1) & (op <= shortoend)) ) {
1862
1947
  /* Copy the literals */
1863
- memcpy(op, ip, endOnInput ? 16 : 8);
1948
+ LZ4_memcpy(op, ip, endOnInput ? 16 : 8);
1864
1949
  op += length; ip += length;
1865
1950
 
1866
1951
  /* The second stage: prepare for match copying, decode full info.
@@ -1875,9 +1960,9 @@ LZ4_decompress_generic(
1875
1960
  && (offset >= 8)
1876
1961
  && (dict==withPrefix64k || match >= lowPrefix) ) {
1877
1962
  /* Copy the match. */
1878
- memcpy(op + 0, match + 0, 8);
1879
- memcpy(op + 8, match + 8, 8);
1880
- memcpy(op +16, match +16, 2);
1963
+ LZ4_memcpy(op + 0, match + 0, 8);
1964
+ LZ4_memcpy(op + 8, match + 8, 8);
1965
+ LZ4_memcpy(op +16, match +16, 2);
1881
1966
  op += length + MINMATCH;
1882
1967
  /* Both stages worked, load the next token. */
1883
1968
  continue;
@@ -1891,7 +1976,7 @@ LZ4_decompress_generic(
1891
1976
  /* decode literal length */
1892
1977
  if (length == RUN_MASK) {
1893
1978
  variable_length_error error = ok;
1894
- length += read_variable_length(&ip, iend-RUN_MASK, endOnInput, endOnInput, &error);
1979
+ length += read_variable_length(&ip, iend-RUN_MASK, (int)endOnInput, (int)endOnInput, &error);
1895
1980
  if (error == initial_error) { goto _output_error; }
1896
1981
  if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)(op))) { goto _output_error; } /* overflow detection */
1897
1982
  if ((safeDecode) && unlikely((uptrval)(ip)+length<(uptrval)(ip))) { goto _output_error; } /* overflow detection */
@@ -1907,29 +1992,34 @@ LZ4_decompress_generic(
1907
1992
  || ((!endOnInput) && (cpy>oend-WILDCOPYLENGTH)) )
1908
1993
  {
1909
1994
  /* We've either hit the input parsing restriction or the output parsing restriction.
1910
- * If we've hit the input parsing condition then this must be the last sequence.
1911
- * If we've hit the output parsing condition then we are either using partialDecoding
1912
- * or we've hit the output parsing condition.
1995
+ * In the normal scenario, decoding a full block, it must be the last sequence,
1996
+ * otherwise it's an error (invalid input or dimensions).
1997
+ * In partialDecoding scenario, it's necessary to ensure there is no buffer overflow.
1913
1998
  */
1914
1999
  if (partialDecoding) {
1915
2000
  /* Since we are partial decoding we may be in this block because of the output parsing
1916
2001
  * restriction, which is not valid since the output buffer is allowed to be undersized.
1917
2002
  */
1918
2003
  assert(endOnInput);
1919
- /* If we're in this block because of the input parsing condition, then we must be on the
1920
- * last sequence (or invalid), so we must check that we exactly consume the input.
2004
+ DEBUGLOG(7, "partialDecoding: copying literals, close to input or output end")
2005
+ DEBUGLOG(7, "partialDecoding: literal length = %u", (unsigned)length);
2006
+ DEBUGLOG(7, "partialDecoding: remaining space in dstBuffer : %i", (int)(oend - op));
2007
+ DEBUGLOG(7, "partialDecoding: remaining space in srcBuffer : %i", (int)(iend - ip));
2008
+ /* Finishing in the middle of a literals segment,
2009
+ * due to lack of input.
1921
2010
  */
1922
- if ((ip+length>iend-(2+1+LASTLITERALS)) && (ip+length != iend)) { goto _output_error; }
1923
- assert(ip+length <= iend);
1924
- /* We are finishing in the middle of a literals segment.
1925
- * Break after the copy.
2011
+ if (ip+length > iend) {
2012
+ length = (size_t)(iend-ip);
2013
+ cpy = op + length;
2014
+ }
2015
+ /* Finishing in the middle of a literals segment,
2016
+ * due to lack of output space.
1926
2017
  */
1927
2018
  if (cpy > oend) {
1928
2019
  cpy = oend;
1929
2020
  assert(op<=oend);
1930
2021
  length = (size_t)(oend-op);
1931
2022
  }
1932
- assert(ip+length <= iend);
1933
2023
  } else {
1934
2024
  /* We must be on the last sequence because of the parsing limitations so check
1935
2025
  * that we exactly regenerate the original size (must be exact when !endOnInput).
@@ -1938,16 +2028,22 @@ LZ4_decompress_generic(
1938
2028
  /* We must be on the last sequence (or invalid) because of the parsing limitations
1939
2029
  * so check that we exactly consume the input and don't overrun the output buffer.
1940
2030
  */
1941
- if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) { goto _output_error; }
2031
+ if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) {
2032
+ DEBUGLOG(6, "should have been last run of literals")
2033
+ DEBUGLOG(6, "ip(%p) + length(%i) = %p != iend (%p)", ip, (int)length, ip+length, iend);
2034
+ DEBUGLOG(6, "or cpy(%p) > oend(%p)", cpy, oend);
2035
+ goto _output_error;
2036
+ }
1942
2037
  }
1943
- memmove(op, ip, length); /* supports overlapping memory regions, which only matters for in-place decompression scenarios */
2038
+ memmove(op, ip, length); /* supports overlapping memory regions; only matters for in-place decompression scenarios */
1944
2039
  ip += length;
1945
2040
  op += length;
1946
- /* Necessarily EOF when !partialDecoding. When partialDecoding
1947
- * it is EOF if we've either filled the output buffer or hit
1948
- * the input parsing restriction.
2041
+ /* Necessarily EOF when !partialDecoding.
2042
+ * When partialDecoding, it is EOF if we've either
2043
+ * filled the output buffer or
2044
+ * can't proceed with reading an offset for following match.
1949
2045
  */
1950
- if (!partialDecoding || (cpy == oend) || (ip == iend)) {
2046
+ if (!partialDecoding || (cpy == oend) || (ip >= (iend-2))) {
1951
2047
  break;
1952
2048
  }
1953
2049
  } else {
@@ -1965,7 +2061,7 @@ LZ4_decompress_generic(
1965
2061
  _copy_match:
1966
2062
  if (length == ML_MASK) {
1967
2063
  variable_length_error error = ok;
1968
- length += read_variable_length(&ip, iend - LASTLITERALS + 1, endOnInput, 0, &error);
2064
+ length += read_variable_length(&ip, iend - LASTLITERALS + 1, (int)endOnInput, 0, &error);
1969
2065
  if (error != ok) goto _output_error;
1970
2066
  if ((safeDecode) && unlikely((uptrval)(op)+length<(uptrval)op)) goto _output_error; /* overflow detection */
1971
2067
  }
@@ -1990,14 +2086,14 @@ LZ4_decompress_generic(
1990
2086
  /* match stretches into both external dictionary and current block */
1991
2087
  size_t const copySize = (size_t)(lowPrefix - match);
1992
2088
  size_t const restSize = length - copySize;
1993
- memcpy(op, dictEnd - copySize, copySize);
2089
+ LZ4_memcpy(op, dictEnd - copySize, copySize);
1994
2090
  op += copySize;
1995
2091
  if (restSize > (size_t)(op - lowPrefix)) { /* overlap copy */
1996
2092
  BYTE* const endOfMatch = op + restSize;
1997
2093
  const BYTE* copyFrom = lowPrefix;
1998
2094
  while (op < endOfMatch) *op++ = *copyFrom++;
1999
2095
  } else {
2000
- memcpy(op, lowPrefix, restSize);
2096
+ LZ4_memcpy(op, lowPrefix, restSize);
2001
2097
  op += restSize;
2002
2098
  } }
2003
2099
  continue;
@@ -2016,7 +2112,7 @@ LZ4_decompress_generic(
2016
2112
  if (matchEnd > op) { /* overlap copy */
2017
2113
  while (op < copyEnd) { *op++ = *match++; }
2018
2114
  } else {
2019
- memcpy(op, match, mlen);
2115
+ LZ4_memcpy(op, match, mlen);
2020
2116
  }
2021
2117
  op = copyEnd;
2022
2118
  if (op == oend) { break; }
@@ -2030,10 +2126,10 @@ LZ4_decompress_generic(
2030
2126
  op[2] = match[2];
2031
2127
  op[3] = match[3];
2032
2128
  match += inc32table[offset];
2033
- memcpy(op+4, match, 4);
2129
+ LZ4_memcpy(op+4, match, 4);
2034
2130
  match -= dec64table[offset];
2035
2131
  } else {
2036
- memcpy(op, match, 8);
2132
+ LZ4_memcpy(op, match, 8);
2037
2133
  match += 8;
2038
2134
  }
2039
2135
  op += 8;
@@ -2048,7 +2144,7 @@ LZ4_decompress_generic(
2048
2144
  }
2049
2145
  while (op < cpy) { *op++ = *match++; }
2050
2146
  } else {
2051
- memcpy(op, match, 8);
2147
+ LZ4_memcpy(op, match, 8);
2052
2148
  if (length > 16) { LZ4_wildCopy8(op+8, match+8, cpy); }
2053
2149
  }
2054
2150
  op = cpy; /* wildcopy correction */
@@ -2056,6 +2152,7 @@ LZ4_decompress_generic(
2056
2152
 
2057
2153
  /* end of decoding */
2058
2154
  if (endOnInput) {
2155
+ DEBUGLOG(5, "decoded %i bytes", (int) (((char*)op)-dst));
2059
2156
  return (int) (((char*)op)-dst); /* Nb of output bytes decoded */
2060
2157
  } else {
2061
2158
  return (int) (((const char*)ip)-src); /* Nb of input bytes read */
@@ -2070,7 +2167,7 @@ LZ4_decompress_generic(
2070
2167
 
2071
2168
  /*===== Instantiate the API decoding functions. =====*/
2072
2169
 
2073
- LZ4_FORCE_O2_GCC_PPC64LE
2170
+ LZ4_FORCE_O2
2074
2171
  int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int maxDecompressedSize)
2075
2172
  {
2076
2173
  return LZ4_decompress_generic(source, dest, compressedSize, maxDecompressedSize,
@@ -2078,7 +2175,7 @@ int LZ4_decompress_safe(const char* source, char* dest, int compressedSize, int
2078
2175
  (BYTE*)dest, NULL, 0);
2079
2176
  }
2080
2177
 
2081
- LZ4_FORCE_O2_GCC_PPC64LE
2178
+ LZ4_FORCE_O2
2082
2179
  int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize, int targetOutputSize, int dstCapacity)
2083
2180
  {
2084
2181
  dstCapacity = MIN(targetOutputSize, dstCapacity);
@@ -2087,7 +2184,7 @@ int LZ4_decompress_safe_partial(const char* src, char* dst, int compressedSize,
2087
2184
  noDict, (BYTE*)dst, NULL, 0);
2088
2185
  }
2089
2186
 
2090
- LZ4_FORCE_O2_GCC_PPC64LE
2187
+ LZ4_FORCE_O2
2091
2188
  int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
2092
2189
  {
2093
2190
  return LZ4_decompress_generic(source, dest, 0, originalSize,
@@ -2097,7 +2194,7 @@ int LZ4_decompress_fast(const char* source, char* dest, int originalSize)
2097
2194
 
2098
2195
  /*===== Instantiate a few more decoding cases, used more than once. =====*/
2099
2196
 
2100
- LZ4_FORCE_O2_GCC_PPC64LE /* Exported, an obsolete API function. */
2197
+ LZ4_FORCE_O2 /* Exported, an obsolete API function. */
2101
2198
  int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int compressedSize, int maxOutputSize)
2102
2199
  {
2103
2200
  return LZ4_decompress_generic(source, dest, compressedSize, maxOutputSize,
@@ -2113,7 +2210,7 @@ int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int origin
2113
2210
  return LZ4_decompress_fast(source, dest, originalSize);
2114
2211
  }
2115
2212
 
2116
- LZ4_FORCE_O2_GCC_PPC64LE
2213
+ LZ4_FORCE_O2
2117
2214
  static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, int compressedSize, int maxOutputSize,
2118
2215
  size_t prefixSize)
2119
2216
  {
@@ -2122,7 +2219,7 @@ static int LZ4_decompress_safe_withSmallPrefix(const char* source, char* dest, i
2122
2219
  (BYTE*)dest-prefixSize, NULL, 0);
2123
2220
  }
2124
2221
 
2125
- LZ4_FORCE_O2_GCC_PPC64LE
2222
+ LZ4_FORCE_O2
2126
2223
  int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
2127
2224
  int compressedSize, int maxOutputSize,
2128
2225
  const void* dictStart, size_t dictSize)
@@ -2132,7 +2229,7 @@ int LZ4_decompress_safe_forceExtDict(const char* source, char* dest,
2132
2229
  (BYTE*)dest, (const BYTE*)dictStart, dictSize);
2133
2230
  }
2134
2231
 
2135
- LZ4_FORCE_O2_GCC_PPC64LE
2232
+ LZ4_FORCE_O2
2136
2233
  static int LZ4_decompress_fast_extDict(const char* source, char* dest, int originalSize,
2137
2234
  const void* dictStart, size_t dictSize)
2138
2235
  {
@@ -2221,7 +2318,7 @@ int LZ4_decoderRingBufferSize(int maxBlockSize)
2221
2318
  If it's not possible, save the relevant part of decoded data into a safe buffer,
2222
2319
  and indicate where it stands using LZ4_setStreamDecode()
2223
2320
  */
2224
- LZ4_FORCE_O2_GCC_PPC64LE
2321
+ LZ4_FORCE_O2
2225
2322
  int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int compressedSize, int maxOutputSize)
2226
2323
  {
2227
2324
  LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
@@ -2261,7 +2358,7 @@ int LZ4_decompress_safe_continue (LZ4_streamDecode_t* LZ4_streamDecode, const ch
2261
2358
  return result;
2262
2359
  }
2263
2360
 
2264
- LZ4_FORCE_O2_GCC_PPC64LE
2361
+ LZ4_FORCE_O2
2265
2362
  int LZ4_decompress_fast_continue (LZ4_streamDecode_t* LZ4_streamDecode, const char* source, char* dest, int originalSize)
2266
2363
  {
2267
2364
  LZ4_streamDecode_t_internal* lz4sd = &LZ4_streamDecode->internal_donotuse;
@@ -2374,7 +2471,7 @@ int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize,
2374
2471
 
2375
2472
  /* Obsolete Streaming functions */
2376
2473
 
2377
- int LZ4_sizeofStreamState() { return LZ4_STREAMSIZE; }
2474
+ int LZ4_sizeofStreamState(void) { return LZ4_STREAMSIZE; }
2378
2475
 
2379
2476
  int LZ4_resetStreamState(void* state, char* inputBuffer)
2380
2477
  {