zstdlib 0.6.0-x86-mingw32 → 0.7.0-x86-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGES.md +5 -0
  3. data/ext/zstdlib/extconf.rb +1 -1
  4. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/bitstream.h +31 -37
  5. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/compiler.h +19 -3
  6. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/cpu.h +1 -1
  7. data/ext/zstdlib/zstd-1.4.5/lib/common/debug.c +24 -0
  8. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/debug.h +11 -31
  9. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/entropy_common.c +13 -33
  10. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/error_private.c +2 -1
  11. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/error_private.h +6 -2
  12. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/fse.h +11 -31
  13. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/fse_decompress.c +12 -37
  14. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/huf.h +15 -33
  15. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/mem.h +1 -1
  16. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/pool.c +1 -1
  17. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/pool.h +2 -2
  18. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/threading.c +4 -3
  19. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/threading.h +4 -3
  20. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/xxhash.c +15 -33
  21. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/xxhash.h +11 -31
  22. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/zstd_common.c +1 -1
  23. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/zstd_errors.h +2 -1
  24. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/common/zstd_internal.h +112 -15
  25. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/fse_compress.c +17 -40
  26. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/hist.c +15 -35
  27. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/hist.h +12 -32
  28. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/huf_compress.c +92 -92
  29. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress.c +450 -275
  30. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress_internal.h +136 -14
  31. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress_literals.c +10 -6
  32. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress_literals.h +1 -1
  33. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress_sequences.c +24 -20
  34. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_compress_sequences.h +10 -3
  35. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_superblock.c +845 -0
  36. data/ext/zstdlib/zstd-1.4.5/lib/compress/zstd_compress_superblock.h +32 -0
  37. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_cwksp.h +3 -13
  38. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_double_fast.c +11 -8
  39. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_double_fast.h +2 -2
  40. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_fast.c +36 -24
  41. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_fast.h +2 -2
  42. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_lazy.c +34 -11
  43. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_lazy.h +1 -1
  44. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_ldm.c +27 -5
  45. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_ldm.h +7 -2
  46. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_opt.c +38 -84
  47. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstd_opt.h +1 -1
  48. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstdmt_compress.c +48 -21
  49. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/compress/zstdmt_compress.h +2 -2
  50. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/huf_decompress.c +76 -62
  51. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_ddict.c +12 -8
  52. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_ddict.h +2 -2
  53. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_decompress.c +264 -148
  54. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_decompress_block.c +312 -203
  55. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_decompress_block.h +3 -3
  56. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/decompress/zstd_decompress_internal.h +18 -4
  57. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/lib/zstd.h +62 -21
  58. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzclose.c +0 -0
  59. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzcompatibility.h +1 -1
  60. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzguts.h +0 -0
  61. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzlib.c +0 -0
  62. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzread.c +0 -0
  63. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/gzwrite.c +0 -0
  64. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/zstd_zlibwrapper.c +1 -1
  65. data/ext/zstdlib/{zstd-1.4.4 → zstd-1.4.5}/zlibWrapper/zstd_zlibwrapper.h +1 -1
  66. data/lib/2.2/zstdlib.so +0 -0
  67. data/lib/2.3/zstdlib.so +0 -0
  68. data/lib/2.4/zstdlib.so +0 -0
  69. data/lib/2.5/zstdlib.so +0 -0
  70. data/lib/2.6/zstdlib.so +0 -0
  71. data/lib/2.7/zstdlib.so +0 -0
  72. metadata +64 -62
  73. data/ext/zstdlib/zstd-1.4.4/lib/common/debug.c +0 -44
@@ -0,0 +1,32 @@
1
+ /*
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTD_COMPRESS_ADVANCED_H
12
+ #define ZSTD_COMPRESS_ADVANCED_H
13
+
14
+ /*-*************************************
15
+ * Dependencies
16
+ ***************************************/
17
+
18
+ #include "../zstd.h" /* ZSTD_CCtx */
19
+
20
+ /*-*************************************
21
+ * Target Compressed Block Size
22
+ ***************************************/
23
+
24
+ /* ZSTD_compressSuperBlock() :
25
+ * Used to compress a super block when targetCBlockSize is being used.
26
+ * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */
27
+ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
28
+ void* dst, size_t dstCapacity,
29
+ void const* src, size_t srcSize,
30
+ unsigned lastBlock);
31
+
32
+ #endif /* ZSTD_COMPRESS_ADVANCED_H */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,7 +14,7 @@
14
14
  /*-*************************************
15
15
  * Dependencies
16
16
  ***************************************/
17
- #include "zstd_internal.h"
17
+ #include "../common/zstd_internal.h"
18
18
 
19
19
  #if defined (__cplusplus)
20
20
  extern "C" {
@@ -24,16 +24,6 @@ extern "C" {
24
24
  * Constants
25
25
  ***************************************/
26
26
 
27
- /* define "workspace is too large" as this number of times larger than needed */
28
- #define ZSTD_WORKSPACETOOLARGE_FACTOR 3
29
-
30
- /* when workspace is continuously too large
31
- * during at least this number of times,
32
- * context's memory usage is considered wasteful,
33
- * because it's sized to handle a worst case scenario which rarely happens.
34
- * In which case, resize it down to free some memory */
35
- #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
36
-
37
27
  /* Since the workspace is effectively its own little malloc implementation /
38
28
  * arena, when we run under ASAN, we should similarly insert redzones between
39
29
  * each internal element of the workspace, so ASAN will catch overruns that
@@ -468,7 +458,7 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
468
458
  MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
469
459
  void* workspace = ZSTD_malloc(size, customMem);
470
460
  DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
471
- RETURN_ERROR_IF(workspace == NULL, memory_allocation);
461
+ RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!");
472
462
  ZSTD_cwksp_init(ws, workspace, size);
473
463
  return 0;
474
464
  }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -63,10 +63,8 @@ size_t ZSTD_compressBlock_doubleFast_generic(
63
63
  const BYTE* ip = istart;
64
64
  const BYTE* anchor = istart;
65
65
  const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
66
- const U32 lowestValid = ms->window.dictLimit;
67
- const U32 maxDistance = 1U << cParams->windowLog;
68
66
  /* presumes that, if there is a dictionary, it must be using Attach mode */
69
- const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
67
+ const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
70
68
  const BYTE* const prefixLowest = base + prefixLowestIndex;
71
69
  const BYTE* const iend = istart + srcSize;
72
70
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
@@ -96,7 +94,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
96
94
  dictCParams->hashLog : hBitsL;
97
95
  const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
98
96
  dictCParams->chainLog : hBitsS;
99
- const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
97
+ const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
100
98
 
101
99
  DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
102
100
 
@@ -104,13 +102,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(
104
102
 
105
103
  /* if a dictionary is attached, it must be within window range */
106
104
  if (dictMode == ZSTD_dictMatchState) {
107
- assert(lowestValid + maxDistance >= endIndex);
105
+ assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
108
106
  }
109
107
 
110
108
  /* init */
111
109
  ip += (dictAndPrefixLength == 0);
112
110
  if (dictMode == ZSTD_noDict) {
113
- U32 const maxRep = (U32)(ip - prefixLowest);
111
+ U32 const current = (U32)(ip - base);
112
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
113
+ U32 const maxRep = current - windowLow;
114
114
  if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
115
115
  if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
116
116
  }
@@ -198,6 +198,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
198
198
  } }
199
199
 
200
200
  ip += ((ip-anchor) >> kSearchStrength) + 1;
201
+ #if defined(__aarch64__)
202
+ PREFETCH_L1(ip+256);
203
+ #endif
201
204
  continue;
202
205
 
203
206
  _search_next_long:
@@ -271,7 +274,7 @@ _match_stored:
271
274
  U32 const repIndex2 = current2 - offset_2;
272
275
  const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
273
276
  && repIndex2 < prefixLowestIndex ?
274
- dictBase - dictIndexDelta + repIndex2 :
277
+ dictBase + repIndex2 - dictIndexDelta :
275
278
  base + repIndex2;
276
279
  if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
277
280
  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,7 +15,7 @@
15
15
  extern "C" {
16
16
  #endif
17
17
 
18
- #include "mem.h" /* U32 */
18
+ #include "../common/mem.h" /* U32 */
19
19
  #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
20
20
 
21
21
  void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -61,9 +61,7 @@ ZSTD_compressBlock_fast_generic(
61
61
  const BYTE* ip1;
62
62
  const BYTE* anchor = istart;
63
63
  const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
64
- const U32 maxDistance = 1U << cParams->windowLog;
65
- const U32 validStartIndex = ms->window.dictLimit;
66
- const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
64
+ const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
67
65
  const BYTE* const prefixStart = base + prefixStartIndex;
68
66
  const BYTE* const iend = istart + srcSize;
69
67
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
@@ -74,12 +72,21 @@ ZSTD_compressBlock_fast_generic(
74
72
  DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
75
73
  ip0 += (ip0 == prefixStart);
76
74
  ip1 = ip0 + 1;
77
- { U32 const maxRep = (U32)(ip0 - prefixStart);
75
+ { U32 const current = (U32)(ip0 - base);
76
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
77
+ U32 const maxRep = current - windowLow;
78
78
  if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
79
79
  if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
80
80
  }
81
81
 
82
82
  /* Main Search Loop */
83
+ #ifdef __INTEL_COMPILER
84
+ /* From intel 'The vector pragma indicates that the loop should be
85
+ * vectorized if it is legal to do so'. Can be used together with
86
+ * #pragma ivdep (but have opted to exclude that because intel
87
+ * warns against using it).*/
88
+ #pragma vector always
89
+ #endif
83
90
  while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */
84
91
  size_t mLength;
85
92
  BYTE const* ip2 = ip0 + 2;
@@ -91,19 +98,25 @@ ZSTD_compressBlock_fast_generic(
91
98
  U32 const current1 = (U32)(ip1-base);
92
99
  U32 const matchIndex0 = hashTable[h0];
93
100
  U32 const matchIndex1 = hashTable[h1];
94
- BYTE const* repMatch = ip2-offset_1;
101
+ BYTE const* repMatch = ip2 - offset_1;
95
102
  const BYTE* match0 = base + matchIndex0;
96
103
  const BYTE* match1 = base + matchIndex1;
97
104
  U32 offcode;
105
+
106
+ #if defined(__aarch64__)
107
+ PREFETCH_L1(ip0+256);
108
+ #endif
109
+
98
110
  hashTable[h0] = current0; /* update hash table */
99
111
  hashTable[h1] = current1; /* update hash table */
100
112
 
101
113
  assert(ip0 + 1 == ip1);
102
114
 
103
115
  if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
104
- mLength = ip2[-1] == repMatch[-1] ? 1 : 0;
116
+ mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0;
105
117
  ip0 = ip2 - mLength;
106
118
  match0 = repMatch - mLength;
119
+ mLength += 4;
107
120
  offcode = 0;
108
121
  goto _match;
109
122
  }
@@ -128,19 +141,18 @@ _offset: /* Requires: ip0, match0 */
128
141
  offset_2 = offset_1;
129
142
  offset_1 = (U32)(ip0-match0);
130
143
  offcode = offset_1 + ZSTD_REP_MOVE;
131
- mLength = 0;
144
+ mLength = 4;
132
145
  /* Count the backwards match length */
133
146
  while (((ip0>anchor) & (match0>prefixStart))
134
147
  && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
135
148
 
136
149
  _match: /* Requires: ip0, match0, offcode */
137
150
  /* Count the forward length */
138
- mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
151
+ mLength += ZSTD_count(ip0+mLength, match0+mLength, iend);
139
152
  ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
140
153
  /* match found */
141
154
  ip0 += mLength;
142
155
  anchor = ip0;
143
- ip1 = ip0 + 1;
144
156
 
145
157
  if (ip0 <= ilimit) {
146
158
  /* Fill Table */
@@ -148,19 +160,18 @@ _match: /* Requires: ip0, match0, offcode */
148
160
  hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
149
161
  hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
150
162
 
151
- while ( ((ip0 <= ilimit) & (offset_2>0)) /* offset_2==0 means offset_2 is invalidated */
152
- && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
153
- /* store sequence */
154
- size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
155
- { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
156
- hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
157
- ip0 += rLength;
158
- ip1 = ip0 + 1;
159
- ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
160
- anchor = ip0;
161
- continue; /* faster when present (confirmed on gcc-8) ... (?) */
162
- }
163
- }
163
+ if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */
164
+ while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
165
+ /* store sequence */
166
+ size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
167
+ { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
168
+ hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
169
+ ip0 += rLength;
170
+ ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
171
+ anchor = ip0;
172
+ continue; /* faster when present (confirmed on gcc-8) ... (?) */
173
+ } } }
174
+ ip1 = ip0 + 1;
164
175
  }
165
176
 
166
177
  /* save reps for next block */
@@ -387,7 +398,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
387
398
  const BYTE* const ilimit = iend - 8;
388
399
  U32 offset_1=rep[0], offset_2=rep[1];
389
400
 
390
- DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic");
401
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
391
402
 
392
403
  /* switch to "regular" variant if extDict is invalidated due to maxDistance */
393
404
  if (prefixStartIndex == dictStartIndex)
@@ -404,6 +415,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
404
415
  const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
405
416
  const BYTE* const repMatch = repBase + repIndex;
406
417
  hashTable[h] = current; /* update hash table */
418
+ DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current);
407
419
  assert(offset_1 <= current +1); /* check repIndex */
408
420
 
409
421
  if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,7 +15,7 @@
15
15
  extern "C" {
16
16
  #endif
17
17
 
18
- #include "mem.h" /* U32 */
18
+ #include "../common/mem.h" /* U32 */
19
19
  #include "zstd_compress_internal.h"
20
20
 
21
21
  void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -660,12 +660,16 @@ ZSTD_compressBlock_lazy_generic(
660
660
  const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
661
661
  prefixLowestIndex - (U32)(dictEnd - dictBase) :
662
662
  0;
663
- const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);
663
+ const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
664
+
665
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
664
666
 
665
667
  /* init */
666
668
  ip += (dictAndPrefixLength == 0);
667
669
  if (dictMode == ZSTD_noDict) {
668
- U32 const maxRep = (U32)(ip - prefixLowest);
670
+ U32 const current = (U32)(ip - base);
671
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);
672
+ U32 const maxRep = current - windowLow;
669
673
  if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
670
674
  if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
671
675
  }
@@ -677,6 +681,12 @@ ZSTD_compressBlock_lazy_generic(
677
681
  }
678
682
 
679
683
  /* Match Loop */
684
+ #if defined(__GNUC__) && defined(__x86_64__)
685
+ /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
686
+ * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
687
+ */
688
+ __asm__(".p2align 5");
689
+ #endif
680
690
  while (ip < ilimit) {
681
691
  size_t matchLength=0;
682
692
  size_t offset=0;
@@ -929,11 +939,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
929
939
  const BYTE* const ilimit = iend - 8;
930
940
  const BYTE* const base = ms->window.base;
931
941
  const U32 dictLimit = ms->window.dictLimit;
932
- const U32 lowestIndex = ms->window.lowLimit;
933
942
  const BYTE* const prefixStart = base + dictLimit;
934
943
  const BYTE* const dictBase = ms->window.dictBase;
935
944
  const BYTE* const dictEnd = dictBase + dictLimit;
936
- const BYTE* const dictStart = dictBase + lowestIndex;
945
+ const BYTE* const dictStart = dictBase + ms->window.lowLimit;
946
+ const U32 windowLog = ms->cParams.windowLog;
937
947
 
938
948
  typedef size_t (*searchMax_f)(
939
949
  ZSTD_matchState_t* ms,
@@ -942,10 +952,18 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
942
952
 
943
953
  U32 offset_1 = rep[0], offset_2 = rep[1];
944
954
 
955
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
956
+
945
957
  /* init */
946
958
  ip += (ip == prefixStart);
947
959
 
948
960
  /* Match Loop */
961
+ #if defined(__GNUC__) && defined(__x86_64__)
962
+ /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
963
+ * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
964
+ */
965
+ __asm__(".p2align 5");
966
+ #endif
949
967
  while (ip < ilimit) {
950
968
  size_t matchLength=0;
951
969
  size_t offset=0;
@@ -953,10 +971,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
953
971
  U32 current = (U32)(ip-base);
954
972
 
955
973
  /* check repCode */
956
- { const U32 repIndex = (U32)(current+1 - offset_1);
974
+ { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);
975
+ const U32 repIndex = (U32)(current+1 - offset_1);
957
976
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
958
977
  const BYTE* const repMatch = repBase + repIndex;
959
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
978
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
960
979
  if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
961
980
  /* repcode detected we should take it */
962
981
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -983,10 +1002,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
983
1002
  current++;
984
1003
  /* check repCode */
985
1004
  if (offset) {
1005
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
986
1006
  const U32 repIndex = (U32)(current - offset_1);
987
1007
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
988
1008
  const BYTE* const repMatch = repBase + repIndex;
989
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
1009
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
990
1010
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
991
1011
  /* repcode detected */
992
1012
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1013,10 +1033,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1013
1033
  current++;
1014
1034
  /* check repCode */
1015
1035
  if (offset) {
1036
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
1016
1037
  const U32 repIndex = (U32)(current - offset_1);
1017
1038
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1018
1039
  const BYTE* const repMatch = repBase + repIndex;
1019
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
1040
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
1020
1041
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
1021
1042
  /* repcode detected */
1022
1043
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1057,10 +1078,12 @@ _storeSequence:
1057
1078
 
1058
1079
  /* check immediate repcode */
1059
1080
  while (ip <= ilimit) {
1060
- const U32 repIndex = (U32)((ip-base) - offset_2);
1081
+ const U32 repCurrent = (U32)(ip-base);
1082
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
1083
+ const U32 repIndex = repCurrent - offset_2;
1061
1084
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1062
1085
  const BYTE* const repMatch = repBase + repIndex;
1063
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
1086
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
1064
1087
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
1065
1088
  /* repcode detected we should take it */
1066
1089
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,15 +1,16 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
6
6
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
7
  * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
8
9
  */
9
10
 
10
11
  #include "zstd_ldm.h"
11
12
 
12
- #include "debug.h"
13
+ #include "../common/debug.h"
13
14
  #include "zstd_fast.h" /* ZSTD_fillHashTable() */
14
15
  #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
15
16
 
@@ -223,6 +224,20 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
223
224
  return rollingHash;
224
225
  }
225
226
 
227
+ void ZSTD_ldm_fillHashTable(
228
+ ldmState_t* state, const BYTE* ip,
229
+ const BYTE* iend, ldmParams_t const* params)
230
+ {
231
+ DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
232
+ if ((size_t)(iend - ip) >= params->minMatchLength) {
233
+ U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
234
+ ZSTD_ldm_fillLdmHashTable(
235
+ state, startingHash, ip, iend - params->minMatchLength, state->window.base,
236
+ params->hashLog - params->bucketSizeLog,
237
+ *params);
238
+ }
239
+ }
240
+
226
241
 
227
242
  /** ZSTD_ldm_limitTableUpdate() :
228
243
  *
@@ -449,6 +464,8 @@ size_t ZSTD_ldm_generateSequences(
449
464
  U32 const correction = ZSTD_window_correctOverflow(
450
465
  &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
451
466
  ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
467
+ /* invalidate dictionaries on overflow correction */
468
+ ldmState->loadedDictEnd = 0;
452
469
  }
453
470
  /* 2. We enforce the maximum offset allowed.
454
471
  *
@@ -457,8 +474,14 @@ size_t ZSTD_ldm_generateSequences(
457
474
  * TODO: * Test the chunk size.
458
475
  * * Try invalidation after the sequence generation and test the
459
476
  * the offset against maxDist directly.
477
+ *
478
+ * NOTE: Because of dictionaries + sequence splitting we MUST make sure
479
+ * that any offset used is valid at the END of the sequence, since it may
480
+ * be split into two sequences. This condition holds when using
481
+ * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
482
+ * against maxDist directly, we'll have to carefully handle that case.
460
483
  */
461
- ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
484
+ ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
462
485
  /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
463
486
  newLeftoverSize = ZSTD_ldm_generateSequences_internal(
464
487
  ldmState, sequences, params, chunkStart, chunkSize);
@@ -566,14 +589,13 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
566
589
  if (sequence.offset == 0)
567
590
  break;
568
591
 
569
- assert(sequence.offset <= (1U << cParams->windowLog));
570
592
  assert(ip + sequence.litLength + sequence.matchLength <= iend);
571
593
 
572
594
  /* Fill tables for block compressor */
573
595
  ZSTD_ldm_limitTableUpdate(ms, ip);
574
596
  ZSTD_ldm_fillFastTables(ms, ip);
575
597
  /* Run the block compressor */
576
- DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
598
+ DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
577
599
  {
578
600
  size_t const newLitLength =
579
601
  blockCompressor(ms, seqStore, rep, ip, sequence.litLength);