zstd-ruby 1.4.4.0 → 1.4.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (86) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/Makefile +123 -58
  4. data/ext/zstdruby/libzstd/README.md +34 -14
  5. data/ext/zstdruby/libzstd/common/bitstream.h +31 -37
  6. data/ext/zstdruby/libzstd/common/compiler.h +19 -3
  7. data/ext/zstdruby/libzstd/common/cpu.h +1 -1
  8. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  9. data/ext/zstdruby/libzstd/common/debug.h +11 -31
  10. data/ext/zstdruby/libzstd/common/entropy_common.c +13 -33
  11. data/ext/zstdruby/libzstd/common/error_private.c +2 -1
  12. data/ext/zstdruby/libzstd/common/error_private.h +6 -2
  13. data/ext/zstdruby/libzstd/common/fse.h +11 -31
  14. data/ext/zstdruby/libzstd/common/fse_decompress.c +12 -37
  15. data/ext/zstdruby/libzstd/common/huf.h +15 -33
  16. data/ext/zstdruby/libzstd/common/mem.h +1 -1
  17. data/ext/zstdruby/libzstd/common/pool.c +1 -1
  18. data/ext/zstdruby/libzstd/common/pool.h +2 -2
  19. data/ext/zstdruby/libzstd/common/threading.c +4 -3
  20. data/ext/zstdruby/libzstd/common/threading.h +4 -3
  21. data/ext/zstdruby/libzstd/common/xxhash.c +15 -33
  22. data/ext/zstdruby/libzstd/common/xxhash.h +11 -31
  23. data/ext/zstdruby/libzstd/common/zstd_common.c +1 -1
  24. data/ext/zstdruby/libzstd/common/zstd_errors.h +2 -1
  25. data/ext/zstdruby/libzstd/common/zstd_internal.h +112 -15
  26. data/ext/zstdruby/libzstd/compress/fse_compress.c +17 -40
  27. data/ext/zstdruby/libzstd/compress/hist.c +15 -35
  28. data/ext/zstdruby/libzstd/compress/hist.h +12 -32
  29. data/ext/zstdruby/libzstd/compress/huf_compress.c +92 -92
  30. data/ext/zstdruby/libzstd/compress/zstd_compress.c +450 -275
  31. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +136 -14
  32. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +10 -6
  33. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +1 -1
  34. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +24 -20
  35. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
  36. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +845 -0
  37. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  38. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +3 -13
  39. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +11 -8
  40. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  41. data/ext/zstdruby/libzstd/compress/zstd_fast.c +36 -24
  42. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  43. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +34 -11
  44. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +1 -1
  45. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +27 -5
  46. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +7 -2
  47. data/ext/zstdruby/libzstd/compress/zstd_opt.c +38 -84
  48. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  49. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +48 -21
  50. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +2 -2
  51. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +76 -62
  52. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +12 -8
  53. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  54. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +264 -148
  55. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +312 -203
  56. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +3 -3
  57. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +18 -4
  58. data/ext/zstdruby/libzstd/deprecated/zbuff.h +3 -3
  59. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  60. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +1 -1
  61. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  62. data/ext/zstdruby/libzstd/dictBuilder/cover.c +5 -5
  63. data/ext/zstdruby/libzstd/dictBuilder/cover.h +14 -4
  64. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +14 -4
  65. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +33 -9
  66. data/ext/zstdruby/libzstd/dictBuilder/zdict.h +51 -28
  67. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  68. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
  69. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +18 -12
  70. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  71. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +10 -6
  72. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  73. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +10 -6
  74. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  75. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +13 -7
  76. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  77. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +17 -13
  78. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  79. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +17 -13
  80. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +22 -14
  82. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  83. data/ext/zstdruby/libzstd/libzstd.pc.in +2 -2
  84. data/ext/zstdruby/libzstd/zstd.h +62 -21
  85. data/lib/zstd-ruby/version.rb +1 -1
  86. metadata +7 -5
@@ -0,0 +1,32 @@
1
+ /*
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTD_COMPRESS_ADVANCED_H
12
+ #define ZSTD_COMPRESS_ADVANCED_H
13
+
14
+ /*-*************************************
15
+ * Dependencies
16
+ ***************************************/
17
+
18
+ #include "../zstd.h" /* ZSTD_CCtx */
19
+
20
+ /*-*************************************
21
+ * Target Compressed Block Size
22
+ ***************************************/
23
+
24
+ /* ZSTD_compressSuperBlock() :
25
+ * Used to compress a super block when targetCBlockSize is being used.
26
+ * The given block will be compressed into multiple sub blocks that are around targetCBlockSize. */
27
+ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
28
+ void* dst, size_t dstCapacity,
29
+ void const* src, size_t srcSize,
30
+ unsigned lastBlock);
31
+
32
+ #endif /* ZSTD_COMPRESS_ADVANCED_H */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,7 +14,7 @@
14
14
  /*-*************************************
15
15
  * Dependencies
16
16
  ***************************************/
17
- #include "zstd_internal.h"
17
+ #include "../common/zstd_internal.h"
18
18
 
19
19
  #if defined (__cplusplus)
20
20
  extern "C" {
@@ -24,16 +24,6 @@ extern "C" {
24
24
  * Constants
25
25
  ***************************************/
26
26
 
27
- /* define "workspace is too large" as this number of times larger than needed */
28
- #define ZSTD_WORKSPACETOOLARGE_FACTOR 3
29
-
30
- /* when workspace is continuously too large
31
- * during at least this number of times,
32
- * context's memory usage is considered wasteful,
33
- * because it's sized to handle a worst case scenario which rarely happens.
34
- * In which case, resize it down to free some memory */
35
- #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
36
-
37
27
  /* Since the workspace is effectively its own little malloc implementation /
38
28
  * arena, when we run under ASAN, we should similarly insert redzones between
39
29
  * each internal element of the workspace, so ASAN will catch overruns that
@@ -468,7 +458,7 @@ MEM_STATIC void ZSTD_cwksp_init(ZSTD_cwksp* ws, void* start, size_t size) {
468
458
  MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem customMem) {
469
459
  void* workspace = ZSTD_malloc(size, customMem);
470
460
  DEBUGLOG(4, "cwksp: creating new workspace with %zd bytes", size);
471
- RETURN_ERROR_IF(workspace == NULL, memory_allocation);
461
+ RETURN_ERROR_IF(workspace == NULL, memory_allocation, "NULL pointer!");
472
462
  ZSTD_cwksp_init(ws, workspace, size);
473
463
  return 0;
474
464
  }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -63,10 +63,8 @@ size_t ZSTD_compressBlock_doubleFast_generic(
63
63
  const BYTE* ip = istart;
64
64
  const BYTE* anchor = istart;
65
65
  const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
66
- const U32 lowestValid = ms->window.dictLimit;
67
- const U32 maxDistance = 1U << cParams->windowLog;
68
66
  /* presumes that, if there is a dictionary, it must be using Attach mode */
69
- const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
67
+ const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
70
68
  const BYTE* const prefixLowest = base + prefixLowestIndex;
71
69
  const BYTE* const iend = istart + srcSize;
72
70
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
@@ -96,7 +94,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
96
94
  dictCParams->hashLog : hBitsL;
97
95
  const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
98
96
  dictCParams->chainLog : hBitsS;
99
- const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
97
+ const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
100
98
 
101
99
  DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
102
100
 
@@ -104,13 +102,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(
104
102
 
105
103
  /* if a dictionary is attached, it must be within window range */
106
104
  if (dictMode == ZSTD_dictMatchState) {
107
- assert(lowestValid + maxDistance >= endIndex);
105
+ assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
108
106
  }
109
107
 
110
108
  /* init */
111
109
  ip += (dictAndPrefixLength == 0);
112
110
  if (dictMode == ZSTD_noDict) {
113
- U32 const maxRep = (U32)(ip - prefixLowest);
111
+ U32 const current = (U32)(ip - base);
112
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
113
+ U32 const maxRep = current - windowLow;
114
114
  if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
115
115
  if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
116
116
  }
@@ -198,6 +198,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
198
198
  } }
199
199
 
200
200
  ip += ((ip-anchor) >> kSearchStrength) + 1;
201
+ #if defined(__aarch64__)
202
+ PREFETCH_L1(ip+256);
203
+ #endif
201
204
  continue;
202
205
 
203
206
  _search_next_long:
@@ -271,7 +274,7 @@ _match_stored:
271
274
  U32 const repIndex2 = current2 - offset_2;
272
275
  const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
273
276
  && repIndex2 < prefixLowestIndex ?
274
- dictBase - dictIndexDelta + repIndex2 :
277
+ dictBase + repIndex2 - dictIndexDelta :
275
278
  base + repIndex2;
276
279
  if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
277
280
  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,7 +15,7 @@
15
15
  extern "C" {
16
16
  #endif
17
17
 
18
- #include "mem.h" /* U32 */
18
+ #include "../common/mem.h" /* U32 */
19
19
  #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
20
20
 
21
21
  void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -61,9 +61,7 @@ ZSTD_compressBlock_fast_generic(
61
61
  const BYTE* ip1;
62
62
  const BYTE* anchor = istart;
63
63
  const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
64
- const U32 maxDistance = 1U << cParams->windowLog;
65
- const U32 validStartIndex = ms->window.dictLimit;
66
- const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
64
+ const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
67
65
  const BYTE* const prefixStart = base + prefixStartIndex;
68
66
  const BYTE* const iend = istart + srcSize;
69
67
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
@@ -74,12 +72,21 @@ ZSTD_compressBlock_fast_generic(
74
72
  DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
75
73
  ip0 += (ip0 == prefixStart);
76
74
  ip1 = ip0 + 1;
77
- { U32 const maxRep = (U32)(ip0 - prefixStart);
75
+ { U32 const current = (U32)(ip0 - base);
76
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
77
+ U32 const maxRep = current - windowLow;
78
78
  if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
79
79
  if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
80
80
  }
81
81
 
82
82
  /* Main Search Loop */
83
+ #ifdef __INTEL_COMPILER
84
+ /* From intel 'The vector pragma indicates that the loop should be
85
+ * vectorized if it is legal to do so'. Can be used together with
86
+ * #pragma ivdep (but have opted to exclude that because intel
87
+ * warns against using it).*/
88
+ #pragma vector always
89
+ #endif
83
90
  while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */
84
91
  size_t mLength;
85
92
  BYTE const* ip2 = ip0 + 2;
@@ -91,19 +98,25 @@ ZSTD_compressBlock_fast_generic(
91
98
  U32 const current1 = (U32)(ip1-base);
92
99
  U32 const matchIndex0 = hashTable[h0];
93
100
  U32 const matchIndex1 = hashTable[h1];
94
- BYTE const* repMatch = ip2-offset_1;
101
+ BYTE const* repMatch = ip2 - offset_1;
95
102
  const BYTE* match0 = base + matchIndex0;
96
103
  const BYTE* match1 = base + matchIndex1;
97
104
  U32 offcode;
105
+
106
+ #if defined(__aarch64__)
107
+ PREFETCH_L1(ip0+256);
108
+ #endif
109
+
98
110
  hashTable[h0] = current0; /* update hash table */
99
111
  hashTable[h1] = current1; /* update hash table */
100
112
 
101
113
  assert(ip0 + 1 == ip1);
102
114
 
103
115
  if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
104
- mLength = ip2[-1] == repMatch[-1] ? 1 : 0;
116
+ mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0;
105
117
  ip0 = ip2 - mLength;
106
118
  match0 = repMatch - mLength;
119
+ mLength += 4;
107
120
  offcode = 0;
108
121
  goto _match;
109
122
  }
@@ -128,19 +141,18 @@ _offset: /* Requires: ip0, match0 */
128
141
  offset_2 = offset_1;
129
142
  offset_1 = (U32)(ip0-match0);
130
143
  offcode = offset_1 + ZSTD_REP_MOVE;
131
- mLength = 0;
144
+ mLength = 4;
132
145
  /* Count the backwards match length */
133
146
  while (((ip0>anchor) & (match0>prefixStart))
134
147
  && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
135
148
 
136
149
  _match: /* Requires: ip0, match0, offcode */
137
150
  /* Count the forward length */
138
- mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
151
+ mLength += ZSTD_count(ip0+mLength, match0+mLength, iend);
139
152
  ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
140
153
  /* match found */
141
154
  ip0 += mLength;
142
155
  anchor = ip0;
143
- ip1 = ip0 + 1;
144
156
 
145
157
  if (ip0 <= ilimit) {
146
158
  /* Fill Table */
@@ -148,19 +160,18 @@ _match: /* Requires: ip0, match0, offcode */
148
160
  hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
149
161
  hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
150
162
 
151
- while ( ((ip0 <= ilimit) & (offset_2>0)) /* offset_2==0 means offset_2 is invalidated */
152
- && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
153
- /* store sequence */
154
- size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
155
- { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
156
- hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
157
- ip0 += rLength;
158
- ip1 = ip0 + 1;
159
- ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
160
- anchor = ip0;
161
- continue; /* faster when present (confirmed on gcc-8) ... (?) */
162
- }
163
- }
163
+ if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */
164
+ while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
165
+ /* store sequence */
166
+ size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
167
+ { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
168
+ hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
169
+ ip0 += rLength;
170
+ ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
171
+ anchor = ip0;
172
+ continue; /* faster when present (confirmed on gcc-8) ... (?) */
173
+ } } }
174
+ ip1 = ip0 + 1;
164
175
  }
165
176
 
166
177
  /* save reps for next block */
@@ -387,7 +398,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
387
398
  const BYTE* const ilimit = iend - 8;
388
399
  U32 offset_1=rep[0], offset_2=rep[1];
389
400
 
390
- DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic");
401
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
391
402
 
392
403
  /* switch to "regular" variant if extDict is invalidated due to maxDistance */
393
404
  if (prefixStartIndex == dictStartIndex)
@@ -404,6 +415,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
404
415
  const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
405
416
  const BYTE* const repMatch = repBase + repIndex;
406
417
  hashTable[h] = current; /* update hash table */
418
+ DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current);
407
419
  assert(offset_1 <= current +1); /* check repIndex */
408
420
 
409
421
  if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,7 +15,7 @@
15
15
  extern "C" {
16
16
  #endif
17
17
 
18
- #include "mem.h" /* U32 */
18
+ #include "../common/mem.h" /* U32 */
19
19
  #include "zstd_compress_internal.h"
20
20
 
21
21
  void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -660,12 +660,16 @@ ZSTD_compressBlock_lazy_generic(
660
660
  const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
661
661
  prefixLowestIndex - (U32)(dictEnd - dictBase) :
662
662
  0;
663
- const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);
663
+ const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
664
+
665
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
664
666
 
665
667
  /* init */
666
668
  ip += (dictAndPrefixLength == 0);
667
669
  if (dictMode == ZSTD_noDict) {
668
- U32 const maxRep = (U32)(ip - prefixLowest);
670
+ U32 const current = (U32)(ip - base);
671
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, ms->cParams.windowLog);
672
+ U32 const maxRep = current - windowLow;
669
673
  if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
670
674
  if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
671
675
  }
@@ -677,6 +681,12 @@ ZSTD_compressBlock_lazy_generic(
677
681
  }
678
682
 
679
683
  /* Match Loop */
684
+ #if defined(__GNUC__) && defined(__x86_64__)
685
+ /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
686
+ * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
687
+ */
688
+ __asm__(".p2align 5");
689
+ #endif
680
690
  while (ip < ilimit) {
681
691
  size_t matchLength=0;
682
692
  size_t offset=0;
@@ -929,11 +939,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
929
939
  const BYTE* const ilimit = iend - 8;
930
940
  const BYTE* const base = ms->window.base;
931
941
  const U32 dictLimit = ms->window.dictLimit;
932
- const U32 lowestIndex = ms->window.lowLimit;
933
942
  const BYTE* const prefixStart = base + dictLimit;
934
943
  const BYTE* const dictBase = ms->window.dictBase;
935
944
  const BYTE* const dictEnd = dictBase + dictLimit;
936
- const BYTE* const dictStart = dictBase + lowestIndex;
945
+ const BYTE* const dictStart = dictBase + ms->window.lowLimit;
946
+ const U32 windowLog = ms->cParams.windowLog;
937
947
 
938
948
  typedef size_t (*searchMax_f)(
939
949
  ZSTD_matchState_t* ms,
@@ -942,10 +952,18 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
942
952
 
943
953
  U32 offset_1 = rep[0], offset_2 = rep[1];
944
954
 
955
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
956
+
945
957
  /* init */
946
958
  ip += (ip == prefixStart);
947
959
 
948
960
  /* Match Loop */
961
+ #if defined(__GNUC__) && defined(__x86_64__)
962
+ /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
963
+ * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
964
+ */
965
+ __asm__(".p2align 5");
966
+ #endif
949
967
  while (ip < ilimit) {
950
968
  size_t matchLength=0;
951
969
  size_t offset=0;
@@ -953,10 +971,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
953
971
  U32 current = (U32)(ip-base);
954
972
 
955
973
  /* check repCode */
956
- { const U32 repIndex = (U32)(current+1 - offset_1);
974
+ { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current+1, windowLog);
975
+ const U32 repIndex = (U32)(current+1 - offset_1);
957
976
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
958
977
  const BYTE* const repMatch = repBase + repIndex;
959
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
978
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
960
979
  if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
961
980
  /* repcode detected we should take it */
962
981
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -983,10 +1002,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
983
1002
  current++;
984
1003
  /* check repCode */
985
1004
  if (offset) {
1005
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
986
1006
  const U32 repIndex = (U32)(current - offset_1);
987
1007
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
988
1008
  const BYTE* const repMatch = repBase + repIndex;
989
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
1009
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
990
1010
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
991
1011
  /* repcode detected */
992
1012
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1013,10 +1033,11 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
1013
1033
  current++;
1014
1034
  /* check repCode */
1015
1035
  if (offset) {
1036
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, current, windowLog);
1016
1037
  const U32 repIndex = (U32)(current - offset_1);
1017
1038
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1018
1039
  const BYTE* const repMatch = repBase + repIndex;
1019
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
1040
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
1020
1041
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
1021
1042
  /* repcode detected */
1022
1043
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1057,10 +1078,12 @@ _storeSequence:
1057
1078
 
1058
1079
  /* check immediate repcode */
1059
1080
  while (ip <= ilimit) {
1060
- const U32 repIndex = (U32)((ip-base) - offset_2);
1081
+ const U32 repCurrent = (U32)(ip-base);
1082
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
1083
+ const U32 repIndex = repCurrent - offset_2;
1061
1084
  const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1062
1085
  const BYTE* const repMatch = repBase + repIndex;
1063
- if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
1086
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
1064
1087
  if (MEM_read32(ip) == MEM_read32(repMatch)) {
1065
1088
  /* repcode detected we should take it */
1066
1089
  const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -1,15 +1,16 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
6
6
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
7
  * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
8
9
  */
9
10
 
10
11
  #include "zstd_ldm.h"
11
12
 
12
- #include "debug.h"
13
+ #include "../common/debug.h"
13
14
  #include "zstd_fast.h" /* ZSTD_fillHashTable() */
14
15
  #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
15
16
 
@@ -223,6 +224,20 @@ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
223
224
  return rollingHash;
224
225
  }
225
226
 
227
+ void ZSTD_ldm_fillHashTable(
228
+ ldmState_t* state, const BYTE* ip,
229
+ const BYTE* iend, ldmParams_t const* params)
230
+ {
231
+ DEBUGLOG(5, "ZSTD_ldm_fillHashTable");
232
+ if ((size_t)(iend - ip) >= params->minMatchLength) {
233
+ U64 startingHash = ZSTD_rollingHash_compute(ip, params->minMatchLength);
234
+ ZSTD_ldm_fillLdmHashTable(
235
+ state, startingHash, ip, iend - params->minMatchLength, state->window.base,
236
+ params->hashLog - params->bucketSizeLog,
237
+ *params);
238
+ }
239
+ }
240
+
226
241
 
227
242
  /** ZSTD_ldm_limitTableUpdate() :
228
243
  *
@@ -449,6 +464,8 @@ size_t ZSTD_ldm_generateSequences(
449
464
  U32 const correction = ZSTD_window_correctOverflow(
450
465
  &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
451
466
  ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
467
+ /* invalidate dictionaries on overflow correction */
468
+ ldmState->loadedDictEnd = 0;
452
469
  }
453
470
  /* 2. We enforce the maximum offset allowed.
454
471
  *
@@ -457,8 +474,14 @@ size_t ZSTD_ldm_generateSequences(
457
474
  * TODO: * Test the chunk size.
458
475
  * * Try invalidation after the sequence generation and test the
459
476
  * the offset against maxDist directly.
477
+ *
478
+ * NOTE: Because of dictionaries + sequence splitting we MUST make sure
479
+ * that any offset used is valid at the END of the sequence, since it may
480
+ * be split into two sequences. This condition holds when using
481
+ * ZSTD_window_enforceMaxDist(), but if we move to checking offsets
482
+ * against maxDist directly, we'll have to carefully handle that case.
460
483
  */
461
- ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
484
+ ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, &ldmState->loadedDictEnd, NULL);
462
485
  /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
463
486
  newLeftoverSize = ZSTD_ldm_generateSequences_internal(
464
487
  ldmState, sequences, params, chunkStart, chunkSize);
@@ -566,14 +589,13 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
566
589
  if (sequence.offset == 0)
567
590
  break;
568
591
 
569
- assert(sequence.offset <= (1U << cParams->windowLog));
570
592
  assert(ip + sequence.litLength + sequence.matchLength <= iend);
571
593
 
572
594
  /* Fill tables for block compressor */
573
595
  ZSTD_ldm_limitTableUpdate(ms, ip);
574
596
  ZSTD_ldm_fillFastTables(ms, ip);
575
597
  /* Run the block compressor */
576
- DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
598
+ DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
577
599
  {
578
600
  size_t const newLitLength =
579
601
  blockCompressor(ms, seqStore, rep, ip, sequence.litLength);