zstd-ruby 1.4.4.0 → 1.5.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.github/dependabot.yml +8 -0
  3. data/.github/workflows/ruby.yml +35 -0
  4. data/README.md +2 -2
  5. data/ext/zstdruby/extconf.rb +1 -0
  6. data/ext/zstdruby/libzstd/BUCK +5 -7
  7. data/ext/zstdruby/libzstd/Makefile +241 -173
  8. data/ext/zstdruby/libzstd/README.md +76 -18
  9. data/ext/zstdruby/libzstd/common/bitstream.h +75 -57
  10. data/ext/zstdruby/libzstd/common/compiler.h +196 -20
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  13. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +208 -76
  15. data/ext/zstdruby/libzstd/common/error_private.c +3 -1
  16. data/ext/zstdruby/libzstd/common/error_private.h +87 -4
  17. data/ext/zstdruby/libzstd/common/fse.h +51 -42
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +149 -57
  19. data/ext/zstdruby/libzstd/common/huf.h +60 -54
  20. data/ext/zstdruby/libzstd/common/mem.h +87 -98
  21. data/ext/zstdruby/libzstd/common/pool.c +23 -17
  22. data/ext/zstdruby/libzstd/common/pool.h +3 -3
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +131 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +10 -8
  25. data/ext/zstdruby/libzstd/common/threading.h +4 -3
  26. data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
  27. data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
  28. data/ext/zstdruby/libzstd/common/zstd_common.c +10 -10
  29. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  30. data/ext/zstdruby/libzstd/common/zstd_internal.h +252 -108
  31. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  32. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  33. data/ext/zstdruby/libzstd/compress/fse_compress.c +105 -85
  34. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  35. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  36. data/ext/zstdruby/libzstd/compress/huf_compress.c +831 -259
  37. data/ext/zstdruby/libzstd/compress/zstd_compress.c +3213 -1007
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +493 -71
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +21 -16
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +4 -2
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +51 -24
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +573 -0
  44. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  45. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +208 -81
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +315 -137
  47. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +2 -2
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.c +319 -128
  49. data/ext/zstdruby/libzstd/compress/zstd_fast.h +2 -2
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1156 -171
  51. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +59 -1
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +331 -206
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
  54. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.c +403 -226
  56. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +188 -453
  58. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +32 -114
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1065 -410
  60. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +571 -0
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +20 -16
  62. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +691 -230
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1072 -323
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +16 -7
  66. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +71 -10
  67. data/ext/zstdruby/libzstd/deprecated/zbuff.h +3 -3
  68. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +2 -2
  69. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +24 -4
  70. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +1 -1
  71. data/ext/zstdruby/libzstd/dictBuilder/cover.c +57 -40
  72. data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
  73. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  74. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +54 -35
  75. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +151 -57
  76. data/ext/zstdruby/libzstd/dll/example/Makefile +2 -1
  77. data/ext/zstdruby/libzstd/dll/example/README.md +16 -22
  78. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +4 -4
  79. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +25 -19
  80. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +1 -1
  81. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +18 -14
  82. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +1 -1
  83. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +18 -14
  84. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +1 -1
  85. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +22 -16
  86. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +1 -1
  87. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +29 -25
  88. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +2 -2
  89. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +29 -25
  90. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +1 -1
  91. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +34 -26
  92. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +1 -1
  93. data/ext/zstdruby/libzstd/libzstd.mk +185 -0
  94. data/ext/zstdruby/libzstd/libzstd.pc.in +4 -3
  95. data/ext/zstdruby/libzstd/modulemap/module.modulemap +4 -0
  96. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +201 -31
  97. data/ext/zstdruby/libzstd/zstd.h +760 -234
  98. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +3 -1
  99. data/ext/zstdruby/zstdruby.c +2 -2
  100. data/lib/zstd-ruby/version.rb +1 -1
  101. metadata +20 -9
  102. data/.travis.yml +0 -14
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,7 +15,7 @@
15
15
  extern "C" {
16
16
  #endif
17
17
 
18
- #include "mem.h" /* U32 */
18
+ #include "../common/mem.h" /* U32 */
19
19
  #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
20
20
 
21
21
  void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -29,148 +29,308 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
29
29
  * Insert the other positions if their hash entry is empty.
30
30
  */
31
31
  for ( ; ip + fastHashFillStep < iend + 2; ip += fastHashFillStep) {
32
- U32 const current = (U32)(ip - base);
32
+ U32 const curr = (U32)(ip - base);
33
33
  size_t const hash0 = ZSTD_hashPtr(ip, hBits, mls);
34
- hashTable[hash0] = current;
34
+ hashTable[hash0] = curr;
35
35
  if (dtlm == ZSTD_dtlm_fast) continue;
36
36
  /* Only load extra positions for ZSTD_dtlm_full */
37
37
  { U32 p;
38
38
  for (p = 1; p < fastHashFillStep; ++p) {
39
39
  size_t const hash = ZSTD_hashPtr(ip + p, hBits, mls);
40
40
  if (hashTable[hash] == 0) { /* not yet filled */
41
- hashTable[hash] = current + p;
41
+ hashTable[hash] = curr + p;
42
42
  } } } }
43
43
  }
44
44
 
45
45
 
46
+ /**
47
+ * If you squint hard enough (and ignore repcodes), the search operation at any
48
+ * given position is broken into 4 stages:
49
+ *
50
+ * 1. Hash (map position to hash value via input read)
51
+ * 2. Lookup (map hash val to index via hashtable read)
52
+ * 3. Load (map index to value at that position via input read)
53
+ * 4. Compare
54
+ *
55
+ * Each of these steps involves a memory read at an address which is computed
56
+ * from the previous step. This means these steps must be sequenced and their
57
+ * latencies are cumulative.
58
+ *
59
+ * Rather than do 1->2->3->4 sequentially for a single position before moving
60
+ * onto the next, this implementation interleaves these operations across the
61
+ * next few positions:
62
+ *
63
+ * R = Repcode Read & Compare
64
+ * H = Hash
65
+ * T = Table Lookup
66
+ * M = Match Read & Compare
67
+ *
68
+ * Pos | Time -->
69
+ * ----+-------------------
70
+ * N | ... M
71
+ * N+1 | ... TM
72
+ * N+2 | R H T M
73
+ * N+3 | H TM
74
+ * N+4 | R H T M
75
+ * N+5 | H ...
76
+ * N+6 | R ...
77
+ *
78
+ * This is very much analogous to the pipelining of execution in a CPU. And just
79
+ * like a CPU, we have to dump the pipeline when we find a match (i.e., take a
80
+ * branch).
81
+ *
82
+ * When this happens, we throw away our current state, and do the following prep
83
+ * to re-enter the loop:
84
+ *
85
+ * Pos | Time -->
86
+ * ----+-------------------
87
+ * N | H T
88
+ * N+1 | H
89
+ *
90
+ * This is also the work we do at the beginning to enter the loop initially.
91
+ */
46
92
  FORCE_INLINE_TEMPLATE size_t
47
- ZSTD_compressBlock_fast_generic(
93
+ ZSTD_compressBlock_fast_noDict_generic(
48
94
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
49
95
  void const* src, size_t srcSize,
50
- U32 const mls)
96
+ U32 const mls, U32 const hasStep)
51
97
  {
52
98
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
53
99
  U32* const hashTable = ms->hashTable;
54
100
  U32 const hlog = cParams->hashLog;
55
101
  /* support stepSize of 0 */
56
- size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
102
+ size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2;
57
103
  const BYTE* const base = ms->window.base;
58
104
  const BYTE* const istart = (const BYTE*)src;
59
- /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */
60
- const BYTE* ip0 = istart;
61
- const BYTE* ip1;
62
- const BYTE* anchor = istart;
63
105
  const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
64
- const U32 maxDistance = 1U << cParams->windowLog;
65
- const U32 validStartIndex = ms->window.dictLimit;
66
- const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex;
106
+ const U32 prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
67
107
  const BYTE* const prefixStart = base + prefixStartIndex;
68
108
  const BYTE* const iend = istart + srcSize;
69
109
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
70
- U32 offset_1=rep[0], offset_2=rep[1];
110
+
111
+ const BYTE* anchor = istart;
112
+ const BYTE* ip0 = istart;
113
+ const BYTE* ip1;
114
+ const BYTE* ip2;
115
+ const BYTE* ip3;
116
+ U32 current0;
117
+
118
+ U32 rep_offset1 = rep[0];
119
+ U32 rep_offset2 = rep[1];
71
120
  U32 offsetSaved = 0;
72
121
 
73
- /* init */
122
+ size_t hash0; /* hash for ip0 */
123
+ size_t hash1; /* hash for ip1 */
124
+ U32 idx; /* match idx for ip0 */
125
+ U32 mval; /* src value at match idx */
126
+
127
+ U32 offcode;
128
+ const BYTE* match0;
129
+ size_t mLength;
130
+
131
+ /* ip0 and ip1 are always adjacent. The targetLength skipping and
132
+ * uncompressibility acceleration is applied to every other position,
133
+ * matching the behavior of #1562. step therefore represents the gap
134
+ * between pairs of positions, from ip0 to ip2 or ip1 to ip3. */
135
+ size_t step;
136
+ const BYTE* nextStep;
137
+ const size_t kStepIncr = (1 << (kSearchStrength - 1));
138
+
74
139
  DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
75
140
  ip0 += (ip0 == prefixStart);
141
+ { U32 const curr = (U32)(ip0 - base);
142
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
143
+ U32 const maxRep = curr - windowLow;
144
+ if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0;
145
+ if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0;
146
+ }
147
+
148
+ /* start each op */
149
+ _start: /* Requires: ip0 */
150
+
151
+ step = stepSize;
152
+ nextStep = ip0 + kStepIncr;
153
+
154
+ /* calculate positions, ip0 - anchor == 0, so we skip step calc */
76
155
  ip1 = ip0 + 1;
77
- { U32 const maxRep = (U32)(ip0 - prefixStart);
78
- if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
79
- if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
156
+ ip2 = ip0 + step;
157
+ ip3 = ip2 + 1;
158
+
159
+ if (ip3 >= ilimit) {
160
+ goto _cleanup;
80
161
  }
81
162
 
82
- /* Main Search Loop */
83
- while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */
84
- size_t mLength;
85
- BYTE const* ip2 = ip0 + 2;
86
- size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls);
87
- U32 const val0 = MEM_read32(ip0);
88
- size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls);
89
- U32 const val1 = MEM_read32(ip1);
90
- U32 const current0 = (U32)(ip0-base);
91
- U32 const current1 = (U32)(ip1-base);
92
- U32 const matchIndex0 = hashTable[h0];
93
- U32 const matchIndex1 = hashTable[h1];
94
- BYTE const* repMatch = ip2-offset_1;
95
- const BYTE* match0 = base + matchIndex0;
96
- const BYTE* match1 = base + matchIndex1;
97
- U32 offcode;
98
- hashTable[h0] = current0; /* update hash table */
99
- hashTable[h1] = current1; /* update hash table */
100
-
101
- assert(ip0 + 1 == ip1);
102
-
103
- if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
104
- mLength = ip2[-1] == repMatch[-1] ? 1 : 0;
105
- ip0 = ip2 - mLength;
106
- match0 = repMatch - mLength;
163
+ hash0 = ZSTD_hashPtr(ip0, hlog, mls);
164
+ hash1 = ZSTD_hashPtr(ip1, hlog, mls);
165
+
166
+ idx = hashTable[hash0];
167
+
168
+ do {
169
+ /* load repcode match for ip[2]*/
170
+ const U32 rval = MEM_read32(ip2 - rep_offset1);
171
+
172
+ /* write back hash table entry */
173
+ current0 = (U32)(ip0 - base);
174
+ hashTable[hash0] = current0;
175
+
176
+ /* check repcode at ip[2] */
177
+ if ((MEM_read32(ip2) == rval) & (rep_offset1 > 0)) {
178
+ ip0 = ip2;
179
+ match0 = ip0 - rep_offset1;
180
+ mLength = ip0[-1] == match0[-1];
181
+ ip0 -= mLength;
182
+ match0 -= mLength;
107
183
  offcode = 0;
184
+ mLength += 4;
108
185
  goto _match;
109
186
  }
110
- if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) {
111
- /* found a regular match */
187
+
188
+ /* load match for ip[0] */
189
+ if (idx >= prefixStartIndex) {
190
+ mval = MEM_read32(base + idx);
191
+ } else {
192
+ mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
193
+ }
194
+
195
+ /* check match at ip[0] */
196
+ if (MEM_read32(ip0) == mval) {
197
+ /* found a match! */
112
198
  goto _offset;
113
199
  }
114
- if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) {
115
- /* found a regular match after one literal */
116
- ip0 = ip1;
117
- match0 = match1;
200
+
201
+ /* lookup ip[1] */
202
+ idx = hashTable[hash1];
203
+
204
+ /* hash ip[2] */
205
+ hash0 = hash1;
206
+ hash1 = ZSTD_hashPtr(ip2, hlog, mls);
207
+
208
+ /* advance to next positions */
209
+ ip0 = ip1;
210
+ ip1 = ip2;
211
+ ip2 = ip3;
212
+
213
+ /* write back hash table entry */
214
+ current0 = (U32)(ip0 - base);
215
+ hashTable[hash0] = current0;
216
+
217
+ /* load match for ip[0] */
218
+ if (idx >= prefixStartIndex) {
219
+ mval = MEM_read32(base + idx);
220
+ } else {
221
+ mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
222
+ }
223
+
224
+ /* check match at ip[0] */
225
+ if (MEM_read32(ip0) == mval) {
226
+ /* found a match! */
118
227
  goto _offset;
119
228
  }
120
- { size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
121
- assert(step >= 2);
122
- ip0 += step;
123
- ip1 += step;
124
- continue;
229
+
230
+ /* lookup ip[1] */
231
+ idx = hashTable[hash1];
232
+
233
+ /* hash ip[2] */
234
+ hash0 = hash1;
235
+ hash1 = ZSTD_hashPtr(ip2, hlog, mls);
236
+
237
+ /* advance to next positions */
238
+ ip0 = ip1;
239
+ ip1 = ip2;
240
+ ip2 = ip0 + step;
241
+ ip3 = ip1 + step;
242
+
243
+ /* calculate step */
244
+ if (ip2 >= nextStep) {
245
+ step++;
246
+ PREFETCH_L1(ip1 + 64);
247
+ PREFETCH_L1(ip1 + 128);
248
+ nextStep += kStepIncr;
125
249
  }
126
- _offset: /* Requires: ip0, match0 */
127
- /* Compute the offset code */
128
- offset_2 = offset_1;
129
- offset_1 = (U32)(ip0-match0);
130
- offcode = offset_1 + ZSTD_REP_MOVE;
131
- mLength = 0;
132
- /* Count the backwards match length */
133
- while (((ip0>anchor) & (match0>prefixStart))
134
- && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
250
+ } while (ip3 < ilimit);
251
+
252
+ _cleanup:
253
+ /* Note that there are probably still a couple positions we could search.
254
+ * However, it seems to be a meaningful performance hit to try to search
255
+ * them. So let's not. */
256
+
257
+ /* save reps for next block */
258
+ rep[0] = rep_offset1 ? rep_offset1 : offsetSaved;
259
+ rep[1] = rep_offset2 ? rep_offset2 : offsetSaved;
260
+
261
+ /* Return the last literals size */
262
+ return (size_t)(iend - anchor);
263
+
264
+ _offset: /* Requires: ip0, idx */
265
+
266
+ /* Compute the offset code. */
267
+ match0 = base + idx;
268
+ rep_offset2 = rep_offset1;
269
+ rep_offset1 = (U32)(ip0-match0);
270
+ offcode = rep_offset1 + ZSTD_REP_MOVE;
271
+ mLength = 4;
272
+
273
+ /* Count the backwards match length. */
274
+ while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) {
275
+ ip0--;
276
+ match0--;
277
+ mLength++;
278
+ }
135
279
 
136
280
  _match: /* Requires: ip0, match0, offcode */
137
- /* Count the forward length */
138
- mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4;
139
- ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
140
- /* match found */
141
- ip0 += mLength;
142
- anchor = ip0;
143
- ip1 = ip0 + 1;
144
281
 
145
- if (ip0 <= ilimit) {
146
- /* Fill Table */
147
- assert(base+current0+2 > istart); /* check base overflow */
148
- hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
149
- hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
282
+ /* Count the forward length. */
283
+ mLength += ZSTD_count(ip0 + mLength, match0 + mLength, iend);
284
+
285
+ ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength - MINMATCH);
286
+
287
+ ip0 += mLength;
288
+ anchor = ip0;
289
+
290
+ /* write next hash table entry */
291
+ if (ip1 < ip0) {
292
+ hashTable[hash1] = (U32)(ip1 - base);
293
+ }
150
294
 
151
- while ( ((ip0 <= ilimit) & (offset_2>0)) /* offset_2==0 means offset_2 is invalidated */
152
- && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
295
+ /* Fill table and check for immediate repcode. */
296
+ if (ip0 <= ilimit) {
297
+ /* Fill Table */
298
+ assert(base+current0+2 > istart); /* check base overflow */
299
+ hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */
300
+ hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
301
+
302
+ if (rep_offset2 > 0) { /* rep_offset2==0 means rep_offset2 is invalidated */
303
+ while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - rep_offset2)) ) {
153
304
  /* store sequence */
154
- size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
155
- { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
305
+ size_t const rLength = ZSTD_count(ip0+4, ip0+4-rep_offset2, iend) + 4;
306
+ { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */
156
307
  hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
157
308
  ip0 += rLength;
158
- ip1 = ip0 + 1;
159
309
  ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
160
310
  anchor = ip0;
161
311
  continue; /* faster when present (confirmed on gcc-8) ... (?) */
162
- }
163
- }
164
- }
165
-
166
- /* save reps for next block */
167
- rep[0] = offset_1 ? offset_1 : offsetSaved;
168
- rep[1] = offset_2 ? offset_2 : offsetSaved;
312
+ } } }
169
313
 
170
- /* Return the last literals size */
171
- return (size_t)(iend - anchor);
314
+ goto _start;
172
315
  }
173
316
 
317
+ #define ZSTD_GEN_FAST_FN(dictMode, mls, step) \
318
+ static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step( \
319
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
320
+ void const* src, size_t srcSize) \
321
+ { \
322
+ return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \
323
+ }
324
+
325
+ ZSTD_GEN_FAST_FN(noDict, 4, 1)
326
+ ZSTD_GEN_FAST_FN(noDict, 5, 1)
327
+ ZSTD_GEN_FAST_FN(noDict, 6, 1)
328
+ ZSTD_GEN_FAST_FN(noDict, 7, 1)
329
+
330
+ ZSTD_GEN_FAST_FN(noDict, 4, 0)
331
+ ZSTD_GEN_FAST_FN(noDict, 5, 0)
332
+ ZSTD_GEN_FAST_FN(noDict, 6, 0)
333
+ ZSTD_GEN_FAST_FN(noDict, 7, 0)
174
334
 
175
335
  size_t ZSTD_compressBlock_fast(
176
336
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -178,24 +338,40 @@ size_t ZSTD_compressBlock_fast(
178
338
  {
179
339
  U32 const mls = ms->cParams.minMatch;
180
340
  assert(ms->dictMatchState == NULL);
181
- switch(mls)
182
- {
183
- default: /* includes case 3 */
184
- case 4 :
185
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4);
186
- case 5 :
187
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5);
188
- case 6 :
189
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6);
190
- case 7 :
191
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7);
341
+ if (ms->cParams.targetLength > 1) {
342
+ switch(mls)
343
+ {
344
+ default: /* includes case 3 */
345
+ case 4 :
346
+ return ZSTD_compressBlock_fast_noDict_4_1(ms, seqStore, rep, src, srcSize);
347
+ case 5 :
348
+ return ZSTD_compressBlock_fast_noDict_5_1(ms, seqStore, rep, src, srcSize);
349
+ case 6 :
350
+ return ZSTD_compressBlock_fast_noDict_6_1(ms, seqStore, rep, src, srcSize);
351
+ case 7 :
352
+ return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize);
353
+ }
354
+ } else {
355
+ switch(mls)
356
+ {
357
+ default: /* includes case 3 */
358
+ case 4 :
359
+ return ZSTD_compressBlock_fast_noDict_4_0(ms, seqStore, rep, src, srcSize);
360
+ case 5 :
361
+ return ZSTD_compressBlock_fast_noDict_5_0(ms, seqStore, rep, src, srcSize);
362
+ case 6 :
363
+ return ZSTD_compressBlock_fast_noDict_6_0(ms, seqStore, rep, src, srcSize);
364
+ case 7 :
365
+ return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize);
366
+ }
367
+
192
368
  }
193
369
  }
194
370
 
195
371
  FORCE_INLINE_TEMPLATE
196
372
  size_t ZSTD_compressBlock_fast_dictMatchState_generic(
197
373
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
198
- void const* src, size_t srcSize, U32 const mls)
374
+ void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
199
375
  {
200
376
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
201
377
  U32* const hashTable = ms->hashTable;
@@ -231,7 +407,9 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
231
407
  assert(endIndex - prefixStartIndex <= maxDistance);
232
408
  (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */
233
409
 
234
- /* ensure there will be no no underflow
410
+ (void)hasStep; /* not currently specialized on whether it's accelerated */
411
+
412
+ /* ensure there will be no underflow
235
413
  * when translating a dict index into a local index */
236
414
  assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
237
415
 
@@ -247,14 +425,14 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
247
425
  while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
248
426
  size_t mLength;
249
427
  size_t const h = ZSTD_hashPtr(ip, hlog, mls);
250
- U32 const current = (U32)(ip-base);
428
+ U32 const curr = (U32)(ip-base);
251
429
  U32 const matchIndex = hashTable[h];
252
430
  const BYTE* match = base + matchIndex;
253
- const U32 repIndex = current + 1 - offset_1;
431
+ const U32 repIndex = curr + 1 - offset_1;
254
432
  const BYTE* repMatch = (repIndex < prefixStartIndex) ?
255
433
  dictBase + (repIndex - dictIndexDelta) :
256
434
  base + repIndex;
257
- hashTable[h] = current; /* update hash table */
435
+ hashTable[h] = curr; /* update hash table */
258
436
 
259
437
  if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
260
438
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
@@ -273,7 +451,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
273
451
  continue;
274
452
  } else {
275
453
  /* found a dict match */
276
- U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
454
+ U32 const offset = (U32)(curr-dictMatchIndex-dictIndexDelta);
277
455
  mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
278
456
  while (((ip>anchor) & (dictMatch>dictStart))
279
457
  && (ip[-1] == dictMatch[-1])) {
@@ -305,8 +483,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
305
483
 
306
484
  if (ip <= ilimit) {
307
485
  /* Fill Table */
308
- assert(base+current+2 > istart); /* check base overflow */
309
- hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
486
+ assert(base+curr+2 > istart); /* check base overflow */
487
+ hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2; /* here because curr+2 could be > iend-8 */
310
488
  hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
311
489
 
312
490
  /* check immediate repcode */
@@ -340,6 +518,12 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
340
518
  return (size_t)(iend - anchor);
341
519
  }
342
520
 
521
+
522
+ ZSTD_GEN_FAST_FN(dictMatchState, 4, 0)
523
+ ZSTD_GEN_FAST_FN(dictMatchState, 5, 0)
524
+ ZSTD_GEN_FAST_FN(dictMatchState, 6, 0)
525
+ ZSTD_GEN_FAST_FN(dictMatchState, 7, 0)
526
+
343
527
  size_t ZSTD_compressBlock_fast_dictMatchState(
344
528
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
345
529
  void const* src, size_t srcSize)
@@ -350,20 +534,20 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
350
534
  {
351
535
  default: /* includes case 3 */
352
536
  case 4 :
353
- return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
537
+ return ZSTD_compressBlock_fast_dictMatchState_4_0(ms, seqStore, rep, src, srcSize);
354
538
  case 5 :
355
- return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
539
+ return ZSTD_compressBlock_fast_dictMatchState_5_0(ms, seqStore, rep, src, srcSize);
356
540
  case 6 :
357
- return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
541
+ return ZSTD_compressBlock_fast_dictMatchState_6_0(ms, seqStore, rep, src, srcSize);
358
542
  case 7 :
359
- return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
543
+ return ZSTD_compressBlock_fast_dictMatchState_7_0(ms, seqStore, rep, src, srcSize);
360
544
  }
361
545
  }
362
546
 
363
547
 
364
548
  static size_t ZSTD_compressBlock_fast_extDict_generic(
365
549
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
366
- void const* src, size_t srcSize, U32 const mls)
550
+ void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
367
551
  {
368
552
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
369
553
  U32* const hashTable = ms->hashTable;
@@ -387,11 +571,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
387
571
  const BYTE* const ilimit = iend - 8;
388
572
  U32 offset_1=rep[0], offset_2=rep[1];
389
573
 
390
- DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic");
574
+ (void)hasStep; /* not currently specialized on whether it's accelerated */
575
+
576
+ DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
391
577
 
392
578
  /* switch to "regular" variant if extDict is invalidated due to maxDistance */
393
579
  if (prefixStartIndex == dictStartIndex)
394
- return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
580
+ return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);
395
581
 
396
582
  /* Search Loop */
397
583
  while (ip < ilimit) { /* < instead of <=, because (ip+1) */
@@ -399,14 +585,15 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
399
585
  const U32 matchIndex = hashTable[h];
400
586
  const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
401
587
  const BYTE* match = matchBase + matchIndex;
402
- const U32 current = (U32)(ip-base);
403
- const U32 repIndex = current + 1 - offset_1;
588
+ const U32 curr = (U32)(ip-base);
589
+ const U32 repIndex = curr + 1 - offset_1;
404
590
  const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
405
591
  const BYTE* const repMatch = repBase + repIndex;
406
- hashTable[h] = current; /* update hash table */
407
- assert(offset_1 <= current +1); /* check repIndex */
592
+ hashTable[h] = curr; /* update hash table */
593
+ DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
408
594
 
409
- if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
595
+ if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
596
+ & (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
410
597
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
411
598
  const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
412
599
  size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
@@ -423,7 +610,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
423
610
  }
424
611
  { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
425
612
  const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
426
- U32 const offset = current - matchIndex;
613
+ U32 const offset = curr - matchIndex;
427
614
  size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
428
615
  while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
429
616
  offset_2 = offset_1; offset_1 = offset; /* update offset history */
@@ -434,14 +621,14 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
434
621
 
435
622
  if (ip <= ilimit) {
436
623
  /* Fill Table */
437
- hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2;
624
+ hashTable[ZSTD_hashPtr(base+curr+2, hlog, mls)] = curr+2;
438
625
  hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
439
626
  /* check immediate repcode */
440
627
  while (ip <= ilimit) {
441
628
  U32 const current2 = (U32)(ip-base);
442
629
  U32 const repIndex2 = current2 - offset_2;
443
630
  const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
444
- if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */
631
+ if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex)) /* intentional overflow */
445
632
  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
446
633
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
447
634
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
@@ -463,6 +650,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
463
650
  return (size_t)(iend - anchor);
464
651
  }
465
652
 
653
+ ZSTD_GEN_FAST_FN(extDict, 4, 0)
654
+ ZSTD_GEN_FAST_FN(extDict, 5, 0)
655
+ ZSTD_GEN_FAST_FN(extDict, 6, 0)
656
+ ZSTD_GEN_FAST_FN(extDict, 7, 0)
466
657
 
467
658
  size_t ZSTD_compressBlock_fast_extDict(
468
659
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -473,12 +664,12 @@ size_t ZSTD_compressBlock_fast_extDict(
473
664
  {
474
665
  default: /* includes case 3 */
475
666
  case 4 :
476
- return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
667
+ return ZSTD_compressBlock_fast_extDict_4_0(ms, seqStore, rep, src, srcSize);
477
668
  case 5 :
478
- return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
669
+ return ZSTD_compressBlock_fast_extDict_5_0(ms, seqStore, rep, src, srcSize);
479
670
  case 6 :
480
- return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
671
+ return ZSTD_compressBlock_fast_extDict_6_0(ms, seqStore, rep, src, srcSize);
481
672
  case 7 :
482
- return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
673
+ return ZSTD_compressBlock_fast_extDict_7_0(ms, seqStore, rep, src, srcSize);
483
674
  }
484
675
  }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Yann Collet, Facebook, Inc.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -15,7 +15,7 @@
15
15
  extern "C" {
16
16
  #endif
17
17
 
18
- #include "mem.h" /* U32 */
18
+ #include "../common/mem.h" /* U32 */
19
19
  #include "zstd_compress_internal.h"
20
20
 
21
21
  void ZSTD_fillHashTable(ZSTD_matchState_t* ms,