zstd-ruby 1.4.5.0 → 1.5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/README.md +78 -5
  4. data/Rakefile +8 -2
  5. data/ext/zstdruby/common.h +15 -0
  6. data/ext/zstdruby/extconf.rb +3 -2
  7. data/ext/zstdruby/libzstd/common/allocations.h +55 -0
  8. data/ext/zstdruby/libzstd/common/bits.h +200 -0
  9. data/ext/zstdruby/libzstd/common/bitstream.h +45 -62
  10. data/ext/zstdruby/libzstd/common/compiler.h +205 -22
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +1 -1
  13. data/ext/zstdruby/libzstd/common/debug.h +12 -19
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +172 -48
  15. data/ext/zstdruby/libzstd/common/error_private.c +10 -2
  16. data/ext/zstdruby/libzstd/common/error_private.h +82 -3
  17. data/ext/zstdruby/libzstd/common/fse.h +37 -86
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +117 -92
  19. data/ext/zstdruby/libzstd/common/huf.h +99 -166
  20. data/ext/zstdruby/libzstd/common/mem.h +124 -142
  21. data/ext/zstdruby/libzstd/common/pool.c +54 -27
  22. data/ext/zstdruby/libzstd/common/pool.h +10 -4
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +74 -19
  25. data/ext/zstdruby/libzstd/common/threading.h +5 -10
  26. data/ext/zstdruby/libzstd/common/xxhash.c +7 -847
  27. data/ext/zstdruby/libzstd/common/xxhash.h +5568 -167
  28. data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
  29. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  30. data/ext/zstdruby/libzstd/common/zstd_internal.h +132 -187
  31. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  32. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  33. data/ext/zstdruby/libzstd/compress/fse_compress.c +83 -157
  34. data/ext/zstdruby/libzstd/compress/hist.c +27 -29
  35. data/ext/zstdruby/libzstd/compress/hist.h +2 -2
  36. data/ext/zstdruby/libzstd/compress/huf_compress.c +916 -279
  37. data/ext/zstdruby/libzstd/compress/zstd_compress.c +3773 -1019
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +610 -203
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +119 -42
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +42 -19
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +1 -1
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +49 -317
  44. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +1 -1
  45. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +320 -103
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +388 -151
  47. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +3 -2
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -265
  49. data/ext/zstdruby/libzstd/compress/zstd_fast.h +3 -2
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1270 -251
  51. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +324 -219
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +9 -2
  54. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.c +481 -209
  56. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +181 -457
  58. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +34 -113
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1199 -565
  60. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +12 -12
  62. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +2 -2
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +627 -157
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1086 -326
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +19 -5
  66. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +62 -13
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +73 -52
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +7 -6
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +44 -35
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +103 -111
  72. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +203 -34
  73. data/ext/zstdruby/libzstd/zstd.h +1217 -287
  74. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +28 -8
  75. data/ext/zstdruby/main.c +20 -0
  76. data/ext/zstdruby/skippable_frame.c +63 -0
  77. data/ext/zstdruby/streaming_compress.c +177 -0
  78. data/ext/zstdruby/streaming_compress.h +5 -0
  79. data/ext/zstdruby/streaming_decompress.c +123 -0
  80. data/ext/zstdruby/zstdruby.c +114 -32
  81. data/lib/zstd-ruby/version.rb +1 -1
  82. data/lib/zstd-ruby.rb +0 -1
  83. data/zstd-ruby.gemspec +1 -1
  84. metadata +19 -36
  85. data/.travis.yml +0 -14
  86. data/ext/zstdruby/libzstd/.gitignore +0 -3
  87. data/ext/zstdruby/libzstd/BUCK +0 -234
  88. data/ext/zstdruby/libzstd/Makefile +0 -354
  89. data/ext/zstdruby/libzstd/README.md +0 -179
  90. data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
  91. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
  92. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
  93. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
  94. data/ext/zstdruby/libzstd/dll/example/Makefile +0 -48
  95. data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
  96. data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
  97. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
  98. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
  99. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
  100. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2158
  101. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
  102. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3518
  103. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
  104. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3160
  105. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
  106. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3647
  107. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
  108. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4050
  109. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
  110. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4154
  111. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
  112. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4541
  113. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
  114. data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
  115. data/ext/zstdruby/zstdruby.h +0 -6
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -11,8 +11,43 @@
11
11
  #include "zstd_compress_internal.h"
12
12
  #include "zstd_double_fast.h"
13
13
 
14
+ static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
15
+ void const* end, ZSTD_dictTableLoadMethod_e dtlm)
16
+ {
17
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
18
+ U32* const hashLarge = ms->hashTable;
19
+ U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
20
+ U32 const mls = cParams->minMatch;
21
+ U32* const hashSmall = ms->chainTable;
22
+ U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
23
+ const BYTE* const base = ms->window.base;
24
+ const BYTE* ip = base + ms->nextToUpdate;
25
+ const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
26
+ const U32 fastHashFillStep = 3;
14
27
 
15
- void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
28
+ /* Always insert every fastHashFillStep position into the hash tables.
29
+ * Insert the other positions into the large hash table if their entry
30
+ * is empty.
31
+ */
32
+ for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
33
+ U32 const curr = (U32)(ip - base);
34
+ U32 i;
35
+ for (i = 0; i < fastHashFillStep; ++i) {
36
+ size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
37
+ size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);
38
+ if (i == 0) {
39
+ ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
40
+ }
41
+ if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {
42
+ ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
43
+ }
44
+ /* Only load extra positions for ZSTD_dtlm_full */
45
+ if (dtlm == ZSTD_dtlm_fast)
46
+ break;
47
+ } }
48
+ }
49
+
50
+ static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
16
51
  void const* end, ZSTD_dictTableLoadMethod_e dtlm)
17
52
  {
18
53
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -31,27 +66,249 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
31
66
  * is empty.
32
67
  */
33
68
  for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
34
- U32 const current = (U32)(ip - base);
69
+ U32 const curr = (U32)(ip - base);
35
70
  U32 i;
36
71
  for (i = 0; i < fastHashFillStep; ++i) {
37
72
  size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
38
73
  size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
39
74
  if (i == 0)
40
- hashSmall[smHash] = current + i;
75
+ hashSmall[smHash] = curr + i;
41
76
  if (i == 0 || hashLarge[lgHash] == 0)
42
- hashLarge[lgHash] = current + i;
77
+ hashLarge[lgHash] = curr + i;
43
78
  /* Only load extra positions for ZSTD_dtlm_full */
44
79
  if (dtlm == ZSTD_dtlm_fast)
45
80
  break;
46
- } }
81
+ } }
82
+ }
83
+
84
+ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
85
+ const void* const end,
86
+ ZSTD_dictTableLoadMethod_e dtlm,
87
+ ZSTD_tableFillPurpose_e tfp)
88
+ {
89
+ if (tfp == ZSTD_tfp_forCDict) {
90
+ ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
91
+ } else {
92
+ ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
93
+ }
94
+ }
95
+
96
+
97
+ FORCE_INLINE_TEMPLATE
98
+ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
99
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
100
+ void const* src, size_t srcSize, U32 const mls /* template */)
101
+ {
102
+ ZSTD_compressionParameters const* cParams = &ms->cParams;
103
+ U32* const hashLong = ms->hashTable;
104
+ const U32 hBitsL = cParams->hashLog;
105
+ U32* const hashSmall = ms->chainTable;
106
+ const U32 hBitsS = cParams->chainLog;
107
+ const BYTE* const base = ms->window.base;
108
+ const BYTE* const istart = (const BYTE*)src;
109
+ const BYTE* anchor = istart;
110
+ const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
111
+ /* presumes that, if there is a dictionary, it must be using Attach mode */
112
+ const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
113
+ const BYTE* const prefixLowest = base + prefixLowestIndex;
114
+ const BYTE* const iend = istart + srcSize;
115
+ const BYTE* const ilimit = iend - HASH_READ_SIZE;
116
+ U32 offset_1=rep[0], offset_2=rep[1];
117
+ U32 offsetSaved1 = 0, offsetSaved2 = 0;
118
+
119
+ size_t mLength;
120
+ U32 offset;
121
+ U32 curr;
122
+
123
+ /* how many positions to search before increasing step size */
124
+ const size_t kStepIncr = 1 << kSearchStrength;
125
+ /* the position at which to increment the step size if no match is found */
126
+ const BYTE* nextStep;
127
+ size_t step; /* the current step size */
128
+
129
+ size_t hl0; /* the long hash at ip */
130
+ size_t hl1; /* the long hash at ip1 */
131
+
132
+ U32 idxl0; /* the long match index for ip */
133
+ U32 idxl1; /* the long match index for ip1 */
134
+
135
+ const BYTE* matchl0; /* the long match for ip */
136
+ const BYTE* matchs0; /* the short match for ip */
137
+ const BYTE* matchl1; /* the long match for ip1 */
138
+
139
+ const BYTE* ip = istart; /* the current position */
140
+ const BYTE* ip1; /* the next position */
141
+
142
+ DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");
143
+
144
+ /* init */
145
+ ip += ((ip - prefixLowest) == 0);
146
+ {
147
+ U32 const current = (U32)(ip - base);
148
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
149
+ U32 const maxRep = current - windowLow;
150
+ if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
151
+ if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
152
+ }
153
+
154
+ /* Outer Loop: one iteration per match found and stored */
155
+ while (1) {
156
+ step = 1;
157
+ nextStep = ip + kStepIncr;
158
+ ip1 = ip + step;
159
+
160
+ if (ip1 > ilimit) {
161
+ goto _cleanup;
162
+ }
163
+
164
+ hl0 = ZSTD_hashPtr(ip, hBitsL, 8);
165
+ idxl0 = hashLong[hl0];
166
+ matchl0 = base + idxl0;
167
+
168
+ /* Inner Loop: one iteration per search / position */
169
+ do {
170
+ const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls);
171
+ const U32 idxs0 = hashSmall[hs0];
172
+ curr = (U32)(ip-base);
173
+ matchs0 = base + idxs0;
174
+
175
+ hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */
176
+
177
+ /* check noDict repcode */
178
+ if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
179
+ mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
180
+ ip++;
181
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
182
+ goto _match_stored;
183
+ }
184
+
185
+ hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);
186
+
187
+ if (idxl0 > prefixLowestIndex) {
188
+ /* check prefix long match */
189
+ if (MEM_read64(matchl0) == MEM_read64(ip)) {
190
+ mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
191
+ offset = (U32)(ip-matchl0);
192
+ while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
193
+ goto _match_found;
194
+ }
195
+ }
196
+
197
+ idxl1 = hashLong[hl1];
198
+ matchl1 = base + idxl1;
199
+
200
+ if (idxs0 > prefixLowestIndex) {
201
+ /* check prefix short match */
202
+ if (MEM_read32(matchs0) == MEM_read32(ip)) {
203
+ goto _search_next_long;
204
+ }
205
+ }
206
+
207
+ if (ip1 >= nextStep) {
208
+ PREFETCH_L1(ip1 + 64);
209
+ PREFETCH_L1(ip1 + 128);
210
+ step++;
211
+ nextStep += kStepIncr;
212
+ }
213
+ ip = ip1;
214
+ ip1 += step;
215
+
216
+ hl0 = hl1;
217
+ idxl0 = idxl1;
218
+ matchl0 = matchl1;
219
+ #if defined(__aarch64__)
220
+ PREFETCH_L1(ip+256);
221
+ #endif
222
+ } while (ip1 <= ilimit);
223
+
224
+ _cleanup:
225
+ /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
226
+ * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
227
+ offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
228
+
229
+ /* save reps for next block */
230
+ rep[0] = offset_1 ? offset_1 : offsetSaved1;
231
+ rep[1] = offset_2 ? offset_2 : offsetSaved2;
232
+
233
+ /* Return the last literals size */
234
+ return (size_t)(iend - anchor);
235
+
236
+ _search_next_long:
237
+
238
+ /* check prefix long +1 match */
239
+ if (idxl1 > prefixLowestIndex) {
240
+ if (MEM_read64(matchl1) == MEM_read64(ip1)) {
241
+ ip = ip1;
242
+ mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8;
243
+ offset = (U32)(ip-matchl1);
244
+ while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */
245
+ goto _match_found;
246
+ }
247
+ }
248
+
249
+ /* if no long +1 match, explore the short match we found */
250
+ mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
251
+ offset = (U32)(ip - matchs0);
252
+ while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */
253
+
254
+ /* fall-through */
255
+
256
+ _match_found: /* requires ip, offset, mLength */
257
+ offset_2 = offset_1;
258
+ offset_1 = offset;
259
+
260
+ if (step < 4) {
261
+ /* It is unsafe to write this value back to the hashtable when ip1 is
262
+ * greater than or equal to the new ip we will have after we're done
263
+ * processing this match. Rather than perform that test directly
264
+ * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler
265
+ * more predictable test. The minmatch even if we take a short match is
266
+ * 4 bytes, so as long as step, the distance between ip and ip1
267
+ * (initially) is less than 4, we know ip1 < new ip. */
268
+ hashLong[hl1] = (U32)(ip1 - base);
269
+ }
270
+
271
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
272
+
273
+ _match_stored:
274
+ /* match found */
275
+ ip += mLength;
276
+ anchor = ip;
277
+
278
+ if (ip <= ilimit) {
279
+ /* Complementary insertion */
280
+ /* done after iLimit test, as candidates could be > iend-8 */
281
+ { U32 const indexToInsert = curr+2;
282
+ hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
283
+ hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
284
+ hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
285
+ hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
286
+ }
287
+
288
+ /* check immediate repcode */
289
+ while ( (ip <= ilimit)
290
+ && ( (offset_2>0)
291
+ & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
292
+ /* store sequence */
293
+ size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
294
+ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
295
+ hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
296
+ hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
297
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
298
+ ip += rLength;
299
+ anchor = ip;
300
+ continue; /* faster when present ... (?) */
301
+ }
302
+ }
303
+ }
47
304
  }
48
305
 
49
306
 
50
307
  FORCE_INLINE_TEMPLATE
51
- size_t ZSTD_compressBlock_doubleFast_generic(
308
+ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
52
309
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
53
310
  void const* src, size_t srcSize,
54
- U32 const mls /* template */, ZSTD_dictMode_e const dictMode)
311
+ U32 const mls /* template */)
55
312
  {
56
313
  ZSTD_compressionParameters const* cParams = &ms->cParams;
57
314
  U32* const hashLong = ms->hashTable;
@@ -69,57 +326,39 @@ size_t ZSTD_compressBlock_doubleFast_generic(
69
326
  const BYTE* const iend = istart + srcSize;
70
327
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
71
328
  U32 offset_1=rep[0], offset_2=rep[1];
72
- U32 offsetSaved = 0;
73
329
 
74
330
  const ZSTD_matchState_t* const dms = ms->dictMatchState;
75
- const ZSTD_compressionParameters* const dictCParams =
76
- dictMode == ZSTD_dictMatchState ?
77
- &dms->cParams : NULL;
78
- const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ?
79
- dms->hashTable : NULL;
80
- const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ?
81
- dms->chainTable : NULL;
82
- const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ?
83
- dms->window.dictLimit : 0;
84
- const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
85
- dms->window.base : NULL;
86
- const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ?
87
- dictBase + dictStartIndex : NULL;
88
- const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
89
- dms->window.nextSrc : NULL;
90
- const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
91
- prefixLowestIndex - (U32)(dictEnd - dictBase) :
92
- 0;
93
- const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ?
94
- dictCParams->hashLog : hBitsL;
95
- const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
96
- dictCParams->chainLog : hBitsS;
331
+ const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
332
+ const U32* const dictHashLong = dms->hashTable;
333
+ const U32* const dictHashSmall = dms->chainTable;
334
+ const U32 dictStartIndex = dms->window.dictLimit;
335
+ const BYTE* const dictBase = dms->window.base;
336
+ const BYTE* const dictStart = dictBase + dictStartIndex;
337
+ const BYTE* const dictEnd = dms->window.nextSrc;
338
+ const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);
339
+ const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
340
+ const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
97
341
  const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
98
342
 
99
- DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
100
-
101
- assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
343
+ DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
102
344
 
103
345
  /* if a dictionary is attached, it must be within window range */
104
- if (dictMode == ZSTD_dictMatchState) {
105
- assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
346
+ assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
347
+
348
+ if (ms->prefetchCDictTables) {
349
+ size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
350
+ size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
351
+ PREFETCH_AREA(dictHashLong, hashTableBytes)
352
+ PREFETCH_AREA(dictHashSmall, chainTableBytes)
106
353
  }
107
354
 
108
355
  /* init */
109
356
  ip += (dictAndPrefixLength == 0);
110
- if (dictMode == ZSTD_noDict) {
111
- U32 const current = (U32)(ip - base);
112
- U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
113
- U32 const maxRep = current - windowLow;
114
- if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
115
- if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
116
- }
117
- if (dictMode == ZSTD_dictMatchState) {
118
- /* dictMatchState repCode checks don't currently handle repCode == 0
119
- * disabling. */
120
- assert(offset_1 <= dictAndPrefixLength);
121
- assert(offset_2 <= dictAndPrefixLength);
122
- }
357
+
358
+ /* dictMatchState repCode checks don't currently handle repCode == 0
359
+ * disabling. */
360
+ assert(offset_1 <= dictAndPrefixLength);
361
+ assert(offset_2 <= dictAndPrefixLength);
123
362
 
124
363
  /* Main Search Loop */
125
364
  while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
@@ -127,37 +366,30 @@ size_t ZSTD_compressBlock_doubleFast_generic(
127
366
  U32 offset;
128
367
  size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
129
368
  size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
130
- size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
131
- size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
132
- U32 const current = (U32)(ip-base);
369
+ size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);
370
+ size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
371
+ U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
372
+ U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
373
+ int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
374
+ int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
375
+ U32 const curr = (U32)(ip-base);
133
376
  U32 const matchIndexL = hashLong[h2];
134
377
  U32 matchIndexS = hashSmall[h];
135
378
  const BYTE* matchLong = base + matchIndexL;
136
379
  const BYTE* match = base + matchIndexS;
137
- const U32 repIndex = current + 1 - offset_1;
138
- const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
139
- && repIndex < prefixLowestIndex) ?
380
+ const U32 repIndex = curr + 1 - offset_1;
381
+ const BYTE* repMatch = (repIndex < prefixLowestIndex) ?
140
382
  dictBase + (repIndex - dictIndexDelta) :
141
383
  base + repIndex;
142
- hashLong[h2] = hashSmall[h] = current; /* update hash tables */
384
+ hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
143
385
 
144
- /* check dictMatchState repcode */
145
- if (dictMode == ZSTD_dictMatchState
146
- && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
386
+ /* check repcode */
387
+ if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
147
388
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
148
389
  const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
149
390
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
150
391
  ip++;
151
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
152
- goto _match_stored;
153
- }
154
-
155
- /* check noDict repcode */
156
- if ( dictMode == ZSTD_noDict
157
- && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
158
- mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
159
- ip++;
160
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
392
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
161
393
  goto _match_stored;
162
394
  }
163
395
 
@@ -169,15 +401,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(
169
401
  while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
170
402
  goto _match_found;
171
403
  }
172
- } else if (dictMode == ZSTD_dictMatchState) {
404
+ } else if (dictTagsMatchL) {
173
405
  /* check dictMatchState long match */
174
- U32 const dictMatchIndexL = dictHashLong[dictHL];
406
+ U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
175
407
  const BYTE* dictMatchL = dictBase + dictMatchIndexL;
176
408
  assert(dictMatchL < dictEnd);
177
409
 
178
410
  if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
179
411
  mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
180
- offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
412
+ offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
181
413
  while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
182
414
  goto _match_found;
183
415
  } }
@@ -187,9 +419,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
187
419
  if (MEM_read32(match) == MEM_read32(ip)) {
188
420
  goto _search_next_long;
189
421
  }
190
- } else if (dictMode == ZSTD_dictMatchState) {
422
+ } else if (dictTagsMatchS) {
191
423
  /* check dictMatchState short match */
192
- U32 const dictMatchIndexS = dictHashSmall[dictHS];
424
+ U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
193
425
  match = dictBase + dictMatchIndexS;
194
426
  matchIndexS = dictMatchIndexS + dictIndexDelta;
195
427
 
@@ -204,12 +436,13 @@ size_t ZSTD_compressBlock_doubleFast_generic(
204
436
  continue;
205
437
 
206
438
  _search_next_long:
207
-
208
439
  { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
209
- size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
440
+ size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
210
441
  U32 const matchIndexL3 = hashLong[hl3];
442
+ U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
443
+ int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
211
444
  const BYTE* matchL3 = base + matchIndexL3;
212
- hashLong[hl3] = current + 1;
445
+ hashLong[hl3] = curr + 1;
213
446
 
214
447
  /* check prefix long +1 match */
215
448
  if (matchIndexL3 > prefixLowestIndex) {
@@ -220,23 +453,23 @@ _search_next_long:
220
453
  while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
221
454
  goto _match_found;
222
455
  }
223
- } else if (dictMode == ZSTD_dictMatchState) {
456
+ } else if (dictTagsMatchL3) {
224
457
  /* check dict long +1 match */
225
- U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
458
+ U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
226
459
  const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
227
460
  assert(dictMatchL3 < dictEnd);
228
461
  if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
229
462
  mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
230
463
  ip++;
231
- offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
464
+ offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
232
465
  while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
233
466
  goto _match_found;
234
467
  } } }
235
468
 
236
469
  /* if no long +1 match, explore the short match we found */
237
- if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
470
+ if (matchIndexS < prefixLowestIndex) {
238
471
  mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
239
- offset = (U32)(current - matchIndexS);
472
+ offset = (U32)(curr - matchIndexS);
240
473
  while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
241
474
  } else {
242
475
  mLength = ZSTD_count(ip+4, match+4, iend) + 4;
@@ -244,13 +477,11 @@ _search_next_long:
244
477
  while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
245
478
  }
246
479
 
247
- /* fall-through */
248
-
249
480
  _match_found:
250
481
  offset_2 = offset_1;
251
482
  offset_1 = offset;
252
483
 
253
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
484
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
254
485
 
255
486
  _match_stored:
256
487
  /* match found */
@@ -260,7 +491,7 @@ _match_stored:
260
491
  if (ip <= ilimit) {
261
492
  /* Complementary insertion */
262
493
  /* done after iLimit test, as candidates could be > iend-8 */
263
- { U32 const indexToInsert = current+2;
494
+ { U32 const indexToInsert = curr+2;
264
495
  hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
265
496
  hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
266
497
  hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
@@ -268,53 +499,55 @@ _match_stored:
268
499
  }
269
500
 
270
501
  /* check immediate repcode */
271
- if (dictMode == ZSTD_dictMatchState) {
272
- while (ip <= ilimit) {
273
- U32 const current2 = (U32)(ip-base);
274
- U32 const repIndex2 = current2 - offset_2;
275
- const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
276
- && repIndex2 < prefixLowestIndex ?
277
- dictBase + repIndex2 - dictIndexDelta :
278
- base + repIndex2;
279
- if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
280
- && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
281
- const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
282
- size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
283
- U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
284
- ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
285
- hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
286
- hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
287
- ip += repLength2;
288
- anchor = ip;
289
- continue;
290
- }
291
- break;
292
- } }
293
-
294
- if (dictMode == ZSTD_noDict) {
295
- while ( (ip <= ilimit)
296
- && ( (offset_2>0)
297
- & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
298
- /* store sequence */
299
- size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
300
- U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
301
- hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
302
- hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
303
- ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
304
- ip += rLength;
502
+ while (ip <= ilimit) {
503
+ U32 const current2 = (U32)(ip-base);
504
+ U32 const repIndex2 = current2 - offset_2;
505
+ const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
506
+ dictBase + repIndex2 - dictIndexDelta :
507
+ base + repIndex2;
508
+ if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
509
+ && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
510
+ const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
511
+ size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
512
+ U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
513
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
514
+ hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
515
+ hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
516
+ ip += repLength2;
305
517
  anchor = ip;
306
- continue; /* faster when present ... (?) */
307
- } } }
518
+ continue;
519
+ }
520
+ break;
521
+ }
522
+ }
308
523
  } /* while (ip < ilimit) */
309
524
 
310
525
  /* save reps for next block */
311
- rep[0] = offset_1 ? offset_1 : offsetSaved;
312
- rep[1] = offset_2 ? offset_2 : offsetSaved;
526
+ rep[0] = offset_1;
527
+ rep[1] = offset_2;
313
528
 
314
529
  /* Return the last literals size */
315
530
  return (size_t)(iend - anchor);
316
531
  }
317
532
 
533
+ #define ZSTD_GEN_DFAST_FN(dictMode, mls) \
534
+ static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \
535
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
536
+ void const* src, size_t srcSize) \
537
+ { \
538
+ return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
539
+ }
540
+
541
+ ZSTD_GEN_DFAST_FN(noDict, 4)
542
+ ZSTD_GEN_DFAST_FN(noDict, 5)
543
+ ZSTD_GEN_DFAST_FN(noDict, 6)
544
+ ZSTD_GEN_DFAST_FN(noDict, 7)
545
+
546
+ ZSTD_GEN_DFAST_FN(dictMatchState, 4)
547
+ ZSTD_GEN_DFAST_FN(dictMatchState, 5)
548
+ ZSTD_GEN_DFAST_FN(dictMatchState, 6)
549
+ ZSTD_GEN_DFAST_FN(dictMatchState, 7)
550
+
318
551
 
319
552
  size_t ZSTD_compressBlock_doubleFast(
320
553
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -325,13 +558,13 @@ size_t ZSTD_compressBlock_doubleFast(
325
558
  {
326
559
  default: /* includes case 3 */
327
560
  case 4 :
328
- return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
561
+ return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
329
562
  case 5 :
330
- return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
563
+ return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
331
564
  case 6 :
332
- return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
565
+ return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
333
566
  case 7 :
334
- return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
567
+ return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
335
568
  }
336
569
  }
337
570
 
@@ -345,13 +578,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
345
578
  {
346
579
  default: /* includes case 3 */
347
580
  case 4 :
348
- return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
581
+ return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
349
582
  case 5 :
350
- return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
583
+ return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
351
584
  case 6 :
352
- return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
585
+ return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
353
586
  case 7 :
354
- return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
587
+ return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
355
588
  }
356
589
  }
357
590
 
@@ -387,7 +620,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
387
620
 
388
621
  /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
389
622
  if (prefixStartIndex == dictStartIndex)
390
- return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
623
+ return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);
391
624
 
392
625
  /* Search Loop */
393
626
  while (ip < ilimit) { /* < instead of <=, because (ip+1) */
@@ -401,31 +634,31 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
401
634
  const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
402
635
  const BYTE* matchLong = matchLongBase + matchLongIndex;
403
636
 
404
- const U32 current = (U32)(ip-base);
405
- const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
637
+ const U32 curr = (U32)(ip-base);
638
+ const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
406
639
  const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
407
640
  const BYTE* const repMatch = repBase + repIndex;
408
641
  size_t mLength;
409
- hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
642
+ hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
410
643
 
411
644
  if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
412
- & (repIndex > dictStartIndex))
645
+ & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
413
646
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
414
647
  const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
415
648
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
416
649
  ip++;
417
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
650
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
418
651
  } else {
419
652
  if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
420
653
  const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
421
654
  const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
422
655
  U32 offset;
423
656
  mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
424
- offset = current - matchLongIndex;
657
+ offset = curr - matchLongIndex;
425
658
  while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
426
659
  offset_2 = offset_1;
427
660
  offset_1 = offset;
428
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
661
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
429
662
 
430
663
  } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
431
664
  size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@@ -433,24 +666,24 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
433
666
  const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
434
667
  const BYTE* match3 = match3Base + matchIndex3;
435
668
  U32 offset;
436
- hashLong[h3] = current + 1;
669
+ hashLong[h3] = curr + 1;
437
670
  if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
438
671
  const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
439
672
  const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
440
673
  mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
441
674
  ip++;
442
- offset = current+1 - matchIndex3;
675
+ offset = curr+1 - matchIndex3;
443
676
  while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
444
677
  } else {
445
678
  const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
446
679
  const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
447
680
  mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
448
- offset = current - matchIndex;
681
+ offset = curr - matchIndex;
449
682
  while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
450
683
  }
451
684
  offset_2 = offset_1;
452
685
  offset_1 = offset;
453
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
686
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
454
687
 
455
688
  } else {
456
689
  ip += ((ip-anchor) >> kSearchStrength) + 1;
@@ -464,7 +697,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
464
697
  if (ip <= ilimit) {
465
698
  /* Complementary insertion */
466
699
  /* done after iLimit test, as candidates could be > iend-8 */
467
- { U32 const indexToInsert = current+2;
700
+ { U32 const indexToInsert = curr+2;
468
701
  hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
469
702
  hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
470
703
  hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
@@ -477,12 +710,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
477
710
  U32 const repIndex2 = current2 - offset_2;
478
711
  const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
479
712
  if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
480
- & (repIndex2 > dictStartIndex))
713
+ & (offset_2 <= current2 - dictStartIndex))
481
714
  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
482
715
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
483
716
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
484
717
  U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
485
- ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
718
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
486
719
  hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
487
720
  hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
488
721
  ip += repLength2;
@@ -500,6 +733,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
500
733
  return (size_t)(iend - anchor);
501
734
  }
502
735
 
736
+ ZSTD_GEN_DFAST_FN(extDict, 4)
737
+ ZSTD_GEN_DFAST_FN(extDict, 5)
738
+ ZSTD_GEN_DFAST_FN(extDict, 6)
739
+ ZSTD_GEN_DFAST_FN(extDict, 7)
503
740
 
504
741
  size_t ZSTD_compressBlock_doubleFast_extDict(
505
742
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -510,12 +747,12 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
510
747
  {
511
748
  default: /* includes case 3 */
512
749
  case 4 :
513
- return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
750
+ return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
514
751
  case 5 :
515
- return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
752
+ return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
516
753
  case 6 :
517
- return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
754
+ return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
518
755
  case 7 :
519
- return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
756
+ return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
520
757
  }
521
758
  }