zstd-ruby 1.4.4.0 → 1.5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -0
  3. data/README.md +78 -5
  4. data/Rakefile +8 -2
  5. data/ext/zstdruby/common.h +15 -0
  6. data/ext/zstdruby/extconf.rb +3 -2
  7. data/ext/zstdruby/libzstd/common/allocations.h +55 -0
  8. data/ext/zstdruby/libzstd/common/bits.h +200 -0
  9. data/ext/zstdruby/libzstd/common/bitstream.h +74 -97
  10. data/ext/zstdruby/libzstd/common/compiler.h +219 -20
  11. data/ext/zstdruby/libzstd/common/cpu.h +1 -3
  12. data/ext/zstdruby/libzstd/common/debug.c +11 -31
  13. data/ext/zstdruby/libzstd/common/debug.h +22 -49
  14. data/ext/zstdruby/libzstd/common/entropy_common.c +184 -80
  15. data/ext/zstdruby/libzstd/common/error_private.c +11 -2
  16. data/ext/zstdruby/libzstd/common/error_private.h +87 -4
  17. data/ext/zstdruby/libzstd/common/fse.h +47 -116
  18. data/ext/zstdruby/libzstd/common/fse_decompress.c +127 -127
  19. data/ext/zstdruby/libzstd/common/huf.h +112 -197
  20. data/ext/zstdruby/libzstd/common/mem.h +124 -142
  21. data/ext/zstdruby/libzstd/common/pool.c +54 -27
  22. data/ext/zstdruby/libzstd/common/pool.h +11 -5
  23. data/ext/zstdruby/libzstd/common/portability_macros.h +156 -0
  24. data/ext/zstdruby/libzstd/common/threading.c +78 -22
  25. data/ext/zstdruby/libzstd/common/threading.h +9 -13
  26. data/ext/zstdruby/libzstd/common/xxhash.c +15 -873
  27. data/ext/zstdruby/libzstd/common/xxhash.h +5572 -191
  28. data/ext/zstdruby/libzstd/common/zstd_common.c +2 -37
  29. data/ext/zstdruby/libzstd/common/zstd_deps.h +111 -0
  30. data/ext/zstdruby/libzstd/common/zstd_internal.h +186 -144
  31. data/ext/zstdruby/libzstd/common/zstd_trace.h +163 -0
  32. data/ext/zstdruby/libzstd/compress/clevels.h +134 -0
  33. data/ext/zstdruby/libzstd/compress/fse_compress.c +99 -196
  34. data/ext/zstdruby/libzstd/compress/hist.c +41 -63
  35. data/ext/zstdruby/libzstd/compress/hist.h +13 -33
  36. data/ext/zstdruby/libzstd/compress/huf_compress.c +968 -331
  37. data/ext/zstdruby/libzstd/compress/zstd_compress.c +4120 -1191
  38. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +688 -159
  39. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.c +121 -40
  40. data/ext/zstdruby/libzstd/compress/zstd_compress_literals.h +16 -6
  41. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.c +62 -35
  42. data/ext/zstdruby/libzstd/compress/zstd_compress_sequences.h +10 -3
  43. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +577 -0
  44. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.h +32 -0
  45. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +322 -115
  46. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +394 -154
  47. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +4 -3
  48. data/ext/zstdruby/libzstd/compress/zstd_fast.c +729 -253
  49. data/ext/zstdruby/libzstd/compress/zstd_fast.h +4 -3
  50. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +1289 -247
  51. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +61 -1
  52. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +339 -212
  53. data/ext/zstdruby/libzstd/compress/zstd_ldm.h +15 -3
  54. data/ext/zstdruby/libzstd/compress/zstd_ldm_geartab.h +106 -0
  55. data/ext/zstdruby/libzstd/compress/zstd_opt.c +508 -282
  56. data/ext/zstdruby/libzstd/compress/zstd_opt.h +1 -1
  57. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +217 -466
  58. data/ext/zstdruby/libzstd/compress/zstdmt_compress.h +35 -114
  59. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +1220 -572
  60. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +576 -0
  61. data/ext/zstdruby/libzstd/decompress/zstd_ddict.c +23 -19
  62. data/ext/zstdruby/libzstd/decompress/zstd_ddict.h +3 -3
  63. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +859 -273
  64. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +1244 -375
  65. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +21 -7
  66. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +74 -11
  67. data/ext/zstdruby/libzstd/dictBuilder/cover.c +75 -54
  68. data/ext/zstdruby/libzstd/dictBuilder/cover.h +20 -9
  69. data/ext/zstdruby/libzstd/dictBuilder/divsufsort.c +1 -1
  70. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +55 -36
  71. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +126 -110
  72. data/ext/zstdruby/libzstd/{dictBuilder/zdict.h → zdict.h} +248 -56
  73. data/ext/zstdruby/libzstd/zstd.h +1277 -306
  74. data/ext/zstdruby/libzstd/{common/zstd_errors.h → zstd_errors.h} +29 -8
  75. data/ext/zstdruby/main.c +20 -0
  76. data/ext/zstdruby/skippable_frame.c +63 -0
  77. data/ext/zstdruby/streaming_compress.c +177 -0
  78. data/ext/zstdruby/streaming_compress.h +5 -0
  79. data/ext/zstdruby/streaming_decompress.c +123 -0
  80. data/ext/zstdruby/zstdruby.c +114 -32
  81. data/lib/zstd-ruby/version.rb +1 -1
  82. data/lib/zstd-ruby.rb +0 -1
  83. data/zstd-ruby.gemspec +1 -1
  84. metadata +24 -39
  85. data/.travis.yml +0 -14
  86. data/ext/zstdruby/libzstd/.gitignore +0 -3
  87. data/ext/zstdruby/libzstd/BUCK +0 -234
  88. data/ext/zstdruby/libzstd/Makefile +0 -289
  89. data/ext/zstdruby/libzstd/README.md +0 -159
  90. data/ext/zstdruby/libzstd/deprecated/zbuff.h +0 -214
  91. data/ext/zstdruby/libzstd/deprecated/zbuff_common.c +0 -26
  92. data/ext/zstdruby/libzstd/deprecated/zbuff_compress.c +0 -147
  93. data/ext/zstdruby/libzstd/deprecated/zbuff_decompress.c +0 -75
  94. data/ext/zstdruby/libzstd/dll/example/Makefile +0 -47
  95. data/ext/zstdruby/libzstd/dll/example/README.md +0 -69
  96. data/ext/zstdruby/libzstd/dll/example/build_package.bat +0 -20
  97. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.sln +0 -25
  98. data/ext/zstdruby/libzstd/dll/example/fullbench-dll.vcxproj +0 -181
  99. data/ext/zstdruby/libzstd/legacy/zstd_legacy.h +0 -415
  100. data/ext/zstdruby/libzstd/legacy/zstd_v01.c +0 -2152
  101. data/ext/zstdruby/libzstd/legacy/zstd_v01.h +0 -94
  102. data/ext/zstdruby/libzstd/legacy/zstd_v02.c +0 -3514
  103. data/ext/zstdruby/libzstd/legacy/zstd_v02.h +0 -93
  104. data/ext/zstdruby/libzstd/legacy/zstd_v03.c +0 -3156
  105. data/ext/zstdruby/libzstd/legacy/zstd_v03.h +0 -93
  106. data/ext/zstdruby/libzstd/legacy/zstd_v04.c +0 -3641
  107. data/ext/zstdruby/libzstd/legacy/zstd_v04.h +0 -142
  108. data/ext/zstdruby/libzstd/legacy/zstd_v05.c +0 -4046
  109. data/ext/zstdruby/libzstd/legacy/zstd_v05.h +0 -162
  110. data/ext/zstdruby/libzstd/legacy/zstd_v06.c +0 -4150
  111. data/ext/zstdruby/libzstd/legacy/zstd_v06.h +0 -172
  112. data/ext/zstdruby/libzstd/legacy/zstd_v07.c +0 -4533
  113. data/ext/zstdruby/libzstd/legacy/zstd_v07.h +0 -187
  114. data/ext/zstdruby/libzstd/libzstd.pc.in +0 -15
  115. data/ext/zstdruby/zstdruby.h +0 -6
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -11,8 +11,43 @@
11
11
  #include "zstd_compress_internal.h"
12
12
  #include "zstd_double_fast.h"
13
13
 
14
+ static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
15
+ void const* end, ZSTD_dictTableLoadMethod_e dtlm)
16
+ {
17
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
18
+ U32* const hashLarge = ms->hashTable;
19
+ U32 const hBitsL = cParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
20
+ U32 const mls = cParams->minMatch;
21
+ U32* const hashSmall = ms->chainTable;
22
+ U32 const hBitsS = cParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
23
+ const BYTE* const base = ms->window.base;
24
+ const BYTE* ip = base + ms->nextToUpdate;
25
+ const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
26
+ const U32 fastHashFillStep = 3;
14
27
 
15
- void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
28
+ /* Always insert every fastHashFillStep position into the hash tables.
29
+ * Insert the other positions into the large hash table if their entry
30
+ * is empty.
31
+ */
32
+ for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
33
+ U32 const curr = (U32)(ip - base);
34
+ U32 i;
35
+ for (i = 0; i < fastHashFillStep; ++i) {
36
+ size_t const smHashAndTag = ZSTD_hashPtr(ip + i, hBitsS, mls);
37
+ size_t const lgHashAndTag = ZSTD_hashPtr(ip + i, hBitsL, 8);
38
+ if (i == 0) {
39
+ ZSTD_writeTaggedIndex(hashSmall, smHashAndTag, curr + i);
40
+ }
41
+ if (i == 0 || hashLarge[lgHashAndTag >> ZSTD_SHORT_CACHE_TAG_BITS] == 0) {
42
+ ZSTD_writeTaggedIndex(hashLarge, lgHashAndTag, curr + i);
43
+ }
44
+ /* Only load extra positions for ZSTD_dtlm_full */
45
+ if (dtlm == ZSTD_dtlm_fast)
46
+ break;
47
+ } }
48
+ }
49
+
50
+ static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
16
51
  void const* end, ZSTD_dictTableLoadMethod_e dtlm)
17
52
  {
18
53
  const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -31,27 +66,249 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
31
66
  * is empty.
32
67
  */
33
68
  for (; ip + fastHashFillStep - 1 <= iend; ip += fastHashFillStep) {
34
- U32 const current = (U32)(ip - base);
69
+ U32 const curr = (U32)(ip - base);
35
70
  U32 i;
36
71
  for (i = 0; i < fastHashFillStep; ++i) {
37
72
  size_t const smHash = ZSTD_hashPtr(ip + i, hBitsS, mls);
38
73
  size_t const lgHash = ZSTD_hashPtr(ip + i, hBitsL, 8);
39
74
  if (i == 0)
40
- hashSmall[smHash] = current + i;
75
+ hashSmall[smHash] = curr + i;
41
76
  if (i == 0 || hashLarge[lgHash] == 0)
42
- hashLarge[lgHash] = current + i;
77
+ hashLarge[lgHash] = curr + i;
43
78
  /* Only load extra positions for ZSTD_dtlm_full */
44
79
  if (dtlm == ZSTD_dtlm_fast)
45
80
  break;
46
- } }
81
+ } }
82
+ }
83
+
84
+ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
85
+ const void* const end,
86
+ ZSTD_dictTableLoadMethod_e dtlm,
87
+ ZSTD_tableFillPurpose_e tfp)
88
+ {
89
+ if (tfp == ZSTD_tfp_forCDict) {
90
+ ZSTD_fillDoubleHashTableForCDict(ms, end, dtlm);
91
+ } else {
92
+ ZSTD_fillDoubleHashTableForCCtx(ms, end, dtlm);
93
+ }
47
94
  }
48
95
 
49
96
 
50
97
  FORCE_INLINE_TEMPLATE
51
- size_t ZSTD_compressBlock_doubleFast_generic(
98
+ size_t ZSTD_compressBlock_doubleFast_noDict_generic(
99
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
100
+ void const* src, size_t srcSize, U32 const mls /* template */)
101
+ {
102
+ ZSTD_compressionParameters const* cParams = &ms->cParams;
103
+ U32* const hashLong = ms->hashTable;
104
+ const U32 hBitsL = cParams->hashLog;
105
+ U32* const hashSmall = ms->chainTable;
106
+ const U32 hBitsS = cParams->chainLog;
107
+ const BYTE* const base = ms->window.base;
108
+ const BYTE* const istart = (const BYTE*)src;
109
+ const BYTE* anchor = istart;
110
+ const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
111
+ /* presumes that, if there is a dictionary, it must be using Attach mode */
112
+ const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
113
+ const BYTE* const prefixLowest = base + prefixLowestIndex;
114
+ const BYTE* const iend = istart + srcSize;
115
+ const BYTE* const ilimit = iend - HASH_READ_SIZE;
116
+ U32 offset_1=rep[0], offset_2=rep[1];
117
+ U32 offsetSaved1 = 0, offsetSaved2 = 0;
118
+
119
+ size_t mLength;
120
+ U32 offset;
121
+ U32 curr;
122
+
123
+ /* how many positions to search before increasing step size */
124
+ const size_t kStepIncr = 1 << kSearchStrength;
125
+ /* the position at which to increment the step size if no match is found */
126
+ const BYTE* nextStep;
127
+ size_t step; /* the current step size */
128
+
129
+ size_t hl0; /* the long hash at ip */
130
+ size_t hl1; /* the long hash at ip1 */
131
+
132
+ U32 idxl0; /* the long match index for ip */
133
+ U32 idxl1; /* the long match index for ip1 */
134
+
135
+ const BYTE* matchl0; /* the long match for ip */
136
+ const BYTE* matchs0; /* the short match for ip */
137
+ const BYTE* matchl1; /* the long match for ip1 */
138
+
139
+ const BYTE* ip = istart; /* the current position */
140
+ const BYTE* ip1; /* the next position */
141
+
142
+ DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");
143
+
144
+ /* init */
145
+ ip += ((ip - prefixLowest) == 0);
146
+ {
147
+ U32 const current = (U32)(ip - base);
148
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
149
+ U32 const maxRep = current - windowLow;
150
+ if (offset_2 > maxRep) offsetSaved2 = offset_2, offset_2 = 0;
151
+ if (offset_1 > maxRep) offsetSaved1 = offset_1, offset_1 = 0;
152
+ }
153
+
154
+ /* Outer Loop: one iteration per match found and stored */
155
+ while (1) {
156
+ step = 1;
157
+ nextStep = ip + kStepIncr;
158
+ ip1 = ip + step;
159
+
160
+ if (ip1 > ilimit) {
161
+ goto _cleanup;
162
+ }
163
+
164
+ hl0 = ZSTD_hashPtr(ip, hBitsL, 8);
165
+ idxl0 = hashLong[hl0];
166
+ matchl0 = base + idxl0;
167
+
168
+ /* Inner Loop: one iteration per search / position */
169
+ do {
170
+ const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls);
171
+ const U32 idxs0 = hashSmall[hs0];
172
+ curr = (U32)(ip-base);
173
+ matchs0 = base + idxs0;
174
+
175
+ hashLong[hl0] = hashSmall[hs0] = curr; /* update hash tables */
176
+
177
+ /* check noDict repcode */
178
+ if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
179
+ mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
180
+ ip++;
181
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
182
+ goto _match_stored;
183
+ }
184
+
185
+ hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);
186
+
187
+ if (idxl0 > prefixLowestIndex) {
188
+ /* check prefix long match */
189
+ if (MEM_read64(matchl0) == MEM_read64(ip)) {
190
+ mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
191
+ offset = (U32)(ip-matchl0);
192
+ while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
193
+ goto _match_found;
194
+ }
195
+ }
196
+
197
+ idxl1 = hashLong[hl1];
198
+ matchl1 = base + idxl1;
199
+
200
+ if (idxs0 > prefixLowestIndex) {
201
+ /* check prefix short match */
202
+ if (MEM_read32(matchs0) == MEM_read32(ip)) {
203
+ goto _search_next_long;
204
+ }
205
+ }
206
+
207
+ if (ip1 >= nextStep) {
208
+ PREFETCH_L1(ip1 + 64);
209
+ PREFETCH_L1(ip1 + 128);
210
+ step++;
211
+ nextStep += kStepIncr;
212
+ }
213
+ ip = ip1;
214
+ ip1 += step;
215
+
216
+ hl0 = hl1;
217
+ idxl0 = idxl1;
218
+ matchl0 = matchl1;
219
+ #if defined(__aarch64__)
220
+ PREFETCH_L1(ip+256);
221
+ #endif
222
+ } while (ip1 <= ilimit);
223
+
224
+ _cleanup:
225
+ /* If offset_1 started invalid (offsetSaved1 != 0) and became valid (offset_1 != 0),
226
+ * rotate saved offsets. See comment in ZSTD_compressBlock_fast_noDict for more context. */
227
+ offsetSaved2 = ((offsetSaved1 != 0) && (offset_1 != 0)) ? offsetSaved1 : offsetSaved2;
228
+
229
+ /* save reps for next block */
230
+ rep[0] = offset_1 ? offset_1 : offsetSaved1;
231
+ rep[1] = offset_2 ? offset_2 : offsetSaved2;
232
+
233
+ /* Return the last literals size */
234
+ return (size_t)(iend - anchor);
235
+
236
+ _search_next_long:
237
+
238
+ /* check prefix long +1 match */
239
+ if (idxl1 > prefixLowestIndex) {
240
+ if (MEM_read64(matchl1) == MEM_read64(ip1)) {
241
+ ip = ip1;
242
+ mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8;
243
+ offset = (U32)(ip-matchl1);
244
+ while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */
245
+ goto _match_found;
246
+ }
247
+ }
248
+
249
+ /* if no long +1 match, explore the short match we found */
250
+ mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
251
+ offset = (U32)(ip - matchs0);
252
+ while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */
253
+
254
+ /* fall-through */
255
+
256
+ _match_found: /* requires ip, offset, mLength */
257
+ offset_2 = offset_1;
258
+ offset_1 = offset;
259
+
260
+ if (step < 4) {
261
+ /* It is unsafe to write this value back to the hashtable when ip1 is
262
+ * greater than or equal to the new ip we will have after we're done
263
+ * processing this match. Rather than perform that test directly
264
+ * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler
265
+ * more predictable test. The minmatch even if we take a short match is
266
+ * 4 bytes, so as long as step, the distance between ip and ip1
267
+ * (initially) is less than 4, we know ip1 < new ip. */
268
+ hashLong[hl1] = (U32)(ip1 - base);
269
+ }
270
+
271
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
272
+
273
+ _match_stored:
274
+ /* match found */
275
+ ip += mLength;
276
+ anchor = ip;
277
+
278
+ if (ip <= ilimit) {
279
+ /* Complementary insertion */
280
+ /* done after iLimit test, as candidates could be > iend-8 */
281
+ { U32 const indexToInsert = curr+2;
282
+ hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
283
+ hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
284
+ hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
285
+ hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
286
+ }
287
+
288
+ /* check immediate repcode */
289
+ while ( (ip <= ilimit)
290
+ && ( (offset_2>0)
291
+ & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
292
+ /* store sequence */
293
+ size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
294
+ U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
295
+ hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
296
+ hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
297
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, rLength);
298
+ ip += rLength;
299
+ anchor = ip;
300
+ continue; /* faster when present ... (?) */
301
+ }
302
+ }
303
+ }
304
+ }
305
+
306
+
307
+ FORCE_INLINE_TEMPLATE
308
+ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
52
309
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
53
310
  void const* src, size_t srcSize,
54
- U32 const mls /* template */, ZSTD_dictMode_e const dictMode)
311
+ U32 const mls /* template */)
55
312
  {
56
313
  ZSTD_compressionParameters const* cParams = &ms->cParams;
57
314
  U32* const hashLong = ms->hashTable;
@@ -63,63 +320,45 @@ size_t ZSTD_compressBlock_doubleFast_generic(
63
320
  const BYTE* ip = istart;
64
321
  const BYTE* anchor = istart;
65
322
  const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
66
- const U32 lowestValid = ms->window.dictLimit;
67
- const U32 maxDistance = 1U << cParams->windowLog;
68
323
  /* presumes that, if there is a dictionary, it must be using Attach mode */
69
- const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid;
324
+ const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
70
325
  const BYTE* const prefixLowest = base + prefixLowestIndex;
71
326
  const BYTE* const iend = istart + srcSize;
72
327
  const BYTE* const ilimit = iend - HASH_READ_SIZE;
73
328
  U32 offset_1=rep[0], offset_2=rep[1];
74
- U32 offsetSaved = 0;
75
329
 
76
330
  const ZSTD_matchState_t* const dms = ms->dictMatchState;
77
- const ZSTD_compressionParameters* const dictCParams =
78
- dictMode == ZSTD_dictMatchState ?
79
- &dms->cParams : NULL;
80
- const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ?
81
- dms->hashTable : NULL;
82
- const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ?
83
- dms->chainTable : NULL;
84
- const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ?
85
- dms->window.dictLimit : 0;
86
- const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
87
- dms->window.base : NULL;
88
- const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ?
89
- dictBase + dictStartIndex : NULL;
90
- const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
91
- dms->window.nextSrc : NULL;
92
- const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
93
- prefixLowestIndex - (U32)(dictEnd - dictBase) :
94
- 0;
95
- const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ?
96
- dictCParams->hashLog : hBitsL;
97
- const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
98
- dictCParams->chainLog : hBitsS;
99
- const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
100
-
101
- DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
102
-
103
- assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
331
+ const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
332
+ const U32* const dictHashLong = dms->hashTable;
333
+ const U32* const dictHashSmall = dms->chainTable;
334
+ const U32 dictStartIndex = dms->window.dictLimit;
335
+ const BYTE* const dictBase = dms->window.base;
336
+ const BYTE* const dictStart = dictBase + dictStartIndex;
337
+ const BYTE* const dictEnd = dms->window.nextSrc;
338
+ const U32 dictIndexDelta = prefixLowestIndex - (U32)(dictEnd - dictBase);
339
+ const U32 dictHBitsL = dictCParams->hashLog + ZSTD_SHORT_CACHE_TAG_BITS;
340
+ const U32 dictHBitsS = dictCParams->chainLog + ZSTD_SHORT_CACHE_TAG_BITS;
341
+ const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
342
+
343
+ DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
104
344
 
105
345
  /* if a dictionary is attached, it must be within window range */
106
- if (dictMode == ZSTD_dictMatchState) {
107
- assert(lowestValid + maxDistance >= endIndex);
346
+ assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
347
+
348
+ if (ms->prefetchCDictTables) {
349
+ size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
350
+ size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
351
+ PREFETCH_AREA(dictHashLong, hashTableBytes)
352
+ PREFETCH_AREA(dictHashSmall, chainTableBytes)
108
353
  }
109
354
 
110
355
  /* init */
111
356
  ip += (dictAndPrefixLength == 0);
112
- if (dictMode == ZSTD_noDict) {
113
- U32 const maxRep = (U32)(ip - prefixLowest);
114
- if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
115
- if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
116
- }
117
- if (dictMode == ZSTD_dictMatchState) {
118
- /* dictMatchState repCode checks don't currently handle repCode == 0
119
- * disabling. */
120
- assert(offset_1 <= dictAndPrefixLength);
121
- assert(offset_2 <= dictAndPrefixLength);
122
- }
357
+
358
+ /* dictMatchState repCode checks don't currently handle repCode == 0
359
+ * disabling. */
360
+ assert(offset_1 <= dictAndPrefixLength);
361
+ assert(offset_2 <= dictAndPrefixLength);
123
362
 
124
363
  /* Main Search Loop */
125
364
  while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
@@ -127,37 +366,30 @@ size_t ZSTD_compressBlock_doubleFast_generic(
127
366
  U32 offset;
128
367
  size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
129
368
  size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
130
- size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
131
- size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
132
- U32 const current = (U32)(ip-base);
369
+ size_t const dictHashAndTagL = ZSTD_hashPtr(ip, dictHBitsL, 8);
370
+ size_t const dictHashAndTagS = ZSTD_hashPtr(ip, dictHBitsS, mls);
371
+ U32 const dictMatchIndexAndTagL = dictHashLong[dictHashAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS];
372
+ U32 const dictMatchIndexAndTagS = dictHashSmall[dictHashAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS];
373
+ int const dictTagsMatchL = ZSTD_comparePackedTags(dictMatchIndexAndTagL, dictHashAndTagL);
374
+ int const dictTagsMatchS = ZSTD_comparePackedTags(dictMatchIndexAndTagS, dictHashAndTagS);
375
+ U32 const curr = (U32)(ip-base);
133
376
  U32 const matchIndexL = hashLong[h2];
134
377
  U32 matchIndexS = hashSmall[h];
135
378
  const BYTE* matchLong = base + matchIndexL;
136
379
  const BYTE* match = base + matchIndexS;
137
- const U32 repIndex = current + 1 - offset_1;
138
- const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
139
- && repIndex < prefixLowestIndex) ?
380
+ const U32 repIndex = curr + 1 - offset_1;
381
+ const BYTE* repMatch = (repIndex < prefixLowestIndex) ?
140
382
  dictBase + (repIndex - dictIndexDelta) :
141
383
  base + repIndex;
142
- hashLong[h2] = hashSmall[h] = current; /* update hash tables */
384
+ hashLong[h2] = hashSmall[h] = curr; /* update hash tables */
143
385
 
144
- /* check dictMatchState repcode */
145
- if (dictMode == ZSTD_dictMatchState
146
- && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
386
+ /* check repcode */
387
+ if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
147
388
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
148
389
  const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
149
390
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
150
391
  ip++;
151
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
152
- goto _match_stored;
153
- }
154
-
155
- /* check noDict repcode */
156
- if ( dictMode == ZSTD_noDict
157
- && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
158
- mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
159
- ip++;
160
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
392
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
161
393
  goto _match_stored;
162
394
  }
163
395
 
@@ -169,15 +401,15 @@ size_t ZSTD_compressBlock_doubleFast_generic(
169
401
  while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
170
402
  goto _match_found;
171
403
  }
172
- } else if (dictMode == ZSTD_dictMatchState) {
404
+ } else if (dictTagsMatchL) {
173
405
  /* check dictMatchState long match */
174
- U32 const dictMatchIndexL = dictHashLong[dictHL];
406
+ U32 const dictMatchIndexL = dictMatchIndexAndTagL >> ZSTD_SHORT_CACHE_TAG_BITS;
175
407
  const BYTE* dictMatchL = dictBase + dictMatchIndexL;
176
408
  assert(dictMatchL < dictEnd);
177
409
 
178
410
  if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
179
411
  mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
180
- offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
412
+ offset = (U32)(curr - dictMatchIndexL - dictIndexDelta);
181
413
  while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
182
414
  goto _match_found;
183
415
  } }
@@ -187,9 +419,9 @@ size_t ZSTD_compressBlock_doubleFast_generic(
187
419
  if (MEM_read32(match) == MEM_read32(ip)) {
188
420
  goto _search_next_long;
189
421
  }
190
- } else if (dictMode == ZSTD_dictMatchState) {
422
+ } else if (dictTagsMatchS) {
191
423
  /* check dictMatchState short match */
192
- U32 const dictMatchIndexS = dictHashSmall[dictHS];
424
+ U32 const dictMatchIndexS = dictMatchIndexAndTagS >> ZSTD_SHORT_CACHE_TAG_BITS;
193
425
  match = dictBase + dictMatchIndexS;
194
426
  matchIndexS = dictMatchIndexS + dictIndexDelta;
195
427
 
@@ -198,15 +430,19 @@ size_t ZSTD_compressBlock_doubleFast_generic(
198
430
  } }
199
431
 
200
432
  ip += ((ip-anchor) >> kSearchStrength) + 1;
433
+ #if defined(__aarch64__)
434
+ PREFETCH_L1(ip+256);
435
+ #endif
201
436
  continue;
202
437
 
203
438
  _search_next_long:
204
-
205
439
  { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
206
- size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
440
+ size_t const dictHashAndTagL3 = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
207
441
  U32 const matchIndexL3 = hashLong[hl3];
442
+ U32 const dictMatchIndexAndTagL3 = dictHashLong[dictHashAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS];
443
+ int const dictTagsMatchL3 = ZSTD_comparePackedTags(dictMatchIndexAndTagL3, dictHashAndTagL3);
208
444
  const BYTE* matchL3 = base + matchIndexL3;
209
- hashLong[hl3] = current + 1;
445
+ hashLong[hl3] = curr + 1;
210
446
 
211
447
  /* check prefix long +1 match */
212
448
  if (matchIndexL3 > prefixLowestIndex) {
@@ -217,23 +453,23 @@ _search_next_long:
217
453
  while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
218
454
  goto _match_found;
219
455
  }
220
- } else if (dictMode == ZSTD_dictMatchState) {
456
+ } else if (dictTagsMatchL3) {
221
457
  /* check dict long +1 match */
222
- U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
458
+ U32 const dictMatchIndexL3 = dictMatchIndexAndTagL3 >> ZSTD_SHORT_CACHE_TAG_BITS;
223
459
  const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
224
460
  assert(dictMatchL3 < dictEnd);
225
461
  if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
226
462
  mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
227
463
  ip++;
228
- offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
464
+ offset = (U32)(curr + 1 - dictMatchIndexL3 - dictIndexDelta);
229
465
  while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
230
466
  goto _match_found;
231
467
  } } }
232
468
 
233
469
  /* if no long +1 match, explore the short match we found */
234
- if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
470
+ if (matchIndexS < prefixLowestIndex) {
235
471
  mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
236
- offset = (U32)(current - matchIndexS);
472
+ offset = (U32)(curr - matchIndexS);
237
473
  while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
238
474
  } else {
239
475
  mLength = ZSTD_count(ip+4, match+4, iend) + 4;
@@ -241,13 +477,11 @@ _search_next_long:
241
477
  while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
242
478
  }
243
479
 
244
- /* fall-through */
245
-
246
480
  _match_found:
247
481
  offset_2 = offset_1;
248
482
  offset_1 = offset;
249
483
 
250
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
484
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
251
485
 
252
486
  _match_stored:
253
487
  /* match found */
@@ -257,7 +491,7 @@ _match_stored:
257
491
  if (ip <= ilimit) {
258
492
  /* Complementary insertion */
259
493
  /* done after iLimit test, as candidates could be > iend-8 */
260
- { U32 const indexToInsert = current+2;
494
+ { U32 const indexToInsert = curr+2;
261
495
  hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
262
496
  hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
263
497
  hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
@@ -265,53 +499,55 @@ _match_stored:
265
499
  }
266
500
 
267
501
  /* check immediate repcode */
268
- if (dictMode == ZSTD_dictMatchState) {
269
- while (ip <= ilimit) {
270
- U32 const current2 = (U32)(ip-base);
271
- U32 const repIndex2 = current2 - offset_2;
272
- const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
273
- && repIndex2 < prefixLowestIndex ?
274
- dictBase - dictIndexDelta + repIndex2 :
275
- base + repIndex2;
276
- if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
277
- && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
278
- const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
279
- size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
280
- U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
281
- ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
282
- hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
283
- hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
284
- ip += repLength2;
285
- anchor = ip;
286
- continue;
287
- }
288
- break;
289
- } }
290
-
291
- if (dictMode == ZSTD_noDict) {
292
- while ( (ip <= ilimit)
293
- && ( (offset_2>0)
294
- & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
295
- /* store sequence */
296
- size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
297
- U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
298
- hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
299
- hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
300
- ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
301
- ip += rLength;
502
+ while (ip <= ilimit) {
503
+ U32 const current2 = (U32)(ip-base);
504
+ U32 const repIndex2 = current2 - offset_2;
505
+ const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
506
+ dictBase + repIndex2 - dictIndexDelta :
507
+ base + repIndex2;
508
+ if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
509
+ && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
510
+ const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
511
+ size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
512
+ U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
513
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
514
+ hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
515
+ hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
516
+ ip += repLength2;
302
517
  anchor = ip;
303
- continue; /* faster when present ... (?) */
304
- } } }
518
+ continue;
519
+ }
520
+ break;
521
+ }
522
+ }
305
523
  } /* while (ip < ilimit) */
306
524
 
307
525
  /* save reps for next block */
308
- rep[0] = offset_1 ? offset_1 : offsetSaved;
309
- rep[1] = offset_2 ? offset_2 : offsetSaved;
526
+ rep[0] = offset_1;
527
+ rep[1] = offset_2;
310
528
 
311
529
  /* Return the last literals size */
312
530
  return (size_t)(iend - anchor);
313
531
  }
314
532
 
533
+ #define ZSTD_GEN_DFAST_FN(dictMode, mls) \
534
+ static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \
535
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
536
+ void const* src, size_t srcSize) \
537
+ { \
538
+ return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
539
+ }
540
+
541
+ ZSTD_GEN_DFAST_FN(noDict, 4)
542
+ ZSTD_GEN_DFAST_FN(noDict, 5)
543
+ ZSTD_GEN_DFAST_FN(noDict, 6)
544
+ ZSTD_GEN_DFAST_FN(noDict, 7)
545
+
546
+ ZSTD_GEN_DFAST_FN(dictMatchState, 4)
547
+ ZSTD_GEN_DFAST_FN(dictMatchState, 5)
548
+ ZSTD_GEN_DFAST_FN(dictMatchState, 6)
549
+ ZSTD_GEN_DFAST_FN(dictMatchState, 7)
550
+
315
551
 
316
552
  size_t ZSTD_compressBlock_doubleFast(
317
553
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -322,13 +558,13 @@ size_t ZSTD_compressBlock_doubleFast(
322
558
  {
323
559
  default: /* includes case 3 */
324
560
  case 4 :
325
- return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
561
+ return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
326
562
  case 5 :
327
- return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
563
+ return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
328
564
  case 6 :
329
- return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
565
+ return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
330
566
  case 7 :
331
- return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
567
+ return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
332
568
  }
333
569
  }
334
570
 
@@ -342,13 +578,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
342
578
  {
343
579
  default: /* includes case 3 */
344
580
  case 4 :
345
- return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
581
+ return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
346
582
  case 5 :
347
- return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
583
+ return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
348
584
  case 6 :
349
- return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
585
+ return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
350
586
  case 7 :
351
- return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
587
+ return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
352
588
  }
353
589
  }
354
590
 
@@ -384,7 +620,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
384
620
 
385
621
  /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
386
622
  if (prefixStartIndex == dictStartIndex)
387
- return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
623
+ return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);
388
624
 
389
625
  /* Search Loop */
390
626
  while (ip < ilimit) { /* < instead of <=, because (ip+1) */
@@ -398,31 +634,31 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
398
634
  const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
399
635
  const BYTE* matchLong = matchLongBase + matchLongIndex;
400
636
 
401
- const U32 current = (U32)(ip-base);
402
- const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
637
+ const U32 curr = (U32)(ip-base);
638
+ const U32 repIndex = curr + 1 - offset_1; /* offset_1 expected <= curr +1 */
403
639
  const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
404
640
  const BYTE* const repMatch = repBase + repIndex;
405
641
  size_t mLength;
406
- hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
642
+ hashSmall[hSmall] = hashLong[hLong] = curr; /* update hash table */
407
643
 
408
644
  if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
409
- & (repIndex > dictStartIndex))
645
+ & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
410
646
  && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
411
647
  const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
412
648
  mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
413
649
  ip++;
414
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
650
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, REPCODE1_TO_OFFBASE, mLength);
415
651
  } else {
416
652
  if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
417
653
  const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
418
654
  const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
419
655
  U32 offset;
420
656
  mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
421
- offset = current - matchLongIndex;
657
+ offset = curr - matchLongIndex;
422
658
  while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
423
659
  offset_2 = offset_1;
424
660
  offset_1 = offset;
425
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
661
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
426
662
 
427
663
  } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
428
664
  size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@@ -430,24 +666,24 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
430
666
  const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
431
667
  const BYTE* match3 = match3Base + matchIndex3;
432
668
  U32 offset;
433
- hashLong[h3] = current + 1;
669
+ hashLong[h3] = curr + 1;
434
670
  if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
435
671
  const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
436
672
  const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
437
673
  mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
438
674
  ip++;
439
- offset = current+1 - matchIndex3;
675
+ offset = curr+1 - matchIndex3;
440
676
  while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
441
677
  } else {
442
678
  const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
443
679
  const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
444
680
  mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
445
- offset = current - matchIndex;
681
+ offset = curr - matchIndex;
446
682
  while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
447
683
  }
448
684
  offset_2 = offset_1;
449
685
  offset_1 = offset;
450
- ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
686
+ ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, OFFSET_TO_OFFBASE(offset), mLength);
451
687
 
452
688
  } else {
453
689
  ip += ((ip-anchor) >> kSearchStrength) + 1;
@@ -461,7 +697,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
461
697
  if (ip <= ilimit) {
462
698
  /* Complementary insertion */
463
699
  /* done after iLimit test, as candidates could be > iend-8 */
464
- { U32 const indexToInsert = current+2;
700
+ { U32 const indexToInsert = curr+2;
465
701
  hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
466
702
  hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
467
703
  hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
@@ -474,12 +710,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
474
710
  U32 const repIndex2 = current2 - offset_2;
475
711
  const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
476
712
  if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
477
- & (repIndex2 > dictStartIndex))
713
+ & (offset_2 <= current2 - dictStartIndex))
478
714
  && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
479
715
  const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
480
716
  size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
481
717
  U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
482
- ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
718
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, REPCODE1_TO_OFFBASE, repLength2);
483
719
  hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
484
720
  hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
485
721
  ip += repLength2;
@@ -497,6 +733,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
497
733
  return (size_t)(iend - anchor);
498
734
  }
499
735
 
736
+ ZSTD_GEN_DFAST_FN(extDict, 4)
737
+ ZSTD_GEN_DFAST_FN(extDict, 5)
738
+ ZSTD_GEN_DFAST_FN(extDict, 6)
739
+ ZSTD_GEN_DFAST_FN(extDict, 7)
500
740
 
501
741
  size_t ZSTD_compressBlock_doubleFast_extDict(
502
742
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -507,12 +747,12 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
507
747
  {
508
748
  default: /* includes case 3 */
509
749
  case 4 :
510
- return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
750
+ return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
511
751
  case 5 :
512
- return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
752
+ return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
513
753
  case 6 :
514
- return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
754
+ return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
515
755
  case 7 :
516
- return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
756
+ return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
517
757
  }
518
758
  }