extzstd 0.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (134) hide show
  1. checksums.yaml +5 -5
  2. data/HISTORY.ja.md +39 -0
  3. data/README.md +38 -56
  4. data/contrib/zstd/CHANGELOG +613 -0
  5. data/contrib/zstd/CODE_OF_CONDUCT.md +5 -0
  6. data/contrib/zstd/CONTRIBUTING.md +406 -0
  7. data/contrib/zstd/COPYING +339 -0
  8. data/contrib/zstd/Makefile +420 -0
  9. data/contrib/zstd/README.md +179 -41
  10. data/contrib/zstd/TESTING.md +44 -0
  11. data/contrib/zstd/appveyor.yml +292 -0
  12. data/contrib/zstd/lib/BUCK +234 -0
  13. data/contrib/zstd/lib/Makefile +451 -0
  14. data/contrib/zstd/lib/README.md +207 -0
  15. data/contrib/zstd/{common → lib/common}/bitstream.h +187 -138
  16. data/contrib/zstd/lib/common/compiler.h +288 -0
  17. data/contrib/zstd/lib/common/cpu.h +213 -0
  18. data/contrib/zstd/lib/common/debug.c +24 -0
  19. data/contrib/zstd/lib/common/debug.h +107 -0
  20. data/contrib/zstd/lib/common/entropy_common.c +362 -0
  21. data/contrib/zstd/{common → lib/common}/error_private.c +25 -12
  22. data/contrib/zstd/{common → lib/common}/error_private.h +14 -10
  23. data/contrib/zstd/{common → lib/common}/fse.h +173 -92
  24. data/contrib/zstd/{common → lib/common}/fse_decompress.c +149 -85
  25. data/contrib/zstd/lib/common/huf.h +361 -0
  26. data/contrib/zstd/{common → lib/common}/mem.h +115 -59
  27. data/contrib/zstd/lib/common/pool.c +350 -0
  28. data/contrib/zstd/lib/common/pool.h +84 -0
  29. data/contrib/zstd/lib/common/threading.c +122 -0
  30. data/contrib/zstd/lib/common/threading.h +155 -0
  31. data/contrib/zstd/{common → lib/common}/xxhash.c +55 -96
  32. data/contrib/zstd/{common → lib/common}/xxhash.h +23 -47
  33. data/contrib/zstd/lib/common/zstd_common.c +83 -0
  34. data/contrib/zstd/lib/common/zstd_deps.h +111 -0
  35. data/contrib/zstd/lib/common/zstd_errors.h +95 -0
  36. data/contrib/zstd/lib/common/zstd_internal.h +478 -0
  37. data/contrib/zstd/{compress → lib/compress}/fse_compress.c +214 -319
  38. data/contrib/zstd/lib/compress/hist.c +181 -0
  39. data/contrib/zstd/lib/compress/hist.h +75 -0
  40. data/contrib/zstd/lib/compress/huf_compress.c +913 -0
  41. data/contrib/zstd/lib/compress/zstd_compress.c +5208 -0
  42. data/contrib/zstd/lib/compress/zstd_compress_internal.h +1203 -0
  43. data/contrib/zstd/lib/compress/zstd_compress_literals.c +158 -0
  44. data/contrib/zstd/lib/compress/zstd_compress_literals.h +29 -0
  45. data/contrib/zstd/lib/compress/zstd_compress_sequences.c +433 -0
  46. data/contrib/zstd/lib/compress/zstd_compress_sequences.h +54 -0
  47. data/contrib/zstd/lib/compress/zstd_compress_superblock.c +849 -0
  48. data/contrib/zstd/lib/compress/zstd_compress_superblock.h +32 -0
  49. data/contrib/zstd/lib/compress/zstd_cwksp.h +561 -0
  50. data/contrib/zstd/lib/compress/zstd_double_fast.c +521 -0
  51. data/contrib/zstd/lib/compress/zstd_double_fast.h +38 -0
  52. data/contrib/zstd/lib/compress/zstd_fast.c +496 -0
  53. data/contrib/zstd/lib/compress/zstd_fast.h +37 -0
  54. data/contrib/zstd/lib/compress/zstd_lazy.c +1412 -0
  55. data/contrib/zstd/lib/compress/zstd_lazy.h +87 -0
  56. data/contrib/zstd/lib/compress/zstd_ldm.c +660 -0
  57. data/contrib/zstd/lib/compress/zstd_ldm.h +116 -0
  58. data/contrib/zstd/lib/compress/zstd_opt.c +1345 -0
  59. data/contrib/zstd/lib/compress/zstd_opt.h +56 -0
  60. data/contrib/zstd/lib/compress/zstdmt_compress.c +1811 -0
  61. data/contrib/zstd/lib/compress/zstdmt_compress.h +110 -0
  62. data/contrib/zstd/lib/decompress/huf_decompress.c +1350 -0
  63. data/contrib/zstd/lib/decompress/zstd_ddict.c +244 -0
  64. data/contrib/zstd/lib/decompress/zstd_ddict.h +44 -0
  65. data/contrib/zstd/lib/decompress/zstd_decompress.c +1930 -0
  66. data/contrib/zstd/lib/decompress/zstd_decompress_block.c +1540 -0
  67. data/contrib/zstd/lib/decompress/zstd_decompress_block.h +62 -0
  68. data/contrib/zstd/lib/decompress/zstd_decompress_internal.h +190 -0
  69. data/contrib/zstd/{common → lib/deprecated}/zbuff.h +68 -45
  70. data/contrib/zstd/lib/deprecated/zbuff_common.c +26 -0
  71. data/contrib/zstd/lib/deprecated/zbuff_compress.c +147 -0
  72. data/contrib/zstd/lib/deprecated/zbuff_decompress.c +75 -0
  73. data/contrib/zstd/lib/dictBuilder/cover.c +1245 -0
  74. data/contrib/zstd/lib/dictBuilder/cover.h +157 -0
  75. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.c +3 -3
  76. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/divsufsort.h +0 -0
  77. data/contrib/zstd/lib/dictBuilder/fastcover.c +758 -0
  78. data/contrib/zstd/{dictBuilder → lib/dictBuilder}/zdict.c +318 -194
  79. data/contrib/zstd/lib/dictBuilder/zdict.h +305 -0
  80. data/contrib/zstd/{legacy → lib/legacy}/zstd_legacy.h +171 -15
  81. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.c +191 -124
  82. data/contrib/zstd/{legacy → lib/legacy}/zstd_v01.h +19 -5
  83. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.c +125 -125
  84. data/contrib/zstd/{legacy → lib/legacy}/zstd_v02.h +19 -5
  85. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.c +125 -124
  86. data/contrib/zstd/{legacy → lib/legacy}/zstd_v03.h +20 -6
  87. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.c +151 -299
  88. data/contrib/zstd/{legacy → lib/legacy}/zstd_v04.h +19 -5
  89. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.c +237 -243
  90. data/contrib/zstd/{legacy → lib/legacy}/zstd_v05.h +19 -6
  91. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.c +130 -143
  92. data/contrib/zstd/{legacy → lib/legacy}/zstd_v06.h +18 -5
  93. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.c +158 -157
  94. data/contrib/zstd/{legacy → lib/legacy}/zstd_v07.h +19 -5
  95. data/contrib/zstd/lib/libzstd.pc.in +15 -0
  96. data/contrib/zstd/lib/zstd.h +2391 -0
  97. data/ext/depend +2 -0
  98. data/ext/extconf.rb +15 -6
  99. data/ext/extzstd.c +76 -145
  100. data/ext/extzstd.h +80 -31
  101. data/ext/extzstd_stream.c +417 -142
  102. data/ext/libzstd_conf.h +8 -0
  103. data/ext/zstd_common.c +10 -7
  104. data/ext/zstd_compress.c +14 -5
  105. data/ext/zstd_decompress.c +5 -4
  106. data/ext/zstd_dictbuilder.c +9 -4
  107. data/ext/zstd_dictbuilder_fastcover.c +3 -0
  108. data/ext/zstd_legacy_v01.c +3 -1
  109. data/ext/zstd_legacy_v02.c +3 -1
  110. data/ext/zstd_legacy_v03.c +3 -1
  111. data/ext/zstd_legacy_v04.c +3 -1
  112. data/ext/zstd_legacy_v05.c +3 -1
  113. data/ext/zstd_legacy_v06.c +3 -1
  114. data/ext/zstd_legacy_v07.c +3 -1
  115. data/gemstub.rb +10 -24
  116. data/lib/extzstd.rb +64 -179
  117. data/lib/extzstd/version.rb +6 -1
  118. data/test/test_basic.rb +9 -6
  119. metadata +113 -57
  120. data/HISTORY.ja +0 -5
  121. data/contrib/zstd/common/entropy_common.c +0 -225
  122. data/contrib/zstd/common/huf.h +0 -228
  123. data/contrib/zstd/common/zstd_common.c +0 -83
  124. data/contrib/zstd/common/zstd_errors.h +0 -60
  125. data/contrib/zstd/common/zstd_internal.h +0 -267
  126. data/contrib/zstd/compress/huf_compress.c +0 -533
  127. data/contrib/zstd/compress/zbuff_compress.c +0 -319
  128. data/contrib/zstd/compress/zstd_compress.c +0 -3264
  129. data/contrib/zstd/compress/zstd_opt.h +0 -900
  130. data/contrib/zstd/decompress/huf_decompress.c +0 -883
  131. data/contrib/zstd/decompress/zbuff_decompress.c +0 -252
  132. data/contrib/zstd/decompress/zstd_decompress.c +0 -1842
  133. data/contrib/zstd/dictBuilder/zdict.h +0 -111
  134. data/contrib/zstd/zstd.h +0 -640
@@ -0,0 +1,37 @@
1
+ /*
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTD_FAST_H
12
+ #define ZSTD_FAST_H
13
+
14
+ #if defined (__cplusplus)
15
+ extern "C" {
16
+ #endif
17
+
18
+ #include "../common/mem.h" /* U32 */
19
+ #include "zstd_compress_internal.h"
20
+
21
+ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
22
+ void const* end, ZSTD_dictTableLoadMethod_e dtlm);
23
+ size_t ZSTD_compressBlock_fast(
24
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
25
+ void const* src, size_t srcSize);
26
+ size_t ZSTD_compressBlock_fast_dictMatchState(
27
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
28
+ void const* src, size_t srcSize);
29
+ size_t ZSTD_compressBlock_fast_extDict(
30
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
31
+ void const* src, size_t srcSize);
32
+
33
+ #if defined (__cplusplus)
34
+ }
35
+ #endif
36
+
37
+ #endif /* ZSTD_FAST_H */
@@ -0,0 +1,1412 @@
1
+ /*
2
+ * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #include "zstd_compress_internal.h"
12
+ #include "zstd_lazy.h"
13
+
14
+
15
+ /*-*************************************
16
+ * Binary Tree search
17
+ ***************************************/
18
+
19
+ static void
20
+ ZSTD_updateDUBT(ZSTD_matchState_t* ms,
21
+ const BYTE* ip, const BYTE* iend,
22
+ U32 mls)
23
+ {
24
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
25
+ U32* const hashTable = ms->hashTable;
26
+ U32 const hashLog = cParams->hashLog;
27
+
28
+ U32* const bt = ms->chainTable;
29
+ U32 const btLog = cParams->chainLog - 1;
30
+ U32 const btMask = (1 << btLog) - 1;
31
+
32
+ const BYTE* const base = ms->window.base;
33
+ U32 const target = (U32)(ip - base);
34
+ U32 idx = ms->nextToUpdate;
35
+
36
+ if (idx != target)
37
+ DEBUGLOG(7, "ZSTD_updateDUBT, from %u to %u (dictLimit:%u)",
38
+ idx, target, ms->window.dictLimit);
39
+ assert(ip + 8 <= iend); /* condition for ZSTD_hashPtr */
40
+ (void)iend;
41
+
42
+ assert(idx >= ms->window.dictLimit); /* condition for valid base+idx */
43
+ for ( ; idx < target ; idx++) {
44
+ size_t const h = ZSTD_hashPtr(base + idx, hashLog, mls); /* assumption : ip + 8 <= iend */
45
+ U32 const matchIndex = hashTable[h];
46
+
47
+ U32* const nextCandidatePtr = bt + 2*(idx&btMask);
48
+ U32* const sortMarkPtr = nextCandidatePtr + 1;
49
+
50
+ DEBUGLOG(8, "ZSTD_updateDUBT: insert %u", idx);
51
+ hashTable[h] = idx; /* Update Hash Table */
52
+ *nextCandidatePtr = matchIndex; /* update BT like a chain */
53
+ *sortMarkPtr = ZSTD_DUBT_UNSORTED_MARK;
54
+ }
55
+ ms->nextToUpdate = target;
56
+ }
57
+
58
+
59
+ /** ZSTD_insertDUBT1() :
60
+ * sort one already inserted but unsorted position
61
+ * assumption : curr >= btlow == (curr - btmask)
62
+ * doesn't fail */
63
+ static void
64
+ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
65
+ U32 curr, const BYTE* inputEnd,
66
+ U32 nbCompares, U32 btLow,
67
+ const ZSTD_dictMode_e dictMode)
68
+ {
69
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
70
+ U32* const bt = ms->chainTable;
71
+ U32 const btLog = cParams->chainLog - 1;
72
+ U32 const btMask = (1 << btLog) - 1;
73
+ size_t commonLengthSmaller=0, commonLengthLarger=0;
74
+ const BYTE* const base = ms->window.base;
75
+ const BYTE* const dictBase = ms->window.dictBase;
76
+ const U32 dictLimit = ms->window.dictLimit;
77
+ const BYTE* const ip = (curr>=dictLimit) ? base + curr : dictBase + curr;
78
+ const BYTE* const iend = (curr>=dictLimit) ? inputEnd : dictBase + dictLimit;
79
+ const BYTE* const dictEnd = dictBase + dictLimit;
80
+ const BYTE* const prefixStart = base + dictLimit;
81
+ const BYTE* match;
82
+ U32* smallerPtr = bt + 2*(curr&btMask);
83
+ U32* largerPtr = smallerPtr + 1;
84
+ U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */
85
+ U32 dummy32; /* to be nullified at the end */
86
+ U32 const windowValid = ms->window.lowLimit;
87
+ U32 const maxDistance = 1U << cParams->windowLog;
88
+ U32 const windowLow = (curr - windowValid > maxDistance) ? curr - maxDistance : windowValid;
89
+
90
+
91
+ DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)",
92
+ curr, dictLimit, windowLow);
93
+ assert(curr >= btLow);
94
+ assert(ip < iend); /* condition for ZSTD_count */
95
+
96
+ while (nbCompares-- && (matchIndex > windowLow)) {
97
+ U32* const nextPtr = bt + 2*(matchIndex & btMask);
98
+ size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
99
+ assert(matchIndex < curr);
100
+ /* note : all candidates are now supposed sorted,
101
+ * but it's still possible to have nextPtr[1] == ZSTD_DUBT_UNSORTED_MARK
102
+ * when a real index has the same value as ZSTD_DUBT_UNSORTED_MARK */
103
+
104
+ if ( (dictMode != ZSTD_extDict)
105
+ || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
106
+ || (curr < dictLimit) /* both in extDict */) {
107
+ const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
108
+ || (matchIndex+matchLength >= dictLimit)) ?
109
+ base : dictBase;
110
+ assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
111
+ || (curr < dictLimit) );
112
+ match = mBase + matchIndex;
113
+ matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
114
+ } else {
115
+ match = dictBase + matchIndex;
116
+ matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
117
+ if (matchIndex+matchLength >= dictLimit)
118
+ match = base + matchIndex; /* preparation for next read of match[matchLength] */
119
+ }
120
+
121
+ DEBUGLOG(8, "ZSTD_insertDUBT1: comparing %u with %u : found %u common bytes ",
122
+ curr, matchIndex, (U32)matchLength);
123
+
124
+ if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
125
+ break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt tree */
126
+ }
127
+
128
+ if (match[matchLength] < ip[matchLength]) { /* necessarily within buffer */
129
+ /* match is smaller than current */
130
+ *smallerPtr = matchIndex; /* update smaller idx */
131
+ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
132
+ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop searching */
133
+ DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is smaller : next => %u",
134
+ matchIndex, btLow, nextPtr[1]);
135
+ smallerPtr = nextPtr+1; /* new "candidate" => larger than match, which was smaller than target */
136
+ matchIndex = nextPtr[1]; /* new matchIndex, larger than previous and closer to current */
137
+ } else {
138
+ /* match is larger than current */
139
+ *largerPtr = matchIndex;
140
+ commonLengthLarger = matchLength;
141
+ if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop searching */
142
+ DEBUGLOG(8, "ZSTD_insertDUBT1: %u (>btLow=%u) is larger => %u",
143
+ matchIndex, btLow, nextPtr[0]);
144
+ largerPtr = nextPtr;
145
+ matchIndex = nextPtr[0];
146
+ } }
147
+
148
+ *smallerPtr = *largerPtr = 0;
149
+ }
150
+
151
+
152
+ static size_t
153
+ ZSTD_DUBT_findBetterDictMatch (
154
+ ZSTD_matchState_t* ms,
155
+ const BYTE* const ip, const BYTE* const iend,
156
+ size_t* offsetPtr,
157
+ size_t bestLength,
158
+ U32 nbCompares,
159
+ U32 const mls,
160
+ const ZSTD_dictMode_e dictMode)
161
+ {
162
+ const ZSTD_matchState_t * const dms = ms->dictMatchState;
163
+ const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
164
+ const U32 * const dictHashTable = dms->hashTable;
165
+ U32 const hashLog = dmsCParams->hashLog;
166
+ size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
167
+ U32 dictMatchIndex = dictHashTable[h];
168
+
169
+ const BYTE* const base = ms->window.base;
170
+ const BYTE* const prefixStart = base + ms->window.dictLimit;
171
+ U32 const curr = (U32)(ip-base);
172
+ const BYTE* const dictBase = dms->window.base;
173
+ const BYTE* const dictEnd = dms->window.nextSrc;
174
+ U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
175
+ U32 const dictLowLimit = dms->window.lowLimit;
176
+ U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
177
+
178
+ U32* const dictBt = dms->chainTable;
179
+ U32 const btLog = dmsCParams->chainLog - 1;
180
+ U32 const btMask = (1 << btLog) - 1;
181
+ U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
182
+
183
+ size_t commonLengthSmaller=0, commonLengthLarger=0;
184
+
185
+ (void)dictMode;
186
+ assert(dictMode == ZSTD_dictMatchState);
187
+
188
+ while (nbCompares-- && (dictMatchIndex > dictLowLimit)) {
189
+ U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
190
+ size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
191
+ const BYTE* match = dictBase + dictMatchIndex;
192
+ matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
193
+ if (dictMatchIndex+matchLength >= dictHighLimit)
194
+ match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */
195
+
196
+ if (matchLength > bestLength) {
197
+ U32 matchIndex = dictMatchIndex + dictIndexDelta;
198
+ if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
199
+ DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
200
+ curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex);
201
+ bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
202
+ }
203
+ if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
204
+ break; /* drop, to guarantee consistency (miss a little bit of compression) */
205
+ }
206
+ }
207
+
208
+ if (match[matchLength] < ip[matchLength]) {
209
+ if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
210
+ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
211
+ dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
212
+ } else {
213
+ /* match is larger than current */
214
+ if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
215
+ commonLengthLarger = matchLength;
216
+ dictMatchIndex = nextPtr[0];
217
+ }
218
+ }
219
+
220
+ if (bestLength >= MINMATCH) {
221
+ U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
222
+ DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
223
+ curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
224
+ }
225
+ return bestLength;
226
+
227
+ }
228
+
229
+
230
+ static size_t
231
+ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
232
+ const BYTE* const ip, const BYTE* const iend,
233
+ size_t* offsetPtr,
234
+ U32 const mls,
235
+ const ZSTD_dictMode_e dictMode)
236
+ {
237
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
238
+ U32* const hashTable = ms->hashTable;
239
+ U32 const hashLog = cParams->hashLog;
240
+ size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
241
+ U32 matchIndex = hashTable[h];
242
+
243
+ const BYTE* const base = ms->window.base;
244
+ U32 const curr = (U32)(ip-base);
245
+ U32 const windowLow = ZSTD_getLowestMatchIndex(ms, curr, cParams->windowLog);
246
+
247
+ U32* const bt = ms->chainTable;
248
+ U32 const btLog = cParams->chainLog - 1;
249
+ U32 const btMask = (1 << btLog) - 1;
250
+ U32 const btLow = (btMask >= curr) ? 0 : curr - btMask;
251
+ U32 const unsortLimit = MAX(btLow, windowLow);
252
+
253
+ U32* nextCandidate = bt + 2*(matchIndex&btMask);
254
+ U32* unsortedMark = bt + 2*(matchIndex&btMask) + 1;
255
+ U32 nbCompares = 1U << cParams->searchLog;
256
+ U32 nbCandidates = nbCompares;
257
+ U32 previousCandidate = 0;
258
+
259
+ DEBUGLOG(7, "ZSTD_DUBT_findBestMatch (%u) ", curr);
260
+ assert(ip <= iend-8); /* required for h calculation */
261
+ assert(dictMode != ZSTD_dedicatedDictSearch);
262
+
263
+ /* reach end of unsorted candidates list */
264
+ while ( (matchIndex > unsortLimit)
265
+ && (*unsortedMark == ZSTD_DUBT_UNSORTED_MARK)
266
+ && (nbCandidates > 1) ) {
267
+ DEBUGLOG(8, "ZSTD_DUBT_findBestMatch: candidate %u is unsorted",
268
+ matchIndex);
269
+ *unsortedMark = previousCandidate; /* the unsortedMark becomes a reversed chain, to move up back to original position */
270
+ previousCandidate = matchIndex;
271
+ matchIndex = *nextCandidate;
272
+ nextCandidate = bt + 2*(matchIndex&btMask);
273
+ unsortedMark = bt + 2*(matchIndex&btMask) + 1;
274
+ nbCandidates --;
275
+ }
276
+
277
+ /* nullify last candidate if it's still unsorted
278
+ * simplification, detrimental to compression ratio, beneficial for speed */
279
+ if ( (matchIndex > unsortLimit)
280
+ && (*unsortedMark==ZSTD_DUBT_UNSORTED_MARK) ) {
281
+ DEBUGLOG(7, "ZSTD_DUBT_findBestMatch: nullify last unsorted candidate %u",
282
+ matchIndex);
283
+ *nextCandidate = *unsortedMark = 0;
284
+ }
285
+
286
+ /* batch sort stacked candidates */
287
+ matchIndex = previousCandidate;
288
+ while (matchIndex) { /* will end on matchIndex == 0 */
289
+ U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
290
+ U32 const nextCandidateIdx = *nextCandidateIdxPtr;
291
+ ZSTD_insertDUBT1(ms, matchIndex, iend,
292
+ nbCandidates, unsortLimit, dictMode);
293
+ matchIndex = nextCandidateIdx;
294
+ nbCandidates++;
295
+ }
296
+
297
+ /* find longest match */
298
+ { size_t commonLengthSmaller = 0, commonLengthLarger = 0;
299
+ const BYTE* const dictBase = ms->window.dictBase;
300
+ const U32 dictLimit = ms->window.dictLimit;
301
+ const BYTE* const dictEnd = dictBase + dictLimit;
302
+ const BYTE* const prefixStart = base + dictLimit;
303
+ U32* smallerPtr = bt + 2*(curr&btMask);
304
+ U32* largerPtr = bt + 2*(curr&btMask) + 1;
305
+ U32 matchEndIdx = curr + 8 + 1;
306
+ U32 dummy32; /* to be nullified at the end */
307
+ size_t bestLength = 0;
308
+
309
+ matchIndex = hashTable[h];
310
+ hashTable[h] = curr; /* Update Hash Table */
311
+
312
+ while (nbCompares-- && (matchIndex > windowLow)) {
313
+ U32* const nextPtr = bt + 2*(matchIndex & btMask);
314
+ size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
315
+ const BYTE* match;
316
+
317
+ if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
318
+ match = base + matchIndex;
319
+ matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
320
+ } else {
321
+ match = dictBase + matchIndex;
322
+ matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
323
+ if (matchIndex+matchLength >= dictLimit)
324
+ match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
325
+ }
326
+
327
+ if (matchLength > bestLength) {
328
+ if (matchLength > matchEndIdx - matchIndex)
329
+ matchEndIdx = matchIndex + (U32)matchLength;
330
+ if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
331
+ bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
332
+ if (ip+matchLength == iend) { /* equal : no way to know if inf or sup */
333
+ if (dictMode == ZSTD_dictMatchState) {
334
+ nbCompares = 0; /* in addition to avoiding checking any
335
+ * further in this loop, make sure we
336
+ * skip checking in the dictionary. */
337
+ }
338
+ break; /* drop, to guarantee consistency (miss a little bit of compression) */
339
+ }
340
+ }
341
+
342
+ if (match[matchLength] < ip[matchLength]) {
343
+ /* match is smaller than current */
344
+ *smallerPtr = matchIndex; /* update smaller idx */
345
+ commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
346
+ if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
347
+ smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
348
+ matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
349
+ } else {
350
+ /* match is larger than current */
351
+ *largerPtr = matchIndex;
352
+ commonLengthLarger = matchLength;
353
+ if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
354
+ largerPtr = nextPtr;
355
+ matchIndex = nextPtr[0];
356
+ } }
357
+
358
+ *smallerPtr = *largerPtr = 0;
359
+
360
+ if (dictMode == ZSTD_dictMatchState && nbCompares) {
361
+ bestLength = ZSTD_DUBT_findBetterDictMatch(
362
+ ms, ip, iend,
363
+ offsetPtr, bestLength, nbCompares,
364
+ mls, dictMode);
365
+ }
366
+
367
+ assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
368
+ ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
369
+ if (bestLength >= MINMATCH) {
370
+ U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
371
+ DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
372
+ curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
373
+ }
374
+ return bestLength;
375
+ }
376
+ }
377
+
378
+
379
+ /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
380
+ FORCE_INLINE_TEMPLATE size_t
381
+ ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
382
+ const BYTE* const ip, const BYTE* const iLimit,
383
+ size_t* offsetPtr,
384
+ const U32 mls /* template */,
385
+ const ZSTD_dictMode_e dictMode)
386
+ {
387
+ DEBUGLOG(7, "ZSTD_BtFindBestMatch");
388
+ if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
389
+ ZSTD_updateDUBT(ms, ip, iLimit, mls);
390
+ return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
391
+ }
392
+
393
+
394
+ static size_t
395
+ ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms,
396
+ const BYTE* ip, const BYTE* const iLimit,
397
+ size_t* offsetPtr)
398
+ {
399
+ switch(ms->cParams.minMatch)
400
+ {
401
+ default : /* includes case 3 */
402
+ case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
403
+ case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
404
+ case 7 :
405
+ case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
406
+ }
407
+ }
408
+
409
+
410
+ static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
411
+ ZSTD_matchState_t* ms,
412
+ const BYTE* ip, const BYTE* const iLimit,
413
+ size_t* offsetPtr)
414
+ {
415
+ switch(ms->cParams.minMatch)
416
+ {
417
+ default : /* includes case 3 */
418
+ case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
419
+ case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
420
+ case 7 :
421
+ case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
422
+ }
423
+ }
424
+
425
+
426
+ static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
427
+ ZSTD_matchState_t* ms,
428
+ const BYTE* ip, const BYTE* const iLimit,
429
+ size_t* offsetPtr)
430
+ {
431
+ switch(ms->cParams.minMatch)
432
+ {
433
+ default : /* includes case 3 */
434
+ case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
435
+ case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
436
+ case 7 :
437
+ case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
438
+ }
439
+ }
440
+
441
+
442
+
443
+ /* *********************************
444
+ * Hash Chain
445
+ ***********************************/
446
+ #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & (mask)]
447
+
448
+ /* Update chains up to ip (excluded)
449
+ Assumption : always within prefix (i.e. not within extDict) */
450
+ FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
451
+ ZSTD_matchState_t* ms,
452
+ const ZSTD_compressionParameters* const cParams,
453
+ const BYTE* ip, U32 const mls)
454
+ {
455
+ U32* const hashTable = ms->hashTable;
456
+ const U32 hashLog = cParams->hashLog;
457
+ U32* const chainTable = ms->chainTable;
458
+ const U32 chainMask = (1 << cParams->chainLog) - 1;
459
+ const BYTE* const base = ms->window.base;
460
+ const U32 target = (U32)(ip - base);
461
+ U32 idx = ms->nextToUpdate;
462
+
463
+ while(idx < target) { /* catch up */
464
+ size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
465
+ NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
466
+ hashTable[h] = idx;
467
+ idx++;
468
+ }
469
+
470
+ ms->nextToUpdate = target;
471
+ return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
472
+ }
473
+
474
+ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
475
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
476
+ return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
477
+ }
478
+
479
+ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
480
+ {
481
+ const BYTE* const base = ms->window.base;
482
+ U32 const target = (U32)(ip - base);
483
+ U32* const hashTable = ms->hashTable;
484
+ U32* const chainTable = ms->chainTable;
485
+ U32 const chainSize = 1 << ms->cParams.chainLog;
486
+ U32 idx = ms->nextToUpdate;
487
+ U32 const minChain = chainSize < target ? target - chainSize : idx;
488
+ U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG;
489
+ U32 const cacheSize = bucketSize - 1;
490
+ U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize;
491
+ U32 const chainLimit = chainAttempts > 255 ? 255 : chainAttempts;
492
+
493
+ /* We know the hashtable is oversized by a factor of `bucketSize`.
494
+ * We are going to temporarily pretend `bucketSize == 1`, keeping only a
495
+ * single entry. We will use the rest of the space to construct a temporary
496
+ * chaintable.
497
+ */
498
+ U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
499
+ U32* const tmpHashTable = hashTable;
500
+ U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
501
+ U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
502
+ U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
503
+
504
+ U32 hashIdx;
505
+
506
+ assert(ms->cParams.chainLog <= 24);
507
+ assert(ms->cParams.hashLog >= ms->cParams.chainLog);
508
+ assert(idx != 0);
509
+ assert(tmpMinChain <= minChain);
510
+
511
+ /* fill conventional hash table and conventional chain table */
512
+ for ( ; idx < target; idx++) {
513
+ U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch);
514
+ if (idx >= tmpMinChain) {
515
+ tmpChainTable[idx - tmpMinChain] = hashTable[h];
516
+ }
517
+ tmpHashTable[h] = idx;
518
+ }
519
+
520
+ /* sort chains into ddss chain table */
521
+ {
522
+ U32 chainPos = 0;
523
+ for (hashIdx = 0; hashIdx < (1U << hashLog); hashIdx++) {
524
+ U32 count;
525
+ U32 countBeyondMinChain = 0;
526
+ U32 i = tmpHashTable[hashIdx];
527
+ for (count = 0; i >= tmpMinChain && count < cacheSize; count++) {
528
+ /* skip through the chain to the first position that won't be
529
+ * in the hash cache bucket */
530
+ if (i < minChain) {
531
+ countBeyondMinChain++;
532
+ }
533
+ i = tmpChainTable[i - tmpMinChain];
534
+ }
535
+ if (count == cacheSize) {
536
+ for (count = 0; count < chainLimit;) {
537
+ if (i < minChain) {
538
+ if (!i || countBeyondMinChain++ > cacheSize) {
539
+ /* only allow pulling `cacheSize` number of entries
540
+ * into the cache or chainTable beyond `minChain`,
541
+ * to replace the entries pulled out of the
542
+ * chainTable into the cache. This lets us reach
543
+ * back further without increasing the total number
544
+ * of entries in the chainTable, guaranteeing the
545
+ * DDSS chain table will fit into the space
546
+ * allocated for the regular one. */
547
+ break;
548
+ }
549
+ }
550
+ chainTable[chainPos++] = i;
551
+ count++;
552
+ if (i < tmpMinChain) {
553
+ break;
554
+ }
555
+ i = tmpChainTable[i - tmpMinChain];
556
+ }
557
+ } else {
558
+ count = 0;
559
+ }
560
+ if (count) {
561
+ tmpHashTable[hashIdx] = ((chainPos - count) << 8) + count;
562
+ } else {
563
+ tmpHashTable[hashIdx] = 0;
564
+ }
565
+ }
566
+ assert(chainPos <= chainSize); /* I believe this is guaranteed... */
567
+ }
568
+
569
+ /* move chain pointers into the last entry of each hash bucket */
570
+ for (hashIdx = (1 << hashLog); hashIdx; ) {
571
+ U32 const bucketIdx = --hashIdx << ZSTD_LAZY_DDSS_BUCKET_LOG;
572
+ U32 const chainPackedPointer = tmpHashTable[hashIdx];
573
+ U32 i;
574
+ for (i = 0; i < cacheSize; i++) {
575
+ hashTable[bucketIdx + i] = 0;
576
+ }
577
+ hashTable[bucketIdx + bucketSize - 1] = chainPackedPointer;
578
+ }
579
+
580
+ /* fill the buckets of the hash table */
581
+ for (idx = ms->nextToUpdate; idx < target; idx++) {
582
+ U32 const h = (U32)ZSTD_hashPtr(base + idx, hashLog, ms->cParams.minMatch)
583
+ << ZSTD_LAZY_DDSS_BUCKET_LOG;
584
+ U32 i;
585
+ /* Shift hash cache down 1. */
586
+ for (i = cacheSize - 1; i; i--)
587
+ hashTable[h + i] = hashTable[h + i - 1];
588
+ hashTable[h] = idx;
589
+ }
590
+
591
+ ms->nextToUpdate = target;
592
+ }
593
+
594
+
595
+ /* inlining is important to hardwire a hot branch (template emulation) */
596
+ FORCE_INLINE_TEMPLATE
597
+ size_t ZSTD_HcFindBestMatch_generic (
598
+ ZSTD_matchState_t* ms,
599
+ const BYTE* const ip, const BYTE* const iLimit,
600
+ size_t* offsetPtr,
601
+ const U32 mls, const ZSTD_dictMode_e dictMode)
602
+ {
603
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
604
+ U32* const chainTable = ms->chainTable;
605
+ const U32 chainSize = (1 << cParams->chainLog);
606
+ const U32 chainMask = chainSize-1;
607
+ const BYTE* const base = ms->window.base;
608
+ const BYTE* const dictBase = ms->window.dictBase;
609
+ const U32 dictLimit = ms->window.dictLimit;
610
+ const BYTE* const prefixStart = base + dictLimit;
611
+ const BYTE* const dictEnd = dictBase + dictLimit;
612
+ const U32 curr = (U32)(ip-base);
613
+ const U32 maxDistance = 1U << cParams->windowLog;
614
+ const U32 lowestValid = ms->window.lowLimit;
615
+ const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
616
+ const U32 isDictionary = (ms->loadedDictEnd != 0);
617
+ const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
618
+ const U32 minChain = curr > chainSize ? curr - chainSize : 0;
619
+ U32 nbAttempts = 1U << cParams->searchLog;
620
+ size_t ml=4-1;
621
+
622
+ const ZSTD_matchState_t* const dms = ms->dictMatchState;
623
+ const U32 ddsHashLog = dictMode == ZSTD_dedicatedDictSearch
624
+ ? dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
625
+ const size_t ddsIdx = dictMode == ZSTD_dedicatedDictSearch
626
+ ? ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG : 0;
627
+
628
+ U32 matchIndex;
629
+
630
+ if (dictMode == ZSTD_dedicatedDictSearch) {
631
+ const U32* entry = &dms->hashTable[ddsIdx];
632
+ PREFETCH_L1(entry);
633
+ }
634
+
635
+ /* HC4 match finder */
636
+ matchIndex = ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, mls);
637
+
638
+ for ( ; (matchIndex>=lowLimit) & (nbAttempts>0) ; nbAttempts--) {
639
+ size_t currentMl=0;
640
+ if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
641
+ const BYTE* const match = base + matchIndex;
642
+ assert(matchIndex >= dictLimit); /* ensures this is true if dictMode != ZSTD_extDict */
643
+ if (match[ml] == ip[ml]) /* potentially better */
644
+ currentMl = ZSTD_count(ip, match, iLimit);
645
+ } else {
646
+ const BYTE* const match = dictBase + matchIndex;
647
+ assert(match+4 <= dictEnd);
648
+ if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
649
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
650
+ }
651
+
652
+ /* save best solution */
653
+ if (currentMl > ml) {
654
+ ml = currentMl;
655
+ *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
656
+ if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
657
+ }
658
+
659
+ if (matchIndex <= minChain) break;
660
+ matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
661
+ }
662
+
663
+ if (dictMode == ZSTD_dedicatedDictSearch) {
664
+ const U32 ddsLowestIndex = dms->window.dictLimit;
665
+ const BYTE* const ddsBase = dms->window.base;
666
+ const BYTE* const ddsEnd = dms->window.nextSrc;
667
+ const U32 ddsSize = (U32)(ddsEnd - ddsBase);
668
+ const U32 ddsIndexDelta = dictLimit - ddsSize;
669
+ const U32 bucketSize = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
670
+ const U32 bucketLimit = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
671
+ U32 ddsAttempt;
672
+
673
+ for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
674
+ PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
675
+ }
676
+
677
+ {
678
+ U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
679
+ U32 const chainIndex = chainPackedPointer >> 8;
680
+
681
+ PREFETCH_L1(&dms->chainTable[chainIndex]);
682
+ }
683
+
684
+ for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
685
+ size_t currentMl=0;
686
+ const BYTE* match;
687
+ matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
688
+ match = ddsBase + matchIndex;
689
+
690
+ if (!matchIndex) {
691
+ return ml;
692
+ }
693
+
694
+ /* guaranteed by table construction */
695
+ (void)ddsLowestIndex;
696
+ assert(matchIndex >= ddsLowestIndex);
697
+ assert(match+4 <= ddsEnd);
698
+ if (MEM_read32(match) == MEM_read32(ip)) {
699
+ /* assumption : matchIndex <= dictLimit-4 (by table construction) */
700
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
701
+ }
702
+
703
+ /* save best solution */
704
+ if (currentMl > ml) {
705
+ ml = currentMl;
706
+ *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
707
+ if (ip+currentMl == iLimit) {
708
+ /* best possible, avoids read overflow on next attempt */
709
+ return ml;
710
+ }
711
+ }
712
+ }
713
+
714
+ {
715
+ U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
716
+ U32 chainIndex = chainPackedPointer >> 8;
717
+ U32 const chainLength = chainPackedPointer & 0xFF;
718
+ U32 const chainAttempts = nbAttempts - ddsAttempt;
719
+ U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
720
+ U32 chainAttempt;
721
+
722
+ for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
723
+ PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
724
+ }
725
+
726
+ for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
727
+ size_t currentMl=0;
728
+ const BYTE* match;
729
+ matchIndex = dms->chainTable[chainIndex];
730
+ match = ddsBase + matchIndex;
731
+
732
+ /* guaranteed by table construction */
733
+ assert(matchIndex >= ddsLowestIndex);
734
+ assert(match+4 <= ddsEnd);
735
+ if (MEM_read32(match) == MEM_read32(ip)) {
736
+ /* assumption : matchIndex <= dictLimit-4 (by table construction) */
737
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
738
+ }
739
+
740
+ /* save best solution */
741
+ if (currentMl > ml) {
742
+ ml = currentMl;
743
+ *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
744
+ if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
745
+ }
746
+ }
747
+ }
748
+ } else if (dictMode == ZSTD_dictMatchState) {
749
+ const U32* const dmsChainTable = dms->chainTable;
750
+ const U32 dmsChainSize = (1 << dms->cParams.chainLog);
751
+ const U32 dmsChainMask = dmsChainSize - 1;
752
+ const U32 dmsLowestIndex = dms->window.dictLimit;
753
+ const BYTE* const dmsBase = dms->window.base;
754
+ const BYTE* const dmsEnd = dms->window.nextSrc;
755
+ const U32 dmsSize = (U32)(dmsEnd - dmsBase);
756
+ const U32 dmsIndexDelta = dictLimit - dmsSize;
757
+ const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
758
+
759
+ matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
760
+
761
+ for ( ; (matchIndex>=dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
762
+ size_t currentMl=0;
763
+ const BYTE* const match = dmsBase + matchIndex;
764
+ assert(match+4 <= dmsEnd);
765
+ if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
766
+ currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
767
+
768
+ /* save best solution */
769
+ if (currentMl > ml) {
770
+ ml = currentMl;
771
+ *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
772
+ if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
773
+ }
774
+
775
+ if (matchIndex <= dmsMinChain) break;
776
+
777
+ matchIndex = dmsChainTable[matchIndex & dmsChainMask];
778
+ }
779
+ }
780
+
781
+ return ml;
782
+ }
783
+
784
+
785
+ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
786
+ ZSTD_matchState_t* ms,
787
+ const BYTE* ip, const BYTE* const iLimit,
788
+ size_t* offsetPtr)
789
+ {
790
+ switch(ms->cParams.minMatch)
791
+ {
792
+ default : /* includes case 3 */
793
+ case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
794
+ case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
795
+ case 7 :
796
+ case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
797
+ }
798
+ }
799
+
800
+
801
+ static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
802
+ ZSTD_matchState_t* ms,
803
+ const BYTE* ip, const BYTE* const iLimit,
804
+ size_t* offsetPtr)
805
+ {
806
+ switch(ms->cParams.minMatch)
807
+ {
808
+ default : /* includes case 3 */
809
+ case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
810
+ case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
811
+ case 7 :
812
+ case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
813
+ }
814
+ }
815
+
816
+
817
+ static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS (
818
+ ZSTD_matchState_t* ms,
819
+ const BYTE* ip, const BYTE* const iLimit,
820
+ size_t* offsetPtr)
821
+ {
822
+ switch(ms->cParams.minMatch)
823
+ {
824
+ default : /* includes case 3 */
825
+ case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch);
826
+ case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch);
827
+ case 7 :
828
+ case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch);
829
+ }
830
+ }
831
+
832
+
833
+ FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
834
+ ZSTD_matchState_t* ms,
835
+ const BYTE* ip, const BYTE* const iLimit,
836
+ size_t* offsetPtr)
837
+ {
838
+ switch(ms->cParams.minMatch)
839
+ {
840
+ default : /* includes case 3 */
841
+ case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
842
+ case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
843
+ case 7 :
844
+ case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
845
+ }
846
+ }
847
+
848
+
849
+ /* *******************************
850
+ * Common parser - lazy strategy
851
+ *********************************/
852
+ typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
853
+
854
+ FORCE_INLINE_TEMPLATE size_t
855
+ ZSTD_compressBlock_lazy_generic(
856
+ ZSTD_matchState_t* ms, seqStore_t* seqStore,
857
+ U32 rep[ZSTD_REP_NUM],
858
+ const void* src, size_t srcSize,
859
+ const searchMethod_e searchMethod, const U32 depth,
860
+ ZSTD_dictMode_e const dictMode)
861
+ {
862
+ const BYTE* const istart = (const BYTE*)src;
863
+ const BYTE* ip = istart;
864
+ const BYTE* anchor = istart;
865
+ const BYTE* const iend = istart + srcSize;
866
+ const BYTE* const ilimit = iend - 8;
867
+ const BYTE* const base = ms->window.base;
868
+ const U32 prefixLowestIndex = ms->window.dictLimit;
869
+ const BYTE* const prefixLowest = base + prefixLowestIndex;
870
+
871
+ typedef size_t (*searchMax_f)(
872
+ ZSTD_matchState_t* ms,
873
+ const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
874
+
875
+ /**
876
+ * This table is indexed first by the four ZSTD_dictMode_e values, and then
877
+ * by the two searchMethod_e values. NULLs are placed for configurations
878
+ * that should never occur (extDict modes go to the other implementation
879
+ * below and there is no DDSS for binary tree search yet).
880
+ */
881
+ const searchMax_f searchFuncs[4][2] = {
882
+ {
883
+ ZSTD_HcFindBestMatch_selectMLS,
884
+ ZSTD_BtFindBestMatch_selectMLS
885
+ },
886
+ {
887
+ NULL,
888
+ NULL
889
+ },
890
+ {
891
+ ZSTD_HcFindBestMatch_dictMatchState_selectMLS,
892
+ ZSTD_BtFindBestMatch_dictMatchState_selectMLS
893
+ },
894
+ {
895
+ ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS,
896
+ NULL
897
+ }
898
+ };
899
+
900
+ searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree];
901
+ U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
902
+
903
+ const int isDMS = dictMode == ZSTD_dictMatchState;
904
+ const int isDDS = dictMode == ZSTD_dedicatedDictSearch;
905
+ const int isDxS = isDMS || isDDS;
906
+ const ZSTD_matchState_t* const dms = ms->dictMatchState;
907
+ const U32 dictLowestIndex = isDxS ? dms->window.dictLimit : 0;
908
+ const BYTE* const dictBase = isDxS ? dms->window.base : NULL;
909
+ const BYTE* const dictLowest = isDxS ? dictBase + dictLowestIndex : NULL;
910
+ const BYTE* const dictEnd = isDxS ? dms->window.nextSrc : NULL;
911
+ const U32 dictIndexDelta = isDxS ?
912
+ prefixLowestIndex - (U32)(dictEnd - dictBase) :
913
+ 0;
914
+ const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
915
+
916
+ assert(searchMax != NULL);
917
+
918
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
919
+
920
+ /* init */
921
+ ip += (dictAndPrefixLength == 0);
922
+ if (dictMode == ZSTD_noDict) {
923
+ U32 const curr = (U32)(ip - base);
924
+ U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, ms->cParams.windowLog);
925
+ U32 const maxRep = curr - windowLow;
926
+ if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
927
+ if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
928
+ }
929
+ if (isDxS) {
930
+ /* dictMatchState repCode checks don't currently handle repCode == 0
931
+ * disabling. */
932
+ assert(offset_1 <= dictAndPrefixLength);
933
+ assert(offset_2 <= dictAndPrefixLength);
934
+ }
935
+
936
+ /* Match Loop */
937
+ #if defined(__GNUC__) && defined(__x86_64__)
938
+ /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
939
+ * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
940
+ */
941
+ __asm__(".p2align 5");
942
+ #endif
943
+ while (ip < ilimit) {
944
+ size_t matchLength=0;
945
+ size_t offset=0;
946
+ const BYTE* start=ip+1;
947
+
948
+ /* check repCode */
949
+ if (isDxS) {
950
+ const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
951
+ const BYTE* repMatch = ((dictMode == ZSTD_dictMatchState || dictMode == ZSTD_dedicatedDictSearch)
952
+ && repIndex < prefixLowestIndex) ?
953
+ dictBase + (repIndex - dictIndexDelta) :
954
+ base + repIndex;
955
+ if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
956
+ && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
957
+ const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
958
+ matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
959
+ if (depth==0) goto _storeSequence;
960
+ }
961
+ }
962
+ if ( dictMode == ZSTD_noDict
963
+ && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
964
+ matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
965
+ if (depth==0) goto _storeSequence;
966
+ }
967
+
968
+ /* first search (depth 0) */
969
+ { size_t offsetFound = 999999999;
970
+ size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
971
+ if (ml2 > matchLength)
972
+ matchLength = ml2, start = ip, offset=offsetFound;
973
+ }
974
+
975
+ if (matchLength < 4) {
976
+ ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
977
+ continue;
978
+ }
979
+
980
+ /* let's try to find a better solution */
981
+ if (depth>=1)
982
+ while (ip<ilimit) {
983
+ ip ++;
984
+ if ( (dictMode == ZSTD_noDict)
985
+ && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
986
+ size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
987
+ int const gain2 = (int)(mlRep * 3);
988
+ int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
989
+ if ((mlRep >= 4) && (gain2 > gain1))
990
+ matchLength = mlRep, offset = 0, start = ip;
991
+ }
992
+ if (isDxS) {
993
+ const U32 repIndex = (U32)(ip - base) - offset_1;
994
+ const BYTE* repMatch = repIndex < prefixLowestIndex ?
995
+ dictBase + (repIndex - dictIndexDelta) :
996
+ base + repIndex;
997
+ if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
998
+ && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
999
+ const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
1000
+ size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
1001
+ int const gain2 = (int)(mlRep * 3);
1002
+ int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
1003
+ if ((mlRep >= 4) && (gain2 > gain1))
1004
+ matchLength = mlRep, offset = 0, start = ip;
1005
+ }
1006
+ }
1007
+ { size_t offset2=999999999;
1008
+ size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1009
+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
1010
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
1011
+ if ((ml2 >= 4) && (gain2 > gain1)) {
1012
+ matchLength = ml2, offset = offset2, start = ip;
1013
+ continue; /* search a better one */
1014
+ } }
1015
+
1016
+ /* let's find an even better one */
1017
+ if ((depth==2) && (ip<ilimit)) {
1018
+ ip ++;
1019
+ if ( (dictMode == ZSTD_noDict)
1020
+ && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
1021
+ size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
1022
+ int const gain2 = (int)(mlRep * 4);
1023
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
1024
+ if ((mlRep >= 4) && (gain2 > gain1))
1025
+ matchLength = mlRep, offset = 0, start = ip;
1026
+ }
1027
+ if (isDxS) {
1028
+ const U32 repIndex = (U32)(ip - base) - offset_1;
1029
+ const BYTE* repMatch = repIndex < prefixLowestIndex ?
1030
+ dictBase + (repIndex - dictIndexDelta) :
1031
+ base + repIndex;
1032
+ if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
1033
+ && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
1034
+ const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
1035
+ size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
1036
+ int const gain2 = (int)(mlRep * 4);
1037
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
1038
+ if ((mlRep >= 4) && (gain2 > gain1))
1039
+ matchLength = mlRep, offset = 0, start = ip;
1040
+ }
1041
+ }
1042
+ { size_t offset2=999999999;
1043
+ size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1044
+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
1045
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
1046
+ if ((ml2 >= 4) && (gain2 > gain1)) {
1047
+ matchLength = ml2, offset = offset2, start = ip;
1048
+ continue;
1049
+ } } }
1050
+ break; /* nothing found : store previous solution */
1051
+ }
1052
+
1053
+ /* NOTE:
1054
+ * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
1055
+ * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
1056
+ * overflows the pointer, which is undefined behavior.
1057
+ */
1058
+ /* catch up */
1059
+ if (offset) {
1060
+ if (dictMode == ZSTD_noDict) {
1061
+ while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
1062
+ && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
1063
+ { start--; matchLength++; }
1064
+ }
1065
+ if (isDxS) {
1066
+ U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
1067
+ const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
1068
+ const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
1069
+ while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
1070
+ }
1071
+ offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
1072
+ }
1073
+ /* store sequence */
1074
+ _storeSequence:
1075
+ { size_t const litLength = start - anchor;
1076
+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
1077
+ anchor = ip = start + matchLength;
1078
+ }
1079
+
1080
+ /* check immediate repcode */
1081
+ if (isDxS) {
1082
+ while (ip <= ilimit) {
1083
+ U32 const current2 = (U32)(ip-base);
1084
+ U32 const repIndex = current2 - offset_2;
1085
+ const BYTE* repMatch = repIndex < prefixLowestIndex ?
1086
+ dictBase - dictIndexDelta + repIndex :
1087
+ base + repIndex;
1088
+ if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
1089
+ && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
1090
+ const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
1091
+ matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
1092
+ offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
1093
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
1094
+ ip += matchLength;
1095
+ anchor = ip;
1096
+ continue;
1097
+ }
1098
+ break;
1099
+ }
1100
+ }
1101
+
1102
+ if (dictMode == ZSTD_noDict) {
1103
+ while ( ((ip <= ilimit) & (offset_2>0))
1104
+ && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
1105
+ /* store sequence */
1106
+ matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
1107
+ offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
1108
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
1109
+ ip += matchLength;
1110
+ anchor = ip;
1111
+ continue; /* faster when present ... (?) */
1112
+ } } }
1113
+
1114
+ /* Save reps for next block */
1115
+ rep[0] = offset_1 ? offset_1 : savedOffset;
1116
+ rep[1] = offset_2 ? offset_2 : savedOffset;
1117
+
1118
+ /* Return the last literals size */
1119
+ return (size_t)(iend - anchor);
1120
+ }
1121
+
1122
+
1123
+ size_t ZSTD_compressBlock_btlazy2(
1124
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1125
+ void const* src, size_t srcSize)
1126
+ {
1127
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
1128
+ }
1129
+
1130
+ size_t ZSTD_compressBlock_lazy2(
1131
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1132
+ void const* src, size_t srcSize)
1133
+ {
1134
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
1135
+ }
1136
+
1137
+ size_t ZSTD_compressBlock_lazy(
1138
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1139
+ void const* src, size_t srcSize)
1140
+ {
1141
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
1142
+ }
1143
+
1144
+ size_t ZSTD_compressBlock_greedy(
1145
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1146
+ void const* src, size_t srcSize)
1147
+ {
1148
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
1149
+ }
1150
+
1151
+ size_t ZSTD_compressBlock_btlazy2_dictMatchState(
1152
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1153
+ void const* src, size_t srcSize)
1154
+ {
1155
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
1156
+ }
1157
+
1158
+ size_t ZSTD_compressBlock_lazy2_dictMatchState(
1159
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1160
+ void const* src, size_t srcSize)
1161
+ {
1162
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
1163
+ }
1164
+
1165
+ size_t ZSTD_compressBlock_lazy_dictMatchState(
1166
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1167
+ void const* src, size_t srcSize)
1168
+ {
1169
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
1170
+ }
1171
+
1172
+ size_t ZSTD_compressBlock_greedy_dictMatchState(
1173
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1174
+ void const* src, size_t srcSize)
1175
+ {
1176
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
1177
+ }
1178
+
1179
+
1180
+ size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
1181
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1182
+ void const* src, size_t srcSize)
1183
+ {
1184
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
1185
+ }
1186
+
1187
+ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
1188
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1189
+ void const* src, size_t srcSize)
1190
+ {
1191
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
1192
+ }
1193
+
1194
+ size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
1195
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1196
+ void const* src, size_t srcSize)
1197
+ {
1198
+ return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
1199
+ }
1200
+
1201
+
1202
+ FORCE_INLINE_TEMPLATE
1203
+ size_t ZSTD_compressBlock_lazy_extDict_generic(
1204
+ ZSTD_matchState_t* ms, seqStore_t* seqStore,
1205
+ U32 rep[ZSTD_REP_NUM],
1206
+ const void* src, size_t srcSize,
1207
+ const searchMethod_e searchMethod, const U32 depth)
1208
+ {
1209
+ const BYTE* const istart = (const BYTE*)src;
1210
+ const BYTE* ip = istart;
1211
+ const BYTE* anchor = istart;
1212
+ const BYTE* const iend = istart + srcSize;
1213
+ const BYTE* const ilimit = iend - 8;
1214
+ const BYTE* const base = ms->window.base;
1215
+ const U32 dictLimit = ms->window.dictLimit;
1216
+ const BYTE* const prefixStart = base + dictLimit;
1217
+ const BYTE* const dictBase = ms->window.dictBase;
1218
+ const BYTE* const dictEnd = dictBase + dictLimit;
1219
+ const BYTE* const dictStart = dictBase + ms->window.lowLimit;
1220
+ const U32 windowLog = ms->cParams.windowLog;
1221
+
1222
+ typedef size_t (*searchMax_f)(
1223
+ ZSTD_matchState_t* ms,
1224
+ const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
1225
+ searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
1226
+
1227
+ U32 offset_1 = rep[0], offset_2 = rep[1];
1228
+
1229
+ DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
1230
+
1231
+ /* init */
1232
+ ip += (ip == prefixStart);
1233
+
1234
+ /* Match Loop */
1235
+ #if defined(__GNUC__) && defined(__x86_64__)
1236
+ /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
1237
+ * code alignment is perturbed. To fix the instability align the loop on 32-bytes.
1238
+ */
1239
+ __asm__(".p2align 5");
1240
+ #endif
1241
+ while (ip < ilimit) {
1242
+ size_t matchLength=0;
1243
+ size_t offset=0;
1244
+ const BYTE* start=ip+1;
1245
+ U32 curr = (U32)(ip-base);
1246
+
1247
+ /* check repCode */
1248
+ { const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr+1, windowLog);
1249
+ const U32 repIndex = (U32)(curr+1 - offset_1);
1250
+ const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1251
+ const BYTE* const repMatch = repBase + repIndex;
1252
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
1253
+ if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
1254
+ /* repcode detected we should take it */
1255
+ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1256
+ matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1257
+ if (depth==0) goto _storeSequence;
1258
+ } }
1259
+
1260
+ /* first search (depth 0) */
1261
+ { size_t offsetFound = 999999999;
1262
+ size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
1263
+ if (ml2 > matchLength)
1264
+ matchLength = ml2, start = ip, offset=offsetFound;
1265
+ }
1266
+
1267
+ if (matchLength < 4) {
1268
+ ip += ((ip-anchor) >> kSearchStrength) + 1; /* jump faster over incompressible sections */
1269
+ continue;
1270
+ }
1271
+
1272
+ /* let's try to find a better solution */
1273
+ if (depth>=1)
1274
+ while (ip<ilimit) {
1275
+ ip ++;
1276
+ curr++;
1277
+ /* check repCode */
1278
+ if (offset) {
1279
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
1280
+ const U32 repIndex = (U32)(curr - offset_1);
1281
+ const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1282
+ const BYTE* const repMatch = repBase + repIndex;
1283
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
1284
+ if (MEM_read32(ip) == MEM_read32(repMatch)) {
1285
+ /* repcode detected */
1286
+ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1287
+ size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1288
+ int const gain2 = (int)(repLength * 3);
1289
+ int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
1290
+ if ((repLength >= 4) && (gain2 > gain1))
1291
+ matchLength = repLength, offset = 0, start = ip;
1292
+ } }
1293
+
1294
+ /* search match, depth 1 */
1295
+ { size_t offset2=999999999;
1296
+ size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1297
+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
1298
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
1299
+ if ((ml2 >= 4) && (gain2 > gain1)) {
1300
+ matchLength = ml2, offset = offset2, start = ip;
1301
+ continue; /* search a better one */
1302
+ } }
1303
+
1304
+ /* let's find an even better one */
1305
+ if ((depth==2) && (ip<ilimit)) {
1306
+ ip ++;
1307
+ curr++;
1308
+ /* check repCode */
1309
+ if (offset) {
1310
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
1311
+ const U32 repIndex = (U32)(curr - offset_1);
1312
+ const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1313
+ const BYTE* const repMatch = repBase + repIndex;
1314
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
1315
+ if (MEM_read32(ip) == MEM_read32(repMatch)) {
1316
+ /* repcode detected */
1317
+ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1318
+ size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1319
+ int const gain2 = (int)(repLength * 4);
1320
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
1321
+ if ((repLength >= 4) && (gain2 > gain1))
1322
+ matchLength = repLength, offset = 0, start = ip;
1323
+ } }
1324
+
1325
+ /* search match, depth 2 */
1326
+ { size_t offset2=999999999;
1327
+ size_t const ml2 = searchMax(ms, ip, iend, &offset2);
1328
+ int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
1329
+ int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
1330
+ if ((ml2 >= 4) && (gain2 > gain1)) {
1331
+ matchLength = ml2, offset = offset2, start = ip;
1332
+ continue;
1333
+ } } }
1334
+ break; /* nothing found : store previous solution */
1335
+ }
1336
+
1337
+ /* catch up */
1338
+ if (offset) {
1339
+ U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
1340
+ const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
1341
+ const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
1342
+ while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
1343
+ offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
1344
+ }
1345
+
1346
+ /* store sequence */
1347
+ _storeSequence:
1348
+ { size_t const litLength = start - anchor;
1349
+ ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
1350
+ anchor = ip = start + matchLength;
1351
+ }
1352
+
1353
+ /* check immediate repcode */
1354
+ while (ip <= ilimit) {
1355
+ const U32 repCurrent = (U32)(ip-base);
1356
+ const U32 windowLow = ZSTD_getLowestMatchIndex(ms, repCurrent, windowLog);
1357
+ const U32 repIndex = repCurrent - offset_2;
1358
+ const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
1359
+ const BYTE* const repMatch = repBase + repIndex;
1360
+ if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */
1361
+ if (MEM_read32(ip) == MEM_read32(repMatch)) {
1362
+ /* repcode detected we should take it */
1363
+ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
1364
+ matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
1365
+ offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
1366
+ ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
1367
+ ip += matchLength;
1368
+ anchor = ip;
1369
+ continue; /* faster when present ... (?) */
1370
+ }
1371
+ break;
1372
+ } }
1373
+
1374
+ /* Save reps for next block */
1375
+ rep[0] = offset_1;
1376
+ rep[1] = offset_2;
1377
+
1378
+ /* Return the last literals size */
1379
+ return (size_t)(iend - anchor);
1380
+ }
1381
+
1382
+
1383
+ size_t ZSTD_compressBlock_greedy_extDict(
1384
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1385
+ void const* src, size_t srcSize)
1386
+ {
1387
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
1388
+ }
1389
+
1390
+ size_t ZSTD_compressBlock_lazy_extDict(
1391
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1392
+ void const* src, size_t srcSize)
1393
+
1394
+ {
1395
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
1396
+ }
1397
+
1398
+ size_t ZSTD_compressBlock_lazy2_extDict(
1399
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1400
+ void const* src, size_t srcSize)
1401
+
1402
+ {
1403
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
1404
+ }
1405
+
1406
+ size_t ZSTD_compressBlock_btlazy2_extDict(
1407
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1408
+ void const* src, size_t srcSize)
1409
+
1410
+ {
1411
+ return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
1412
+ }