zstdlib 0.1.0-x86-mingw32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGES.md +9 -0
  3. data/Gemfile +3 -0
  4. data/README.md +96 -0
  5. data/Rakefile +29 -0
  6. data/ext/zstdlib/extconf.rb +53 -0
  7. data/ext/zstdlib/ruby/zlib-2.2/zlib.c +4659 -0
  8. data/ext/zstdlib/ruby/zlib-2.3/zlib.c +4686 -0
  9. data/ext/zstdlib/ruby/zlib-2.4/zlib.c +4843 -0
  10. data/ext/zstdlib/ruby/zlib-2.5/zlib.c +4848 -0
  11. data/ext/zstdlib/ruby/zlib-2.6/zlib.c +4890 -0
  12. data/ext/zstdlib/zlib-1.2.11/adler32.c +186 -0
  13. data/ext/zstdlib/zlib-1.2.11/compress.c +86 -0
  14. data/ext/zstdlib/zlib-1.2.11/crc32.c +442 -0
  15. data/ext/zstdlib/zlib-1.2.11/crc32.h +441 -0
  16. data/ext/zstdlib/zlib-1.2.11/deflate.c +2163 -0
  17. data/ext/zstdlib/zlib-1.2.11/deflate.h +349 -0
  18. data/ext/zstdlib/zlib-1.2.11/gzclose.c +25 -0
  19. data/ext/zstdlib/zlib-1.2.11/gzguts.h +218 -0
  20. data/ext/zstdlib/zlib-1.2.11/gzlib.c +637 -0
  21. data/ext/zstdlib/zlib-1.2.11/gzread.c +654 -0
  22. data/ext/zstdlib/zlib-1.2.11/gzwrite.c +665 -0
  23. data/ext/zstdlib/zlib-1.2.11/infback.c +640 -0
  24. data/ext/zstdlib/zlib-1.2.11/inffast.c +323 -0
  25. data/ext/zstdlib/zlib-1.2.11/inffast.h +11 -0
  26. data/ext/zstdlib/zlib-1.2.11/inffixed.h +94 -0
  27. data/ext/zstdlib/zlib-1.2.11/inflate.c +1561 -0
  28. data/ext/zstdlib/zlib-1.2.11/inflate.h +125 -0
  29. data/ext/zstdlib/zlib-1.2.11/inftrees.c +304 -0
  30. data/ext/zstdlib/zlib-1.2.11/inftrees.h +62 -0
  31. data/ext/zstdlib/zlib-1.2.11/trees.c +1203 -0
  32. data/ext/zstdlib/zlib-1.2.11/trees.h +128 -0
  33. data/ext/zstdlib/zlib-1.2.11/uncompr.c +93 -0
  34. data/ext/zstdlib/zlib-1.2.11/zconf.h +534 -0
  35. data/ext/zstdlib/zlib-1.2.11/zlib.h +1912 -0
  36. data/ext/zstdlib/zlib-1.2.11/zutil.c +325 -0
  37. data/ext/zstdlib/zlib-1.2.11/zutil.h +271 -0
  38. data/ext/zstdlib/zlib.mk +14 -0
  39. data/ext/zstdlib/zlibwrapper/zlibwrapper.c +14 -0
  40. data/ext/zstdlib/zlibwrapper.mk +14 -0
  41. data/ext/zstdlib/zstd-1.3.8/lib/common/bitstream.h +455 -0
  42. data/ext/zstdlib/zstd-1.3.8/lib/common/compiler.h +140 -0
  43. data/ext/zstdlib/zstd-1.3.8/lib/common/cpu.h +215 -0
  44. data/ext/zstdlib/zstd-1.3.8/lib/common/debug.c +44 -0
  45. data/ext/zstdlib/zstd-1.3.8/lib/common/debug.h +134 -0
  46. data/ext/zstdlib/zstd-1.3.8/lib/common/entropy_common.c +236 -0
  47. data/ext/zstdlib/zstd-1.3.8/lib/common/error_private.c +54 -0
  48. data/ext/zstdlib/zstd-1.3.8/lib/common/error_private.h +76 -0
  49. data/ext/zstdlib/zstd-1.3.8/lib/common/fse.h +708 -0
  50. data/ext/zstdlib/zstd-1.3.8/lib/common/fse_decompress.c +309 -0
  51. data/ext/zstdlib/zstd-1.3.8/lib/common/huf.h +358 -0
  52. data/ext/zstdlib/zstd-1.3.8/lib/common/mem.h +380 -0
  53. data/ext/zstdlib/zstd-1.3.8/lib/common/pool.c +340 -0
  54. data/ext/zstdlib/zstd-1.3.8/lib/common/pool.h +84 -0
  55. data/ext/zstdlib/zstd-1.3.8/lib/common/threading.c +75 -0
  56. data/ext/zstdlib/zstd-1.3.8/lib/common/threading.h +123 -0
  57. data/ext/zstdlib/zstd-1.3.8/lib/common/xxhash.c +876 -0
  58. data/ext/zstdlib/zstd-1.3.8/lib/common/xxhash.h +305 -0
  59. data/ext/zstdlib/zstd-1.3.8/lib/common/zstd_common.c +83 -0
  60. data/ext/zstdlib/zstd-1.3.8/lib/common/zstd_errors.h +93 -0
  61. data/ext/zstdlib/zstd-1.3.8/lib/common/zstd_internal.h +266 -0
  62. data/ext/zstdlib/zstd-1.3.8/lib/compress/fse_compress.c +721 -0
  63. data/ext/zstdlib/zstd-1.3.8/lib/compress/hist.c +203 -0
  64. data/ext/zstdlib/zstd-1.3.8/lib/compress/hist.h +95 -0
  65. data/ext/zstdlib/zstd-1.3.8/lib/compress/huf_compress.c +798 -0
  66. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_compress.c +4290 -0
  67. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_compress_internal.h +860 -0
  68. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_double_fast.c +499 -0
  69. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_double_fast.h +38 -0
  70. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_fast.c +391 -0
  71. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_fast.h +37 -0
  72. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_lazy.c +1106 -0
  73. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_lazy.h +67 -0
  74. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_ldm.c +597 -0
  75. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_ldm.h +105 -0
  76. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_opt.c +1217 -0
  77. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstd_opt.h +56 -0
  78. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstdmt_compress.c +2107 -0
  79. data/ext/zstdlib/zstd-1.3.8/lib/compress/zstdmt_compress.h +174 -0
  80. data/ext/zstdlib/zstd-1.3.8/lib/decompress/huf_decompress.c +1232 -0
  81. data/ext/zstdlib/zstd-1.3.8/lib/decompress/zstd_ddict.c +240 -0
  82. data/ext/zstdlib/zstd-1.3.8/lib/decompress/zstd_ddict.h +44 -0
  83. data/ext/zstdlib/zstd-1.3.8/lib/decompress/zstd_decompress.c +1672 -0
  84. data/ext/zstdlib/zstd-1.3.8/lib/decompress/zstd_decompress_block.c +1307 -0
  85. data/ext/zstdlib/zstd-1.3.8/lib/decompress/zstd_decompress_block.h +59 -0
  86. data/ext/zstdlib/zstd-1.3.8/lib/decompress/zstd_decompress_internal.h +168 -0
  87. data/ext/zstdlib/zstd-1.3.8/lib/zstd.h +1766 -0
  88. data/ext/zstdlib/zstd-1.3.8/zlibWrapper/gzclose.c +28 -0
  89. data/ext/zstdlib/zstd-1.3.8/zlibWrapper/gzcompatibility.h +68 -0
  90. data/ext/zstdlib/zstd-1.3.8/zlibWrapper/gzguts.h +227 -0
  91. data/ext/zstdlib/zstd-1.3.8/zlibWrapper/gzlib.c +640 -0
  92. data/ext/zstdlib/zstd-1.3.8/zlibWrapper/gzread.c +670 -0
  93. data/ext/zstdlib/zstd-1.3.8/zlibWrapper/gzwrite.c +671 -0
  94. data/ext/zstdlib/zstd-1.3.8/zlibWrapper/zstd_zlibwrapper.c +1105 -0
  95. data/ext/zstdlib/zstd-1.3.8/zlibWrapper/zstd_zlibwrapper.h +88 -0
  96. data/ext/zstdlib/zstd.mk +14 -0
  97. data/lib/2.2/zstdlib.so +0 -0
  98. data/lib/2.3/zstdlib.so +0 -0
  99. data/lib/2.4/zstdlib.so +0 -0
  100. data/lib/2.5/zstdlib.so +0 -0
  101. data/lib/2.6/zstdlib.so +0 -0
  102. data/lib/zstdlib.rb +6 -0
  103. data/test/zstdlib_test.rb +21 -0
  104. metadata +205 -0
@@ -0,0 +1,67 @@
1
+ /*
2
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ #ifndef ZSTD_LAZY_H
12
+ #define ZSTD_LAZY_H
13
+
14
+ #if defined (__cplusplus)
15
+ extern "C" {
16
+ #endif
17
+
18
+ #include "zstd_compress_internal.h"
19
+
20
+ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
21
+
22
+ void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK */
23
+
24
+ size_t ZSTD_compressBlock_btlazy2(
25
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
26
+ void const* src, size_t srcSize);
27
+ size_t ZSTD_compressBlock_lazy2(
28
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
29
+ void const* src, size_t srcSize);
30
+ size_t ZSTD_compressBlock_lazy(
31
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
32
+ void const* src, size_t srcSize);
33
+ size_t ZSTD_compressBlock_greedy(
34
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
35
+ void const* src, size_t srcSize);
36
+
37
+ size_t ZSTD_compressBlock_btlazy2_dictMatchState(
38
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
39
+ void const* src, size_t srcSize);
40
+ size_t ZSTD_compressBlock_lazy2_dictMatchState(
41
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
42
+ void const* src, size_t srcSize);
43
+ size_t ZSTD_compressBlock_lazy_dictMatchState(
44
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
45
+ void const* src, size_t srcSize);
46
+ size_t ZSTD_compressBlock_greedy_dictMatchState(
47
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
48
+ void const* src, size_t srcSize);
49
+
50
+ size_t ZSTD_compressBlock_greedy_extDict(
51
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
52
+ void const* src, size_t srcSize);
53
+ size_t ZSTD_compressBlock_lazy_extDict(
54
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
55
+ void const* src, size_t srcSize);
56
+ size_t ZSTD_compressBlock_lazy2_extDict(
57
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
58
+ void const* src, size_t srcSize);
59
+ size_t ZSTD_compressBlock_btlazy2_extDict(
60
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
61
+ void const* src, size_t srcSize);
62
+
63
+ #if defined (__cplusplus)
64
+ }
65
+ #endif
66
+
67
+ #endif /* ZSTD_LAZY_H */
@@ -0,0 +1,597 @@
1
+ /*
2
+ * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ */
9
+
10
+ #include "zstd_ldm.h"
11
+
12
+ #include "debug.h"
13
+ #include "zstd_fast.h" /* ZSTD_fillHashTable() */
14
+ #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
15
+
16
+ #define LDM_BUCKET_SIZE_LOG 3
17
+ #define LDM_MIN_MATCH_LENGTH 64
18
+ #define LDM_HASH_RLOG 7
19
+ #define LDM_HASH_CHAR_OFFSET 10
20
+
21
+ void ZSTD_ldm_adjustParameters(ldmParams_t* params,
22
+ ZSTD_compressionParameters const* cParams)
23
+ {
24
+ params->windowLog = cParams->windowLog;
25
+ ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
26
+ DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
27
+ if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
28
+ if (!params->minMatchLength) params->minMatchLength = LDM_MIN_MATCH_LENGTH;
29
+ if (cParams->strategy >= ZSTD_btopt) {
30
+ /* Get out of the way of the optimal parser */
31
+ U32 const minMatch = MAX(cParams->targetLength, params->minMatchLength);
32
+ assert(minMatch >= ZSTD_LDM_MINMATCH_MIN);
33
+ assert(minMatch <= ZSTD_LDM_MINMATCH_MAX);
34
+ params->minMatchLength = minMatch;
35
+ }
36
+ if (params->hashLog == 0) {
37
+ params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
38
+ assert(params->hashLog <= ZSTD_HASHLOG_MAX);
39
+ }
40
+ if (params->hashRateLog == 0) {
41
+ params->hashRateLog = params->windowLog < params->hashLog
42
+ ? 0
43
+ : params->windowLog - params->hashLog;
44
+ }
45
+ params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
46
+ }
47
+
48
+ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
49
+ {
50
+ size_t const ldmHSize = ((size_t)1) << params.hashLog;
51
+ size_t const ldmBucketSizeLog = MIN(params.bucketSizeLog, params.hashLog);
52
+ size_t const ldmBucketSize =
53
+ ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
54
+ size_t const totalSize = ldmBucketSize + ldmHSize * sizeof(ldmEntry_t);
55
+ return params.enableLdm ? totalSize : 0;
56
+ }
57
+
58
+ size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
59
+ {
60
+ return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
61
+ }
62
+
63
+ /** ZSTD_ldm_getSmallHash() :
64
+ * numBits should be <= 32
65
+ * If numBits==0, returns 0.
66
+ * @return : the most significant numBits of value. */
67
+ static U32 ZSTD_ldm_getSmallHash(U64 value, U32 numBits)
68
+ {
69
+ assert(numBits <= 32);
70
+ return numBits == 0 ? 0 : (U32)(value >> (64 - numBits));
71
+ }
72
+
73
+ /** ZSTD_ldm_getChecksum() :
74
+ * numBitsToDiscard should be <= 32
75
+ * @return : the next most significant 32 bits after numBitsToDiscard */
76
+ static U32 ZSTD_ldm_getChecksum(U64 hash, U32 numBitsToDiscard)
77
+ {
78
+ assert(numBitsToDiscard <= 32);
79
+ return (hash >> (64 - 32 - numBitsToDiscard)) & 0xFFFFFFFF;
80
+ }
81
+
82
+ /** ZSTD_ldm_getTag() ;
83
+ * Given the hash, returns the most significant numTagBits bits
84
+ * after (32 + hbits) bits.
85
+ *
86
+ * If there are not enough bits remaining, return the last
87
+ * numTagBits bits. */
88
+ static U32 ZSTD_ldm_getTag(U64 hash, U32 hbits, U32 numTagBits)
89
+ {
90
+ assert(numTagBits < 32 && hbits <= 32);
91
+ if (32 - hbits < numTagBits) {
92
+ return hash & (((U32)1 << numTagBits) - 1);
93
+ } else {
94
+ return (hash >> (32 - hbits - numTagBits)) & (((U32)1 << numTagBits) - 1);
95
+ }
96
+ }
97
+
98
+ /** ZSTD_ldm_getBucket() :
99
+ * Returns a pointer to the start of the bucket associated with hash. */
100
+ static ldmEntry_t* ZSTD_ldm_getBucket(
101
+ ldmState_t* ldmState, size_t hash, ldmParams_t const ldmParams)
102
+ {
103
+ return ldmState->hashTable + (hash << ldmParams.bucketSizeLog);
104
+ }
105
+
106
+ /** ZSTD_ldm_insertEntry() :
107
+ * Insert the entry with corresponding hash into the hash table */
108
+ static void ZSTD_ldm_insertEntry(ldmState_t* ldmState,
109
+ size_t const hash, const ldmEntry_t entry,
110
+ ldmParams_t const ldmParams)
111
+ {
112
+ BYTE* const bucketOffsets = ldmState->bucketOffsets;
113
+ *(ZSTD_ldm_getBucket(ldmState, hash, ldmParams) + bucketOffsets[hash]) = entry;
114
+ bucketOffsets[hash]++;
115
+ bucketOffsets[hash] &= ((U32)1 << ldmParams.bucketSizeLog) - 1;
116
+ }
117
+
118
+ /** ZSTD_ldm_makeEntryAndInsertByTag() :
119
+ *
120
+ * Gets the small hash, checksum, and tag from the rollingHash.
121
+ *
122
+ * If the tag matches (1 << ldmParams.hashRateLog)-1, then
123
+ * creates an ldmEntry from the offset, and inserts it into the hash table.
124
+ *
125
+ * hBits is the length of the small hash, which is the most significant hBits
126
+ * of rollingHash. The checksum is the next 32 most significant bits, followed
127
+ * by ldmParams.hashRateLog bits that make up the tag. */
128
+ static void ZSTD_ldm_makeEntryAndInsertByTag(ldmState_t* ldmState,
129
+ U64 const rollingHash,
130
+ U32 const hBits,
131
+ U32 const offset,
132
+ ldmParams_t const ldmParams)
133
+ {
134
+ U32 const tag = ZSTD_ldm_getTag(rollingHash, hBits, ldmParams.hashRateLog);
135
+ U32 const tagMask = ((U32)1 << ldmParams.hashRateLog) - 1;
136
+ if (tag == tagMask) {
137
+ U32 const hash = ZSTD_ldm_getSmallHash(rollingHash, hBits);
138
+ U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
139
+ ldmEntry_t entry;
140
+ entry.offset = offset;
141
+ entry.checksum = checksum;
142
+ ZSTD_ldm_insertEntry(ldmState, hash, entry, ldmParams);
143
+ }
144
+ }
145
+
146
+ /** ZSTD_ldm_countBackwardsMatch() :
147
+ * Returns the number of bytes that match backwards before pIn and pMatch.
148
+ *
149
+ * We count only bytes where pMatch >= pBase and pIn >= pAnchor. */
150
+ static size_t ZSTD_ldm_countBackwardsMatch(
151
+ const BYTE* pIn, const BYTE* pAnchor,
152
+ const BYTE* pMatch, const BYTE* pBase)
153
+ {
154
+ size_t matchLength = 0;
155
+ while (pIn > pAnchor && pMatch > pBase && pIn[-1] == pMatch[-1]) {
156
+ pIn--;
157
+ pMatch--;
158
+ matchLength++;
159
+ }
160
+ return matchLength;
161
+ }
162
+
163
+ /** ZSTD_ldm_fillFastTables() :
164
+ *
165
+ * Fills the relevant tables for the ZSTD_fast and ZSTD_dfast strategies.
166
+ * This is similar to ZSTD_loadDictionaryContent.
167
+ *
168
+ * The tables for the other strategies are filled within their
169
+ * block compressors. */
170
+ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
171
+ void const* end)
172
+ {
173
+ const BYTE* const iend = (const BYTE*)end;
174
+
175
+ switch(ms->cParams.strategy)
176
+ {
177
+ case ZSTD_fast:
178
+ ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
179
+ break;
180
+
181
+ case ZSTD_dfast:
182
+ ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
183
+ break;
184
+
185
+ case ZSTD_greedy:
186
+ case ZSTD_lazy:
187
+ case ZSTD_lazy2:
188
+ case ZSTD_btlazy2:
189
+ case ZSTD_btopt:
190
+ case ZSTD_btultra:
191
+ case ZSTD_btultra2:
192
+ break;
193
+ default:
194
+ assert(0); /* not possible : not a valid strategy id */
195
+ }
196
+
197
+ return 0;
198
+ }
199
+
200
+ /** ZSTD_ldm_fillLdmHashTable() :
201
+ *
202
+ * Fills hashTable from (lastHashed + 1) to iend (non-inclusive).
203
+ * lastHash is the rolling hash that corresponds to lastHashed.
204
+ *
205
+ * Returns the rolling hash corresponding to position iend-1. */
206
+ static U64 ZSTD_ldm_fillLdmHashTable(ldmState_t* state,
207
+ U64 lastHash, const BYTE* lastHashed,
208
+ const BYTE* iend, const BYTE* base,
209
+ U32 hBits, ldmParams_t const ldmParams)
210
+ {
211
+ U64 rollingHash = lastHash;
212
+ const BYTE* cur = lastHashed + 1;
213
+
214
+ while (cur < iend) {
215
+ rollingHash = ZSTD_rollingHash_rotate(rollingHash, cur[-1],
216
+ cur[ldmParams.minMatchLength-1],
217
+ state->hashPower);
218
+ ZSTD_ldm_makeEntryAndInsertByTag(state,
219
+ rollingHash, hBits,
220
+ (U32)(cur - base), ldmParams);
221
+ ++cur;
222
+ }
223
+ return rollingHash;
224
+ }
225
+
226
+
227
+ /** ZSTD_ldm_limitTableUpdate() :
228
+ *
229
+ * Sets cctx->nextToUpdate to a position corresponding closer to anchor
230
+ * if it is far way
231
+ * (after a long match, only update tables a limited amount). */
232
+ static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
233
+ {
234
+ U32 const current = (U32)(anchor - ms->window.base);
235
+ if (current > ms->nextToUpdate + 1024) {
236
+ ms->nextToUpdate =
237
+ current - MIN(512, current - ms->nextToUpdate - 1024);
238
+ }
239
+ }
240
+
241
+ static size_t ZSTD_ldm_generateSequences_internal(
242
+ ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
243
+ ldmParams_t const* params, void const* src, size_t srcSize)
244
+ {
245
+ /* LDM parameters */
246
+ int const extDict = ZSTD_window_hasExtDict(ldmState->window);
247
+ U32 const minMatchLength = params->minMatchLength;
248
+ U64 const hashPower = ldmState->hashPower;
249
+ U32 const hBits = params->hashLog - params->bucketSizeLog;
250
+ U32 const ldmBucketSize = 1U << params->bucketSizeLog;
251
+ U32 const hashRateLog = params->hashRateLog;
252
+ U32 const ldmTagMask = (1U << params->hashRateLog) - 1;
253
+ /* Prefix and extDict parameters */
254
+ U32 const dictLimit = ldmState->window.dictLimit;
255
+ U32 const lowestIndex = extDict ? ldmState->window.lowLimit : dictLimit;
256
+ BYTE const* const base = ldmState->window.base;
257
+ BYTE const* const dictBase = extDict ? ldmState->window.dictBase : NULL;
258
+ BYTE const* const dictStart = extDict ? dictBase + lowestIndex : NULL;
259
+ BYTE const* const dictEnd = extDict ? dictBase + dictLimit : NULL;
260
+ BYTE const* const lowPrefixPtr = base + dictLimit;
261
+ /* Input bounds */
262
+ BYTE const* const istart = (BYTE const*)src;
263
+ BYTE const* const iend = istart + srcSize;
264
+ BYTE const* const ilimit = iend - MAX(minMatchLength, HASH_READ_SIZE);
265
+ /* Input positions */
266
+ BYTE const* anchor = istart;
267
+ BYTE const* ip = istart;
268
+ /* Rolling hash */
269
+ BYTE const* lastHashed = NULL;
270
+ U64 rollingHash = 0;
271
+
272
+ while (ip <= ilimit) {
273
+ size_t mLength;
274
+ U32 const current = (U32)(ip - base);
275
+ size_t forwardMatchLength = 0, backwardMatchLength = 0;
276
+ ldmEntry_t* bestEntry = NULL;
277
+ if (ip != istart) {
278
+ rollingHash = ZSTD_rollingHash_rotate(rollingHash, lastHashed[0],
279
+ lastHashed[minMatchLength],
280
+ hashPower);
281
+ } else {
282
+ rollingHash = ZSTD_rollingHash_compute(ip, minMatchLength);
283
+ }
284
+ lastHashed = ip;
285
+
286
+ /* Do not insert and do not look for a match */
287
+ if (ZSTD_ldm_getTag(rollingHash, hBits, hashRateLog) != ldmTagMask) {
288
+ ip++;
289
+ continue;
290
+ }
291
+
292
+ /* Get the best entry and compute the match lengths */
293
+ {
294
+ ldmEntry_t* const bucket =
295
+ ZSTD_ldm_getBucket(ldmState,
296
+ ZSTD_ldm_getSmallHash(rollingHash, hBits),
297
+ *params);
298
+ ldmEntry_t* cur;
299
+ size_t bestMatchLength = 0;
300
+ U32 const checksum = ZSTD_ldm_getChecksum(rollingHash, hBits);
301
+
302
+ for (cur = bucket; cur < bucket + ldmBucketSize; ++cur) {
303
+ size_t curForwardMatchLength, curBackwardMatchLength,
304
+ curTotalMatchLength;
305
+ if (cur->checksum != checksum || cur->offset <= lowestIndex) {
306
+ continue;
307
+ }
308
+ if (extDict) {
309
+ BYTE const* const curMatchBase =
310
+ cur->offset < dictLimit ? dictBase : base;
311
+ BYTE const* const pMatch = curMatchBase + cur->offset;
312
+ BYTE const* const matchEnd =
313
+ cur->offset < dictLimit ? dictEnd : iend;
314
+ BYTE const* const lowMatchPtr =
315
+ cur->offset < dictLimit ? dictStart : lowPrefixPtr;
316
+
317
+ curForwardMatchLength = ZSTD_count_2segments(
318
+ ip, pMatch, iend,
319
+ matchEnd, lowPrefixPtr);
320
+ if (curForwardMatchLength < minMatchLength) {
321
+ continue;
322
+ }
323
+ curBackwardMatchLength =
324
+ ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
325
+ lowMatchPtr);
326
+ curTotalMatchLength = curForwardMatchLength +
327
+ curBackwardMatchLength;
328
+ } else { /* !extDict */
329
+ BYTE const* const pMatch = base + cur->offset;
330
+ curForwardMatchLength = ZSTD_count(ip, pMatch, iend);
331
+ if (curForwardMatchLength < minMatchLength) {
332
+ continue;
333
+ }
334
+ curBackwardMatchLength =
335
+ ZSTD_ldm_countBackwardsMatch(ip, anchor, pMatch,
336
+ lowPrefixPtr);
337
+ curTotalMatchLength = curForwardMatchLength +
338
+ curBackwardMatchLength;
339
+ }
340
+
341
+ if (curTotalMatchLength > bestMatchLength) {
342
+ bestMatchLength = curTotalMatchLength;
343
+ forwardMatchLength = curForwardMatchLength;
344
+ backwardMatchLength = curBackwardMatchLength;
345
+ bestEntry = cur;
346
+ }
347
+ }
348
+ }
349
+
350
+ /* No match found -- continue searching */
351
+ if (bestEntry == NULL) {
352
+ ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash,
353
+ hBits, current,
354
+ *params);
355
+ ip++;
356
+ continue;
357
+ }
358
+
359
+ /* Match found */
360
+ mLength = forwardMatchLength + backwardMatchLength;
361
+ ip -= backwardMatchLength;
362
+
363
+ {
364
+ /* Store the sequence:
365
+ * ip = current - backwardMatchLength
366
+ * The match is at (bestEntry->offset - backwardMatchLength)
367
+ */
368
+ U32 const matchIndex = bestEntry->offset;
369
+ U32 const offset = current - matchIndex;
370
+ rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
371
+
372
+ /* Out of sequence storage */
373
+ if (rawSeqStore->size == rawSeqStore->capacity)
374
+ return ERROR(dstSize_tooSmall);
375
+ seq->litLength = (U32)(ip - anchor);
376
+ seq->matchLength = (U32)mLength;
377
+ seq->offset = offset;
378
+ rawSeqStore->size++;
379
+ }
380
+
381
+ /* Insert the current entry into the hash table */
382
+ ZSTD_ldm_makeEntryAndInsertByTag(ldmState, rollingHash, hBits,
383
+ (U32)(lastHashed - base),
384
+ *params);
385
+
386
+ assert(ip + backwardMatchLength == lastHashed);
387
+
388
+ /* Fill the hash table from lastHashed+1 to ip+mLength*/
389
+ /* Heuristic: don't need to fill the entire table at end of block */
390
+ if (ip + mLength <= ilimit) {
391
+ rollingHash = ZSTD_ldm_fillLdmHashTable(
392
+ ldmState, rollingHash, lastHashed,
393
+ ip + mLength, base, hBits, *params);
394
+ lastHashed = ip + mLength - 1;
395
+ }
396
+ ip += mLength;
397
+ anchor = ip;
398
+ }
399
+ return iend - anchor;
400
+ }
401
+
402
+ /*! ZSTD_ldm_reduceTable() :
403
+ * reduce table indexes by `reducerValue` */
404
+ static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
405
+ U32 const reducerValue)
406
+ {
407
+ U32 u;
408
+ for (u = 0; u < size; u++) {
409
+ if (table[u].offset < reducerValue) table[u].offset = 0;
410
+ else table[u].offset -= reducerValue;
411
+ }
412
+ }
413
+
414
+ size_t ZSTD_ldm_generateSequences(
415
+ ldmState_t* ldmState, rawSeqStore_t* sequences,
416
+ ldmParams_t const* params, void const* src, size_t srcSize)
417
+ {
418
+ U32 const maxDist = 1U << params->windowLog;
419
+ BYTE const* const istart = (BYTE const*)src;
420
+ BYTE const* const iend = istart + srcSize;
421
+ size_t const kMaxChunkSize = 1 << 20;
422
+ size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
423
+ size_t chunk;
424
+ size_t leftoverSize = 0;
425
+
426
+ assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
427
+ /* Check that ZSTD_window_update() has been called for this chunk prior
428
+ * to passing it to this function.
429
+ */
430
+ assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
431
+ /* The input could be very large (in zstdmt), so it must be broken up into
432
+ * chunks to enforce the maximmum distance and handle overflow correction.
433
+ */
434
+ assert(sequences->pos <= sequences->size);
435
+ assert(sequences->size <= sequences->capacity);
436
+ for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
437
+ BYTE const* const chunkStart = istart + chunk * kMaxChunkSize;
438
+ size_t const remaining = (size_t)(iend - chunkStart);
439
+ BYTE const *const chunkEnd =
440
+ (remaining < kMaxChunkSize) ? iend : chunkStart + kMaxChunkSize;
441
+ size_t const chunkSize = chunkEnd - chunkStart;
442
+ size_t newLeftoverSize;
443
+ size_t const prevSize = sequences->size;
444
+
445
+ assert(chunkStart < iend);
446
+ /* 1. Perform overflow correction if necessary. */
447
+ if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
448
+ U32 const ldmHSize = 1U << params->hashLog;
449
+ U32 const correction = ZSTD_window_correctOverflow(
450
+ &ldmState->window, /* cycleLog */ 0, maxDist, src);
451
+ ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
452
+ }
453
+ /* 2. We enforce the maximum offset allowed.
454
+ *
455
+ * kMaxChunkSize should be small enough that we don't lose too much of
456
+ * the window through early invalidation.
457
+ * TODO: * Test the chunk size.
458
+ * * Try invalidation after the sequence generation and test the
459
+ * the offset against maxDist directly.
460
+ */
461
+ ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
462
+ /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
463
+ newLeftoverSize = ZSTD_ldm_generateSequences_internal(
464
+ ldmState, sequences, params, chunkStart, chunkSize);
465
+ if (ZSTD_isError(newLeftoverSize))
466
+ return newLeftoverSize;
467
+ /* 4. We add the leftover literals from previous iterations to the first
468
+ * newly generated sequence, or add the `newLeftoverSize` if none are
469
+ * generated.
470
+ */
471
+ /* Prepend the leftover literals from the last call */
472
+ if (prevSize < sequences->size) {
473
+ sequences->seq[prevSize].litLength += (U32)leftoverSize;
474
+ leftoverSize = newLeftoverSize;
475
+ } else {
476
+ assert(newLeftoverSize == chunkSize);
477
+ leftoverSize += chunkSize;
478
+ }
479
+ }
480
+ return 0;
481
+ }
482
+
483
+ void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
484
+ while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
485
+ rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
486
+ if (srcSize <= seq->litLength) {
487
+ /* Skip past srcSize literals */
488
+ seq->litLength -= (U32)srcSize;
489
+ return;
490
+ }
491
+ srcSize -= seq->litLength;
492
+ seq->litLength = 0;
493
+ if (srcSize < seq->matchLength) {
494
+ /* Skip past the first srcSize of the match */
495
+ seq->matchLength -= (U32)srcSize;
496
+ if (seq->matchLength < minMatch) {
497
+ /* The match is too short, omit it */
498
+ if (rawSeqStore->pos + 1 < rawSeqStore->size) {
499
+ seq[1].litLength += seq[0].matchLength;
500
+ }
501
+ rawSeqStore->pos++;
502
+ }
503
+ return;
504
+ }
505
+ srcSize -= seq->matchLength;
506
+ seq->matchLength = 0;
507
+ rawSeqStore->pos++;
508
+ }
509
+ }
510
+
511
+ /**
512
+ * If the sequence length is longer than remaining then the sequence is split
513
+ * between this block and the next.
514
+ *
515
+ * Returns the current sequence to handle, or if the rest of the block should
516
+ * be literals, it returns a sequence with offset == 0.
517
+ */
518
+ static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
519
+ U32 const remaining, U32 const minMatch)
520
+ {
521
+ rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
522
+ assert(sequence.offset > 0);
523
+ /* Likely: No partial sequence */
524
+ if (remaining >= sequence.litLength + sequence.matchLength) {
525
+ rawSeqStore->pos++;
526
+ return sequence;
527
+ }
528
+ /* Cut the sequence short (offset == 0 ==> rest is literals). */
529
+ if (remaining <= sequence.litLength) {
530
+ sequence.offset = 0;
531
+ } else if (remaining < sequence.litLength + sequence.matchLength) {
532
+ sequence.matchLength = remaining - sequence.litLength;
533
+ if (sequence.matchLength < minMatch) {
534
+ sequence.offset = 0;
535
+ }
536
+ }
537
+ /* Skip past `remaining` bytes for the future sequences. */
538
+ ZSTD_ldm_skipSequences(rawSeqStore, remaining, minMatch);
539
+ return sequence;
540
+ }
541
+
542
+ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
543
+ ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
544
+ void const* src, size_t srcSize)
545
+ {
546
+ const ZSTD_compressionParameters* const cParams = &ms->cParams;
547
+ unsigned const minMatch = cParams->minMatch;
548
+ ZSTD_blockCompressor const blockCompressor =
549
+ ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
550
+ /* Input bounds */
551
+ BYTE const* const istart = (BYTE const*)src;
552
+ BYTE const* const iend = istart + srcSize;
553
+ /* Input positions */
554
+ BYTE const* ip = istart;
555
+
556
+ DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
557
+ assert(rawSeqStore->pos <= rawSeqStore->size);
558
+ assert(rawSeqStore->size <= rawSeqStore->capacity);
559
+ /* Loop through each sequence and apply the block compressor to the lits */
560
+ while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
561
+ /* maybeSplitSequence updates rawSeqStore->pos */
562
+ rawSeq const sequence = maybeSplitSequence(rawSeqStore,
563
+ (U32)(iend - ip), minMatch);
564
+ int i;
565
+ /* End signal */
566
+ if (sequence.offset == 0)
567
+ break;
568
+
569
+ assert(sequence.offset <= (1U << cParams->windowLog));
570
+ assert(ip + sequence.litLength + sequence.matchLength <= iend);
571
+
572
+ /* Fill tables for block compressor */
573
+ ZSTD_ldm_limitTableUpdate(ms, ip);
574
+ ZSTD_ldm_fillFastTables(ms, ip);
575
+ /* Run the block compressor */
576
+ DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
577
+ {
578
+ size_t const newLitLength =
579
+ blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
580
+ ip += sequence.litLength;
581
+ /* Update the repcodes */
582
+ for (i = ZSTD_REP_NUM - 1; i > 0; i--)
583
+ rep[i] = rep[i-1];
584
+ rep[0] = sequence.offset;
585
+ /* Store the sequence */
586
+ ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength,
587
+ sequence.offset + ZSTD_REP_MOVE,
588
+ sequence.matchLength - MINMATCH);
589
+ ip += sequence.matchLength;
590
+ }
591
+ }
592
+ /* Fill the tables for the block compressor */
593
+ ZSTD_ldm_limitTableUpdate(ms, ip);
594
+ ZSTD_ldm_fillFastTables(ms, ip);
595
+ /* Compress the last literals */
596
+ return blockCompressor(ms, seqStore, rep, ip, iend - ip);
597
+ }