zstd-ruby 1.5.5.0 → 1.5.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/README.md +2 -2
- data/ext/zstdruby/extconf.rb +2 -0
- data/ext/zstdruby/libzstd/common/allocations.h +1 -1
- data/ext/zstdruby/libzstd/common/bitstream.h +49 -29
- data/ext/zstdruby/libzstd/common/compiler.h +114 -22
- data/ext/zstdruby/libzstd/common/cpu.h +36 -0
- data/ext/zstdruby/libzstd/common/debug.c +6 -0
- data/ext/zstdruby/libzstd/common/debug.h +20 -11
- data/ext/zstdruby/libzstd/common/error_private.h +45 -36
- data/ext/zstdruby/libzstd/common/fse.h +3 -2
- data/ext/zstdruby/libzstd/common/fse_decompress.c +19 -17
- data/ext/zstdruby/libzstd/common/huf.h +14 -1
- data/ext/zstdruby/libzstd/common/mem.h +0 -9
- data/ext/zstdruby/libzstd/common/pool.c +1 -1
- data/ext/zstdruby/libzstd/common/pool.h +1 -1
- data/ext/zstdruby/libzstd/common/portability_macros.h +2 -0
- data/ext/zstdruby/libzstd/common/threading.c +8 -2
- data/ext/zstdruby/libzstd/common/xxhash.c +5 -11
- data/ext/zstdruby/libzstd/common/xxhash.h +2341 -1007
- data/ext/zstdruby/libzstd/common/zstd_internal.h +5 -5
- data/ext/zstdruby/libzstd/compress/fse_compress.c +8 -7
- data/ext/zstdruby/libzstd/compress/huf_compress.c +54 -25
- data/ext/zstdruby/libzstd/compress/zstd_compress.c +282 -161
- data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +29 -27
- data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +224 -113
- data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +19 -13
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +17 -5
- data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +11 -0
- data/ext/zstdruby/libzstd/compress/zstd_fast.c +14 -6
- data/ext/zstdruby/libzstd/compress/zstd_lazy.c +129 -87
- data/ext/zstdruby/libzstd/compress/zstd_lazy.h +103 -28
- data/ext/zstdruby/libzstd/compress/zstd_ldm.c +8 -2
- data/ext/zstdruby/libzstd/compress/zstd_opt.c +216 -112
- data/ext/zstdruby/libzstd/compress/zstd_opt.h +31 -7
- data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +94 -79
- data/ext/zstdruby/libzstd/decompress/huf_decompress.c +188 -126
- data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +38 -19
- data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +84 -32
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +231 -208
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +1 -1
- data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +2 -0
- data/ext/zstdruby/libzstd/dictBuilder/cover.c +16 -12
- data/ext/zstdruby/libzstd/dictBuilder/cover.h +2 -8
- data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +2 -2
- data/ext/zstdruby/libzstd/dictBuilder/zdict.c +12 -6
- data/ext/zstdruby/libzstd/zstd.h +129 -60
- data/ext/zstdruby/streaming_compress.c +23 -3
- data/ext/zstdruby/streaming_decompress.c +23 -3
- data/lib/zstd-ruby/version.rb +1 -1
- metadata +2 -2
@@ -12,6 +12,9 @@
|
|
12
12
|
#include "hist.h"
|
13
13
|
#include "zstd_opt.h"
|
14
14
|
|
15
|
+
#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
|
16
|
+
|| !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
|
17
|
+
|| !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
|
15
18
|
|
16
19
|
#define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
|
17
20
|
#define ZSTD_MAX_PRICE (1<<30)
|
@@ -264,6 +267,7 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
|
|
264
267
|
const optState_t* const optPtr,
|
265
268
|
int optLevel)
|
266
269
|
{
|
270
|
+
DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
|
267
271
|
if (litLength == 0) return 0;
|
268
272
|
|
269
273
|
if (!ZSTD_compressedLiterals(optPtr))
|
@@ -402,9 +406,11 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
|
|
402
406
|
|
403
407
|
/* Update hashTable3 up to ip (excluded)
|
404
408
|
Assumption : always within prefix (i.e. not within extDict) */
|
405
|
-
static
|
406
|
-
|
407
|
-
|
409
|
+
static
|
410
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
411
|
+
U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
|
412
|
+
U32* nextToUpdate3,
|
413
|
+
const BYTE* const ip)
|
408
414
|
{
|
409
415
|
U32* const hashTable3 = ms->hashTable3;
|
410
416
|
U32 const hashLog3 = ms->hashLog3;
|
@@ -431,7 +437,9 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
|
|
431
437
|
* @param ip assumed <= iend-8 .
|
432
438
|
* @param target The target of ZSTD_updateTree_internal() - we are filling to this position
|
433
439
|
* @return : nb of positions added */
|
434
|
-
static
|
440
|
+
static
|
441
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
442
|
+
U32 ZSTD_insertBt1(
|
435
443
|
const ZSTD_matchState_t* ms,
|
436
444
|
const BYTE* const ip, const BYTE* const iend,
|
437
445
|
U32 const target,
|
@@ -550,6 +558,7 @@ static U32 ZSTD_insertBt1(
|
|
550
558
|
}
|
551
559
|
|
552
560
|
FORCE_INLINE_TEMPLATE
|
561
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
553
562
|
void ZSTD_updateTree_internal(
|
554
563
|
ZSTD_matchState_t* ms,
|
555
564
|
const BYTE* const ip, const BYTE* const iend,
|
@@ -558,7 +567,7 @@ void ZSTD_updateTree_internal(
|
|
558
567
|
const BYTE* const base = ms->window.base;
|
559
568
|
U32 const target = (U32)(ip - base);
|
560
569
|
U32 idx = ms->nextToUpdate;
|
561
|
-
DEBUGLOG(
|
570
|
+
DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
|
562
571
|
idx, target, dictMode);
|
563
572
|
|
564
573
|
while(idx < target) {
|
@@ -575,7 +584,9 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
|
|
575
584
|
ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
|
576
585
|
}
|
577
586
|
|
578
|
-
FORCE_INLINE_TEMPLATE
|
587
|
+
FORCE_INLINE_TEMPLATE
|
588
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
589
|
+
U32
|
579
590
|
ZSTD_insertBtAndGetAllMatches (
|
580
591
|
ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
|
581
592
|
ZSTD_matchState_t* ms,
|
@@ -816,7 +827,9 @@ typedef U32 (*ZSTD_getAllMatchesFn)(
|
|
816
827
|
U32 const ll0,
|
817
828
|
U32 const lengthToBeat);
|
818
829
|
|
819
|
-
FORCE_INLINE_TEMPLATE
|
830
|
+
FORCE_INLINE_TEMPLATE
|
831
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
832
|
+
U32 ZSTD_btGetAllMatches_internal(
|
820
833
|
ZSTD_match_t* matches,
|
821
834
|
ZSTD_matchState_t* ms,
|
822
835
|
U32* nextToUpdate3,
|
@@ -1035,11 +1048,6 @@ ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
|
|
1035
1048
|
* Optimal parser
|
1036
1049
|
*********************************/
|
1037
1050
|
|
1038
|
-
static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
|
1039
|
-
{
|
1040
|
-
return sol.litlen + sol.mlen;
|
1041
|
-
}
|
1042
|
-
|
1043
1051
|
#if 0 /* debug */
|
1044
1052
|
|
1045
1053
|
static void
|
@@ -1057,7 +1065,13 @@ listStats(const U32* table, int lastEltID)
|
|
1057
1065
|
|
1058
1066
|
#endif
|
1059
1067
|
|
1060
|
-
|
1068
|
+
#define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
|
1069
|
+
#define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
|
1070
|
+
#define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
|
1071
|
+
|
1072
|
+
FORCE_INLINE_TEMPLATE
|
1073
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
1074
|
+
size_t
|
1061
1075
|
ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
1062
1076
|
seqStore_t* seqStore,
|
1063
1077
|
U32 rep[ZSTD_REP_NUM],
|
@@ -1083,10 +1097,10 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
1083
1097
|
|
1084
1098
|
ZSTD_optimal_t* const opt = optStatePtr->priceTable;
|
1085
1099
|
ZSTD_match_t* const matches = optStatePtr->matchTable;
|
1086
|
-
ZSTD_optimal_t
|
1100
|
+
ZSTD_optimal_t lastStretch;
|
1087
1101
|
ZSTD_optLdm_t optLdm;
|
1088
1102
|
|
1089
|
-
ZSTD_memset(&
|
1103
|
+
ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
|
1090
1104
|
|
1091
1105
|
optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
|
1092
1106
|
optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
|
@@ -1108,19 +1122,31 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
1108
1122
|
U32 const ll0 = !litlen;
|
1109
1123
|
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
|
1110
1124
|
ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
|
1111
|
-
(U32)(ip-istart), (U32)(iend
|
1112
|
-
if (!nbMatches) {
|
1125
|
+
(U32)(ip-istart), (U32)(iend-ip));
|
1126
|
+
if (!nbMatches) {
|
1127
|
+
DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
|
1128
|
+
ip++;
|
1129
|
+
continue;
|
1130
|
+
}
|
1131
|
+
|
1132
|
+
/* Match found: let's store this solution, and eventually find more candidates.
|
1133
|
+
* During this forward pass, @opt is used to store stretches,
|
1134
|
+
* defined as "a match followed by N literals".
|
1135
|
+
* Note how this is different from a Sequence, which is "N literals followed by a match".
|
1136
|
+
* Storing stretches allows us to store different match predecessors
|
1137
|
+
* for each literal position part of a literals run. */
|
1113
1138
|
|
1114
1139
|
/* initialize opt[0] */
|
1115
|
-
|
1116
|
-
opt[0].mlen = 0; /* means is_a_literal */
|
1140
|
+
opt[0].mlen = 0; /* there are only literals so far */
|
1117
1141
|
opt[0].litlen = litlen;
|
1118
|
-
/*
|
1119
|
-
* it is static for the duration of the forward pass, and is included
|
1120
|
-
* in every price.
|
1121
|
-
*
|
1142
|
+
/* No need to include the actual price of the literals before the first match
|
1143
|
+
* because it is static for the duration of the forward pass, and is included
|
1144
|
+
* in every subsequent price. But, we include the literal length because
|
1145
|
+
* the cost variation of litlen depends on the value of litlen.
|
1122
1146
|
*/
|
1123
|
-
opt[0].price = (
|
1147
|
+
opt[0].price = LL_PRICE(litlen);
|
1148
|
+
ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
|
1149
|
+
ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
|
1124
1150
|
|
1125
1151
|
/* large match -> immediate encoding */
|
1126
1152
|
{ U32 const maxML = matches[nbMatches-1].len;
|
@@ -1129,82 +1155,106 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
1129
1155
|
nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
|
1130
1156
|
|
1131
1157
|
if (maxML > sufficient_len) {
|
1132
|
-
|
1133
|
-
|
1134
|
-
|
1135
|
-
DEBUGLOG(6, "large match (%u>%u)
|
1158
|
+
lastStretch.litlen = 0;
|
1159
|
+
lastStretch.mlen = maxML;
|
1160
|
+
lastStretch.off = maxOffBase;
|
1161
|
+
DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
|
1136
1162
|
maxML, sufficient_len);
|
1137
1163
|
cur = 0;
|
1138
|
-
last_pos =
|
1164
|
+
last_pos = maxML;
|
1139
1165
|
goto _shortestPath;
|
1140
1166
|
} }
|
1141
1167
|
|
1142
1168
|
/* set prices for first matches starting position == 0 */
|
1143
1169
|
assert(opt[0].price >= 0);
|
1144
|
-
{ U32
|
1145
|
-
U32 pos;
|
1170
|
+
{ U32 pos;
|
1146
1171
|
U32 matchNb;
|
1147
1172
|
for (pos = 1; pos < minMatch; pos++) {
|
1148
|
-
opt[pos].price = ZSTD_MAX_PRICE;
|
1173
|
+
opt[pos].price = ZSTD_MAX_PRICE;
|
1174
|
+
opt[pos].mlen = 0;
|
1175
|
+
opt[pos].litlen = litlen + pos;
|
1149
1176
|
}
|
1150
1177
|
for (matchNb = 0; matchNb < nbMatches; matchNb++) {
|
1151
1178
|
U32 const offBase = matches[matchNb].off;
|
1152
1179
|
U32 const end = matches[matchNb].len;
|
1153
1180
|
for ( ; pos <= end ; pos++ ) {
|
1154
|
-
|
1155
|
-
|
1181
|
+
int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
|
1182
|
+
int const sequencePrice = opt[0].price + matchPrice;
|
1156
1183
|
DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
|
1157
|
-
pos, ZSTD_fCost(
|
1184
|
+
pos, ZSTD_fCost(sequencePrice));
|
1158
1185
|
opt[pos].mlen = pos;
|
1159
1186
|
opt[pos].off = offBase;
|
1160
|
-
opt[pos].litlen =
|
1161
|
-
opt[pos].price = (
|
1162
|
-
|
1187
|
+
opt[pos].litlen = 0; /* end of match */
|
1188
|
+
opt[pos].price = sequencePrice + LL_PRICE(0);
|
1189
|
+
}
|
1190
|
+
}
|
1163
1191
|
last_pos = pos-1;
|
1192
|
+
opt[pos].price = ZSTD_MAX_PRICE;
|
1164
1193
|
}
|
1165
1194
|
}
|
1166
1195
|
|
1167
1196
|
/* check further positions */
|
1168
1197
|
for (cur = 1; cur <= last_pos; cur++) {
|
1169
1198
|
const BYTE* const inr = ip + cur;
|
1170
|
-
assert(cur
|
1171
|
-
DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
|
1199
|
+
assert(cur <= ZSTD_OPT_NUM);
|
1200
|
+
DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur);
|
1172
1201
|
|
1173
1202
|
/* Fix current position with one literal if cheaper */
|
1174
|
-
{ U32 const litlen =
|
1203
|
+
{ U32 const litlen = opt[cur-1].litlen + 1;
|
1175
1204
|
int const price = opt[cur-1].price
|
1176
|
-
+ (
|
1177
|
-
+ (
|
1178
|
-
- (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
|
1205
|
+
+ LIT_PRICE(ip+cur-1)
|
1206
|
+
+ LL_INCPRICE(litlen);
|
1179
1207
|
assert(price < 1000000000); /* overflow check */
|
1180
1208
|
if (price <= opt[cur].price) {
|
1209
|
+
ZSTD_optimal_t const prevMatch = opt[cur];
|
1181
1210
|
DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
|
1182
1211
|
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
|
1183
1212
|
opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
|
1184
|
-
opt[cur]
|
1185
|
-
opt[cur].off = 0;
|
1213
|
+
opt[cur] = opt[cur-1];
|
1186
1214
|
opt[cur].litlen = litlen;
|
1187
1215
|
opt[cur].price = price;
|
1216
|
+
if ( (optLevel >= 1) /* additional check only for higher modes */
|
1217
|
+
&& (prevMatch.litlen == 0) /* replace a match */
|
1218
|
+
&& (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
|
1219
|
+
&& LIKELY(ip + cur < iend)
|
1220
|
+
) {
|
1221
|
+
/* check next position, in case it would be cheaper */
|
1222
|
+
int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
|
1223
|
+
int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
|
1224
|
+
DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
|
1225
|
+
cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
|
1226
|
+
if ( (with1literal < withMoreLiterals)
|
1227
|
+
&& (with1literal < opt[cur+1].price) ) {
|
1228
|
+
/* update offset history - before it disappears */
|
1229
|
+
U32 const prev = cur - prevMatch.mlen;
|
1230
|
+
repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
|
1231
|
+
assert(cur >= prevMatch.mlen);
|
1232
|
+
DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
|
1233
|
+
ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
|
1234
|
+
newReps.rep[0], newReps.rep[1], newReps.rep[2] );
|
1235
|
+
opt[cur+1] = prevMatch; /* mlen & offbase */
|
1236
|
+
ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t));
|
1237
|
+
opt[cur+1].litlen = 1;
|
1238
|
+
opt[cur+1].price = with1literal;
|
1239
|
+
if (last_pos < cur+1) last_pos = cur+1;
|
1240
|
+
}
|
1241
|
+
}
|
1188
1242
|
} else {
|
1189
|
-
DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)
|
1190
|
-
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price)
|
1191
|
-
opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
|
1243
|
+
DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)",
|
1244
|
+
inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
|
1192
1245
|
}
|
1193
1246
|
}
|
1194
1247
|
|
1195
|
-
/*
|
1196
|
-
*
|
1197
|
-
* correct to set the next chunks repcodes during the backward
|
1198
|
-
* traversal.
|
1248
|
+
/* Offset history is not updated during match comparison.
|
1249
|
+
* Do it here, now that the match is selected and confirmed.
|
1199
1250
|
*/
|
1200
1251
|
ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
|
1201
1252
|
assert(cur >= opt[cur].mlen);
|
1202
|
-
if (opt[cur].
|
1253
|
+
if (opt[cur].litlen == 0) {
|
1254
|
+
/* just finished a match => alter offset history */
|
1203
1255
|
U32 const prev = cur - opt[cur].mlen;
|
1204
|
-
repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[
|
1256
|
+
repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
|
1205
1257
|
ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
|
1206
|
-
} else {
|
1207
|
-
ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
|
1208
1258
|
}
|
1209
1259
|
|
1210
1260
|
/* last match must start at a minimum distance of 8 from oend */
|
@@ -1214,15 +1264,14 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
1214
1264
|
|
1215
1265
|
if ( (optLevel==0) /*static_test*/
|
1216
1266
|
&& (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
|
1217
|
-
DEBUGLOG(7, "
|
1267
|
+
DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
|
1218
1268
|
continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
|
1219
1269
|
}
|
1220
1270
|
|
1221
1271
|
assert(opt[cur].price >= 0);
|
1222
|
-
{ U32 const ll0 = (opt[cur].
|
1223
|
-
|
1224
|
-
|
1225
|
-
U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
|
1272
|
+
{ U32 const ll0 = (opt[cur].litlen == 0);
|
1273
|
+
int const previousPrice = opt[cur].price;
|
1274
|
+
int const basePrice = previousPrice + LL_PRICE(0);
|
1226
1275
|
U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
|
1227
1276
|
U32 matchNb;
|
1228
1277
|
|
@@ -1234,18 +1283,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
1234
1283
|
continue;
|
1235
1284
|
}
|
1236
1285
|
|
1237
|
-
{ U32 const
|
1238
|
-
DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of
|
1239
|
-
inr-istart, cur, nbMatches,
|
1240
|
-
|
1241
|
-
if ( (
|
1242
|
-
|| (cur +
|
1243
|
-
|
1244
|
-
|
1245
|
-
|
1246
|
-
|
1247
|
-
last_pos = cur +
|
1248
|
-
if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
|
1286
|
+
{ U32 const longestML = matches[nbMatches-1].len;
|
1287
|
+
DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u",
|
1288
|
+
inr-istart, cur, nbMatches, longestML);
|
1289
|
+
|
1290
|
+
if ( (longestML > sufficient_len)
|
1291
|
+
|| (cur + longestML >= ZSTD_OPT_NUM)
|
1292
|
+
|| (ip + cur + longestML >= iend) ) {
|
1293
|
+
lastStretch.mlen = longestML;
|
1294
|
+
lastStretch.off = matches[nbMatches-1].off;
|
1295
|
+
lastStretch.litlen = 0;
|
1296
|
+
last_pos = cur + longestML;
|
1249
1297
|
goto _shortestPath;
|
1250
1298
|
} }
|
1251
1299
|
|
@@ -1257,19 +1305,24 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
1257
1305
|
U32 mlen;
|
1258
1306
|
|
1259
1307
|
DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
|
1260
|
-
matchNb, matches[matchNb].off, lastML, litlen);
|
1308
|
+
matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
|
1261
1309
|
|
1262
1310
|
for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
|
1263
1311
|
U32 const pos = cur + mlen;
|
1264
|
-
int const price =
|
1312
|
+
int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
|
1265
1313
|
|
1266
1314
|
if ((pos > last_pos) || (price < opt[pos].price)) {
|
1267
1315
|
DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
|
1268
1316
|
pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
|
1269
|
-
while (last_pos < pos) {
|
1317
|
+
while (last_pos < pos) {
|
1318
|
+
/* fill empty positions, for future comparisons */
|
1319
|
+
last_pos++;
|
1320
|
+
opt[last_pos].price = ZSTD_MAX_PRICE;
|
1321
|
+
opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */
|
1322
|
+
}
|
1270
1323
|
opt[pos].mlen = mlen;
|
1271
1324
|
opt[pos].off = offset;
|
1272
|
-
opt[pos].litlen =
|
1325
|
+
opt[pos].litlen = 0;
|
1273
1326
|
opt[pos].price = price;
|
1274
1327
|
} else {
|
1275
1328
|
DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
|
@@ -1277,47 +1330,81 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
|
|
1277
1330
|
if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
|
1278
1331
|
}
|
1279
1332
|
} } }
|
1333
|
+
opt[last_pos+1].price = ZSTD_MAX_PRICE;
|
1280
1334
|
} /* for (cur = 1; cur <= last_pos; cur++) */
|
1281
1335
|
|
1282
|
-
|
1283
|
-
cur
|
1284
|
-
|
1336
|
+
lastStretch = opt[last_pos];
|
1337
|
+
assert(cur >= lastStretch.mlen);
|
1338
|
+
cur = last_pos - lastStretch.mlen;
|
1285
1339
|
|
1286
1340
|
_shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
1287
1341
|
assert(opt[0].mlen == 0);
|
1342
|
+
assert(last_pos >= lastStretch.mlen);
|
1343
|
+
assert(cur == last_pos - lastStretch.mlen);
|
1288
1344
|
|
1289
|
-
|
1290
|
-
|
1291
|
-
|
1292
|
-
|
1293
|
-
|
1294
|
-
|
1295
|
-
|
1345
|
+
if (lastStretch.mlen==0) {
|
1346
|
+
/* no solution : all matches have been converted into literals */
|
1347
|
+
assert(lastStretch.litlen == (ip - anchor) + last_pos);
|
1348
|
+
ip += last_pos;
|
1349
|
+
continue;
|
1350
|
+
}
|
1351
|
+
assert(lastStretch.off > 0);
|
1352
|
+
|
1353
|
+
/* Update offset history */
|
1354
|
+
if (lastStretch.litlen == 0) {
|
1355
|
+
/* finishing on a match : update offset history */
|
1356
|
+
repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
|
1357
|
+
ZSTD_memcpy(rep, &reps, sizeof(repcodes_t));
|
1296
1358
|
} else {
|
1297
|
-
ZSTD_memcpy(rep,
|
1359
|
+
ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t));
|
1360
|
+
assert(cur >= lastStretch.litlen);
|
1361
|
+
cur -= lastStretch.litlen;
|
1298
1362
|
}
|
1299
1363
|
|
1300
|
-
|
1364
|
+
/* Let's write the shortest path solution.
|
1365
|
+
* It is stored in @opt in reverse order,
|
1366
|
+
* starting from @storeEnd (==cur+2),
|
1367
|
+
* effectively partially @opt overwriting.
|
1368
|
+
* Content is changed too:
|
1369
|
+
* - So far, @opt stored stretches, aka a match followed by literals
|
1370
|
+
* - Now, it will store sequences, aka literals followed by a match
|
1371
|
+
*/
|
1372
|
+
{ U32 const storeEnd = cur + 2;
|
1301
1373
|
U32 storeStart = storeEnd;
|
1302
|
-
U32
|
1374
|
+
U32 stretchPos = cur;
|
1303
1375
|
|
1304
1376
|
DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
|
1305
1377
|
last_pos, cur); (void)last_pos;
|
1306
|
-
assert(storeEnd <
|
1307
|
-
DEBUGLOG(6, "last
|
1308
|
-
storeEnd,
|
1309
|
-
|
1310
|
-
|
1311
|
-
|
1378
|
+
assert(storeEnd < ZSTD_OPT_SIZE);
|
1379
|
+
DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
|
1380
|
+
storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
|
1381
|
+
if (lastStretch.litlen > 0) {
|
1382
|
+
/* last "sequence" is unfinished: just a bunch of literals */
|
1383
|
+
opt[storeEnd].litlen = lastStretch.litlen;
|
1384
|
+
opt[storeEnd].mlen = 0;
|
1385
|
+
storeStart = storeEnd-1;
|
1386
|
+
opt[storeStart] = lastStretch;
|
1387
|
+
} {
|
1388
|
+
opt[storeEnd] = lastStretch; /* note: litlen will be fixed */
|
1389
|
+
storeStart = storeEnd;
|
1390
|
+
}
|
1391
|
+
while (1) {
|
1392
|
+
ZSTD_optimal_t nextStretch = opt[stretchPos];
|
1393
|
+
opt[storeStart].litlen = nextStretch.litlen;
|
1394
|
+
DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
|
1395
|
+
opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
|
1396
|
+
if (nextStretch.mlen == 0) {
|
1397
|
+
/* reaching beginning of segment */
|
1398
|
+
break;
|
1399
|
+
}
|
1312
1400
|
storeStart--;
|
1313
|
-
|
1314
|
-
|
1315
|
-
|
1316
|
-
seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
|
1401
|
+
opt[storeStart] = nextStretch; /* note: litlen will be fixed */
|
1402
|
+
assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
|
1403
|
+
stretchPos -= nextStretch.litlen + nextStretch.mlen;
|
1317
1404
|
}
|
1318
1405
|
|
1319
1406
|
/* save sequences */
|
1320
|
-
DEBUGLOG(6, "sending selected sequences into seqStore")
|
1407
|
+
DEBUGLOG(6, "sending selected sequences into seqStore");
|
1321
1408
|
{ U32 storePos;
|
1322
1409
|
for (storePos=storeStart; storePos <= storeEnd; storePos++) {
|
1323
1410
|
U32 const llen = opt[storePos].litlen;
|
@@ -1339,6 +1426,9 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
1339
1426
|
anchor += advance;
|
1340
1427
|
ip = anchor;
|
1341
1428
|
} }
|
1429
|
+
DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
|
1430
|
+
|
1431
|
+
/* update all costs */
|
1342
1432
|
ZSTD_setBasePrices(optStatePtr, optLevel);
|
1343
1433
|
}
|
1344
1434
|
} /* while (ip < ilimit) */
|
@@ -1346,21 +1436,27 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
|
|
1346
1436
|
/* Return the last literals size */
|
1347
1437
|
return (size_t)(iend - anchor);
|
1348
1438
|
}
|
1439
|
+
#endif /* build exclusions */
|
1349
1440
|
|
1441
|
+
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
|
1350
1442
|
static size_t ZSTD_compressBlock_opt0(
|
1351
1443
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1352
1444
|
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
|
1353
1445
|
{
|
1354
1446
|
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
|
1355
1447
|
}
|
1448
|
+
#endif
|
1356
1449
|
|
1450
|
+
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
|
1357
1451
|
static size_t ZSTD_compressBlock_opt2(
|
1358
1452
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1359
1453
|
const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
|
1360
1454
|
{
|
1361
1455
|
return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
|
1362
1456
|
}
|
1457
|
+
#endif
|
1363
1458
|
|
1459
|
+
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
|
1364
1460
|
size_t ZSTD_compressBlock_btopt(
|
1365
1461
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1366
1462
|
const void* src, size_t srcSize)
|
@@ -1368,20 +1464,23 @@ size_t ZSTD_compressBlock_btopt(
|
|
1368
1464
|
DEBUGLOG(5, "ZSTD_compressBlock_btopt");
|
1369
1465
|
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
1370
1466
|
}
|
1467
|
+
#endif
|
1371
1468
|
|
1372
1469
|
|
1373
1470
|
|
1374
1471
|
|
1472
|
+
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
|
1375
1473
|
/* ZSTD_initStats_ultra():
|
1376
1474
|
* make a first compression pass, just to seed stats with more accurate starting values.
|
1377
1475
|
* only works on first block, with no dictionary and no ldm.
|
1378
1476
|
* this function cannot error out, its narrow contract must be respected.
|
1379
1477
|
*/
|
1380
|
-
static
|
1381
|
-
|
1382
|
-
|
1383
|
-
|
1384
|
-
|
1478
|
+
static
|
1479
|
+
ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
|
1480
|
+
void ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
|
1481
|
+
seqStore_t* seqStore,
|
1482
|
+
U32 rep[ZSTD_REP_NUM],
|
1483
|
+
const void* src, size_t srcSize)
|
1385
1484
|
{
|
1386
1485
|
U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
|
1387
1486
|
ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
|
@@ -1425,7 +1524,7 @@ size_t ZSTD_compressBlock_btultra2(
|
|
1425
1524
|
* Consequently, this can only work if no data has been previously loaded in tables,
|
1426
1525
|
* aka, no dictionary, no prefix, no ldm preprocessing.
|
1427
1526
|
* The compression ratio gain is generally small (~0.5% on first block),
|
1428
|
-
|
1527
|
+
* the cost is 2x cpu time on first block. */
|
1429
1528
|
assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
|
1430
1529
|
if ( (ms->opt.litLengthSum==0) /* first block */
|
1431
1530
|
&& (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
|
@@ -1438,7 +1537,9 @@ size_t ZSTD_compressBlock_btultra2(
|
|
1438
1537
|
|
1439
1538
|
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
|
1440
1539
|
}
|
1540
|
+
#endif
|
1441
1541
|
|
1542
|
+
#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
|
1442
1543
|
size_t ZSTD_compressBlock_btopt_dictMatchState(
|
1443
1544
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1444
1545
|
const void* src, size_t srcSize)
|
@@ -1446,18 +1547,20 @@ size_t ZSTD_compressBlock_btopt_dictMatchState(
|
|
1446
1547
|
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
|
1447
1548
|
}
|
1448
1549
|
|
1449
|
-
size_t
|
1550
|
+
size_t ZSTD_compressBlock_btopt_extDict(
|
1450
1551
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1451
1552
|
const void* src, size_t srcSize)
|
1452
1553
|
{
|
1453
|
-
return
|
1554
|
+
return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
|
1454
1555
|
}
|
1556
|
+
#endif
|
1455
1557
|
|
1456
|
-
|
1558
|
+
#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
|
1559
|
+
size_t ZSTD_compressBlock_btultra_dictMatchState(
|
1457
1560
|
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
|
1458
1561
|
const void* src, size_t srcSize)
|
1459
1562
|
{
|
1460
|
-
return
|
1563
|
+
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
|
1461
1564
|
}
|
1462
1565
|
|
1463
1566
|
size_t ZSTD_compressBlock_btultra_extDict(
|
@@ -1466,6 +1569,7 @@ size_t ZSTD_compressBlock_btultra_extDict(
|
|
1466
1569
|
{
|
1467
1570
|
return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
|
1468
1571
|
}
|
1572
|
+
#endif
|
1469
1573
|
|
1470
1574
|
/* note : no btultra2 variant for extDict nor dictMatchState,
|
1471
1575
|
* because btultra2 is not meant to work with dictionaries
|