zstd-ruby 1.5.5.1 → 1.5.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -1
  3. data/ext/zstdruby/libzstd/common/allocations.h +1 -1
  4. data/ext/zstdruby/libzstd/common/bitstream.h +49 -29
  5. data/ext/zstdruby/libzstd/common/compiler.h +114 -22
  6. data/ext/zstdruby/libzstd/common/cpu.h +36 -0
  7. data/ext/zstdruby/libzstd/common/debug.c +6 -0
  8. data/ext/zstdruby/libzstd/common/debug.h +20 -11
  9. data/ext/zstdruby/libzstd/common/error_private.h +45 -36
  10. data/ext/zstdruby/libzstd/common/fse.h +3 -2
  11. data/ext/zstdruby/libzstd/common/fse_decompress.c +19 -17
  12. data/ext/zstdruby/libzstd/common/huf.h +14 -1
  13. data/ext/zstdruby/libzstd/common/mem.h +0 -9
  14. data/ext/zstdruby/libzstd/common/pool.c +1 -1
  15. data/ext/zstdruby/libzstd/common/pool.h +1 -1
  16. data/ext/zstdruby/libzstd/common/portability_macros.h +2 -0
  17. data/ext/zstdruby/libzstd/common/threading.c +8 -2
  18. data/ext/zstdruby/libzstd/common/xxhash.c +5 -11
  19. data/ext/zstdruby/libzstd/common/xxhash.h +2341 -1007
  20. data/ext/zstdruby/libzstd/common/zstd_internal.h +5 -5
  21. data/ext/zstdruby/libzstd/compress/fse_compress.c +8 -7
  22. data/ext/zstdruby/libzstd/compress/huf_compress.c +54 -25
  23. data/ext/zstdruby/libzstd/compress/zstd_compress.c +282 -161
  24. data/ext/zstdruby/libzstd/compress/zstd_compress_internal.h +29 -27
  25. data/ext/zstdruby/libzstd/compress/zstd_compress_superblock.c +224 -113
  26. data/ext/zstdruby/libzstd/compress/zstd_cwksp.h +19 -13
  27. data/ext/zstdruby/libzstd/compress/zstd_double_fast.c +17 -5
  28. data/ext/zstdruby/libzstd/compress/zstd_double_fast.h +11 -0
  29. data/ext/zstdruby/libzstd/compress/zstd_fast.c +14 -6
  30. data/ext/zstdruby/libzstd/compress/zstd_lazy.c +129 -87
  31. data/ext/zstdruby/libzstd/compress/zstd_lazy.h +103 -28
  32. data/ext/zstdruby/libzstd/compress/zstd_ldm.c +8 -2
  33. data/ext/zstdruby/libzstd/compress/zstd_opt.c +216 -112
  34. data/ext/zstdruby/libzstd/compress/zstd_opt.h +31 -7
  35. data/ext/zstdruby/libzstd/compress/zstdmt_compress.c +94 -79
  36. data/ext/zstdruby/libzstd/decompress/huf_decompress.c +188 -126
  37. data/ext/zstdruby/libzstd/decompress/huf_decompress_amd64.S +38 -19
  38. data/ext/zstdruby/libzstd/decompress/zstd_decompress.c +84 -32
  39. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.c +231 -208
  40. data/ext/zstdruby/libzstd/decompress/zstd_decompress_block.h +1 -1
  41. data/ext/zstdruby/libzstd/decompress/zstd_decompress_internal.h +2 -0
  42. data/ext/zstdruby/libzstd/dictBuilder/cover.c +16 -12
  43. data/ext/zstdruby/libzstd/dictBuilder/cover.h +2 -8
  44. data/ext/zstdruby/libzstd/dictBuilder/fastcover.c +2 -2
  45. data/ext/zstdruby/libzstd/dictBuilder/zdict.c +12 -6
  46. data/ext/zstdruby/libzstd/zstd.h +129 -60
  47. data/lib/zstd-ruby/version.rb +1 -1
  48. metadata +1 -1
@@ -12,6 +12,9 @@
12
12
  #include "hist.h"
13
13
  #include "zstd_opt.h"
14
14
 
15
+ #if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
16
+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
17
+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
15
18
 
16
19
  #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
17
20
  #define ZSTD_MAX_PRICE (1<<30)
@@ -264,6 +267,7 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
264
267
  const optState_t* const optPtr,
265
268
  int optLevel)
266
269
  {
270
+ DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
267
271
  if (litLength == 0) return 0;
268
272
 
269
273
  if (!ZSTD_compressedLiterals(optPtr))
@@ -402,9 +406,11 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
402
406
 
403
407
  /* Update hashTable3 up to ip (excluded)
404
408
  Assumption : always within prefix (i.e. not within extDict) */
405
- static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
406
- U32* nextToUpdate3,
407
- const BYTE* const ip)
409
+ static
410
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
411
+ U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
412
+ U32* nextToUpdate3,
413
+ const BYTE* const ip)
408
414
  {
409
415
  U32* const hashTable3 = ms->hashTable3;
410
416
  U32 const hashLog3 = ms->hashLog3;
@@ -431,7 +437,9 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
431
437
  * @param ip assumed <= iend-8 .
432
438
  * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
433
439
  * @return : nb of positions added */
434
- static U32 ZSTD_insertBt1(
440
+ static
441
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
442
+ U32 ZSTD_insertBt1(
435
443
  const ZSTD_matchState_t* ms,
436
444
  const BYTE* const ip, const BYTE* const iend,
437
445
  U32 const target,
@@ -550,6 +558,7 @@ static U32 ZSTD_insertBt1(
550
558
  }
551
559
 
552
560
  FORCE_INLINE_TEMPLATE
561
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
553
562
  void ZSTD_updateTree_internal(
554
563
  ZSTD_matchState_t* ms,
555
564
  const BYTE* const ip, const BYTE* const iend,
@@ -558,7 +567,7 @@ void ZSTD_updateTree_internal(
558
567
  const BYTE* const base = ms->window.base;
559
568
  U32 const target = (U32)(ip - base);
560
569
  U32 idx = ms->nextToUpdate;
561
- DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
570
+ DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
562
571
  idx, target, dictMode);
563
572
 
564
573
  while(idx < target) {
@@ -575,7 +584,9 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
575
584
  ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
576
585
  }
577
586
 
578
- FORCE_INLINE_TEMPLATE U32
587
+ FORCE_INLINE_TEMPLATE
588
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
589
+ U32
579
590
  ZSTD_insertBtAndGetAllMatches (
580
591
  ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */
581
592
  ZSTD_matchState_t* ms,
@@ -816,7 +827,9 @@ typedef U32 (*ZSTD_getAllMatchesFn)(
816
827
  U32 const ll0,
817
828
  U32 const lengthToBeat);
818
829
 
819
- FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
830
+ FORCE_INLINE_TEMPLATE
831
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
832
+ U32 ZSTD_btGetAllMatches_internal(
820
833
  ZSTD_match_t* matches,
821
834
  ZSTD_matchState_t* ms,
822
835
  U32* nextToUpdate3,
@@ -1035,11 +1048,6 @@ ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
1035
1048
  * Optimal parser
1036
1049
  *********************************/
1037
1050
 
1038
- static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
1039
- {
1040
- return sol.litlen + sol.mlen;
1041
- }
1042
-
1043
1051
  #if 0 /* debug */
1044
1052
 
1045
1053
  static void
@@ -1057,7 +1065,13 @@ listStats(const U32* table, int lastEltID)
1057
1065
 
1058
1066
  #endif
1059
1067
 
1060
- FORCE_INLINE_TEMPLATE size_t
1068
+ #define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
1069
+ #define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
1070
+ #define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
1071
+
1072
+ FORCE_INLINE_TEMPLATE
1073
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1074
+ size_t
1061
1075
  ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1062
1076
  seqStore_t* seqStore,
1063
1077
  U32 rep[ZSTD_REP_NUM],
@@ -1083,10 +1097,10 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1083
1097
 
1084
1098
  ZSTD_optimal_t* const opt = optStatePtr->priceTable;
1085
1099
  ZSTD_match_t* const matches = optStatePtr->matchTable;
1086
- ZSTD_optimal_t lastSequence;
1100
+ ZSTD_optimal_t lastStretch;
1087
1101
  ZSTD_optLdm_t optLdm;
1088
1102
 
1089
- ZSTD_memset(&lastSequence, 0, sizeof(ZSTD_optimal_t));
1103
+ ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
1090
1104
 
1091
1105
  optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
1092
1106
  optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
@@ -1108,19 +1122,31 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1108
1122
  U32 const ll0 = !litlen;
1109
1123
  U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
1110
1124
  ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
1111
- (U32)(ip-istart), (U32)(iend - ip));
1112
- if (!nbMatches) { ip++; continue; }
1125
+ (U32)(ip-istart), (U32)(iend-ip));
1126
+ if (!nbMatches) {
1127
+ DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
1128
+ ip++;
1129
+ continue;
1130
+ }
1131
+
1132
+ /* Match found: let's store this solution, and eventually find more candidates.
1133
+ * During this forward pass, @opt is used to store stretches,
1134
+ * defined as "a match followed by N literals".
1135
+ * Note how this is different from a Sequence, which is "N literals followed by a match".
1136
+ * Storing stretches allows us to store different match predecessors
1137
+ * for each literal position part of a literals run. */
1113
1138
 
1114
1139
  /* initialize opt[0] */
1115
- { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
1116
- opt[0].mlen = 0; /* means is_a_literal */
1140
+ opt[0].mlen = 0; /* there are only literals so far */
1117
1141
  opt[0].litlen = litlen;
1118
- /* We don't need to include the actual price of the literals because
1119
- * it is static for the duration of the forward pass, and is included
1120
- * in every price. We include the literal length to avoid negative
1121
- * prices when we subtract the previous literal length.
1142
+ /* No need to include the actual price of the literals before the first match
1143
+ * because it is static for the duration of the forward pass, and is included
1144
+ * in every subsequent price. But, we include the literal length because
1145
+ * the cost variation of litlen depends on the value of litlen.
1122
1146
  */
1123
- opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
1147
+ opt[0].price = LL_PRICE(litlen);
1148
+ ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
1149
+ ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
1124
1150
 
1125
1151
  /* large match -> immediate encoding */
1126
1152
  { U32 const maxML = matches[nbMatches-1].len;
@@ -1129,82 +1155,106 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1129
1155
  nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
1130
1156
 
1131
1157
  if (maxML > sufficient_len) {
1132
- lastSequence.litlen = litlen;
1133
- lastSequence.mlen = maxML;
1134
- lastSequence.off = maxOffBase;
1135
- DEBUGLOG(6, "large match (%u>%u), immediate encoding",
1158
+ lastStretch.litlen = 0;
1159
+ lastStretch.mlen = maxML;
1160
+ lastStretch.off = maxOffBase;
1161
+ DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
1136
1162
  maxML, sufficient_len);
1137
1163
  cur = 0;
1138
- last_pos = ZSTD_totalLen(lastSequence);
1164
+ last_pos = maxML;
1139
1165
  goto _shortestPath;
1140
1166
  } }
1141
1167
 
1142
1168
  /* set prices for first matches starting position == 0 */
1143
1169
  assert(opt[0].price >= 0);
1144
- { U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1145
- U32 pos;
1170
+ { U32 pos;
1146
1171
  U32 matchNb;
1147
1172
  for (pos = 1; pos < minMatch; pos++) {
1148
- opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
1173
+ opt[pos].price = ZSTD_MAX_PRICE;
1174
+ opt[pos].mlen = 0;
1175
+ opt[pos].litlen = litlen + pos;
1149
1176
  }
1150
1177
  for (matchNb = 0; matchNb < nbMatches; matchNb++) {
1151
1178
  U32 const offBase = matches[matchNb].off;
1152
1179
  U32 const end = matches[matchNb].len;
1153
1180
  for ( ; pos <= end ; pos++ ) {
1154
- U32 const matchPrice = ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
1155
- U32 const sequencePrice = literalsPrice + matchPrice;
1181
+ int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
1182
+ int const sequencePrice = opt[0].price + matchPrice;
1156
1183
  DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
1157
- pos, ZSTD_fCost((int)sequencePrice));
1184
+ pos, ZSTD_fCost(sequencePrice));
1158
1185
  opt[pos].mlen = pos;
1159
1186
  opt[pos].off = offBase;
1160
- opt[pos].litlen = litlen;
1161
- opt[pos].price = (int)sequencePrice;
1162
- } }
1187
+ opt[pos].litlen = 0; /* end of match */
1188
+ opt[pos].price = sequencePrice + LL_PRICE(0);
1189
+ }
1190
+ }
1163
1191
  last_pos = pos-1;
1192
+ opt[pos].price = ZSTD_MAX_PRICE;
1164
1193
  }
1165
1194
  }
1166
1195
 
1167
1196
  /* check further positions */
1168
1197
  for (cur = 1; cur <= last_pos; cur++) {
1169
1198
  const BYTE* const inr = ip + cur;
1170
- assert(cur < ZSTD_OPT_NUM);
1171
- DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
1199
+ assert(cur <= ZSTD_OPT_NUM);
1200
+ DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur);
1172
1201
 
1173
1202
  /* Fix current position with one literal if cheaper */
1174
- { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
1203
+ { U32 const litlen = opt[cur-1].litlen + 1;
1175
1204
  int const price = opt[cur-1].price
1176
- + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
1177
- + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
1178
- - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
1205
+ + LIT_PRICE(ip+cur-1)
1206
+ + LL_INCPRICE(litlen);
1179
1207
  assert(price < 1000000000); /* overflow check */
1180
1208
  if (price <= opt[cur].price) {
1209
+ ZSTD_optimal_t const prevMatch = opt[cur];
1181
1210
  DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
1182
1211
  inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
1183
1212
  opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
1184
- opt[cur].mlen = 0;
1185
- opt[cur].off = 0;
1213
+ opt[cur] = opt[cur-1];
1186
1214
  opt[cur].litlen = litlen;
1187
1215
  opt[cur].price = price;
1216
+ if ( (optLevel >= 1) /* additional check only for higher modes */
1217
+ && (prevMatch.litlen == 0) /* replace a match */
1218
+ && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
1219
+ && LIKELY(ip + cur < iend)
1220
+ ) {
1221
+ /* check next position, in case it would be cheaper */
1222
+ int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
1223
+ int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
1224
+ DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
1225
+ cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
1226
+ if ( (with1literal < withMoreLiterals)
1227
+ && (with1literal < opt[cur+1].price) ) {
1228
+ /* update offset history - before it disappears */
1229
+ U32 const prev = cur - prevMatch.mlen;
1230
+ repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
1231
+ assert(cur >= prevMatch.mlen);
1232
+ DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
1233
+ ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
1234
+ newReps.rep[0], newReps.rep[1], newReps.rep[2] );
1235
+ opt[cur+1] = prevMatch; /* mlen & offbase */
1236
+ ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t));
1237
+ opt[cur+1].litlen = 1;
1238
+ opt[cur+1].price = with1literal;
1239
+ if (last_pos < cur+1) last_pos = cur+1;
1240
+ }
1241
+ }
1188
1242
  } else {
1189
- DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
1190
- inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
1191
- opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
1243
+ DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)",
1244
+ inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
1192
1245
  }
1193
1246
  }
1194
1247
 
1195
- /* Set the repcodes of the current position. We must do it here
1196
- * because we rely on the repcodes of the 2nd to last sequence being
1197
- * correct to set the next chunks repcodes during the backward
1198
- * traversal.
1248
+ /* Offset history is not updated during match comparison.
1249
+ * Do it here, now that the match is selected and confirmed.
1199
1250
  */
1200
1251
  ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
1201
1252
  assert(cur >= opt[cur].mlen);
1202
- if (opt[cur].mlen != 0) {
1253
+ if (opt[cur].litlen == 0) {
1254
+ /* just finished a match => alter offset history */
1203
1255
  U32 const prev = cur - opt[cur].mlen;
1204
- repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
1256
+ repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
1205
1257
  ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
1206
- } else {
1207
- ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
1208
1258
  }
1209
1259
 
1210
1260
  /* last match must start at a minimum distance of 8 from oend */
@@ -1214,15 +1264,14 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1214
1264
 
1215
1265
  if ( (optLevel==0) /*static_test*/
1216
1266
  && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
1217
- DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
1267
+ DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
1218
1268
  continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
1219
1269
  }
1220
1270
 
1221
1271
  assert(opt[cur].price >= 0);
1222
- { U32 const ll0 = (opt[cur].mlen != 0);
1223
- U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
1224
- U32 const previousPrice = (U32)opt[cur].price;
1225
- U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
1272
+ { U32 const ll0 = (opt[cur].litlen == 0);
1273
+ int const previousPrice = opt[cur].price;
1274
+ int const basePrice = previousPrice + LL_PRICE(0);
1226
1275
  U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
1227
1276
  U32 matchNb;
1228
1277
 
@@ -1234,18 +1283,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1234
1283
  continue;
1235
1284
  }
1236
1285
 
1237
- { U32 const maxML = matches[nbMatches-1].len;
1238
- DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
1239
- inr-istart, cur, nbMatches, maxML);
1240
-
1241
- if ( (maxML > sufficient_len)
1242
- || (cur + maxML >= ZSTD_OPT_NUM) ) {
1243
- lastSequence.mlen = maxML;
1244
- lastSequence.off = matches[nbMatches-1].off;
1245
- lastSequence.litlen = litlen;
1246
- cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
1247
- last_pos = cur + ZSTD_totalLen(lastSequence);
1248
- if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
1286
+ { U32 const longestML = matches[nbMatches-1].len;
1287
+ DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u",
1288
+ inr-istart, cur, nbMatches, longestML);
1289
+
1290
+ if ( (longestML > sufficient_len)
1291
+ || (cur + longestML >= ZSTD_OPT_NUM)
1292
+ || (ip + cur + longestML >= iend) ) {
1293
+ lastStretch.mlen = longestML;
1294
+ lastStretch.off = matches[nbMatches-1].off;
1295
+ lastStretch.litlen = 0;
1296
+ last_pos = cur + longestML;
1249
1297
  goto _shortestPath;
1250
1298
  } }
1251
1299
 
@@ -1257,19 +1305,24 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1257
1305
  U32 mlen;
1258
1306
 
1259
1307
  DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
1260
- matchNb, matches[matchNb].off, lastML, litlen);
1308
+ matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
1261
1309
 
1262
1310
  for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
1263
1311
  U32 const pos = cur + mlen;
1264
- int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1312
+ int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
1265
1313
 
1266
1314
  if ((pos > last_pos) || (price < opt[pos].price)) {
1267
1315
  DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
1268
1316
  pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
1269
- while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */
1317
+ while (last_pos < pos) {
1318
+ /* fill empty positions, for future comparisons */
1319
+ last_pos++;
1320
+ opt[last_pos].price = ZSTD_MAX_PRICE;
1321
+ opt[last_pos].litlen = !0; /* just needs to be != 0, to mean "not an end of match" */
1322
+ }
1270
1323
  opt[pos].mlen = mlen;
1271
1324
  opt[pos].off = offset;
1272
- opt[pos].litlen = litlen;
1325
+ opt[pos].litlen = 0;
1273
1326
  opt[pos].price = price;
1274
1327
  } else {
1275
1328
  DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
@@ -1277,47 +1330,81 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
1277
1330
  if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
1278
1331
  }
1279
1332
  } } }
1333
+ opt[last_pos+1].price = ZSTD_MAX_PRICE;
1280
1334
  } /* for (cur = 1; cur <= last_pos; cur++) */
1281
1335
 
1282
- lastSequence = opt[last_pos];
1283
- cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */
1284
- assert(cur < ZSTD_OPT_NUM); /* control overflow*/
1336
+ lastStretch = opt[last_pos];
1337
+ assert(cur >= lastStretch.mlen);
1338
+ cur = last_pos - lastStretch.mlen;
1285
1339
 
1286
1340
  _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1287
1341
  assert(opt[0].mlen == 0);
1342
+ assert(last_pos >= lastStretch.mlen);
1343
+ assert(cur == last_pos - lastStretch.mlen);
1288
1344
 
1289
- /* Set the next chunk's repcodes based on the repcodes of the beginning
1290
- * of the last match, and the last sequence. This avoids us having to
1291
- * update them while traversing the sequences.
1292
- */
1293
- if (lastSequence.mlen != 0) {
1294
- repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
1295
- ZSTD_memcpy(rep, &reps, sizeof(reps));
1345
+ if (lastStretch.mlen==0) {
1346
+ /* no solution : all matches have been converted into literals */
1347
+ assert(lastStretch.litlen == (ip - anchor) + last_pos);
1348
+ ip += last_pos;
1349
+ continue;
1350
+ }
1351
+ assert(lastStretch.off > 0);
1352
+
1353
+ /* Update offset history */
1354
+ if (lastStretch.litlen == 0) {
1355
+ /* finishing on a match : update offset history */
1356
+ repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
1357
+ ZSTD_memcpy(rep, &reps, sizeof(repcodes_t));
1296
1358
  } else {
1297
- ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
1359
+ ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t));
1360
+ assert(cur >= lastStretch.litlen);
1361
+ cur -= lastStretch.litlen;
1298
1362
  }
1299
1363
 
1300
- { U32 const storeEnd = cur + 1;
1364
+ /* Let's write the shortest path solution.
1365
+ * It is stored in @opt in reverse order,
1366
+ * starting from @storeEnd (==cur+2),
1367
+ * effectively partially @opt overwriting.
1368
+ * Content is changed too:
1369
+ * - So far, @opt stored stretches, aka a match followed by literals
1370
+ * - Now, it will store sequences, aka literals followed by a match
1371
+ */
1372
+ { U32 const storeEnd = cur + 2;
1301
1373
  U32 storeStart = storeEnd;
1302
- U32 seqPos = cur;
1374
+ U32 stretchPos = cur;
1303
1375
 
1304
1376
  DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
1305
1377
  last_pos, cur); (void)last_pos;
1306
- assert(storeEnd < ZSTD_OPT_NUM);
1307
- DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1308
- storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
1309
- opt[storeEnd] = lastSequence;
1310
- while (seqPos > 0) {
1311
- U32 const backDist = ZSTD_totalLen(opt[seqPos]);
1378
+ assert(storeEnd < ZSTD_OPT_SIZE);
1379
+ DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1380
+ storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
1381
+ if (lastStretch.litlen > 0) {
1382
+ /* last "sequence" is unfinished: just a bunch of literals */
1383
+ opt[storeEnd].litlen = lastStretch.litlen;
1384
+ opt[storeEnd].mlen = 0;
1385
+ storeStart = storeEnd-1;
1386
+ opt[storeStart] = lastStretch;
1387
+ } {
1388
+ opt[storeEnd] = lastStretch; /* note: litlen will be fixed */
1389
+ storeStart = storeEnd;
1390
+ }
1391
+ while (1) {
1392
+ ZSTD_optimal_t nextStretch = opt[stretchPos];
1393
+ opt[storeStart].litlen = nextStretch.litlen;
1394
+ DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
1395
+ opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
1396
+ if (nextStretch.mlen == 0) {
1397
+ /* reaching beginning of segment */
1398
+ break;
1399
+ }
1312
1400
  storeStart--;
1313
- DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
1314
- seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
1315
- opt[storeStart] = opt[seqPos];
1316
- seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
1401
+ opt[storeStart] = nextStretch; /* note: litlen will be fixed */
1402
+ assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
1403
+ stretchPos -= nextStretch.litlen + nextStretch.mlen;
1317
1404
  }
1318
1405
 
1319
1406
  /* save sequences */
1320
- DEBUGLOG(6, "sending selected sequences into seqStore")
1407
+ DEBUGLOG(6, "sending selected sequences into seqStore");
1321
1408
  { U32 storePos;
1322
1409
  for (storePos=storeStart; storePos <= storeEnd; storePos++) {
1323
1410
  U32 const llen = opt[storePos].litlen;
@@ -1339,6 +1426,9 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1339
1426
  anchor += advance;
1340
1427
  ip = anchor;
1341
1428
  } }
1429
+ DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
1430
+
1431
+ /* update all costs */
1342
1432
  ZSTD_setBasePrices(optStatePtr, optLevel);
1343
1433
  }
1344
1434
  } /* while (ip < ilimit) */
@@ -1346,21 +1436,27 @@ _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
1346
1436
  /* Return the last literals size */
1347
1437
  return (size_t)(iend - anchor);
1348
1438
  }
1439
+ #endif /* build exclusions */
1349
1440
 
1441
+ #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1350
1442
  static size_t ZSTD_compressBlock_opt0(
1351
1443
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1352
1444
  const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1353
1445
  {
1354
1446
  return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
1355
1447
  }
1448
+ #endif
1356
1449
 
1450
+ #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1357
1451
  static size_t ZSTD_compressBlock_opt2(
1358
1452
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1359
1453
  const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
1360
1454
  {
1361
1455
  return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
1362
1456
  }
1457
+ #endif
1363
1458
 
1459
+ #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1364
1460
  size_t ZSTD_compressBlock_btopt(
1365
1461
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1366
1462
  const void* src, size_t srcSize)
@@ -1368,20 +1464,23 @@ size_t ZSTD_compressBlock_btopt(
1368
1464
  DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1369
1465
  return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1370
1466
  }
1467
+ #endif
1371
1468
 
1372
1469
 
1373
1470
 
1374
1471
 
1472
+ #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1375
1473
  /* ZSTD_initStats_ultra():
1376
1474
  * make a first compression pass, just to seed stats with more accurate starting values.
1377
1475
  * only works on first block, with no dictionary and no ldm.
1378
1476
  * this function cannot error out, its narrow contract must be respected.
1379
1477
  */
1380
- static void
1381
- ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1382
- seqStore_t* seqStore,
1383
- U32 rep[ZSTD_REP_NUM],
1384
- const void* src, size_t srcSize)
1478
+ static
1479
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1480
+ void ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
1481
+ seqStore_t* seqStore,
1482
+ U32 rep[ZSTD_REP_NUM],
1483
+ const void* src, size_t srcSize)
1385
1484
  {
1386
1485
  U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */
1387
1486
  ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
@@ -1425,7 +1524,7 @@ size_t ZSTD_compressBlock_btultra2(
1425
1524
  * Consequently, this can only work if no data has been previously loaded in tables,
1426
1525
  * aka, no dictionary, no prefix, no ldm preprocessing.
1427
1526
  * The compression ratio gain is generally small (~0.5% on first block),
1428
- ** the cost is 2x cpu time on first block. */
1527
+ * the cost is 2x cpu time on first block. */
1429
1528
  assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1430
1529
  if ( (ms->opt.litLengthSum==0) /* first block */
1431
1530
  && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
@@ -1438,7 +1537,9 @@ size_t ZSTD_compressBlock_btultra2(
1438
1537
 
1439
1538
  return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
1440
1539
  }
1540
+ #endif
1441
1541
 
1542
+ #ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
1442
1543
  size_t ZSTD_compressBlock_btopt_dictMatchState(
1443
1544
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1444
1545
  const void* src, size_t srcSize)
@@ -1446,18 +1547,20 @@ size_t ZSTD_compressBlock_btopt_dictMatchState(
1446
1547
  return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1447
1548
  }
1448
1549
 
1449
- size_t ZSTD_compressBlock_btultra_dictMatchState(
1550
+ size_t ZSTD_compressBlock_btopt_extDict(
1450
1551
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1451
1552
  const void* src, size_t srcSize)
1452
1553
  {
1453
- return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1554
+ return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1454
1555
  }
1556
+ #endif
1455
1557
 
1456
- size_t ZSTD_compressBlock_btopt_extDict(
1558
+ #ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
1559
+ size_t ZSTD_compressBlock_btultra_dictMatchState(
1457
1560
  ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1458
1561
  const void* src, size_t srcSize)
1459
1562
  {
1460
- return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1563
+ return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
1461
1564
  }
1462
1565
 
1463
1566
  size_t ZSTD_compressBlock_btultra_extDict(
@@ -1466,6 +1569,7 @@ size_t ZSTD_compressBlock_btultra_extDict(
1466
1569
  {
1467
1570
  return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
1468
1571
  }
1572
+ #endif
1469
1573
 
1470
1574
  /* note : no btultra2 variant for extDict nor dictMatchState,
1471
1575
  * because btultra2 is not meant to work with dictionaries