multi_compress 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +17 -3
  3. data/GET_STARTED.md +3 -3
  4. data/README.md +75 -66
  5. data/THIRD_PARTY_NOTICES.md +24 -0
  6. data/ext/multi_compress/brotli_dec_static_init.c +3 -0
  7. data/ext/multi_compress/brotli_enc_static_init.c +3 -0
  8. data/ext/multi_compress/extconf.rb +79 -3
  9. data/ext/multi_compress/multi_compress.c +199 -120
  10. data/ext/multi_compress/vendor/.vendored +2 -2
  11. data/ext/multi_compress/vendor/brotli/LICENSE +19 -0
  12. data/ext/multi_compress/vendor/brotli/c/common/constants.c +7 -7
  13. data/ext/multi_compress/vendor/brotli/c/common/constants.h +2 -5
  14. data/ext/multi_compress/vendor/brotli/c/common/context.c +2 -2
  15. data/ext/multi_compress/vendor/brotli/c/common/context.h +1 -2
  16. data/ext/multi_compress/vendor/brotli/c/common/dictionary.c +4 -5856
  17. data/ext/multi_compress/vendor/brotli/c/common/dictionary.h +1 -2
  18. data/ext/multi_compress/vendor/brotli/c/common/dictionary_inc.h +5847 -0
  19. data/ext/multi_compress/vendor/brotli/c/common/platform.c +0 -4
  20. data/ext/multi_compress/vendor/brotli/c/common/platform.h +182 -43
  21. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary.c +3 -7
  22. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary_internal.h +1 -1
  23. data/ext/multi_compress/vendor/brotli/c/common/static_init.h +56 -0
  24. data/ext/multi_compress/vendor/brotli/c/common/transform.c +6 -4
  25. data/ext/multi_compress/vendor/brotli/c/common/transform.h +1 -2
  26. data/ext/multi_compress/vendor/brotli/c/common/version.h +3 -3
  27. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.c +2 -3
  28. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.h +0 -4
  29. data/ext/multi_compress/vendor/brotli/c/dec/decode.c +128 -39
  30. data/ext/multi_compress/vendor/brotli/c/dec/huffman.c +2 -5
  31. data/ext/multi_compress/vendor/brotli/c/dec/huffman.h +0 -2
  32. data/ext/multi_compress/vendor/brotli/c/dec/prefix.c +67 -0
  33. data/ext/multi_compress/vendor/brotli/c/dec/prefix.h +18 -708
  34. data/ext/multi_compress/vendor/brotli/c/dec/prefix_inc.h +707 -0
  35. data/ext/multi_compress/vendor/brotli/c/dec/state.c +18 -15
  36. data/ext/multi_compress/vendor/brotli/c/dec/state.h +2 -6
  37. data/ext/multi_compress/vendor/brotli/c/dec/static_init.c +53 -0
  38. data/ext/multi_compress/vendor/brotli/c/dec/static_init.h +30 -0
  39. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.c +32 -8
  40. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.h +1 -5
  41. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.c +15 -15
  42. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.h +1 -5
  43. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.c +28 -4
  44. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.h +8 -40
  45. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost_inc.h +1 -1
  46. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.c +9 -12
  47. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.h +0 -3
  48. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter_inc.h +14 -8
  49. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.c +10 -9
  50. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.h +0 -6
  51. data/ext/multi_compress/vendor/brotli/c/enc/cluster.c +0 -2
  52. data/ext/multi_compress/vendor/brotli/c/enc/cluster.h +0 -2
  53. data/ext/multi_compress/vendor/brotli/c/enc/command.c +1 -1
  54. data/ext/multi_compress/vendor/brotli/c/enc/command.h +8 -10
  55. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.c +3 -5
  56. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.h +1 -4
  57. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.c +3 -13
  58. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.h +0 -2
  59. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.c +5 -15
  60. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.h +0 -2
  61. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.c +127 -1830
  62. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.h +23 -3
  63. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash_inc.h +1829 -0
  64. data/ext/multi_compress/vendor/brotli/c/enc/encode.c +77 -52
  65. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.c +9 -7
  66. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.h +2 -4
  67. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.c +3 -6
  68. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.h +2 -4
  69. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode_static.h +18 -12
  70. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.c +1 -1
  71. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.h +2 -3
  72. data/ext/multi_compress/vendor/brotli/c/enc/find_match_length.h +0 -2
  73. data/ext/multi_compress/vendor/brotli/c/enc/hash.h +38 -31
  74. data/ext/multi_compress/vendor/brotli/c/enc/hash_base.h +38 -0
  75. data/ext/multi_compress/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +11 -1
  76. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_inc.h +24 -7
  77. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_simd_inc.h +304 -0
  78. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_inc.h +30 -11
  79. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +4 -0
  80. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_simd_inc.h +278 -0
  81. data/ext/multi_compress/vendor/brotli/c/enc/histogram.c +1 -0
  82. data/ext/multi_compress/vendor/brotli/c/enc/histogram.h +0 -4
  83. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.c +4 -6
  84. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.h +0 -2
  85. data/ext/multi_compress/vendor/brotli/c/enc/matching_tag_mask.h +69 -0
  86. data/ext/multi_compress/vendor/brotli/c/enc/memory.c +0 -5
  87. data/ext/multi_compress/vendor/brotli/c/enc/memory.h +0 -4
  88. data/ext/multi_compress/vendor/brotli/c/enc/metablock.c +7 -9
  89. data/ext/multi_compress/vendor/brotli/c/enc/metablock.h +3 -3
  90. data/ext/multi_compress/vendor/brotli/c/enc/metablock_inc.h +4 -4
  91. data/ext/multi_compress/vendor/brotli/c/enc/params.h +0 -1
  92. data/ext/multi_compress/vendor/brotli/c/enc/prefix.h +0 -2
  93. data/ext/multi_compress/vendor/brotli/c/enc/quality.h +17 -10
  94. data/ext/multi_compress/vendor/brotli/c/enc/ringbuffer.h +1 -4
  95. data/ext/multi_compress/vendor/brotli/c/enc/state.h +2 -2
  96. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.c +5 -11
  97. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.h +1 -3
  98. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.c +224 -0
  99. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.h +20 -5837
  100. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut_inc.h +5830 -0
  101. data/ext/multi_compress/vendor/brotli/c/enc/static_init.c +59 -0
  102. data/ext/multi_compress/vendor/brotli/c/enc/static_init.h +30 -0
  103. data/ext/multi_compress/vendor/brotli/c/enc/static_init_lazy.cc +26 -0
  104. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.c +1 -1
  105. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.h +0 -2
  106. data/ext/multi_compress/vendor/brotli/c/enc/write_bits.h +0 -2
  107. data/ext/multi_compress/vendor/brotli/c/include/brotli/decode.h +1 -1
  108. data/ext/multi_compress/vendor/brotli/c/include/brotli/encode.h +5 -1
  109. data/ext/multi_compress/vendor/brotli/c/include/brotli/port.h +4 -7
  110. data/ext/multi_compress/vendor/brotli/c/include/brotli/types.h +2 -2
  111. data/ext/multi_compress/vendor/lz4/LICENSE +12 -0
  112. data/ext/multi_compress/vendor/zstd/COPYING +339 -0
  113. data/ext/multi_compress/vendor/zstd/LICENSE +30 -0
  114. data/ext/multi_compress/vendor/zstd/lib/Makefile +67 -35
  115. data/ext/multi_compress/vendor/zstd/lib/README.md +33 -2
  116. data/ext/multi_compress/vendor/zstd/lib/common/allocations.h +55 -0
  117. data/ext/multi_compress/vendor/zstd/lib/common/bits.h +205 -0
  118. data/ext/multi_compress/vendor/zstd/lib/common/bitstream.h +84 -108
  119. data/ext/multi_compress/vendor/zstd/lib/common/compiler.h +170 -41
  120. data/ext/multi_compress/vendor/zstd/lib/common/cpu.h +37 -1
  121. data/ext/multi_compress/vendor/zstd/lib/common/debug.c +7 -1
  122. data/ext/multi_compress/vendor/zstd/lib/common/debug.h +21 -21
  123. data/ext/multi_compress/vendor/zstd/lib/common/entropy_common.c +12 -40
  124. data/ext/multi_compress/vendor/zstd/lib/common/error_private.c +10 -2
  125. data/ext/multi_compress/vendor/zstd/lib/common/error_private.h +46 -47
  126. data/ext/multi_compress/vendor/zstd/lib/common/fse.h +8 -100
  127. data/ext/multi_compress/vendor/zstd/lib/common/fse_decompress.c +28 -116
  128. data/ext/multi_compress/vendor/zstd/lib/common/huf.h +79 -166
  129. data/ext/multi_compress/vendor/zstd/lib/common/mem.h +46 -66
  130. data/ext/multi_compress/vendor/zstd/lib/common/pool.c +27 -11
  131. data/ext/multi_compress/vendor/zstd/lib/common/pool.h +8 -11
  132. data/ext/multi_compress/vendor/zstd/lib/common/portability_macros.h +45 -11
  133. data/ext/multi_compress/vendor/zstd/lib/common/threading.c +74 -14
  134. data/ext/multi_compress/vendor/zstd/lib/common/threading.h +5 -18
  135. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.c +5 -11
  136. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.h +2411 -1003
  137. data/ext/multi_compress/vendor/zstd/lib/common/zstd_common.c +1 -36
  138. data/ext/multi_compress/vendor/zstd/lib/common/zstd_deps.h +13 -1
  139. data/ext/multi_compress/vendor/zstd/lib/common/zstd_internal.h +13 -182
  140. data/ext/multi_compress/vendor/zstd/lib/common/zstd_trace.h +6 -13
  141. data/ext/multi_compress/vendor/zstd/lib/compress/clevels.h +1 -1
  142. data/ext/multi_compress/vendor/zstd/lib/compress/fse_compress.c +15 -131
  143. data/ext/multi_compress/vendor/zstd/lib/compress/hist.c +11 -1
  144. data/ext/multi_compress/vendor/zstd/lib/compress/hist.h +8 -1
  145. data/ext/multi_compress/vendor/zstd/lib/compress/huf_compress.c +283 -189
  146. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress.c +2419 -903
  147. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_internal.h +423 -245
  148. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.c +116 -40
  149. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.h +16 -8
  150. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.c +10 -10
  151. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.h +8 -7
  152. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.c +254 -139
  153. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  154. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_cwksp.h +184 -95
  155. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.c +163 -81
  156. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.h +18 -14
  157. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.c +507 -197
  158. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.h +7 -14
  159. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.c +579 -484
  160. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.h +133 -65
  161. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.c +61 -40
  162. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.h +7 -15
  163. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm_geartab.h +1 -1
  164. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.c +352 -218
  165. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.h +37 -21
  166. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.c +238 -0
  167. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.h +33 -0
  168. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.c +239 -175
  169. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.h +5 -16
  170. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress.c +543 -488
  171. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress_amd64.S +78 -61
  172. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.c +4 -4
  173. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.h +1 -1
  174. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress.c +295 -115
  175. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.c +430 -293
  176. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.h +7 -2
  177. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_internal.h +11 -7
  178. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff.h +1 -1
  179. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_common.c +1 -1
  180. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_compress.c +1 -1
  181. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  182. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.c +95 -46
  183. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.h +3 -9
  184. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/divsufsort.h +0 -10
  185. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/fastcover.c +4 -4
  186. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/zdict.c +25 -97
  187. data/ext/multi_compress/vendor/zstd/lib/dll/example/Makefile +1 -1
  188. data/ext/multi_compress/vendor/zstd/lib/dll/example/README.md +1 -1
  189. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_legacy.h +38 -1
  190. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.c +19 -50
  191. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.h +1 -1
  192. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.c +27 -80
  193. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.h +1 -1
  194. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.c +28 -83
  195. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.h +1 -1
  196. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.c +25 -74
  197. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.h +1 -1
  198. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.c +31 -76
  199. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.h +1 -1
  200. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.c +44 -88
  201. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.h +1 -1
  202. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.c +33 -84
  203. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.h +1 -1
  204. data/ext/multi_compress/vendor/zstd/lib/libzstd.mk +65 -33
  205. data/ext/multi_compress/vendor/zstd/lib/libzstd.pc.in +5 -5
  206. data/ext/multi_compress/vendor/zstd/lib/module.modulemap +13 -3
  207. data/ext/multi_compress/vendor/zstd/lib/zdict.h +65 -36
  208. data/ext/multi_compress/vendor/zstd/lib/zstd.h +890 -267
  209. data/ext/multi_compress/vendor/zstd/lib/zstd_errors.h +28 -16
  210. data/lib/multi_compress/version.rb +1 -1
  211. data/lib/multi_compress.rb +80 -41
  212. metadata +29 -2
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -36,13 +36,14 @@
36
36
  * If it is set_compressed, first sub-block's literals section will be Treeless_Literals_Block
37
37
  * and the following sub-blocks' literals sections will be Treeless_Literals_Block.
38
38
  * @return : compressed size of literals section of a sub-block
39
- * Or 0 if it unable to compress.
39
+ * Or 0 if unable to compress.
40
40
  * Or error code */
41
- static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
42
- const ZSTD_hufCTablesMetadata_t* hufMetadata,
43
- const BYTE* literals, size_t litSize,
44
- void* dst, size_t dstSize,
45
- const int bmi2, int writeEntropy, int* entropyWritten)
41
+ static size_t
42
+ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
43
+ const ZSTD_hufCTablesMetadata_t* hufMetadata,
44
+ const BYTE* literals, size_t litSize,
45
+ void* dst, size_t dstSize,
46
+ const int bmi2, int writeEntropy, int* entropyWritten)
46
47
  {
47
48
  size_t const header = writeEntropy ? 200 : 0;
48
49
  size_t const lhSize = 3 + (litSize >= (1 KB - header)) + (litSize >= (16 KB - header));
@@ -50,11 +51,9 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
50
51
  BYTE* const oend = ostart + dstSize;
51
52
  BYTE* op = ostart + lhSize;
52
53
  U32 const singleStream = lhSize == 3;
53
- symbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
54
+ SymbolEncodingType_e hType = writeEntropy ? hufMetadata->hType : set_repeat;
54
55
  size_t cLitSize = 0;
55
56
 
56
- (void)bmi2; /* TODO bmi2... */
57
-
58
57
  DEBUGLOG(5, "ZSTD_compressSubBlock_literal (litSize=%zu, lhSize=%zu, writeEntropy=%d)", litSize, lhSize, writeEntropy);
59
58
 
60
59
  *entropyWritten = 0;
@@ -76,9 +75,9 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
76
75
  DEBUGLOG(5, "ZSTD_compressSubBlock_literal (hSize=%zu)", hufMetadata->hufDesSize);
77
76
  }
78
77
 
79
- /* TODO bmi2 */
80
- { const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable)
81
- : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable);
78
+ { int const flags = bmi2 ? HUF_flags_bmi2 : 0;
79
+ const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags)
80
+ : HUF_compress4X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags);
82
81
  op += cSize;
83
82
  cLitSize += cSize;
84
83
  if (cSize == 0 || ERR_isError(cSize)) {
@@ -103,7 +102,7 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
103
102
  switch(lhSize)
104
103
  {
105
104
  case 3: /* 2 - 2 - 10 - 10 */
106
- { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
105
+ { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
107
106
  MEM_writeLE24(ostart, lhc);
108
107
  break;
109
108
  }
@@ -123,26 +122,30 @@ static size_t ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
123
122
  }
124
123
  *entropyWritten = 1;
125
124
  DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
126
- return op-ostart;
125
+ return (size_t)(op-ostart);
127
126
  }
128
127
 
129
- static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef* sequences, size_t nbSeq, size_t litSize, int lastSequence) {
130
- const seqDef* const sstart = sequences;
131
- const seqDef* const send = sequences + nbSeq;
132
- const seqDef* sp = sstart;
128
+ static size_t
129
+ ZSTD_seqDecompressedSize(SeqStore_t const* seqStore,
130
+ const SeqDef* sequences, size_t nbSeqs,
131
+ size_t litSize, int lastSubBlock)
132
+ {
133
133
  size_t matchLengthSum = 0;
134
134
  size_t litLengthSum = 0;
135
- (void)(litLengthSum); /* suppress unused variable warning on some environments */
136
- while (send-sp > 0) {
137
- ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
135
+ size_t n;
136
+ for (n=0; n<nbSeqs; n++) {
137
+ const ZSTD_SequenceLength seqLen = ZSTD_getSequenceLength(seqStore, sequences+n);
138
138
  litLengthSum += seqLen.litLength;
139
139
  matchLengthSum += seqLen.matchLength;
140
- sp++;
141
140
  }
142
- assert(litLengthSum <= litSize);
143
- if (!lastSequence) {
141
+ DEBUGLOG(5, "ZSTD_seqDecompressedSize: %u sequences from %p: %u literals + %u matchlength",
142
+ (unsigned)nbSeqs, (const void*)sequences,
143
+ (unsigned)litLengthSum, (unsigned)matchLengthSum);
144
+ if (!lastSubBlock)
144
145
  assert(litLengthSum == litSize);
145
- }
146
+ else
147
+ assert(litLengthSum <= litSize);
148
+ (void)litLengthSum;
146
149
  return matchLengthSum + litSize;
147
150
  }
148
151
 
@@ -156,13 +159,14 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef*
156
159
  * @return : compressed size of sequences section of a sub-block
157
160
  * Or 0 if it is unable to compress
158
161
  * Or error code. */
159
- static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
160
- const ZSTD_fseCTablesMetadata_t* fseMetadata,
161
- const seqDef* sequences, size_t nbSeq,
162
- const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
163
- const ZSTD_CCtx_params* cctxParams,
164
- void* dst, size_t dstCapacity,
165
- const int bmi2, int writeEntropy, int* entropyWritten)
162
+ static size_t
163
+ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
164
+ const ZSTD_fseCTablesMetadata_t* fseMetadata,
165
+ const SeqDef* sequences, size_t nbSeq,
166
+ const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
167
+ const ZSTD_CCtx_params* cctxParams,
168
+ void* dst, size_t dstCapacity,
169
+ const int bmi2, int writeEntropy, int* entropyWritten)
166
170
  {
167
171
  const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
168
172
  BYTE* const ostart = (BYTE*)dst;
@@ -176,14 +180,14 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
176
180
  /* Sequences Header */
177
181
  RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
178
182
  dstSize_tooSmall, "");
179
- if (nbSeq < 0x7F)
183
+ if (nbSeq < 128)
180
184
  *op++ = (BYTE)nbSeq;
181
185
  else if (nbSeq < LONGNBSEQ)
182
186
  op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
183
187
  else
184
188
  op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
185
189
  if (nbSeq==0) {
186
- return op - ostart;
190
+ return (size_t)(op - ostart);
187
191
  }
188
192
 
189
193
  /* seqHead : flags for FSE encoding type */
@@ -205,7 +209,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
205
209
  }
206
210
 
207
211
  { size_t const bitstreamSize = ZSTD_encodeSequences(
208
- op, oend - op,
212
+ op, (size_t)(oend - op),
209
213
  fseTables->matchlengthCTable, mlCode,
210
214
  fseTables->offcodeCTable, ofCode,
211
215
  fseTables->litlengthCTable, llCode,
@@ -249,7 +253,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
249
253
  #endif
250
254
 
251
255
  *entropyWritten = 1;
252
- return op - ostart;
256
+ return (size_t)(op - ostart);
253
257
  }
254
258
 
255
259
  /** ZSTD_compressSubBlock() :
@@ -258,7 +262,7 @@ static size_t ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables
258
262
  * Or 0 if it failed to compress. */
259
263
  static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
260
264
  const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
261
- const seqDef* sequences, size_t nbSeq,
265
+ const SeqDef* sequences, size_t nbSeq,
262
266
  const BYTE* literals, size_t litSize,
263
267
  const BYTE* llCode, const BYTE* mlCode, const BYTE* ofCode,
264
268
  const ZSTD_CCtx_params* cctxParams,
@@ -275,7 +279,8 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
275
279
  litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
276
280
  { size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
277
281
  &entropyMetadata->hufMetadata, literals, litSize,
278
- op, oend-op, bmi2, writeLitEntropy, litEntropyWritten);
282
+ op, (size_t)(oend-op),
283
+ bmi2, writeLitEntropy, litEntropyWritten);
279
284
  FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
280
285
  if (cLitSize == 0) return 0;
281
286
  op += cLitSize;
@@ -285,18 +290,18 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
285
290
  sequences, nbSeq,
286
291
  llCode, mlCode, ofCode,
287
292
  cctxParams,
288
- op, oend-op,
293
+ op, (size_t)(oend-op),
289
294
  bmi2, writeSeqEntropy, seqEntropyWritten);
290
295
  FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
291
296
  if (cSeqSize == 0) return 0;
292
297
  op += cSeqSize;
293
298
  }
294
299
  /* Write block header */
295
- { size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
300
+ { size_t cSize = (size_t)(op-ostart) - ZSTD_blockHeaderSize;
296
301
  U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
297
302
  MEM_writeLE24(ostart, cBlockHeader24);
298
303
  }
299
- return op-ostart;
304
+ return (size_t)(op-ostart);
300
305
  }
301
306
 
302
307
  static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
@@ -322,7 +327,7 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit
322
327
  return 0;
323
328
  }
324
329
 
325
- static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
330
+ static size_t ZSTD_estimateSubBlockSize_symbolType(SymbolEncodingType_e type,
326
331
  const BYTE* codeTable, unsigned maxCode,
327
332
  size_t nbSeq, const FSE_CTable* fseCTable,
328
333
  const U8* additionalBits,
@@ -385,7 +390,11 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
385
390
  return cSeqSizeEstimate + sequencesSectionHeaderSize;
386
391
  }
387
392
 
388
- static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
393
+ typedef struct {
394
+ size_t estLitSize;
395
+ size_t estBlockSize;
396
+ } EstimatedBlockSize;
397
+ static EstimatedBlockSize ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
389
398
  const BYTE* ofCodeTable,
390
399
  const BYTE* llCodeTable,
391
400
  const BYTE* mlCodeTable,
@@ -393,15 +402,17 @@ static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
393
402
  const ZSTD_entropyCTables_t* entropy,
394
403
  const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
395
404
  void* workspace, size_t wkspSize,
396
- int writeLitEntropy, int writeSeqEntropy) {
397
- size_t cSizeEstimate = 0;
398
- cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
399
- &entropy->huf, &entropyMetadata->hufMetadata,
400
- workspace, wkspSize, writeLitEntropy);
401
- cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
405
+ int writeLitEntropy, int writeSeqEntropy)
406
+ {
407
+ EstimatedBlockSize ebs;
408
+ ebs.estLitSize = ZSTD_estimateSubBlockSize_literal(literals, litSize,
409
+ &entropy->huf, &entropyMetadata->hufMetadata,
410
+ workspace, wkspSize, writeLitEntropy);
411
+ ebs.estBlockSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
402
412
  nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
403
413
  workspace, wkspSize, writeSeqEntropy);
404
- return cSizeEstimate + ZSTD_blockHeaderSize;
414
+ ebs.estBlockSize += ebs.estLitSize + ZSTD_blockHeaderSize;
415
+ return ebs;
405
416
  }
406
417
 
407
418
  static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
@@ -415,14 +426,57 @@ static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMe
415
426
  return 0;
416
427
  }
417
428
 
429
+ static size_t countLiterals(SeqStore_t const* seqStore, const SeqDef* sp, size_t seqCount)
430
+ {
431
+ size_t n, total = 0;
432
+ assert(sp != NULL);
433
+ for (n=0; n<seqCount; n++) {
434
+ total += ZSTD_getSequenceLength(seqStore, sp+n).litLength;
435
+ }
436
+ DEBUGLOG(6, "countLiterals for %zu sequences from %p => %zu bytes", seqCount, (const void*)sp, total);
437
+ return total;
438
+ }
439
+
440
+ #define BYTESCALE 256
441
+
442
+ static size_t sizeBlockSequences(const SeqDef* sp, size_t nbSeqs,
443
+ size_t targetBudget, size_t avgLitCost, size_t avgSeqCost,
444
+ int firstSubBlock)
445
+ {
446
+ size_t n, budget = 0, inSize=0;
447
+ /* entropy headers */
448
+ size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */
449
+ assert(firstSubBlock==0 || firstSubBlock==1);
450
+ budget += headerSize;
451
+
452
+ /* first sequence => at least one sequence*/
453
+ budget += sp[0].litLength * avgLitCost + avgSeqCost;
454
+ if (budget > targetBudget) return 1;
455
+ inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH);
456
+
457
+ /* loop over sequences */
458
+ for (n=1; n<nbSeqs; n++) {
459
+ size_t currentCost = sp[n].litLength * avgLitCost + avgSeqCost;
460
+ budget += currentCost;
461
+ inSize += sp[n].litLength + (sp[n].mlBase+MINMATCH);
462
+ /* stop when sub-block budget is reached */
463
+ if ( (budget > targetBudget)
464
+ /* though continue to expand until the sub-block is deemed compressible */
465
+ && (budget < inSize * BYTESCALE) )
466
+ break;
467
+ }
468
+
469
+ return n;
470
+ }
471
+
418
472
  /** ZSTD_compressSubBlock_multi() :
419
473
  * Breaks super-block into multiple sub-blocks and compresses them.
420
- * Entropy will be written to the first block.
421
- * The following blocks will use repeat mode to compress.
422
- * All sub-blocks are compressed blocks (no raw or rle blocks).
423
- * @return : compressed size of the super block (which is multiple ZSTD blocks)
424
- * Or 0 if it failed to compress. */
425
- static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
474
+ * Entropy will be written into the first block.
475
+ * The following blocks use repeat_mode to compress.
476
+ * Sub-blocks are all compressed, except the last one when beneficial.
477
+ * @return : compressed size of the super block (which features multiple ZSTD blocks)
478
+ * or 0 if it failed to compress. */
479
+ static size_t ZSTD_compressSubBlock_multi(const SeqStore_t* seqStorePtr,
426
480
  const ZSTD_compressedBlockState_t* prevCBlock,
427
481
  ZSTD_compressedBlockState_t* nextCBlock,
428
482
  const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
@@ -432,12 +486,14 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
432
486
  const int bmi2, U32 lastBlock,
433
487
  void* workspace, size_t wkspSize)
434
488
  {
435
- const seqDef* const sstart = seqStorePtr->sequencesStart;
436
- const seqDef* const send = seqStorePtr->sequences;
437
- const seqDef* sp = sstart;
489
+ const SeqDef* const sstart = seqStorePtr->sequencesStart;
490
+ const SeqDef* const send = seqStorePtr->sequences;
491
+ const SeqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */
492
+ size_t const nbSeqs = (size_t)(send - sstart);
438
493
  const BYTE* const lstart = seqStorePtr->litStart;
439
494
  const BYTE* const lend = seqStorePtr->lit;
440
495
  const BYTE* lp = lstart;
496
+ size_t const nbLiterals = (size_t)(lend - lstart);
441
497
  BYTE const* ip = (BYTE const*)src;
442
498
  BYTE const* const iend = ip + srcSize;
443
499
  BYTE* const ostart = (BYTE*)dst;
@@ -446,112 +502,171 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
446
502
  const BYTE* llCodePtr = seqStorePtr->llCode;
447
503
  const BYTE* mlCodePtr = seqStorePtr->mlCode;
448
504
  const BYTE* ofCodePtr = seqStorePtr->ofCode;
449
- size_t targetCBlockSize = cctxParams->targetCBlockSize;
450
- size_t litSize, seqCount;
451
- int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
505
+ size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */
506
+ size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize);
507
+ int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed);
452
508
  int writeSeqEntropy = 1;
453
- int lastSequence = 0;
454
-
455
- DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
456
- (unsigned)(lend-lp), (unsigned)(send-sstart));
457
-
458
- litSize = 0;
459
- seqCount = 0;
460
- do {
461
- size_t cBlockSizeEstimate = 0;
462
- if (sstart == send) {
463
- lastSequence = 1;
464
- } else {
465
- const seqDef* const sequence = sp + seqCount;
466
- lastSequence = sequence == send - 1;
467
- litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
468
- seqCount++;
469
- }
470
- if (lastSequence) {
471
- assert(lp <= lend);
472
- assert(litSize <= (size_t)(lend - lp));
473
- litSize = (size_t)(lend - lp);
509
+
510
+ DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)",
511
+ (unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart));
512
+
513
+ /* let's start by a general estimation for the full block */
514
+ if (nbSeqs > 0) {
515
+ EstimatedBlockSize const ebs =
516
+ ZSTD_estimateSubBlockSize(lp, nbLiterals,
517
+ ofCodePtr, llCodePtr, mlCodePtr, nbSeqs,
518
+ &nextCBlock->entropy, entropyMetadata,
519
+ workspace, wkspSize,
520
+ writeLitEntropy, writeSeqEntropy);
521
+ /* quick estimation */
522
+ size_t const avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE;
523
+ size_t const avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs;
524
+ const size_t nbSubBlocks = MAX((ebs.estBlockSize + (targetCBlockSize/2)) / targetCBlockSize, 1);
525
+ size_t n, avgBlockBudget, blockBudgetSupp=0;
526
+ avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks;
527
+ DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes",
528
+ (unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE,
529
+ (unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE);
530
+ /* simplification: if estimates states that the full superblock doesn't compress, just bail out immediately
531
+ * this will result in the production of a single uncompressed block covering @srcSize.*/
532
+ if (ebs.estBlockSize > srcSize) return 0;
533
+
534
+ /* compress and write sub-blocks */
535
+ assert(nbSubBlocks>0);
536
+ for (n=0; n < nbSubBlocks-1; n++) {
537
+ /* determine nb of sequences for current sub-block + nbLiterals from next sequence */
538
+ size_t const seqCount = sizeBlockSequences(sp, (size_t)(send-sp),
539
+ avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0);
540
+ /* if reached last sequence : break to last sub-block (simplification) */
541
+ assert(seqCount <= (size_t)(send-sp));
542
+ if (sp + seqCount == send) break;
543
+ assert(seqCount > 0);
544
+ /* compress sub-block */
545
+ { int litEntropyWritten = 0;
546
+ int seqEntropyWritten = 0;
547
+ size_t litSize = countLiterals(seqStorePtr, sp, seqCount);
548
+ const size_t decompressedSize =
549
+ ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 0);
550
+ size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
551
+ sp, seqCount,
552
+ lp, litSize,
553
+ llCodePtr, mlCodePtr, ofCodePtr,
554
+ cctxParams,
555
+ op, (size_t)(oend-op),
556
+ bmi2, writeLitEntropy, writeSeqEntropy,
557
+ &litEntropyWritten, &seqEntropyWritten,
558
+ 0);
559
+ FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
560
+
561
+ /* check compressibility, update state components */
562
+ if (cSize > 0 && cSize < decompressedSize) {
563
+ DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes",
564
+ (unsigned)decompressedSize, (unsigned)cSize);
565
+ assert(ip + decompressedSize <= iend);
566
+ ip += decompressedSize;
567
+ lp += litSize;
568
+ op += cSize;
569
+ llCodePtr += seqCount;
570
+ mlCodePtr += seqCount;
571
+ ofCodePtr += seqCount;
572
+ /* Entropy only needs to be written once */
573
+ if (litEntropyWritten) {
574
+ writeLitEntropy = 0;
575
+ }
576
+ if (seqEntropyWritten) {
577
+ writeSeqEntropy = 0;
578
+ }
579
+ sp += seqCount;
580
+ blockBudgetSupp = 0;
581
+ } }
582
+ /* otherwise : do not compress yet, coalesce current sub-block with following one */
474
583
  }
475
- /* I think there is an optimization opportunity here.
476
- * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
477
- * since it recalculates estimate from scratch.
478
- * For example, it would recount literal distribution and symbol codes every time.
479
- */
480
- cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
481
- &nextCBlock->entropy, entropyMetadata,
482
- workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
483
- if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
484
- int litEntropyWritten = 0;
485
- int seqEntropyWritten = 0;
486
- const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
487
- const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
488
- sp, seqCount,
489
- lp, litSize,
490
- llCodePtr, mlCodePtr, ofCodePtr,
491
- cctxParams,
492
- op, oend-op,
493
- bmi2, writeLitEntropy, writeSeqEntropy,
494
- &litEntropyWritten, &seqEntropyWritten,
495
- lastBlock && lastSequence);
496
- FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
497
- if (cSize > 0 && cSize < decompressedSize) {
498
- DEBUGLOG(5, "Committed the sub-block");
499
- assert(ip + decompressedSize <= iend);
500
- ip += decompressedSize;
501
- sp += seqCount;
502
- lp += litSize;
503
- op += cSize;
504
- llCodePtr += seqCount;
505
- mlCodePtr += seqCount;
506
- ofCodePtr += seqCount;
507
- litSize = 0;
508
- seqCount = 0;
509
- /* Entropy only needs to be written once */
510
- if (litEntropyWritten) {
511
- writeLitEntropy = 0;
512
- }
513
- if (seqEntropyWritten) {
514
- writeSeqEntropy = 0;
515
- }
584
+ } /* if (nbSeqs > 0) */
585
+
586
+ /* write last block */
587
+ DEBUGLOG(5, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp));
588
+ { int litEntropyWritten = 0;
589
+ int seqEntropyWritten = 0;
590
+ size_t litSize = (size_t)(lend - lp);
591
+ size_t seqCount = (size_t)(send - sp);
592
+ const size_t decompressedSize =
593
+ ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1);
594
+ size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
595
+ sp, seqCount,
596
+ lp, litSize,
597
+ llCodePtr, mlCodePtr, ofCodePtr,
598
+ cctxParams,
599
+ op, (size_t)(oend-op),
600
+ bmi2, writeLitEntropy, writeSeqEntropy,
601
+ &litEntropyWritten, &seqEntropyWritten,
602
+ lastBlock);
603
+ FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
604
+
605
+ /* update pointers, the nb of literals borrowed from next sequence must be preserved */
606
+ if (cSize > 0 && cSize < decompressedSize) {
607
+ DEBUGLOG(5, "Last sub-block compressed %u bytes => %u bytes",
608
+ (unsigned)decompressedSize, (unsigned)cSize);
609
+ assert(ip + decompressedSize <= iend);
610
+ ip += decompressedSize;
611
+ lp += litSize;
612
+ op += cSize;
613
+ llCodePtr += seqCount;
614
+ mlCodePtr += seqCount;
615
+ ofCodePtr += seqCount;
616
+ /* Entropy only needs to be written once */
617
+ if (litEntropyWritten) {
618
+ writeLitEntropy = 0;
516
619
  }
620
+ if (seqEntropyWritten) {
621
+ writeSeqEntropy = 0;
622
+ }
623
+ sp += seqCount;
517
624
  }
518
- } while (!lastSequence);
625
+ }
626
+
627
+
519
628
  if (writeLitEntropy) {
520
- DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
629
+ DEBUGLOG(5, "Literal entropy tables were never written");
521
630
  ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
522
631
  }
523
632
  if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
524
633
  /* If we haven't written our entropy tables, then we've violated our contract and
525
634
  * must emit an uncompressed block.
526
635
  */
527
- DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
636
+ DEBUGLOG(5, "Sequence entropy tables were never written => cancel, emit an uncompressed block");
528
637
  return 0;
529
638
  }
639
+
530
640
  if (ip < iend) {
531
- size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
532
- DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
641
+ /* some data left : last part of the block sent uncompressed */
642
+ size_t const rSize = (size_t)((iend - ip));
643
+ size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock);
644
+ DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize));
533
645
  FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
534
646
  assert(cSize != 0);
535
647
  op += cSize;
536
648
  /* We have to regenerate the repcodes because we've skipped some sequences */
537
649
  if (sp < send) {
538
- seqDef const* seq;
539
- repcodes_t rep;
650
+ const SeqDef* seq;
651
+ Repcodes_t rep;
540
652
  ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
541
653
  for (seq = sstart; seq < sp; ++seq) {
542
- ZSTD_updateRep(rep.rep, seq->offBase - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
654
+ ZSTD_updateRep(rep.rep, seq->offBase, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
543
655
  }
544
656
  ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
545
657
  }
546
658
  }
547
- DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
548
- return op-ostart;
659
+
660
+ DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed all subBlocks: total compressed size = %u",
661
+ (unsigned)(op-ostart));
662
+ return (size_t)(op-ostart);
549
663
  }
550
664
 
551
665
  size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
552
666
  void* dst, size_t dstCapacity,
553
- void const* src, size_t srcSize,
554
- unsigned lastBlock) {
667
+ const void* src, size_t srcSize,
668
+ unsigned lastBlock)
669
+ {
555
670
  ZSTD_entropyCTablesMetadata_t entropyMetadata;
556
671
 
557
672
  FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
@@ -559,7 +674,7 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
559
674
  &zc->blockState.nextCBlock->entropy,
560
675
  &zc->appliedParams,
561
676
  &entropyMetadata,
562
- zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
677
+ zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */), "");
563
678
 
564
679
  return ZSTD_compressSubBlock_multi(&zc->seqStore,
565
680
  zc->blockState.prevCBlock,
@@ -569,5 +684,5 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
569
684
  dst, dstCapacity,
570
685
  src, srcSize,
571
686
  zc->bmi2, lastBlock,
572
- zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */);
687
+ zc->tmpWorkspace, zc->tmpWkspSize /* statically allocated in resetCCtx */);
573
688
  }
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the