multi_compress 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -3
  3. data/GET_STARTED.md +3 -3
  4. data/README.md +75 -66
  5. data/THIRD_PARTY_NOTICES.md +24 -0
  6. data/ext/multi_compress/brotli_dec_static_init.c +3 -0
  7. data/ext/multi_compress/brotli_enc_static_init.c +3 -0
  8. data/ext/multi_compress/extconf.rb +22 -1
  9. data/ext/multi_compress/vendor/.vendored +2 -2
  10. data/ext/multi_compress/vendor/brotli/LICENSE +19 -0
  11. data/ext/multi_compress/vendor/brotli/c/common/constants.c +7 -7
  12. data/ext/multi_compress/vendor/brotli/c/common/constants.h +2 -5
  13. data/ext/multi_compress/vendor/brotli/c/common/context.c +2 -2
  14. data/ext/multi_compress/vendor/brotli/c/common/context.h +1 -2
  15. data/ext/multi_compress/vendor/brotli/c/common/dictionary.c +4 -5856
  16. data/ext/multi_compress/vendor/brotli/c/common/dictionary.h +1 -2
  17. data/ext/multi_compress/vendor/brotli/c/common/dictionary_inc.h +5847 -0
  18. data/ext/multi_compress/vendor/brotli/c/common/platform.c +0 -4
  19. data/ext/multi_compress/vendor/brotli/c/common/platform.h +182 -43
  20. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary.c +3 -7
  21. data/ext/multi_compress/vendor/brotli/c/common/shared_dictionary_internal.h +1 -1
  22. data/ext/multi_compress/vendor/brotli/c/common/static_init.h +56 -0
  23. data/ext/multi_compress/vendor/brotli/c/common/transform.c +6 -4
  24. data/ext/multi_compress/vendor/brotli/c/common/transform.h +1 -2
  25. data/ext/multi_compress/vendor/brotli/c/common/version.h +3 -3
  26. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.c +2 -3
  27. data/ext/multi_compress/vendor/brotli/c/dec/bit_reader.h +0 -4
  28. data/ext/multi_compress/vendor/brotli/c/dec/decode.c +128 -39
  29. data/ext/multi_compress/vendor/brotli/c/dec/huffman.c +2 -5
  30. data/ext/multi_compress/vendor/brotli/c/dec/huffman.h +0 -2
  31. data/ext/multi_compress/vendor/brotli/c/dec/prefix.c +67 -0
  32. data/ext/multi_compress/vendor/brotli/c/dec/prefix.h +18 -708
  33. data/ext/multi_compress/vendor/brotli/c/dec/prefix_inc.h +707 -0
  34. data/ext/multi_compress/vendor/brotli/c/dec/state.c +18 -15
  35. data/ext/multi_compress/vendor/brotli/c/dec/state.h +2 -6
  36. data/ext/multi_compress/vendor/brotli/c/dec/static_init.c +53 -0
  37. data/ext/multi_compress/vendor/brotli/c/dec/static_init.h +30 -0
  38. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.c +32 -8
  39. data/ext/multi_compress/vendor/brotli/c/enc/backward_references.h +1 -5
  40. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.c +15 -15
  41. data/ext/multi_compress/vendor/brotli/c/enc/backward_references_hq.h +1 -5
  42. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.c +28 -4
  43. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost.h +8 -40
  44. data/ext/multi_compress/vendor/brotli/c/enc/bit_cost_inc.h +1 -1
  45. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.c +9 -12
  46. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter.h +0 -3
  47. data/ext/multi_compress/vendor/brotli/c/enc/block_splitter_inc.h +14 -8
  48. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.c +10 -9
  49. data/ext/multi_compress/vendor/brotli/c/enc/brotli_bit_stream.h +0 -6
  50. data/ext/multi_compress/vendor/brotli/c/enc/cluster.c +0 -2
  51. data/ext/multi_compress/vendor/brotli/c/enc/cluster.h +0 -2
  52. data/ext/multi_compress/vendor/brotli/c/enc/command.c +1 -1
  53. data/ext/multi_compress/vendor/brotli/c/enc/command.h +8 -10
  54. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.c +3 -5
  55. data/ext/multi_compress/vendor/brotli/c/enc/compound_dictionary.h +1 -4
  56. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.c +3 -13
  57. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment.h +0 -2
  58. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.c +5 -15
  59. data/ext/multi_compress/vendor/brotli/c/enc/compress_fragment_two_pass.h +0 -2
  60. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.c +127 -1830
  61. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash.h +23 -3
  62. data/ext/multi_compress/vendor/brotli/c/enc/dictionary_hash_inc.h +1829 -0
  63. data/ext/multi_compress/vendor/brotli/c/enc/encode.c +77 -52
  64. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.c +9 -7
  65. data/ext/multi_compress/vendor/brotli/c/enc/encoder_dict.h +2 -4
  66. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.c +3 -6
  67. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode.h +2 -4
  68. data/ext/multi_compress/vendor/brotli/c/enc/entropy_encode_static.h +18 -12
  69. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.c +1 -1
  70. data/ext/multi_compress/vendor/brotli/c/enc/fast_log.h +2 -3
  71. data/ext/multi_compress/vendor/brotli/c/enc/find_match_length.h +0 -2
  72. data/ext/multi_compress/vendor/brotli/c/enc/hash.h +38 -31
  73. data/ext/multi_compress/vendor/brotli/c/enc/hash_base.h +38 -0
  74. data/ext/multi_compress/vendor/brotli/c/enc/hash_forgetful_chain_inc.h +11 -1
  75. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_inc.h +24 -7
  76. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match64_simd_inc.h +304 -0
  77. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_inc.h +30 -11
  78. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_quickly_inc.h +4 -0
  79. data/ext/multi_compress/vendor/brotli/c/enc/hash_longest_match_simd_inc.h +278 -0
  80. data/ext/multi_compress/vendor/brotli/c/enc/histogram.c +1 -0
  81. data/ext/multi_compress/vendor/brotli/c/enc/histogram.h +0 -4
  82. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.c +4 -6
  83. data/ext/multi_compress/vendor/brotli/c/enc/literal_cost.h +0 -2
  84. data/ext/multi_compress/vendor/brotli/c/enc/matching_tag_mask.h +69 -0
  85. data/ext/multi_compress/vendor/brotli/c/enc/memory.c +0 -5
  86. data/ext/multi_compress/vendor/brotli/c/enc/memory.h +0 -4
  87. data/ext/multi_compress/vendor/brotli/c/enc/metablock.c +7 -9
  88. data/ext/multi_compress/vendor/brotli/c/enc/metablock.h +3 -3
  89. data/ext/multi_compress/vendor/brotli/c/enc/metablock_inc.h +4 -4
  90. data/ext/multi_compress/vendor/brotli/c/enc/params.h +0 -1
  91. data/ext/multi_compress/vendor/brotli/c/enc/prefix.h +0 -2
  92. data/ext/multi_compress/vendor/brotli/c/enc/quality.h +17 -10
  93. data/ext/multi_compress/vendor/brotli/c/enc/ringbuffer.h +1 -4
  94. data/ext/multi_compress/vendor/brotli/c/enc/state.h +2 -2
  95. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.c +5 -11
  96. data/ext/multi_compress/vendor/brotli/c/enc/static_dict.h +1 -3
  97. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.c +224 -0
  98. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut.h +20 -5837
  99. data/ext/multi_compress/vendor/brotli/c/enc/static_dict_lut_inc.h +5830 -0
  100. data/ext/multi_compress/vendor/brotli/c/enc/static_init.c +59 -0
  101. data/ext/multi_compress/vendor/brotli/c/enc/static_init.h +30 -0
  102. data/ext/multi_compress/vendor/brotli/c/enc/static_init_lazy.cc +26 -0
  103. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.c +1 -1
  104. data/ext/multi_compress/vendor/brotli/c/enc/utf8_util.h +0 -2
  105. data/ext/multi_compress/vendor/brotli/c/enc/write_bits.h +0 -2
  106. data/ext/multi_compress/vendor/brotli/c/include/brotli/decode.h +1 -1
  107. data/ext/multi_compress/vendor/brotli/c/include/brotli/encode.h +5 -1
  108. data/ext/multi_compress/vendor/brotli/c/include/brotli/port.h +4 -7
  109. data/ext/multi_compress/vendor/brotli/c/include/brotli/types.h +2 -2
  110. data/ext/multi_compress/vendor/lz4/LICENSE +12 -0
  111. data/ext/multi_compress/vendor/zstd/COPYING +339 -0
  112. data/ext/multi_compress/vendor/zstd/LICENSE +30 -0
  113. data/ext/multi_compress/vendor/zstd/lib/Makefile +67 -35
  114. data/ext/multi_compress/vendor/zstd/lib/README.md +33 -2
  115. data/ext/multi_compress/vendor/zstd/lib/common/allocations.h +55 -0
  116. data/ext/multi_compress/vendor/zstd/lib/common/bits.h +205 -0
  117. data/ext/multi_compress/vendor/zstd/lib/common/bitstream.h +84 -108
  118. data/ext/multi_compress/vendor/zstd/lib/common/compiler.h +170 -41
  119. data/ext/multi_compress/vendor/zstd/lib/common/cpu.h +37 -1
  120. data/ext/multi_compress/vendor/zstd/lib/common/debug.c +7 -1
  121. data/ext/multi_compress/vendor/zstd/lib/common/debug.h +21 -21
  122. data/ext/multi_compress/vendor/zstd/lib/common/entropy_common.c +12 -40
  123. data/ext/multi_compress/vendor/zstd/lib/common/error_private.c +10 -2
  124. data/ext/multi_compress/vendor/zstd/lib/common/error_private.h +46 -47
  125. data/ext/multi_compress/vendor/zstd/lib/common/fse.h +8 -100
  126. data/ext/multi_compress/vendor/zstd/lib/common/fse_decompress.c +28 -116
  127. data/ext/multi_compress/vendor/zstd/lib/common/huf.h +79 -166
  128. data/ext/multi_compress/vendor/zstd/lib/common/mem.h +46 -66
  129. data/ext/multi_compress/vendor/zstd/lib/common/pool.c +27 -11
  130. data/ext/multi_compress/vendor/zstd/lib/common/pool.h +8 -11
  131. data/ext/multi_compress/vendor/zstd/lib/common/portability_macros.h +45 -11
  132. data/ext/multi_compress/vendor/zstd/lib/common/threading.c +74 -14
  133. data/ext/multi_compress/vendor/zstd/lib/common/threading.h +5 -18
  134. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.c +5 -11
  135. data/ext/multi_compress/vendor/zstd/lib/common/xxhash.h +2411 -1003
  136. data/ext/multi_compress/vendor/zstd/lib/common/zstd_common.c +1 -36
  137. data/ext/multi_compress/vendor/zstd/lib/common/zstd_deps.h +13 -1
  138. data/ext/multi_compress/vendor/zstd/lib/common/zstd_internal.h +13 -182
  139. data/ext/multi_compress/vendor/zstd/lib/common/zstd_trace.h +6 -13
  140. data/ext/multi_compress/vendor/zstd/lib/compress/clevels.h +1 -1
  141. data/ext/multi_compress/vendor/zstd/lib/compress/fse_compress.c +15 -131
  142. data/ext/multi_compress/vendor/zstd/lib/compress/hist.c +11 -1
  143. data/ext/multi_compress/vendor/zstd/lib/compress/hist.h +8 -1
  144. data/ext/multi_compress/vendor/zstd/lib/compress/huf_compress.c +283 -189
  145. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress.c +2419 -903
  146. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_internal.h +423 -245
  147. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.c +116 -40
  148. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_literals.h +16 -8
  149. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.c +10 -10
  150. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_sequences.h +8 -7
  151. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.c +254 -139
  152. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_compress_superblock.h +1 -1
  153. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_cwksp.h +184 -95
  154. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.c +163 -81
  155. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_double_fast.h +18 -14
  156. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.c +507 -197
  157. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_fast.h +7 -14
  158. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.c +579 -484
  159. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_lazy.h +133 -65
  160. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.c +61 -40
  161. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm.h +7 -15
  162. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_ldm_geartab.h +1 -1
  163. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.c +352 -218
  164. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_opt.h +37 -21
  165. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.c +238 -0
  166. data/ext/multi_compress/vendor/zstd/lib/compress/zstd_preSplit.h +33 -0
  167. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.c +239 -175
  168. data/ext/multi_compress/vendor/zstd/lib/compress/zstdmt_compress.h +5 -16
  169. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress.c +543 -488
  170. data/ext/multi_compress/vendor/zstd/lib/decompress/huf_decompress_amd64.S +78 -61
  171. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.c +4 -4
  172. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_ddict.h +1 -1
  173. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress.c +295 -115
  174. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.c +430 -293
  175. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_block.h +7 -2
  176. data/ext/multi_compress/vendor/zstd/lib/decompress/zstd_decompress_internal.h +11 -7
  177. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff.h +1 -1
  178. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_common.c +1 -1
  179. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_compress.c +1 -1
  180. data/ext/multi_compress/vendor/zstd/lib/deprecated/zbuff_decompress.c +3 -1
  181. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.c +95 -46
  182. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/cover.h +3 -9
  183. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/divsufsort.h +0 -10
  184. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/fastcover.c +4 -4
  185. data/ext/multi_compress/vendor/zstd/lib/dictBuilder/zdict.c +25 -97
  186. data/ext/multi_compress/vendor/zstd/lib/dll/example/Makefile +1 -1
  187. data/ext/multi_compress/vendor/zstd/lib/dll/example/README.md +1 -1
  188. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_legacy.h +38 -1
  189. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.c +19 -50
  190. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v01.h +1 -1
  191. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.c +27 -80
  192. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v02.h +1 -1
  193. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.c +28 -83
  194. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v03.h +1 -1
  195. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.c +25 -74
  196. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v04.h +1 -1
  197. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.c +31 -76
  198. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v05.h +1 -1
  199. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.c +44 -88
  200. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v06.h +1 -1
  201. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.c +33 -84
  202. data/ext/multi_compress/vendor/zstd/lib/legacy/zstd_v07.h +1 -1
  203. data/ext/multi_compress/vendor/zstd/lib/libzstd.mk +65 -33
  204. data/ext/multi_compress/vendor/zstd/lib/libzstd.pc.in +5 -5
  205. data/ext/multi_compress/vendor/zstd/lib/module.modulemap +13 -3
  206. data/ext/multi_compress/vendor/zstd/lib/zdict.h +65 -36
  207. data/ext/multi_compress/vendor/zstd/lib/zstd.h +890 -267
  208. data/ext/multi_compress/vendor/zstd/lib/zstd_errors.h +28 -16
  209. data/lib/multi_compress/version.rb +1 -1
  210. metadata +29 -2
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -13,11 +13,36 @@
13
13
  ***************************************/
14
14
  #include "zstd_compress_literals.h"
15
15
 
16
+
17
+ /* **************************************************************
18
+ * Debug Traces
19
+ ****************************************************************/
20
+ #if DEBUGLEVEL >= 2
21
+
22
+ static size_t showHexa(const void* src, size_t srcSize)
23
+ {
24
+ const BYTE* const ip = (const BYTE*)src;
25
+ size_t u;
26
+ for (u=0; u<srcSize; u++) {
27
+ RAWLOG(5, " %02X", ip[u]); (void)ip;
28
+ }
29
+ RAWLOG(5, " \n");
30
+ return srcSize;
31
+ }
32
+
33
+ #endif
34
+
35
+
36
+ /* **************************************************************
37
+ * Literals compression - special cases
38
+ ****************************************************************/
16
39
  size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
17
40
  {
18
41
  BYTE* const ostart = (BYTE*)dst;
19
42
  U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
20
43
 
44
+ DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity);
45
+
21
46
  RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
22
47
 
23
48
  switch(flSize)
@@ -36,16 +61,30 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
36
61
  }
37
62
 
38
63
  ZSTD_memcpy(ostart + flSize, src, srcSize);
39
- DEBUGLOG(5, "Raw literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
64
+ DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
40
65
  return srcSize + flSize;
41
66
  }
42
67
 
68
+ static int allBytesIdentical(const void* src, size_t srcSize)
69
+ {
70
+ assert(srcSize >= 1);
71
+ assert(src != NULL);
72
+ { const BYTE b = ((const BYTE*)src)[0];
73
+ size_t p;
74
+ for (p=1; p<srcSize; p++) {
75
+ if (((const BYTE*)src)[p] != b) return 0;
76
+ }
77
+ return 1;
78
+ }
79
+ }
80
+
43
81
  size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
44
82
  {
45
83
  BYTE* const ostart = (BYTE*)dst;
46
84
  U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
47
85
 
48
- (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
86
+ assert(dstCapacity >= 4); (void)dstCapacity;
87
+ assert(allBytesIdentical(src, srcSize));
49
88
 
50
89
  switch(flSize)
51
90
  {
@@ -63,28 +102,51 @@ size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void*
63
102
  }
64
103
 
65
104
  ostart[flSize] = *(const BYTE*)src;
66
- DEBUGLOG(5, "RLE literals: %u -> %u", (U32)srcSize, (U32)flSize + 1);
105
+ DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);
67
106
  return flSize+1;
68
107
  }
69
108
 
70
- size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
71
- ZSTD_hufCTables_t* nextHuf,
72
- ZSTD_strategy strategy, int disableLiteralCompression,
73
- void* dst, size_t dstCapacity,
74
- const void* src, size_t srcSize,
75
- void* entropyWorkspace, size_t entropyWorkspaceSize,
76
- const int bmi2,
77
- unsigned suspectUncompressible)
109
+ /* ZSTD_minLiteralsToCompress() :
110
+ * returns minimal amount of literals
111
+ * for literal compression to even be attempted.
112
+ * Minimum is made tighter as compression strategy increases.
113
+ */
114
+ static size_t
115
+ ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)
116
+ {
117
+ assert((int)strategy >= 0);
118
+ assert((int)strategy <= 9);
119
+ /* btultra2 : min 8 bytes;
120
+ * then 2x larger for each successive compression strategy
121
+ * max threshold 64 bytes */
122
+ { int const shift = MIN(9-(int)strategy, 3);
123
+ size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;
124
+ DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
125
+ return mintc;
126
+ }
127
+ }
128
+
129
+ size_t ZSTD_compressLiterals (
130
+ void* dst, size_t dstCapacity,
131
+ const void* src, size_t srcSize,
132
+ void* entropyWorkspace, size_t entropyWorkspaceSize,
133
+ const ZSTD_hufCTables_t* prevHuf,
134
+ ZSTD_hufCTables_t* nextHuf,
135
+ ZSTD_strategy strategy,
136
+ int disableLiteralCompression,
137
+ int suspectUncompressible,
138
+ int bmi2)
78
139
  {
79
- size_t const minGain = ZSTD_minGain(srcSize, strategy);
80
140
  size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
81
141
  BYTE* const ostart = (BYTE*)dst;
82
142
  U32 singleStream = srcSize < 256;
83
- symbolEncodingType_e hType = set_compressed;
143
+ SymbolEncodingType_e hType = set_compressed;
84
144
  size_t cLitSize;
85
145
 
86
- DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i srcSize=%u)",
87
- disableLiteralCompression, (U32)srcSize);
146
+ DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
147
+ disableLiteralCompression, (U32)srcSize, dstCapacity);
148
+
149
+ DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize));
88
150
 
89
151
  /* Prepare nextEntropy assuming reusing the existing table */
90
152
  ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
@@ -92,40 +154,51 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
92
154
  if (disableLiteralCompression)
93
155
  return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
94
156
 
95
- /* small ? don't even attempt compression (speed opt) */
96
- # define COMPRESS_LITERALS_SIZE_MIN 63
97
- { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
98
- if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
99
- }
157
+ /* if too small, don't even attempt compression (speed opt) */
158
+ if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))
159
+ return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
100
160
 
101
161
  RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
102
162
  { HUF_repeat repeat = prevHuf->repeatMode;
103
- int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
163
+ int const flags = 0
164
+ | (bmi2 ? HUF_flags_bmi2 : 0)
165
+ | (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0)
166
+ | (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0)
167
+ | (suspectUncompressible ? HUF_flags_suspectUncompressible : 0);
168
+
169
+ typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int);
170
+ huf_compress_f huf_compress;
104
171
  if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
105
- cLitSize = singleStream ?
106
- HUF_compress1X_repeat(
107
- ostart+lhSize, dstCapacity-lhSize, src, srcSize,
108
- HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
109
- (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible) :
110
- HUF_compress4X_repeat(
111
- ostart+lhSize, dstCapacity-lhSize, src, srcSize,
112
- HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
113
- (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible);
172
+ huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
173
+ cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,
174
+ src, srcSize,
175
+ HUF_SYMBOLVALUE_MAX, LitHufLog,
176
+ entropyWorkspace, entropyWorkspaceSize,
177
+ (HUF_CElt*)nextHuf->CTable,
178
+ &repeat, flags);
179
+ DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);
114
180
  if (repeat != HUF_repeat_none) {
115
181
  /* reused the existing table */
116
- DEBUGLOG(5, "Reusing previous huffman table");
182
+ DEBUGLOG(5, "reusing statistics from previous huffman block");
117
183
  hType = set_repeat;
118
184
  }
119
185
  }
120
186
 
121
- if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
122
- ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
123
- return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
124
- }
187
+ { size_t const minGain = ZSTD_minGain(srcSize, strategy);
188
+ if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
189
+ ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
190
+ return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
191
+ } }
125
192
  if (cLitSize==1) {
126
- ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
127
- return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
128
- }
193
+ /* A return value of 1 signals that the alphabet consists of a single symbol.
194
+ * However, in some rare circumstances, it could be the compressed size (a single byte).
195
+ * For that outcome to have a chance to happen, it's necessary that `srcSize < 8`.
196
+ * (it's also necessary to not generate statistics).
197
+ * Therefore, in such a case, actively check that all bytes are identical. */
198
+ if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {
199
+ ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
200
+ return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
201
+ } }
129
202
 
130
203
  if (hType == set_compressed) {
131
204
  /* using a newly constructed table */
@@ -136,16 +209,19 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
136
209
  switch(lhSize)
137
210
  {
138
211
  case 3: /* 2 - 2 - 10 - 10 */
139
- { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
212
+ if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
213
+ { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
140
214
  MEM_writeLE24(ostart, lhc);
141
215
  break;
142
216
  }
143
217
  case 4: /* 2 - 2 - 14 - 14 */
218
+ assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
144
219
  { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
145
220
  MEM_writeLE32(ostart, lhc);
146
221
  break;
147
222
  }
148
223
  case 5: /* 2 - 2 - 18 - 18 */
224
+ assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
149
225
  { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
150
226
  MEM_writeLE32(ostart, lhc);
151
227
  ostart[4] = (BYTE)(cLitSize >> 10);
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -16,16 +16,24 @@
16
16
 
17
17
  size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
18
18
 
19
+ /* ZSTD_compressRleLiteralsBlock() :
20
+ * Conditions :
21
+ * - All bytes in @src are identical
22
+ * - dstCapacity >= 4 */
19
23
  size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
20
24
 
21
- /* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
22
- size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
23
- ZSTD_hufCTables_t* nextHuf,
24
- ZSTD_strategy strategy, int disableLiteralCompression,
25
- void* dst, size_t dstCapacity,
25
+ /* ZSTD_compressLiterals():
26
+ * @entropyWorkspace: must be aligned on 4-bytes boundaries
27
+ * @entropyWorkspaceSize : must be >= HUF_WORKSPACE_SIZE
28
+ * @suspectUncompressible: sampling checks, to potentially skip huffman coding
29
+ */
30
+ size_t ZSTD_compressLiterals (void* dst, size_t dstCapacity,
26
31
  const void* src, size_t srcSize,
27
32
  void* entropyWorkspace, size_t entropyWorkspaceSize,
28
- const int bmi2,
29
- unsigned suspectUncompressible);
33
+ const ZSTD_hufCTables_t* prevHuf,
34
+ ZSTD_hufCTables_t* nextHuf,
35
+ ZSTD_strategy strategy, int disableLiteralCompression,
36
+ int suspectUncompressible,
37
+ int bmi2);
30
38
 
31
39
  #endif /* ZSTD_COMPRESS_LITERALS_H */
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -58,7 +58,7 @@ static unsigned ZSTD_useLowProbCount(size_t const nbSeq)
58
58
  {
59
59
  /* Heuristic: This should cover most blocks <= 16K and
60
60
  * start to fade out after 16K to about 32K depending on
61
- * comprssibility.
61
+ * compressibility.
62
62
  */
63
63
  return nbSeq >= 2048;
64
64
  }
@@ -153,20 +153,20 @@ size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
153
153
  return cost >> 8;
154
154
  }
155
155
 
156
- symbolEncodingType_e
156
+ SymbolEncodingType_e
157
157
  ZSTD_selectEncodingType(
158
158
  FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
159
159
  size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
160
160
  FSE_CTable const* prevCTable,
161
161
  short const* defaultNorm, U32 defaultNormLog,
162
- ZSTD_defaultPolicy_e const isDefaultAllowed,
162
+ ZSTD_DefaultPolicy_e const isDefaultAllowed,
163
163
  ZSTD_strategy const strategy)
164
164
  {
165
165
  ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
166
166
  if (mostFrequent == nbSeq) {
167
167
  *repeatMode = FSE_repeat_none;
168
168
  if (isDefaultAllowed && nbSeq <= 2) {
169
- /* Prefer set_basic over set_rle when there are 2 or less symbols,
169
+ /* Prefer set_basic over set_rle when there are 2 or fewer symbols,
170
170
  * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
171
171
  * If basic encoding isn't possible, always choose RLE.
172
172
  */
@@ -241,7 +241,7 @@ typedef struct {
241
241
 
242
242
  size_t
243
243
  ZSTD_buildCTable(void* dst, size_t dstCapacity,
244
- FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
244
+ FSE_CTable* nextCTable, U32 FSELog, SymbolEncodingType_e type,
245
245
  unsigned* count, U32 max,
246
246
  const BYTE* codeTable, size_t nbSeq,
247
247
  const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
@@ -293,7 +293,7 @@ ZSTD_encodeSequences_body(
293
293
  FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
294
294
  FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
295
295
  FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
296
- seqDef const* sequences, size_t nbSeq, int longOffsets)
296
+ SeqDef const* sequences, size_t nbSeq, int longOffsets)
297
297
  {
298
298
  BIT_CStream_t blockStream;
299
299
  FSE_CState_t stateMatchLength;
@@ -387,7 +387,7 @@ ZSTD_encodeSequences_default(
387
387
  FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
388
388
  FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
389
389
  FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
390
- seqDef const* sequences, size_t nbSeq, int longOffsets)
390
+ SeqDef const* sequences, size_t nbSeq, int longOffsets)
391
391
  {
392
392
  return ZSTD_encodeSequences_body(dst, dstCapacity,
393
393
  CTable_MatchLength, mlCodeTable,
@@ -405,7 +405,7 @@ ZSTD_encodeSequences_bmi2(
405
405
  FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
406
406
  FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
407
407
  FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
408
- seqDef const* sequences, size_t nbSeq, int longOffsets)
408
+ SeqDef const* sequences, size_t nbSeq, int longOffsets)
409
409
  {
410
410
  return ZSTD_encodeSequences_body(dst, dstCapacity,
411
411
  CTable_MatchLength, mlCodeTable,
@@ -421,7 +421,7 @@ size_t ZSTD_encodeSequences(
421
421
  FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
422
422
  FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
423
423
  FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
424
- seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
424
+ SeqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2)
425
425
  {
426
426
  DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity);
427
427
  #if DYNAMIC_BMI2
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -11,26 +11,27 @@
11
11
  #ifndef ZSTD_COMPRESS_SEQUENCES_H
12
12
  #define ZSTD_COMPRESS_SEQUENCES_H
13
13
 
14
+ #include "zstd_compress_internal.h" /* SeqDef */
14
15
  #include "../common/fse.h" /* FSE_repeat, FSE_CTable */
15
- #include "../common/zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */
16
+ #include "../common/zstd_internal.h" /* SymbolEncodingType_e, ZSTD_strategy */
16
17
 
17
18
  typedef enum {
18
19
  ZSTD_defaultDisallowed = 0,
19
20
  ZSTD_defaultAllowed = 1
20
- } ZSTD_defaultPolicy_e;
21
+ } ZSTD_DefaultPolicy_e;
21
22
 
22
- symbolEncodingType_e
23
+ SymbolEncodingType_e
23
24
  ZSTD_selectEncodingType(
24
25
  FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
25
26
  size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
26
27
  FSE_CTable const* prevCTable,
27
28
  short const* defaultNorm, U32 defaultNormLog,
28
- ZSTD_defaultPolicy_e const isDefaultAllowed,
29
+ ZSTD_DefaultPolicy_e const isDefaultAllowed,
29
30
  ZSTD_strategy const strategy);
30
31
 
31
32
  size_t
32
33
  ZSTD_buildCTable(void* dst, size_t dstCapacity,
33
- FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
34
+ FSE_CTable* nextCTable, U32 FSELog, SymbolEncodingType_e type,
34
35
  unsigned* count, U32 max,
35
36
  const BYTE* codeTable, size_t nbSeq,
36
37
  const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
@@ -42,7 +43,7 @@ size_t ZSTD_encodeSequences(
42
43
  FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
43
44
  FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
44
45
  FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable,
45
- seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
46
+ SeqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2);
46
47
 
47
48
  size_t ZSTD_fseBitCost(
48
49
  FSE_CTable const* ctable,