clickhouse-native 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (260) hide show
  1. checksums.yaml +7 -0
  2. data/ext/clickhouse_native/client.cpp +847 -0
  3. data/ext/clickhouse_native/extconf.rb +101 -0
  4. data/ext/clickhouse_native/vendor/clickhouse-cpp/.clang-format +11 -0
  5. data/ext/clickhouse_native/vendor/clickhouse-cpp/.git +1 -0
  6. data/ext/clickhouse_native/vendor/clickhouse-cpp/.gitattributes +63 -0
  7. data/ext/clickhouse_native/vendor/clickhouse-cpp/.github/CODEOWNERS +1 -0
  8. data/ext/clickhouse_native/vendor/clickhouse-cpp/.github/workflows/linux.yml +139 -0
  9. data/ext/clickhouse_native/vendor/clickhouse-cpp/.github/workflows/macos.yml +87 -0
  10. data/ext/clickhouse_native/vendor/clickhouse-cpp/.github/workflows/windows_mingw.yml +102 -0
  11. data/ext/clickhouse_native/vendor/clickhouse-cpp/.github/workflows/windows_msvc.yml +74 -0
  12. data/ext/clickhouse_native/vendor/clickhouse-cpp/.gitignore +280 -0
  13. data/ext/clickhouse_native/vendor/clickhouse-cpp/.travis.yml +62 -0
  14. data/ext/clickhouse_native/vendor/clickhouse-cpp/CMakeLists.txt +157 -0
  15. data/ext/clickhouse_native/vendor/clickhouse-cpp/LICENSE +206 -0
  16. data/ext/clickhouse_native/vendor/clickhouse-cpp/README.md +260 -0
  17. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/CMakeLists.txt +246 -0
  18. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/buffer.h +10 -0
  19. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/compressed.cpp +258 -0
  20. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/compressed.h +48 -0
  21. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/endpoints_iterator.cpp +20 -0
  22. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/endpoints_iterator.h +34 -0
  23. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/input.cpp +96 -0
  24. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/input.h +103 -0
  25. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/open_telemetry.h +23 -0
  26. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/output.cpp +119 -0
  27. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/output.h +142 -0
  28. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/platform.cpp +1 -0
  29. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/platform.h +33 -0
  30. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/projected_iterator.h +55 -0
  31. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/singleton.h +11 -0
  32. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/socket.cpp +489 -0
  33. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/socket.h +177 -0
  34. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/sslsocket.cpp +307 -0
  35. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/sslsocket.h +111 -0
  36. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/string_utils.h +28 -0
  37. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/string_view.h +142 -0
  38. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/uuid.h +10 -0
  39. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/wire_format.cpp +177 -0
  40. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/base/wire_format.h +79 -0
  41. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/block.cpp +134 -0
  42. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/block.h +114 -0
  43. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/client.cpp +1269 -0
  44. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/client.h +320 -0
  45. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/array.cpp +175 -0
  46. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/array.h +321 -0
  47. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/column.cpp +24 -0
  48. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/column.h +107 -0
  49. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/date.cpp +365 -0
  50. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/date.h +245 -0
  51. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/decimal.cpp +255 -0
  52. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/decimal.h +50 -0
  53. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/enum.cpp +123 -0
  54. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/enum.h +65 -0
  55. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/factory.cpp +285 -0
  56. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/factory.h +14 -0
  57. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/geo.cpp +108 -0
  58. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/geo.h +79 -0
  59. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/ip4.cpp +117 -0
  60. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/ip4.h +71 -0
  61. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/ip6.cpp +108 -0
  62. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/ip6.h +66 -0
  63. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/itemview.cpp +101 -0
  64. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/itemview.h +86 -0
  65. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/lowcardinality.cpp +527 -0
  66. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/lowcardinality.h +221 -0
  67. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/lowcardinalityadaptor.h +63 -0
  68. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/map.cpp +87 -0
  69. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/map.h +257 -0
  70. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/nothing.h +87 -0
  71. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/nullable.cpp +109 -0
  72. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/nullable.h +153 -0
  73. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/numeric.cpp +126 -0
  74. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/numeric.h +88 -0
  75. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/string.cpp +333 -0
  76. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/string.h +145 -0
  77. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/time.cpp +155 -0
  78. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/time.h +118 -0
  79. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/tuple.cpp +106 -0
  80. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/tuple.h +178 -0
  81. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/utils.h +41 -0
  82. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/uuid.cpp +80 -0
  83. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/columns/uuid.h +58 -0
  84. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/error_codes.h +595 -0
  85. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/exceptions.h +66 -0
  86. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/protocol.h +54 -0
  87. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/query.cpp +25 -0
  88. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/query.h +246 -0
  89. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/server_exception.h +16 -0
  90. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/types/type_parser.cpp +314 -0
  91. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/types/type_parser.h +89 -0
  92. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/types/types.cpp +484 -0
  93. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/types/types.h +397 -0
  94. data/ext/clickhouse_native/vendor/clickhouse-cpp/clickhouse/version.h +14 -0
  95. data/ext/clickhouse_native/vendor/clickhouse-cpp/cmake/Findcityhash.cmake +26 -0
  96. data/ext/clickhouse_native/vendor/clickhouse-cpp/cmake/Findlz4.cmake +39 -0
  97. data/ext/clickhouse_native/vendor/clickhouse-cpp/cmake/Findzstd.cmake +39 -0
  98. data/ext/clickhouse_native/vendor/clickhouse-cpp/cmake/cpp17.cmake +8 -0
  99. data/ext/clickhouse_native/vendor/clickhouse-cpp/cmake/openssl.cmake +8 -0
  100. data/ext/clickhouse_native/vendor/clickhouse-cpp/cmake/subdirs.cmake +5 -0
  101. data/ext/clickhouse_native/vendor/clickhouse-cpp/cmake/version.cmake +87 -0
  102. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/absl/absl/CMakeLists.txt +9 -0
  103. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/absl/absl/base/attributes.h +682 -0
  104. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/absl/absl/base/config.h +714 -0
  105. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/absl/absl/base/internal/bits.h +219 -0
  106. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/absl/absl/base/macros.h +147 -0
  107. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/absl/absl/base/optimization.h +241 -0
  108. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/absl/absl/base/options.h +238 -0
  109. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/absl/absl/base/policy_checks.h +111 -0
  110. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/absl/absl/base/port.h +26 -0
  111. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/absl/absl/numeric/int128.cc +390 -0
  112. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/absl/absl/numeric/int128.h +1092 -0
  113. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/absl/absl/numeric/int128_have_intrinsic.inc +302 -0
  114. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/absl/absl/numeric/int128_no_intrinsic.inc +308 -0
  115. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/cityhash/cityhash/BUCK +13 -0
  116. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/cityhash/cityhash/CMakeLists.txt +7 -0
  117. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/cityhash/cityhash/COPYING +19 -0
  118. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/cityhash/cityhash/city.cc +469 -0
  119. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/cityhash/cityhash/city.h +90 -0
  120. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/cityhash/cityhash/citycrc.h +43 -0
  121. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/cityhash/cityhash/config.h +118 -0
  122. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/BUCK +14 -0
  123. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/CMakeLists.txt +4 -0
  124. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/LICENSE +28 -0
  125. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/README.md +1 -0
  126. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/gtest-death-test.h +346 -0
  127. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/gtest-matchers.h +930 -0
  128. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/gtest-message.h +219 -0
  129. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/gtest-param-test.h +507 -0
  130. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/gtest-printers.h +1029 -0
  131. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/gtest-spi.h +238 -0
  132. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/gtest-test-part.h +184 -0
  133. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/gtest-typed-test.h +329 -0
  134. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/gtest.h +2495 -0
  135. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/gtest_pred_impl.h +359 -0
  136. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/gtest_prod.h +61 -0
  137. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/internal/custom/README.md +56 -0
  138. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/internal/custom/gtest-port.h +37 -0
  139. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/internal/custom/gtest-printers.h +42 -0
  140. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/internal/custom/gtest.h +37 -0
  141. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/internal/gtest-death-test-internal.h +304 -0
  142. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/internal/gtest-filepath.h +211 -0
  143. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/internal/gtest-internal.h +1560 -0
  144. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/internal/gtest-param-util.h +947 -0
  145. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/internal/gtest-port-arch.h +114 -0
  146. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/internal/gtest-port.h +2389 -0
  147. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/internal/gtest-string.h +175 -0
  148. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/include/gtest/internal/gtest-type-util.h +183 -0
  149. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/src/gtest-all.cc +48 -0
  150. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/src/gtest-death-test.cc +1644 -0
  151. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/src/gtest-filepath.cc +369 -0
  152. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/src/gtest-internal-inl.h +1221 -0
  153. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/src/gtest-matchers.cc +97 -0
  154. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/src/gtest-port.cc +1433 -0
  155. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/src/gtest-printers.cc +533 -0
  156. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/src/gtest-test-part.cc +108 -0
  157. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/src/gtest-typed-test.cc +107 -0
  158. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/src/gtest.cc +6746 -0
  159. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/gtest/src/gtest_main.cc +54 -0
  160. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/lz4/lz4/BUCK +13 -0
  161. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/lz4/lz4/CMakeLists.txt +8 -0
  162. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/lz4/lz4/LICENSE +24 -0
  163. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/lz4/lz4/lz4.c +2402 -0
  164. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/lz4/lz4/lz4.h +764 -0
  165. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/lz4/lz4/lz4hc.c +1554 -0
  166. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/lz4/lz4/lz4hc.h +438 -0
  167. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/BUCK +232 -0
  168. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/CMakeLists.txt +115 -0
  169. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/LICENSE +30 -0
  170. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/allocations.h +55 -0
  171. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/bits.h +200 -0
  172. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/bitstream.h +437 -0
  173. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/compiler.h +358 -0
  174. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/cpu.h +213 -0
  175. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/debug.c +24 -0
  176. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/debug.h +107 -0
  177. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/entropy_common.c +340 -0
  178. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/error_private.c +63 -0
  179. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/error_private.h +159 -0
  180. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/fse.h +639 -0
  181. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/fse_decompress.c +311 -0
  182. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/huf.h +273 -0
  183. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/mem.h +435 -0
  184. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/pool.c +371 -0
  185. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/pool.h +90 -0
  186. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/portability_macros.h +156 -0
  187. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/threading.c +176 -0
  188. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/threading.h +150 -0
  189. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/xxhash.c +24 -0
  190. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/xxhash.h +5686 -0
  191. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/zstd_common.c +48 -0
  192. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/zstd_deps.h +111 -0
  193. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/zstd_internal.h +392 -0
  194. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/common/zstd_trace.h +163 -0
  195. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/clevels.h +134 -0
  196. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/fse_compress.c +624 -0
  197. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/hist.c +181 -0
  198. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/hist.h +75 -0
  199. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/huf_compress.c +1435 -0
  200. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstd_compress.c +7032 -0
  201. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstd_compress_internal.h +1532 -0
  202. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstd_compress_literals.c +235 -0
  203. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstd_compress_literals.h +39 -0
  204. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstd_compress_sequences.c +442 -0
  205. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstd_compress_sequences.h +54 -0
  206. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstd_compress_superblock.c +577 -0
  207. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstd_compress_superblock.h +32 -0
  208. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstd_cwksp.h +742 -0
  209. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstd_double_fast.c +758 -0
  210. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstd_double_fast.h +39 -0
  211. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstd_fast.c +960 -0
  212. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstd_fast.h +38 -0
  213. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstd_lazy.c +2157 -0
  214. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstd_lazy.h +127 -0
  215. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstd_ldm.c +724 -0
  216. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstd_ldm.h +117 -0
  217. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstd_ldm_geartab.h +106 -0
  218. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstd_opt.c +1472 -0
  219. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstd_opt.h +56 -0
  220. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstdmt_compress.c +1867 -0
  221. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/compress/zstdmt_compress.h +113 -0
  222. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/decompress/huf_decompress.c +1882 -0
  223. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/decompress/huf_decompress_amd64.S +576 -0
  224. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/decompress/zstd_ddict.c +244 -0
  225. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/decompress/zstd_ddict.h +44 -0
  226. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/decompress/zstd_decompress.c +2355 -0
  227. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/decompress/zstd_decompress_block.c +2192 -0
  228. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/decompress/zstd_decompress_block.h +73 -0
  229. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/decompress/zstd_decompress_internal.h +238 -0
  230. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/dictBuilder/cover.c +1257 -0
  231. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/dictBuilder/cover.h +158 -0
  232. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/dictBuilder/divsufsort.c +1913 -0
  233. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/dictBuilder/divsufsort.h +67 -0
  234. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/dictBuilder/fastcover.c +766 -0
  235. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/dictBuilder/zdict.c +1127 -0
  236. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/legacy/zstd_legacy.h +422 -0
  237. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/legacy/zstd_v01.c +2125 -0
  238. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/legacy/zstd_v01.h +94 -0
  239. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/legacy/zstd_v02.c +3477 -0
  240. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/legacy/zstd_v02.h +93 -0
  241. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/legacy/zstd_v03.c +3117 -0
  242. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/legacy/zstd_v03.h +93 -0
  243. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/legacy/zstd_v04.c +3605 -0
  244. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/legacy/zstd_v04.h +142 -0
  245. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/legacy/zstd_v05.c +4004 -0
  246. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/legacy/zstd_v05.h +162 -0
  247. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/legacy/zstd_v06.c +4113 -0
  248. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/legacy/zstd_v06.h +172 -0
  249. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/legacy/zstd_v07.c +4498 -0
  250. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/legacy/zstd_v07.h +187 -0
  251. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/zdict.h +474 -0
  252. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/zstd.h +3020 -0
  253. data/ext/clickhouse_native/vendor/clickhouse-cpp/contrib/zstd/zstd/zstd_errors.h +114 -0
  254. data/lib/clickhouse_native/clickhouse_native.bundle +0 -0
  255. data/lib/clickhouse_native/client.rb +50 -0
  256. data/lib/clickhouse_native/errors.rb +23 -0
  257. data/lib/clickhouse_native/pool.rb +49 -0
  258. data/lib/clickhouse_native/version.rb +3 -0
  259. data/lib/clickhouse_native.rb +8 -0
  260. metadata +369 -0
@@ -0,0 +1,2192 @@
1
+ /*
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under both the BSD-style license (found in the
6
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7
+ * in the COPYING file in the root directory of this source tree).
8
+ * You may select, at your option, one of the above-listed licenses.
9
+ */
10
+
11
+ /* zstd_decompress_block :
12
+ * this module takes care of decompressing _compressed_ block */
13
+
14
+ /*-*******************************************************
15
+ * Dependencies
16
+ *********************************************************/
17
+ #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
18
+ #include "../common/compiler.h" /* prefetch */
19
+ #include "../common/cpu.h" /* bmi2 */
20
+ #include "../common/mem.h" /* low level memory routines */
21
+ #define FSE_STATIC_LINKING_ONLY
22
+ #include "../common/fse.h"
23
+ #include "../common/huf.h"
24
+ #include "../common/zstd_internal.h"
25
+ #include "zstd_decompress_internal.h" /* ZSTD_DCtx */
26
+ #include "zstd_ddict.h" /* ZSTD_DDictDictContent */
27
+ #include "zstd_decompress_block.h"
28
+ #include "../common/bits.h" /* ZSTD_highbit32 */
29
+
30
+ /*_*******************************************************
31
+ * Macros
32
+ **********************************************************/
33
+
34
+ /* These two optional macros force the use one way or another of the two
35
+ * ZSTD_decompressSequences implementations. You can't force in both directions
36
+ * at the same time.
37
+ */
38
+ #if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
39
+ defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
40
+ #error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
41
+ #endif
42
+
43
+
44
+ /*_*******************************************************
45
+ * Memory operations
46
+ **********************************************************/
47
+ static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
48
+
49
+
50
+ /*-*************************************************************
51
+ * Block decoding
52
+ ***************************************************************/
53
+
54
+ /*! ZSTD_getcBlockSize() :
55
+ * Provides the size of compressed block from block header `src` */
56
+ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
57
+ blockProperties_t* bpPtr)
58
+ {
59
+ RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
60
+
61
+ { U32 const cBlockHeader = MEM_readLE24(src);
62
+ U32 const cSize = cBlockHeader >> 3;
63
+ bpPtr->lastBlock = cBlockHeader & 1;
64
+ bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3);
65
+ bpPtr->origSize = cSize; /* only useful for RLE */
66
+ if (bpPtr->blockType == bt_rle) return 1;
67
+ RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected, "");
68
+ return cSize;
69
+ }
70
+ }
71
+
72
+ /* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
73
+ static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
74
+ const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
75
+ {
76
+ if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
77
+ {
78
+ /* room for litbuffer to fit without read faulting */
79
+ dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
80
+ dctx->litBufferEnd = dctx->litBuffer + litSize;
81
+ dctx->litBufferLocation = ZSTD_in_dst;
82
+ }
83
+ else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
84
+ {
85
+ /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
86
+ if (splitImmediately) {
87
+ /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
88
+ dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
89
+ dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
90
+ }
91
+ else {
92
+ /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */
93
+ dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
94
+ dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
95
+ }
96
+ dctx->litBufferLocation = ZSTD_split;
97
+ }
98
+ else
99
+ {
100
+ /* fits entirely within litExtraBuffer, so no split is necessary */
101
+ dctx->litBuffer = dctx->litExtraBuffer;
102
+ dctx->litBufferEnd = dctx->litBuffer + litSize;
103
+ dctx->litBufferLocation = ZSTD_not_in_dst;
104
+ }
105
+ }
106
+
107
+ /* Hidden declaration for fullbench */
108
+ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
109
+ const void* src, size_t srcSize,
110
+ void* dst, size_t dstCapacity, const streaming_operation streaming);
111
+ /*! ZSTD_decodeLiteralsBlock() :
112
+ * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
113
+ * in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
114
+ * block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
115
+ * stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
116
+ *
117
+ * @return : nb of bytes read from src (< srcSize )
118
+ * note : symbol not declared but exposed for fullbench */
119
+ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
120
+ const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */
121
+ void* dst, size_t dstCapacity, const streaming_operation streaming)
122
+ {
123
+ DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
124
+ RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
125
+
126
+ { const BYTE* const istart = (const BYTE*) src;
127
+ symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
128
+
129
+ switch(litEncType)
130
+ {
131
+ case set_repeat:
132
+ DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
133
+ RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
134
+ ZSTD_FALLTHROUGH;
135
+
136
+ case set_compressed:
137
+ RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3");
138
+ { size_t lhSize, litSize, litCSize;
139
+ U32 singleStream=0;
140
+ U32 const lhlCode = (istart[0] >> 2) & 3;
141
+ U32 const lhc = MEM_readLE32(istart);
142
+ size_t hufSuccess;
143
+ size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
144
+ int const flags = 0
145
+ | (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
146
+ | (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
147
+ switch(lhlCode)
148
+ {
149
+ case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
150
+ /* 2 - 2 - 10 - 10 */
151
+ singleStream = !lhlCode;
152
+ lhSize = 3;
153
+ litSize = (lhc >> 4) & 0x3FF;
154
+ litCSize = (lhc >> 14) & 0x3FF;
155
+ break;
156
+ case 2:
157
+ /* 2 - 2 - 14 - 14 */
158
+ lhSize = 4;
159
+ litSize = (lhc >> 4) & 0x3FFF;
160
+ litCSize = lhc >> 18;
161
+ break;
162
+ case 3:
163
+ /* 2 - 2 - 18 - 18 */
164
+ lhSize = 5;
165
+ litSize = (lhc >> 4) & 0x3FFFF;
166
+ litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
167
+ break;
168
+ }
169
+ RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
170
+ RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
171
+ if (!singleStream)
172
+ RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
173
+ "Not enough literals (%zu) for the 4-streams mode (min %u)",
174
+ litSize, MIN_LITERALS_FOR_4_STREAMS);
175
+ RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
176
+ RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
177
+ ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
178
+
179
+ /* prefetch huffman table if cold */
180
+ if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
181
+ PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
182
+ }
183
+
184
+ if (litEncType==set_repeat) {
185
+ if (singleStream) {
186
+ hufSuccess = HUF_decompress1X_usingDTable(
187
+ dctx->litBuffer, litSize, istart+lhSize, litCSize,
188
+ dctx->HUFptr, flags);
189
+ } else {
190
+ assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
191
+ hufSuccess = HUF_decompress4X_usingDTable(
192
+ dctx->litBuffer, litSize, istart+lhSize, litCSize,
193
+ dctx->HUFptr, flags);
194
+ }
195
+ } else {
196
+ if (singleStream) {
197
+ #if defined(HUF_FORCE_DECOMPRESS_X2)
198
+ hufSuccess = HUF_decompress1X_DCtx_wksp(
199
+ dctx->entropy.hufTable, dctx->litBuffer, litSize,
200
+ istart+lhSize, litCSize, dctx->workspace,
201
+ sizeof(dctx->workspace), flags);
202
+ #else
203
+ hufSuccess = HUF_decompress1X1_DCtx_wksp(
204
+ dctx->entropy.hufTable, dctx->litBuffer, litSize,
205
+ istart+lhSize, litCSize, dctx->workspace,
206
+ sizeof(dctx->workspace), flags);
207
+ #endif
208
+ } else {
209
+ hufSuccess = HUF_decompress4X_hufOnly_wksp(
210
+ dctx->entropy.hufTable, dctx->litBuffer, litSize,
211
+ istart+lhSize, litCSize, dctx->workspace,
212
+ sizeof(dctx->workspace), flags);
213
+ }
214
+ }
215
+ if (dctx->litBufferLocation == ZSTD_split)
216
+ {
217
+ ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
218
+ ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
219
+ dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
220
+ dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
221
+ }
222
+
223
+ RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
224
+
225
+ dctx->litPtr = dctx->litBuffer;
226
+ dctx->litSize = litSize;
227
+ dctx->litEntropy = 1;
228
+ if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
229
+ return litCSize + lhSize;
230
+ }
231
+
232
+ case set_basic:
233
+ { size_t litSize, lhSize;
234
+ U32 const lhlCode = ((istart[0]) >> 2) & 3;
235
+ size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
236
+ switch(lhlCode)
237
+ {
238
+ case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
239
+ lhSize = 1;
240
+ litSize = istart[0] >> 3;
241
+ break;
242
+ case 1:
243
+ lhSize = 2;
244
+ litSize = MEM_readLE16(istart) >> 4;
245
+ break;
246
+ case 3:
247
+ lhSize = 3;
248
+ RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3");
249
+ litSize = MEM_readLE24(istart) >> 4;
250
+ break;
251
+ }
252
+
253
+ RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
254
+ RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
255
+ ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
256
+ if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
257
+ RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
258
+ if (dctx->litBufferLocation == ZSTD_split)
259
+ {
260
+ ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
261
+ ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
262
+ }
263
+ else
264
+ {
265
+ ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
266
+ }
267
+ dctx->litPtr = dctx->litBuffer;
268
+ dctx->litSize = litSize;
269
+ return lhSize+litSize;
270
+ }
271
+ /* direct reference into compressed stream */
272
+ dctx->litPtr = istart+lhSize;
273
+ dctx->litSize = litSize;
274
+ dctx->litBufferEnd = dctx->litPtr + litSize;
275
+ dctx->litBufferLocation = ZSTD_not_in_dst;
276
+ return lhSize+litSize;
277
+ }
278
+
279
+ case set_rle:
280
+ { U32 const lhlCode = ((istart[0]) >> 2) & 3;
281
+ size_t litSize, lhSize;
282
+ size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
283
+ switch(lhlCode)
284
+ {
285
+ case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
286
+ lhSize = 1;
287
+ litSize = istart[0] >> 3;
288
+ break;
289
+ case 1:
290
+ lhSize = 2;
291
+ RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3");
292
+ litSize = MEM_readLE16(istart) >> 4;
293
+ break;
294
+ case 3:
295
+ lhSize = 3;
296
+ RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4");
297
+ litSize = MEM_readLE24(istart) >> 4;
298
+ break;
299
+ }
300
+ RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
301
+ RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
302
+ RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
303
+ ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
304
+ if (dctx->litBufferLocation == ZSTD_split)
305
+ {
306
+ ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
307
+ ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
308
+ }
309
+ else
310
+ {
311
+ ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
312
+ }
313
+ dctx->litPtr = dctx->litBuffer;
314
+ dctx->litSize = litSize;
315
+ return lhSize+1;
316
+ }
317
+ default:
318
+ RETURN_ERROR(corruption_detected, "impossible");
319
+ }
320
+ }
321
+ }
322
+
323
+ /* Default FSE distribution tables.
324
+ * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
325
+ * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
326
+ * They were generated programmatically with following method :
327
+ * - start from default distributions, present in /lib/common/zstd_internal.h
328
+ * - generate tables normally, using ZSTD_buildFSETable()
329
+ * - printout the content of tables
330
+ * - pretify output, report below, test with fuzzer to ensure it's correct */
331
+
332
+ /* Default FSE distribution table for Literal Lengths */
333
+ static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
334
+ { 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
335
+ /* nextState, nbAddBits, nbBits, baseVal */
336
+ { 0, 0, 4, 0}, { 16, 0, 4, 0},
337
+ { 32, 0, 5, 1}, { 0, 0, 5, 3},
338
+ { 0, 0, 5, 4}, { 0, 0, 5, 6},
339
+ { 0, 0, 5, 7}, { 0, 0, 5, 9},
340
+ { 0, 0, 5, 10}, { 0, 0, 5, 12},
341
+ { 0, 0, 6, 14}, { 0, 1, 5, 16},
342
+ { 0, 1, 5, 20}, { 0, 1, 5, 22},
343
+ { 0, 2, 5, 28}, { 0, 3, 5, 32},
344
+ { 0, 4, 5, 48}, { 32, 6, 5, 64},
345
+ { 0, 7, 5, 128}, { 0, 8, 6, 256},
346
+ { 0, 10, 6, 1024}, { 0, 12, 6, 4096},
347
+ { 32, 0, 4, 0}, { 0, 0, 4, 1},
348
+ { 0, 0, 5, 2}, { 32, 0, 5, 4},
349
+ { 0, 0, 5, 5}, { 32, 0, 5, 7},
350
+ { 0, 0, 5, 8}, { 32, 0, 5, 10},
351
+ { 0, 0, 5, 11}, { 0, 0, 6, 13},
352
+ { 32, 1, 5, 16}, { 0, 1, 5, 18},
353
+ { 32, 1, 5, 22}, { 0, 2, 5, 24},
354
+ { 32, 3, 5, 32}, { 0, 3, 5, 40},
355
+ { 0, 6, 4, 64}, { 16, 6, 4, 64},
356
+ { 32, 7, 5, 128}, { 0, 9, 6, 512},
357
+ { 0, 11, 6, 2048}, { 48, 0, 4, 0},
358
+ { 16, 0, 4, 1}, { 32, 0, 5, 2},
359
+ { 32, 0, 5, 3}, { 32, 0, 5, 5},
360
+ { 32, 0, 5, 6}, { 32, 0, 5, 8},
361
+ { 32, 0, 5, 9}, { 32, 0, 5, 11},
362
+ { 32, 0, 5, 12}, { 0, 0, 6, 15},
363
+ { 32, 1, 5, 18}, { 32, 1, 5, 20},
364
+ { 32, 2, 5, 24}, { 32, 2, 5, 28},
365
+ { 32, 3, 5, 40}, { 32, 4, 5, 48},
366
+ { 0, 16, 6,65536}, { 0, 15, 6,32768},
367
+ { 0, 14, 6,16384}, { 0, 13, 6, 8192},
368
+ }; /* LL_defaultDTable */
369
+
370
+ /* Default FSE distribution table for Offset Codes */
371
+ static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
372
+ { 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
373
+ /* nextState, nbAddBits, nbBits, baseVal */
374
+ { 0, 0, 5, 0}, { 0, 6, 4, 61},
375
+ { 0, 9, 5, 509}, { 0, 15, 5,32765},
376
+ { 0, 21, 5,2097149}, { 0, 3, 5, 5},
377
+ { 0, 7, 4, 125}, { 0, 12, 5, 4093},
378
+ { 0, 18, 5,262141}, { 0, 23, 5,8388605},
379
+ { 0, 5, 5, 29}, { 0, 8, 4, 253},
380
+ { 0, 14, 5,16381}, { 0, 20, 5,1048573},
381
+ { 0, 2, 5, 1}, { 16, 7, 4, 125},
382
+ { 0, 11, 5, 2045}, { 0, 17, 5,131069},
383
+ { 0, 22, 5,4194301}, { 0, 4, 5, 13},
384
+ { 16, 8, 4, 253}, { 0, 13, 5, 8189},
385
+ { 0, 19, 5,524285}, { 0, 1, 5, 1},
386
+ { 16, 6, 4, 61}, { 0, 10, 5, 1021},
387
+ { 0, 16, 5,65533}, { 0, 28, 5,268435453},
388
+ { 0, 27, 5,134217725}, { 0, 26, 5,67108861},
389
+ { 0, 25, 5,33554429}, { 0, 24, 5,16777213},
390
+ }; /* OF_defaultDTable */
391
+
392
+
393
+ /* Default FSE distribution table for Match Lengths */
394
+ static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
395
+ { 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
396
+ /* nextState, nbAddBits, nbBits, baseVal */
397
+ { 0, 0, 6, 3}, { 0, 0, 4, 4},
398
+ { 32, 0, 5, 5}, { 0, 0, 5, 6},
399
+ { 0, 0, 5, 8}, { 0, 0, 5, 9},
400
+ { 0, 0, 5, 11}, { 0, 0, 6, 13},
401
+ { 0, 0, 6, 16}, { 0, 0, 6, 19},
402
+ { 0, 0, 6, 22}, { 0, 0, 6, 25},
403
+ { 0, 0, 6, 28}, { 0, 0, 6, 31},
404
+ { 0, 0, 6, 34}, { 0, 1, 6, 37},
405
+ { 0, 1, 6, 41}, { 0, 2, 6, 47},
406
+ { 0, 3, 6, 59}, { 0, 4, 6, 83},
407
+ { 0, 7, 6, 131}, { 0, 9, 6, 515},
408
+ { 16, 0, 4, 4}, { 0, 0, 4, 5},
409
+ { 32, 0, 5, 6}, { 0, 0, 5, 7},
410
+ { 32, 0, 5, 9}, { 0, 0, 5, 10},
411
+ { 0, 0, 6, 12}, { 0, 0, 6, 15},
412
+ { 0, 0, 6, 18}, { 0, 0, 6, 21},
413
+ { 0, 0, 6, 24}, { 0, 0, 6, 27},
414
+ { 0, 0, 6, 30}, { 0, 0, 6, 33},
415
+ { 0, 1, 6, 35}, { 0, 1, 6, 39},
416
+ { 0, 2, 6, 43}, { 0, 3, 6, 51},
417
+ { 0, 4, 6, 67}, { 0, 5, 6, 99},
418
+ { 0, 8, 6, 259}, { 32, 0, 4, 4},
419
+ { 48, 0, 4, 4}, { 16, 0, 4, 5},
420
+ { 32, 0, 5, 7}, { 32, 0, 5, 8},
421
+ { 32, 0, 5, 10}, { 32, 0, 5, 11},
422
+ { 0, 0, 6, 14}, { 0, 0, 6, 17},
423
+ { 0, 0, 6, 20}, { 0, 0, 6, 23},
424
+ { 0, 0, 6, 26}, { 0, 0, 6, 29},
425
+ { 0, 0, 6, 32}, { 0, 16, 6,65539},
426
+ { 0, 15, 6,32771}, { 0, 14, 6,16387},
427
+ { 0, 13, 6, 8195}, { 0, 12, 6, 4099},
428
+ { 0, 11, 6, 2051}, { 0, 10, 6, 1027},
429
+ }; /* ML_defaultDTable */
430
+
431
+
432
+ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
433
+ {
434
+ void* ptr = dt;
435
+ ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
436
+ ZSTD_seqSymbol* const cell = dt + 1;
437
+
438
+ DTableH->tableLog = 0;
439
+ DTableH->fastMode = 0;
440
+
441
+ cell->nbBits = 0;
442
+ cell->nextState = 0;
443
+ assert(nbAddBits < 255);
444
+ cell->nbAdditionalBits = nbAddBits;
445
+ cell->baseValue = baseValue;
446
+ }
447
+
448
+
449
+ /* ZSTD_buildFSETable() :
450
+ * generate FSE decoding table for one symbol (ll, ml or off)
451
+ * cannot fail if input is valid =>
452
+ * all inputs are presumed validated at this stage */
453
+ FORCE_INLINE_TEMPLATE
454
+ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
455
+ const short* normalizedCounter, unsigned maxSymbolValue,
456
+ const U32* baseValue, const U8* nbAdditionalBits,
457
+ unsigned tableLog, void* wksp, size_t wkspSize)
458
+ {
459
+ ZSTD_seqSymbol* const tableDecode = dt+1;
460
+ U32 const maxSV1 = maxSymbolValue + 1;
461
+ U32 const tableSize = 1 << tableLog;
462
+
463
+ U16* symbolNext = (U16*)wksp;
464
+ BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
465
+ U32 highThreshold = tableSize - 1;
466
+
467
+
468
+ /* Sanity Checks */
469
+ assert(maxSymbolValue <= MaxSeq);
470
+ assert(tableLog <= MaxFSELog);
471
+ assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
472
+ (void)wkspSize;
473
+ /* Init, lay down lowprob symbols */
474
+ { ZSTD_seqSymbol_header DTableH;
475
+ DTableH.tableLog = tableLog;
476
+ DTableH.fastMode = 1;
477
+ { S16 const largeLimit= (S16)(1 << (tableLog-1));
478
+ U32 s;
479
+ for (s=0; s<maxSV1; s++) {
480
+ if (normalizedCounter[s]==-1) {
481
+ tableDecode[highThreshold--].baseValue = s;
482
+ symbolNext[s] = 1;
483
+ } else {
484
+ if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
485
+ assert(normalizedCounter[s]>=0);
486
+ symbolNext[s] = (U16)normalizedCounter[s];
487
+ } } }
488
+ ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
489
+ }
490
+
491
+ /* Spread symbols */
492
+ assert(tableSize <= 512);
493
+ /* Specialized symbol spreading for the case when there are
494
+ * no low probability (-1 count) symbols. When compressing
495
+ * small blocks we avoid low probability symbols to hit this
496
+ * case, since header decoding speed matters more.
497
+ */
498
+ if (highThreshold == tableSize - 1) {
499
+ size_t const tableMask = tableSize-1;
500
+ size_t const step = FSE_TABLESTEP(tableSize);
501
+ /* First lay down the symbols in order.
502
+ * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
503
+ * misses since small blocks generally have small table logs, so nearly
504
+ * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
505
+ * our buffer to handle the over-write.
506
+ */
507
+ {
508
+ U64 const add = 0x0101010101010101ull;
509
+ size_t pos = 0;
510
+ U64 sv = 0;
511
+ U32 s;
512
+ for (s=0; s<maxSV1; ++s, sv += add) {
513
+ int i;
514
+ int const n = normalizedCounter[s];
515
+ MEM_write64(spread + pos, sv);
516
+ for (i = 8; i < n; i += 8) {
517
+ MEM_write64(spread + pos + i, sv);
518
+ }
519
+ assert(n>=0);
520
+ pos += (size_t)n;
521
+ }
522
+ }
523
+ /* Now we spread those positions across the table.
524
+ * The benefit of doing it in two stages is that we avoid the
525
+ * variable size inner loop, which caused lots of branch misses.
526
+ * Now we can run through all the positions without any branch misses.
527
+ * We unroll the loop twice, since that is what empirically worked best.
528
+ */
529
+ {
530
+ size_t position = 0;
531
+ size_t s;
532
+ size_t const unroll = 2;
533
+ assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
534
+ for (s = 0; s < (size_t)tableSize; s += unroll) {
535
+ size_t u;
536
+ for (u = 0; u < unroll; ++u) {
537
+ size_t const uPosition = (position + (u * step)) & tableMask;
538
+ tableDecode[uPosition].baseValue = spread[s + u];
539
+ }
540
+ position = (position + (unroll * step)) & tableMask;
541
+ }
542
+ assert(position == 0);
543
+ }
544
+ } else {
545
+ U32 const tableMask = tableSize-1;
546
+ U32 const step = FSE_TABLESTEP(tableSize);
547
+ U32 s, position = 0;
548
+ for (s=0; s<maxSV1; s++) {
549
+ int i;
550
+ int const n = normalizedCounter[s];
551
+ for (i=0; i<n; i++) {
552
+ tableDecode[position].baseValue = s;
553
+ position = (position + step) & tableMask;
554
+ while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask; /* lowprob area */
555
+ } }
556
+ assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
557
+ }
558
+
559
+ /* Build Decoding table */
560
+ {
561
+ U32 u;
562
+ for (u=0; u<tableSize; u++) {
563
+ U32 const symbol = tableDecode[u].baseValue;
564
+ U32 const nextState = symbolNext[symbol]++;
565
+ tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
566
+ tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
567
+ assert(nbAdditionalBits[symbol] < 255);
568
+ tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
569
+ tableDecode[u].baseValue = baseValue[symbol];
570
+ }
571
+ }
572
+ }
573
+
574
+ /* Avoids the FORCE_INLINE of the _body() function. */
575
+ static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
576
+ const short* normalizedCounter, unsigned maxSymbolValue,
577
+ const U32* baseValue, const U8* nbAdditionalBits,
578
+ unsigned tableLog, void* wksp, size_t wkspSize)
579
+ {
580
+ ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
581
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
582
+ }
583
+
584
+ #if DYNAMIC_BMI2
585
+ BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
586
+ const short* normalizedCounter, unsigned maxSymbolValue,
587
+ const U32* baseValue, const U8* nbAdditionalBits,
588
+ unsigned tableLog, void* wksp, size_t wkspSize)
589
+ {
590
+ ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
591
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
592
+ }
593
+ #endif
594
+
595
+ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
596
+ const short* normalizedCounter, unsigned maxSymbolValue,
597
+ const U32* baseValue, const U8* nbAdditionalBits,
598
+ unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
599
+ {
600
+ #if DYNAMIC_BMI2
601
+ if (bmi2) {
602
+ ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
603
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
604
+ return;
605
+ }
606
+ #endif
607
+ (void)bmi2;
608
+ ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
609
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
610
+ }
611
+
612
+
613
+ /*! ZSTD_buildSeqTable() :
614
+ * @return : nb bytes read from src,
615
+ * or an error code if it fails */
616
+ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
617
+ symbolEncodingType_e type, unsigned max, U32 maxLog,
618
+ const void* src, size_t srcSize,
619
+ const U32* baseValue, const U8* nbAdditionalBits,
620
+ const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
621
+ int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
622
+ int bmi2)
623
+ {
624
+ switch(type)
625
+ {
626
+ case set_rle :
627
+ RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
628
+ RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
629
+ { U32 const symbol = *(const BYTE*)src;
630
+ U32 const baseline = baseValue[symbol];
631
+ U8 const nbBits = nbAdditionalBits[symbol];
632
+ ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
633
+ }
634
+ *DTablePtr = DTableSpace;
635
+ return 1;
636
+ case set_basic :
637
+ *DTablePtr = defaultTable;
638
+ return 0;
639
+ case set_repeat:
640
+ RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
641
+ /* prefetch FSE table if used */
642
+ if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
643
+ const void* const pStart = *DTablePtr;
644
+ size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
645
+ PREFETCH_AREA(pStart, pSize);
646
+ }
647
+ return 0;
648
+ case set_compressed :
649
+ { unsigned tableLog;
650
+ S16 norm[MaxSeq+1];
651
+ size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
652
+ RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
653
+ RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
654
+ ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
655
+ *DTablePtr = DTableSpace;
656
+ return headerSize;
657
+ }
658
+ default :
659
+ assert(0);
660
+ RETURN_ERROR(GENERIC, "impossible");
661
+ }
662
+ }
663
+
664
+ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
665
+ const void* src, size_t srcSize)
666
+ {
667
+ const BYTE* const istart = (const BYTE*)src;
668
+ const BYTE* const iend = istart + srcSize;
669
+ const BYTE* ip = istart;
670
+ int nbSeq;
671
+ DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
672
+
673
+ /* check */
674
+ RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
675
+
676
+ /* SeqHead */
677
+ nbSeq = *ip++;
678
+ if (!nbSeq) {
679
+ *nbSeqPtr=0;
680
+ RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
681
+ return 1;
682
+ }
683
+ if (nbSeq > 0x7F) {
684
+ if (nbSeq == 0xFF) {
685
+ RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
686
+ nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
687
+ ip+=2;
688
+ } else {
689
+ RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
690
+ nbSeq = ((nbSeq-0x80)<<8) + *ip++;
691
+ }
692
+ }
693
+ *nbSeqPtr = nbSeq;
694
+
695
+ /* FSE table descriptors */
696
+ RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
697
+ { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
698
+ symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
699
+ symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
700
+ ip++;
701
+
702
+ /* Build DTables */
703
+ { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
704
+ LLtype, MaxLL, LLFSELog,
705
+ ip, iend-ip,
706
+ LL_base, LL_bits,
707
+ LL_defaultDTable, dctx->fseEntropy,
708
+ dctx->ddictIsCold, nbSeq,
709
+ dctx->workspace, sizeof(dctx->workspace),
710
+ ZSTD_DCtx_get_bmi2(dctx));
711
+ RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
712
+ ip += llhSize;
713
+ }
714
+
715
+ { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
716
+ OFtype, MaxOff, OffFSELog,
717
+ ip, iend-ip,
718
+ OF_base, OF_bits,
719
+ OF_defaultDTable, dctx->fseEntropy,
720
+ dctx->ddictIsCold, nbSeq,
721
+ dctx->workspace, sizeof(dctx->workspace),
722
+ ZSTD_DCtx_get_bmi2(dctx));
723
+ RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
724
+ ip += ofhSize;
725
+ }
726
+
727
+ { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
728
+ MLtype, MaxML, MLFSELog,
729
+ ip, iend-ip,
730
+ ML_base, ML_bits,
731
+ ML_defaultDTable, dctx->fseEntropy,
732
+ dctx->ddictIsCold, nbSeq,
733
+ dctx->workspace, sizeof(dctx->workspace),
734
+ ZSTD_DCtx_get_bmi2(dctx));
735
+ RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
736
+ ip += mlhSize;
737
+ }
738
+ }
739
+
740
+ return ip-istart;
741
+ }
742
+
743
+
744
+ typedef struct {
745
+ size_t litLength;
746
+ size_t matchLength;
747
+ size_t offset;
748
+ } seq_t;
749
+
750
+ typedef struct {
751
+ size_t state;
752
+ const ZSTD_seqSymbol* table;
753
+ } ZSTD_fseState;
754
+
755
+ typedef struct {
756
+ BIT_DStream_t DStream;
757
+ ZSTD_fseState stateLL;
758
+ ZSTD_fseState stateOffb;
759
+ ZSTD_fseState stateML;
760
+ size_t prevOffset[ZSTD_REP_NUM];
761
+ } seqState_t;
762
+
763
+ /*! ZSTD_overlapCopy8() :
764
+ * Copies 8 bytes from ip to op and updates op and ip where ip <= op.
765
+ * If the offset is < 8 then the offset is spread to at least 8 bytes.
766
+ *
767
+ * Precondition: *ip <= *op
768
+ * Postcondition: *op - *op >= 8
769
+ */
770
+ HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
771
+ assert(*ip <= *op);
772
+ if (offset < 8) {
773
+ /* close range match, overlap */
774
+ static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
775
+ static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
776
+ int const sub2 = dec64table[offset];
777
+ (*op)[0] = (*ip)[0];
778
+ (*op)[1] = (*ip)[1];
779
+ (*op)[2] = (*ip)[2];
780
+ (*op)[3] = (*ip)[3];
781
+ *ip += dec32table[offset];
782
+ ZSTD_copy4(*op+4, *ip);
783
+ *ip -= sub2;
784
+ } else {
785
+ ZSTD_copy8(*op, *ip);
786
+ }
787
+ *ip += 8;
788
+ *op += 8;
789
+ assert(*op - *ip >= 8);
790
+ }
791
+
792
+ /*! ZSTD_safecopy() :
793
+ * Specialized version of memcpy() that is allowed to READ up to WILDCOPY_OVERLENGTH past the input buffer
794
+ * and write up to 16 bytes past oend_w (op >= oend_w is allowed).
795
+ * This function is only called in the uncommon case where the sequence is near the end of the block. It
796
+ * should be fast for a single long sequence, but can be slow for several short sequences.
797
+ *
798
+ * @param ovtype controls the overlap detection
799
+ * - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
800
+ * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
801
+ * The src buffer must be before the dst buffer.
802
+ */
803
+ static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
804
+ ptrdiff_t const diff = op - ip;
805
+ BYTE* const oend = op + length;
806
+
807
+ assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
808
+ (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
809
+
810
+ if (length < 8) {
811
+ /* Handle short lengths. */
812
+ while (op < oend) *op++ = *ip++;
813
+ return;
814
+ }
815
+ if (ovtype == ZSTD_overlap_src_before_dst) {
816
+ /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
817
+ assert(length >= 8);
818
+ ZSTD_overlapCopy8(&op, &ip, diff);
819
+ length -= 8;
820
+ assert(op - ip >= 8);
821
+ assert(op <= oend);
822
+ }
823
+
824
+ if (oend <= oend_w) {
825
+ /* No risk of overwrite. */
826
+ ZSTD_wildcopy(op, ip, length, ovtype);
827
+ return;
828
+ }
829
+ if (op <= oend_w) {
830
+ /* Wildcopy until we get close to the end. */
831
+ assert(oend > oend_w);
832
+ ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
833
+ ip += oend_w - op;
834
+ op += oend_w - op;
835
+ }
836
+ /* Handle the leftovers. */
837
+ while (op < oend) *op++ = *ip++;
838
+ }
839
+
840
+ /* ZSTD_safecopyDstBeforeSrc():
841
+ * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
842
+ * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
843
+ static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
844
+ ptrdiff_t const diff = op - ip;
845
+ BYTE* const oend = op + length;
846
+
847
+ if (length < 8 || diff > -8) {
848
+ /* Handle short lengths, close overlaps, and dst not before src. */
849
+ while (op < oend) *op++ = *ip++;
850
+ return;
851
+ }
852
+
853
+ if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
854
+ ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
855
+ ip += oend - WILDCOPY_OVERLENGTH - op;
856
+ op += oend - WILDCOPY_OVERLENGTH - op;
857
+ }
858
+
859
+ /* Handle the leftovers. */
860
+ while (op < oend) *op++ = *ip++;
861
+ }
862
+
863
+ /* ZSTD_execSequenceEnd():
864
+ * This version handles cases that are near the end of the output buffer. It requires
865
+ * more careful checks to make sure there is no overflow. By separating out these hard
866
+ * and unlikely cases, we can speed up the common cases.
867
+ *
868
+ * NOTE: This function needs to be fast for a single long sequence, but doesn't need
869
+ * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
870
+ */
871
+ FORCE_NOINLINE
872
+ size_t ZSTD_execSequenceEnd(BYTE* op,
873
+ BYTE* const oend, seq_t sequence,
874
+ const BYTE** litPtr, const BYTE* const litLimit,
875
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
876
+ {
877
+ BYTE* const oLitEnd = op + sequence.litLength;
878
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
879
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
880
+ const BYTE* match = oLitEnd - sequence.offset;
881
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
882
+
883
+ /* bounds checks : careful of address space overflow in 32-bit mode */
884
+ RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
885
+ RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
886
+ assert(op < op + sequenceLength);
887
+ assert(oLitEnd < op + sequenceLength);
888
+
889
+ /* copy literals */
890
+ ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
891
+ op = oLitEnd;
892
+ *litPtr = iLitEnd;
893
+
894
+ /* copy Match */
895
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
896
+ /* offset beyond prefix */
897
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
898
+ match = dictEnd - (prefixStart - match);
899
+ if (match + sequence.matchLength <= dictEnd) {
900
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
901
+ return sequenceLength;
902
+ }
903
+ /* span extDict & currentPrefixSegment */
904
+ { size_t const length1 = dictEnd - match;
905
+ ZSTD_memmove(oLitEnd, match, length1);
906
+ op = oLitEnd + length1;
907
+ sequence.matchLength -= length1;
908
+ match = prefixStart;
909
+ }
910
+ }
911
+ ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
912
+ return sequenceLength;
913
+ }
914
+
915
+ /* ZSTD_execSequenceEndSplitLitBuffer():
916
+ * This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
917
+ */
918
+ FORCE_NOINLINE
919
+ size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
920
+ BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
921
+ const BYTE** litPtr, const BYTE* const litLimit,
922
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
923
+ {
924
+ BYTE* const oLitEnd = op + sequence.litLength;
925
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
926
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
927
+ const BYTE* match = oLitEnd - sequence.offset;
928
+
929
+
930
+ /* bounds checks : careful of address space overflow in 32-bit mode */
931
+ RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
932
+ RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
933
+ assert(op < op + sequenceLength);
934
+ assert(oLitEnd < op + sequenceLength);
935
+
936
+ /* copy literals */
937
+ RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
938
+ ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
939
+ op = oLitEnd;
940
+ *litPtr = iLitEnd;
941
+
942
+ /* copy Match */
943
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
944
+ /* offset beyond prefix */
945
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
946
+ match = dictEnd - (prefixStart - match);
947
+ if (match + sequence.matchLength <= dictEnd) {
948
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
949
+ return sequenceLength;
950
+ }
951
+ /* span extDict & currentPrefixSegment */
952
+ { size_t const length1 = dictEnd - match;
953
+ ZSTD_memmove(oLitEnd, match, length1);
954
+ op = oLitEnd + length1;
955
+ sequence.matchLength -= length1;
956
+ match = prefixStart;
957
+ }
958
+ }
959
+ ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
960
+ return sequenceLength;
961
+ }
962
+
963
+ HINT_INLINE
964
+ size_t ZSTD_execSequence(BYTE* op,
965
+ BYTE* const oend, seq_t sequence,
966
+ const BYTE** litPtr, const BYTE* const litLimit,
967
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
968
+ {
969
+ BYTE* const oLitEnd = op + sequence.litLength;
970
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
971
+ BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
972
+ BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
973
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
974
+ const BYTE* match = oLitEnd - sequence.offset;
975
+
976
+ assert(op != NULL /* Precondition */);
977
+ assert(oend_w < oend /* No underflow */);
978
+
979
+ #if defined(__aarch64__)
980
+ /* prefetch sequence starting from match that will be used for copy later */
981
+ PREFETCH_L1(match);
982
+ #endif
983
+ /* Handle edge cases in a slow path:
984
+ * - Read beyond end of literals
985
+ * - Match end is within WILDCOPY_OVERLIMIT of oend
986
+ * - 32-bit mode and the match length overflows
987
+ */
988
+ if (UNLIKELY(
989
+ iLitEnd > litLimit ||
990
+ oMatchEnd > oend_w ||
991
+ (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
992
+ return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
993
+
994
+ /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
995
+ assert(op <= oLitEnd /* No overflow */);
996
+ assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
997
+ assert(oMatchEnd <= oend /* No underflow */);
998
+ assert(iLitEnd <= litLimit /* Literal length is in bounds */);
999
+ assert(oLitEnd <= oend_w /* Can wildcopy literals */);
1000
+ assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
1001
+
1002
+ /* Copy Literals:
1003
+ * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
1004
+ * We likely don't need the full 32-byte wildcopy.
1005
+ */
1006
+ assert(WILDCOPY_OVERLENGTH >= 16);
1007
+ ZSTD_copy16(op, (*litPtr));
1008
+ if (UNLIKELY(sequence.litLength > 16)) {
1009
+ ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
1010
+ }
1011
+ op = oLitEnd;
1012
+ *litPtr = iLitEnd; /* update for next sequence */
1013
+
1014
+ /* Copy Match */
1015
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1016
+ /* offset beyond prefix -> go into extDict */
1017
+ RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1018
+ match = dictEnd + (match - prefixStart);
1019
+ if (match + sequence.matchLength <= dictEnd) {
1020
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1021
+ return sequenceLength;
1022
+ }
1023
+ /* span extDict & currentPrefixSegment */
1024
+ { size_t const length1 = dictEnd - match;
1025
+ ZSTD_memmove(oLitEnd, match, length1);
1026
+ op = oLitEnd + length1;
1027
+ sequence.matchLength -= length1;
1028
+ match = prefixStart;
1029
+ }
1030
+ }
1031
+ /* Match within prefix of 1 or more bytes */
1032
+ assert(op <= oMatchEnd);
1033
+ assert(oMatchEnd <= oend_w);
1034
+ assert(match >= prefixStart);
1035
+ assert(sequence.matchLength >= 1);
1036
+
1037
+ /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
1038
+ * without overlap checking.
1039
+ */
1040
+ if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
1041
+ /* We bet on a full wildcopy for matches, since we expect matches to be
1042
+ * longer than literals (in general). In silesia, ~10% of matches are longer
1043
+ * than 16 bytes.
1044
+ */
1045
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
1046
+ return sequenceLength;
1047
+ }
1048
+ assert(sequence.offset < WILDCOPY_VECLEN);
1049
+
1050
+ /* Copy 8 bytes and spread the offset to be >= 8. */
1051
+ ZSTD_overlapCopy8(&op, &match, sequence.offset);
1052
+
1053
+ /* If the match length is > 8 bytes, then continue with the wildcopy. */
1054
+ if (sequence.matchLength > 8) {
1055
+ assert(op < oMatchEnd);
1056
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
1057
+ }
1058
+ return sequenceLength;
1059
+ }
1060
+
1061
+ HINT_INLINE
1062
+ size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
1063
+ BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
1064
+ const BYTE** litPtr, const BYTE* const litLimit,
1065
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
1066
+ {
1067
+ BYTE* const oLitEnd = op + sequence.litLength;
1068
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1069
+ BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
1070
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1071
+ const BYTE* match = oLitEnd - sequence.offset;
1072
+
1073
+ assert(op != NULL /* Precondition */);
1074
+ assert(oend_w < oend /* No underflow */);
1075
+ /* Handle edge cases in a slow path:
1076
+ * - Read beyond end of literals
1077
+ * - Match end is within WILDCOPY_OVERLIMIT of oend
1078
+ * - 32-bit mode and the match length overflows
1079
+ */
1080
+ if (UNLIKELY(
1081
+ iLitEnd > litLimit ||
1082
+ oMatchEnd > oend_w ||
1083
+ (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
1084
+ return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
1085
+
1086
+ /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
1087
+ assert(op <= oLitEnd /* No overflow */);
1088
+ assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
1089
+ assert(oMatchEnd <= oend /* No underflow */);
1090
+ assert(iLitEnd <= litLimit /* Literal length is in bounds */);
1091
+ assert(oLitEnd <= oend_w /* Can wildcopy literals */);
1092
+ assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
1093
+
1094
+ /* Copy Literals:
1095
+ * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
1096
+ * We likely don't need the full 32-byte wildcopy.
1097
+ */
1098
+ assert(WILDCOPY_OVERLENGTH >= 16);
1099
+ ZSTD_copy16(op, (*litPtr));
1100
+ if (UNLIKELY(sequence.litLength > 16)) {
1101
+ ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
1102
+ }
1103
+ op = oLitEnd;
1104
+ *litPtr = iLitEnd; /* update for next sequence */
1105
+
1106
+ /* Copy Match */
1107
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1108
+ /* offset beyond prefix -> go into extDict */
1109
+ RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1110
+ match = dictEnd + (match - prefixStart);
1111
+ if (match + sequence.matchLength <= dictEnd) {
1112
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1113
+ return sequenceLength;
1114
+ }
1115
+ /* span extDict & currentPrefixSegment */
1116
+ { size_t const length1 = dictEnd - match;
1117
+ ZSTD_memmove(oLitEnd, match, length1);
1118
+ op = oLitEnd + length1;
1119
+ sequence.matchLength -= length1;
1120
+ match = prefixStart;
1121
+ } }
1122
+ /* Match within prefix of 1 or more bytes */
1123
+ assert(op <= oMatchEnd);
1124
+ assert(oMatchEnd <= oend_w);
1125
+ assert(match >= prefixStart);
1126
+ assert(sequence.matchLength >= 1);
1127
+
1128
+ /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
1129
+ * without overlap checking.
1130
+ */
1131
+ if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
1132
+ /* We bet on a full wildcopy for matches, since we expect matches to be
1133
+ * longer than literals (in general). In silesia, ~10% of matches are longer
1134
+ * than 16 bytes.
1135
+ */
1136
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
1137
+ return sequenceLength;
1138
+ }
1139
+ assert(sequence.offset < WILDCOPY_VECLEN);
1140
+
1141
+ /* Copy 8 bytes and spread the offset to be >= 8. */
1142
+ ZSTD_overlapCopy8(&op, &match, sequence.offset);
1143
+
1144
+ /* If the match length is > 8 bytes, then continue with the wildcopy. */
1145
+ if (sequence.matchLength > 8) {
1146
+ assert(op < oMatchEnd);
1147
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
1148
+ }
1149
+ return sequenceLength;
1150
+ }
1151
+
1152
+
1153
+ static void
1154
+ ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
1155
+ {
1156
+ const void* ptr = dt;
1157
+ const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
1158
+ DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
1159
+ DEBUGLOG(6, "ZSTD_initFseState : val=%u using %u bits",
1160
+ (U32)DStatePtr->state, DTableH->tableLog);
1161
+ BIT_reloadDStream(bitD);
1162
+ DStatePtr->table = dt + 1;
1163
+ }
1164
+
1165
+ FORCE_INLINE_TEMPLATE void
1166
+ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
1167
+ {
1168
+ size_t const lowBits = BIT_readBits(bitD, nbBits);
1169
+ DStatePtr->state = nextState + lowBits;
1170
+ }
1171
+
1172
+ /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
1173
+ * offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32
1174
+ * bits before reloading. This value is the maximum number of bytes we read
1175
+ * after reloading when we are decoding long offsets.
1176
+ */
1177
+ #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
1178
+ (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
1179
+ ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
1180
+ : 0)
1181
+
1182
+ typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
1183
+
1184
+ FORCE_INLINE_TEMPLATE seq_t
1185
+ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
1186
+ {
1187
+ seq_t seq;
1188
+ /*
1189
+ * ZSTD_seqSymbol is a structure with a total of 64 bits wide. So it can be
1190
+ * loaded in one operation and extracted its fields by simply shifting or
1191
+ * bit-extracting on aarch64.
1192
+ * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
1193
+ * operations that cause performance drop. This can be avoided by using this
1194
+ * ZSTD_memcpy hack.
1195
+ */
1196
+ #if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
1197
+ ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS;
1198
+ ZSTD_seqSymbol* const llDInfo = &llDInfoS;
1199
+ ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
1200
+ ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
1201
+ ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
1202
+ ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
1203
+ ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol));
1204
+ #else
1205
+ const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
1206
+ const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
1207
+ const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
1208
+ #endif
1209
+ seq.matchLength = mlDInfo->baseValue;
1210
+ seq.litLength = llDInfo->baseValue;
1211
+ { U32 const ofBase = ofDInfo->baseValue;
1212
+ BYTE const llBits = llDInfo->nbAdditionalBits;
1213
+ BYTE const mlBits = mlDInfo->nbAdditionalBits;
1214
+ BYTE const ofBits = ofDInfo->nbAdditionalBits;
1215
+ BYTE const totalBits = llBits+mlBits+ofBits;
1216
+
1217
+ U16 const llNext = llDInfo->nextState;
1218
+ U16 const mlNext = mlDInfo->nextState;
1219
+ U16 const ofNext = ofDInfo->nextState;
1220
+ U32 const llnbBits = llDInfo->nbBits;
1221
+ U32 const mlnbBits = mlDInfo->nbBits;
1222
+ U32 const ofnbBits = ofDInfo->nbBits;
1223
+
1224
+ assert(llBits <= MaxLLBits);
1225
+ assert(mlBits <= MaxMLBits);
1226
+ assert(ofBits <= MaxOff);
1227
+ /*
1228
+ * As gcc has better branch and block analyzers, sometimes it is only
1229
+ * valuable to mark likeliness for clang, it gives around 3-4% of
1230
+ * performance.
1231
+ */
1232
+
1233
+ /* sequence */
1234
+ { size_t offset;
1235
+ if (ofBits > 1) {
1236
+ ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
1237
+ ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
1238
+ ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
1239
+ ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);
1240
+ if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
1241
+ /* Always read extra bits, this keeps the logic simple,
1242
+ * avoids branches, and avoids accidentally reading 0 bits.
1243
+ */
1244
+ U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
1245
+ offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
1246
+ BIT_reloadDStream(&seqState->DStream);
1247
+ offset += BIT_readBitsFast(&seqState->DStream, extraBits);
1248
+ } else {
1249
+ offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
1250
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
1251
+ }
1252
+ seqState->prevOffset[2] = seqState->prevOffset[1];
1253
+ seqState->prevOffset[1] = seqState->prevOffset[0];
1254
+ seqState->prevOffset[0] = offset;
1255
+ } else {
1256
+ U32 const ll0 = (llDInfo->baseValue == 0);
1257
+ if (LIKELY((ofBits == 0))) {
1258
+ offset = seqState->prevOffset[ll0];
1259
+ seqState->prevOffset[1] = seqState->prevOffset[!ll0];
1260
+ seqState->prevOffset[0] = offset;
1261
+ } else {
1262
+ offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
1263
+ { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
1264
+ temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
1265
+ if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
1266
+ seqState->prevOffset[1] = seqState->prevOffset[0];
1267
+ seqState->prevOffset[0] = offset = temp;
1268
+ } } }
1269
+ seq.offset = offset;
1270
+ }
1271
+
1272
+ if (mlBits > 0)
1273
+ seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
1274
+
1275
+ if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1276
+ BIT_reloadDStream(&seqState->DStream);
1277
+ if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1278
+ BIT_reloadDStream(&seqState->DStream);
1279
+ /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
1280
+ ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1281
+
1282
+ if (llBits > 0)
1283
+ seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
1284
+
1285
+ if (MEM_32bits())
1286
+ BIT_reloadDStream(&seqState->DStream);
1287
+
1288
+ DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
1289
+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1290
+
1291
+ ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
1292
+ ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
1293
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
1294
+ ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
1295
+ }
1296
+
1297
+ return seq;
1298
+ }
1299
+
1300
+ #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1301
+ MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
1302
+ {
1303
+ size_t const windowSize = dctx->fParams.windowSize;
1304
+ /* No dictionary used. */
1305
+ if (dctx->dictContentEndForFuzzing == NULL) return 0;
1306
+ /* Dictionary is our prefix. */
1307
+ if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
1308
+ /* Dictionary is not our ext-dict. */
1309
+ if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
1310
+ /* Dictionary is not within our window size. */
1311
+ if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
1312
+ /* Dictionary is active. */
1313
+ return 1;
1314
+ }
1315
+
1316
+ MEM_STATIC void ZSTD_assertValidSequence(
1317
+ ZSTD_DCtx const* dctx,
1318
+ BYTE const* op, BYTE const* oend,
1319
+ seq_t const seq,
1320
+ BYTE const* prefixStart, BYTE const* virtualStart)
1321
+ {
1322
+ #if DEBUGLEVEL >= 1
1323
+ size_t const windowSize = dctx->fParams.windowSize;
1324
+ size_t const sequenceSize = seq.litLength + seq.matchLength;
1325
+ BYTE const* const oLitEnd = op + seq.litLength;
1326
+ DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
1327
+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1328
+ assert(op <= oend);
1329
+ assert((size_t)(oend - op) >= sequenceSize);
1330
+ assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
1331
+ if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
1332
+ size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
1333
+ /* Offset must be within the dictionary. */
1334
+ assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
1335
+ assert(seq.offset <= windowSize + dictSize);
1336
+ } else {
1337
+ /* Offset must be within our window. */
1338
+ assert(seq.offset <= windowSize);
1339
+ }
1340
+ #else
1341
+ (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
1342
+ #endif
1343
+ }
1344
+ #endif
1345
+
1346
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1347
+
1348
+
1349
+ FORCE_INLINE_TEMPLATE size_t
1350
+ DONT_VECTORIZE
1351
+ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
1352
+ void* dst, size_t maxDstSize,
1353
+ const void* seqStart, size_t seqSize, int nbSeq,
1354
+ const ZSTD_longOffset_e isLongOffset,
1355
+ const int frame)
1356
+ {
1357
+ const BYTE* ip = (const BYTE*)seqStart;
1358
+ const BYTE* const iend = ip + seqSize;
1359
+ BYTE* const ostart = (BYTE*)dst;
1360
+ BYTE* const oend = ostart + maxDstSize;
1361
+ BYTE* op = ostart;
1362
+ const BYTE* litPtr = dctx->litPtr;
1363
+ const BYTE* litBufferEnd = dctx->litBufferEnd;
1364
+ const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1365
+ const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
1366
+ const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1367
+ DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
1368
+ (void)frame;
1369
+
1370
+ /* Regen sequences */
1371
+ if (nbSeq) {
1372
+ seqState_t seqState;
1373
+ dctx->fseEntropy = 1;
1374
+ { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1375
+ RETURN_ERROR_IF(
1376
+ ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1377
+ corruption_detected, "");
1378
+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1379
+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1380
+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1381
+ assert(dst != NULL);
1382
+
1383
+ ZSTD_STATIC_ASSERT(
1384
+ BIT_DStream_unfinished < BIT_DStream_completed &&
1385
+ BIT_DStream_endOfBuffer < BIT_DStream_completed &&
1386
+ BIT_DStream_completed < BIT_DStream_overflow);
1387
+
1388
+ /* decompress without overrunning litPtr begins */
1389
+ {
1390
+ seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1391
+ /* Align the decompression loop to 32 + 16 bytes.
1392
+ *
1393
+ * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
1394
+ * speed swings based on the alignment of the decompression loop. This
1395
+ * performance swing is caused by parts of the decompression loop falling
1396
+ * out of the DSB. The entire decompression loop should fit in the DSB,
1397
+ * when it can't we get much worse performance. You can measure if you've
1398
+ * hit the good case or the bad case with this perf command for some
1399
+ * compressed file test.zst:
1400
+ *
1401
+ * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
1402
+ * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
1403
+ *
1404
+ * If you see most cycles served out of the MITE you've hit the bad case.
1405
+ * If you see most cycles served out of the DSB you've hit the good case.
1406
+ * If it is pretty even then you may be in an okay case.
1407
+ *
1408
+ * This issue has been reproduced on the following CPUs:
1409
+ * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
1410
+ * Use Instruments->Counters to get DSB/MITE cycles.
1411
+ * I never got performance swings, but I was able to
1412
+ * go from the good case of mostly DSB to half of the
1413
+ * cycles served from MITE.
1414
+ * - Coffeelake: Intel i9-9900k
1415
+ * - Coffeelake: Intel i7-9700k
1416
+ *
1417
+ * I haven't been able to reproduce the instability or DSB misses on any
1418
+ * of the following CPUS:
1419
+ * - Haswell
1420
+ * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
1421
+ * - Skylake
1422
+ *
1423
+ * Alignment is done for each of the three major decompression loops:
1424
+ * - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
1425
+ * - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
1426
+ * - ZSTD_decompressSequences_body
1427
+ * Alignment choices are made to minimize large swings on bad cases and influence on performance
1428
+ * from changes external to this code, rather than to overoptimize on the current commit.
1429
+ *
1430
+ * If you are seeing performance stability this script can help test.
1431
+ * It tests on 4 commits in zstd where I saw performance change.
1432
+ *
1433
+ * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
1434
+ */
1435
+ #if defined(__GNUC__) && defined(__x86_64__)
1436
+ __asm__(".p2align 6");
1437
+ # if __GNUC__ >= 7
1438
+ /* good for gcc-7, gcc-9, and gcc-11 */
1439
+ __asm__("nop");
1440
+ __asm__(".p2align 5");
1441
+ __asm__("nop");
1442
+ __asm__(".p2align 4");
1443
+ # if __GNUC__ == 8 || __GNUC__ == 10
1444
+ /* good for gcc-8 and gcc-10 */
1445
+ __asm__("nop");
1446
+ __asm__(".p2align 3");
1447
+ # endif
1448
+ # endif
1449
+ #endif
1450
+
1451
+ /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
1452
+ for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
1453
+ size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1454
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1455
+ assert(!ZSTD_isError(oneSeqSize));
1456
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1457
+ #endif
1458
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1459
+ return oneSeqSize;
1460
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1461
+ op += oneSeqSize;
1462
+ if (UNLIKELY(!--nbSeq))
1463
+ break;
1464
+ BIT_reloadDStream(&(seqState.DStream));
1465
+ sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1466
+ }
1467
+
1468
+ /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
1469
+ if (nbSeq > 0) {
1470
+ const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1471
+ if (leftoverLit)
1472
+ {
1473
+ RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1474
+ ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1475
+ sequence.litLength -= leftoverLit;
1476
+ op += leftoverLit;
1477
+ }
1478
+ litPtr = dctx->litExtraBuffer;
1479
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1480
+ dctx->litBufferLocation = ZSTD_not_in_dst;
1481
+ {
1482
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1483
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1484
+ assert(!ZSTD_isError(oneSeqSize));
1485
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1486
+ #endif
1487
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1488
+ return oneSeqSize;
1489
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1490
+ op += oneSeqSize;
1491
+ if (--nbSeq)
1492
+ BIT_reloadDStream(&(seqState.DStream));
1493
+ }
1494
+ }
1495
+ }
1496
+
1497
+ if (nbSeq > 0) /* there is remaining lit from extra buffer */
1498
+ {
1499
+
1500
+ #if defined(__GNUC__) && defined(__x86_64__)
1501
+ __asm__(".p2align 6");
1502
+ __asm__("nop");
1503
+ # if __GNUC__ != 7
1504
+ /* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
1505
+ __asm__(".p2align 4");
1506
+ __asm__("nop");
1507
+ __asm__(".p2align 3");
1508
+ # elif __GNUC__ >= 11
1509
+ __asm__(".p2align 3");
1510
+ # else
1511
+ __asm__(".p2align 5");
1512
+ __asm__("nop");
1513
+ __asm__(".p2align 3");
1514
+ # endif
1515
+ #endif
1516
+
1517
+ for (; ; ) {
1518
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1519
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1520
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1521
+ assert(!ZSTD_isError(oneSeqSize));
1522
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1523
+ #endif
1524
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1525
+ return oneSeqSize;
1526
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1527
+ op += oneSeqSize;
1528
+ if (UNLIKELY(!--nbSeq))
1529
+ break;
1530
+ BIT_reloadDStream(&(seqState.DStream));
1531
+ }
1532
+ }
1533
+
1534
+ /* check if reached exact end */
1535
+ DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
1536
+ RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1537
+ RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
1538
+ /* save reps for next block */
1539
+ { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1540
+ }
1541
+
1542
+ /* last literal segment */
1543
+ if (dctx->litBufferLocation == ZSTD_split) /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
1544
+ {
1545
+ size_t const lastLLSize = litBufferEnd - litPtr;
1546
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1547
+ if (op != NULL) {
1548
+ ZSTD_memmove(op, litPtr, lastLLSize);
1549
+ op += lastLLSize;
1550
+ }
1551
+ litPtr = dctx->litExtraBuffer;
1552
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1553
+ dctx->litBufferLocation = ZSTD_not_in_dst;
1554
+ }
1555
+ { size_t const lastLLSize = litBufferEnd - litPtr;
1556
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1557
+ if (op != NULL) {
1558
+ ZSTD_memcpy(op, litPtr, lastLLSize);
1559
+ op += lastLLSize;
1560
+ }
1561
+ }
1562
+
1563
+ return op-ostart;
1564
+ }
1565
+
1566
+ FORCE_INLINE_TEMPLATE size_t
1567
+ DONT_VECTORIZE
1568
+ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
1569
+ void* dst, size_t maxDstSize,
1570
+ const void* seqStart, size_t seqSize, int nbSeq,
1571
+ const ZSTD_longOffset_e isLongOffset,
1572
+ const int frame)
1573
+ {
1574
+ const BYTE* ip = (const BYTE*)seqStart;
1575
+ const BYTE* const iend = ip + seqSize;
1576
+ BYTE* const ostart = (BYTE*)dst;
1577
+ BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
1578
+ BYTE* op = ostart;
1579
+ const BYTE* litPtr = dctx->litPtr;
1580
+ const BYTE* const litEnd = litPtr + dctx->litSize;
1581
+ const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
1582
+ const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
1583
+ const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
1584
+ DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
1585
+ (void)frame;
1586
+
1587
+ /* Regen sequences */
1588
+ if (nbSeq) {
1589
+ seqState_t seqState;
1590
+ dctx->fseEntropy = 1;
1591
+ { U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1592
+ RETURN_ERROR_IF(
1593
+ ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
1594
+ corruption_detected, "");
1595
+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1596
+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1597
+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1598
+ assert(dst != NULL);
1599
+
1600
+ ZSTD_STATIC_ASSERT(
1601
+ BIT_DStream_unfinished < BIT_DStream_completed &&
1602
+ BIT_DStream_endOfBuffer < BIT_DStream_completed &&
1603
+ BIT_DStream_completed < BIT_DStream_overflow);
1604
+
1605
+ #if defined(__GNUC__) && defined(__x86_64__)
1606
+ __asm__(".p2align 6");
1607
+ __asm__("nop");
1608
+ # if __GNUC__ >= 7
1609
+ __asm__(".p2align 5");
1610
+ __asm__("nop");
1611
+ __asm__(".p2align 3");
1612
+ # else
1613
+ __asm__(".p2align 4");
1614
+ __asm__("nop");
1615
+ __asm__(".p2align 3");
1616
+ # endif
1617
+ #endif
1618
+
1619
+ for ( ; ; ) {
1620
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1621
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
1622
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1623
+ assert(!ZSTD_isError(oneSeqSize));
1624
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1625
+ #endif
1626
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1627
+ return oneSeqSize;
1628
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1629
+ op += oneSeqSize;
1630
+ if (UNLIKELY(!--nbSeq))
1631
+ break;
1632
+ BIT_reloadDStream(&(seqState.DStream));
1633
+ }
1634
+
1635
+ /* check if reached exact end */
1636
+ DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
1637
+ RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1638
+ RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
1639
+ /* save reps for next block */
1640
+ { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1641
+ }
1642
+
1643
+ /* last literal segment */
1644
+ { size_t const lastLLSize = litEnd - litPtr;
1645
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1646
+ if (op != NULL) {
1647
+ ZSTD_memcpy(op, litPtr, lastLLSize);
1648
+ op += lastLLSize;
1649
+ }
1650
+ }
1651
+
1652
+ return op-ostart;
1653
+ }
1654
+
1655
+ static size_t
1656
+ ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
1657
+ void* dst, size_t maxDstSize,
1658
+ const void* seqStart, size_t seqSize, int nbSeq,
1659
+ const ZSTD_longOffset_e isLongOffset,
1660
+ const int frame)
1661
+ {
1662
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1663
+ }
1664
+
1665
+ static size_t
1666
+ ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
1667
+ void* dst, size_t maxDstSize,
1668
+ const void* seqStart, size_t seqSize, int nbSeq,
1669
+ const ZSTD_longOffset_e isLongOffset,
1670
+ const int frame)
1671
+ {
1672
+ return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1673
+ }
1674
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1675
+
1676
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1677
+
1678
+ FORCE_INLINE_TEMPLATE size_t
1679
+ ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
1680
+ const BYTE* const prefixStart, const BYTE* const dictEnd)
1681
+ {
1682
+ prefetchPos += sequence.litLength;
1683
+ { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
1684
+ const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
1685
+ * No consequence though : memory address is only used for prefetching, not for dereferencing */
1686
+ PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1687
+ }
1688
+ return prefetchPos + sequence.matchLength;
1689
+ }
1690
+
1691
+ /* This decoding function employs prefetching
1692
+ * to reduce latency impact of cache misses.
1693
+ * It's generally employed when block contains a significant portion of long-distance matches
1694
+ * or when coupled with a "cold" dictionary */
1695
+ FORCE_INLINE_TEMPLATE size_t
1696
+ ZSTD_decompressSequencesLong_body(
1697
+ ZSTD_DCtx* dctx,
1698
+ void* dst, size_t maxDstSize,
1699
+ const void* seqStart, size_t seqSize, int nbSeq,
1700
+ const ZSTD_longOffset_e isLongOffset,
1701
+ const int frame)
1702
+ {
1703
+ const BYTE* ip = (const BYTE*)seqStart;
1704
+ const BYTE* const iend = ip + seqSize;
1705
+ BYTE* const ostart = (BYTE*)dst;
1706
+ BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
1707
+ BYTE* op = ostart;
1708
+ const BYTE* litPtr = dctx->litPtr;
1709
+ const BYTE* litBufferEnd = dctx->litBufferEnd;
1710
+ const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1711
+ const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1712
+ const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1713
+ (void)frame;
1714
+
1715
+ /* Regen sequences */
1716
+ if (nbSeq) {
1717
+ #define STORED_SEQS 8
1718
+ #define STORED_SEQS_MASK (STORED_SEQS-1)
1719
+ #define ADVANCED_SEQS STORED_SEQS
1720
+ seq_t sequences[STORED_SEQS];
1721
+ int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
1722
+ seqState_t seqState;
1723
+ int seqNb;
1724
+ size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
1725
+
1726
+ dctx->fseEntropy = 1;
1727
+ { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1728
+ assert(dst != NULL);
1729
+ assert(iend >= ip);
1730
+ RETURN_ERROR_IF(
1731
+ ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1732
+ corruption_detected, "");
1733
+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1734
+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1735
+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1736
+
1737
+ /* prepare in advance */
1738
+ for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
1739
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1740
+ prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1741
+ sequences[seqNb] = sequence;
1742
+ }
1743
+ RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
1744
+
1745
+ /* decompress without stomping litBuffer */
1746
+ for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
1747
+ seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1748
+ size_t oneSeqSize;
1749
+
1750
+ if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
1751
+ {
1752
+ /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
1753
+ const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1754
+ if (leftoverLit)
1755
+ {
1756
+ RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1757
+ ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1758
+ sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
1759
+ op += leftoverLit;
1760
+ }
1761
+ litPtr = dctx->litExtraBuffer;
1762
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1763
+ dctx->litBufferLocation = ZSTD_not_in_dst;
1764
+ oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1765
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1766
+ assert(!ZSTD_isError(oneSeqSize));
1767
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1768
+ #endif
1769
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1770
+
1771
+ prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1772
+ sequences[seqNb & STORED_SEQS_MASK] = sequence;
1773
+ op += oneSeqSize;
1774
+ }
1775
+ else
1776
+ {
1777
+ /* lit buffer is either wholly contained in first or second split, or not split at all*/
1778
+ oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1779
+ ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
1780
+ ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1781
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1782
+ assert(!ZSTD_isError(oneSeqSize));
1783
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1784
+ #endif
1785
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1786
+
1787
+ prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1788
+ sequences[seqNb & STORED_SEQS_MASK] = sequence;
1789
+ op += oneSeqSize;
1790
+ }
1791
+ }
1792
+ RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
1793
+
1794
+ /* finish queue */
1795
+ seqNb -= seqAdvance;
1796
+ for ( ; seqNb<nbSeq ; seqNb++) {
1797
+ seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
1798
+ if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
1799
+ {
1800
+ const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1801
+ if (leftoverLit)
1802
+ {
1803
+ RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1804
+ ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1805
+ sequence->litLength -= leftoverLit;
1806
+ op += leftoverLit;
1807
+ }
1808
+ litPtr = dctx->litExtraBuffer;
1809
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1810
+ dctx->litBufferLocation = ZSTD_not_in_dst;
1811
+ {
1812
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1813
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1814
+ assert(!ZSTD_isError(oneSeqSize));
1815
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1816
+ #endif
1817
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1818
+ op += oneSeqSize;
1819
+ }
1820
+ }
1821
+ else
1822
+ {
1823
+ size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1824
+ ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
1825
+ ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1826
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1827
+ assert(!ZSTD_isError(oneSeqSize));
1828
+ if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1829
+ #endif
1830
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1831
+ op += oneSeqSize;
1832
+ }
1833
+ }
1834
+
1835
+ /* save reps for next block */
1836
+ { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1837
+ }
1838
+
1839
+ /* last literal segment */
1840
+ if (dctx->litBufferLocation == ZSTD_split) /* first deplete literal buffer in dst, then copy litExtraBuffer */
1841
+ {
1842
+ size_t const lastLLSize = litBufferEnd - litPtr;
1843
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1844
+ if (op != NULL) {
1845
+ ZSTD_memmove(op, litPtr, lastLLSize);
1846
+ op += lastLLSize;
1847
+ }
1848
+ litPtr = dctx->litExtraBuffer;
1849
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1850
+ }
1851
+ { size_t const lastLLSize = litBufferEnd - litPtr;
1852
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1853
+ if (op != NULL) {
1854
+ ZSTD_memmove(op, litPtr, lastLLSize);
1855
+ op += lastLLSize;
1856
+ }
1857
+ }
1858
+
1859
+ return op-ostart;
1860
+ }
1861
+
1862
+ static size_t
1863
+ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1864
+ void* dst, size_t maxDstSize,
1865
+ const void* seqStart, size_t seqSize, int nbSeq,
1866
+ const ZSTD_longOffset_e isLongOffset,
1867
+ const int frame)
1868
+ {
1869
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1870
+ }
1871
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1872
+
1873
+
1874
+
1875
+ #if DYNAMIC_BMI2
1876
+
1877
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1878
+ static BMI2_TARGET_ATTRIBUTE size_t
1879
+ DONT_VECTORIZE
1880
+ ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1881
+ void* dst, size_t maxDstSize,
1882
+ const void* seqStart, size_t seqSize, int nbSeq,
1883
+ const ZSTD_longOffset_e isLongOffset,
1884
+ const int frame)
1885
+ {
1886
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1887
+ }
1888
+ static BMI2_TARGET_ATTRIBUTE size_t
1889
+ DONT_VECTORIZE
1890
+ ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
1891
+ void* dst, size_t maxDstSize,
1892
+ const void* seqStart, size_t seqSize, int nbSeq,
1893
+ const ZSTD_longOffset_e isLongOffset,
1894
+ const int frame)
1895
+ {
1896
+ return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1897
+ }
1898
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1899
+
1900
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1901
+ static BMI2_TARGET_ATTRIBUTE size_t
1902
+ ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1903
+ void* dst, size_t maxDstSize,
1904
+ const void* seqStart, size_t seqSize, int nbSeq,
1905
+ const ZSTD_longOffset_e isLongOffset,
1906
+ const int frame)
1907
+ {
1908
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1909
+ }
1910
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1911
+
1912
+ #endif /* DYNAMIC_BMI2 */
1913
+
1914
+ typedef size_t (*ZSTD_decompressSequences_t)(
1915
+ ZSTD_DCtx* dctx,
1916
+ void* dst, size_t maxDstSize,
1917
+ const void* seqStart, size_t seqSize, int nbSeq,
1918
+ const ZSTD_longOffset_e isLongOffset,
1919
+ const int frame);
1920
+
1921
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1922
+ static size_t
1923
+ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1924
+ const void* seqStart, size_t seqSize, int nbSeq,
1925
+ const ZSTD_longOffset_e isLongOffset,
1926
+ const int frame)
1927
+ {
1928
+ DEBUGLOG(5, "ZSTD_decompressSequences");
1929
+ #if DYNAMIC_BMI2
1930
+ if (ZSTD_DCtx_get_bmi2(dctx)) {
1931
+ return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1932
+ }
1933
+ #endif
1934
+ return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1935
+ }
1936
+ static size_t
1937
+ ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1938
+ const void* seqStart, size_t seqSize, int nbSeq,
1939
+ const ZSTD_longOffset_e isLongOffset,
1940
+ const int frame)
1941
+ {
1942
+ DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
1943
+ #if DYNAMIC_BMI2
1944
+ if (ZSTD_DCtx_get_bmi2(dctx)) {
1945
+ return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1946
+ }
1947
+ #endif
1948
+ return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1949
+ }
1950
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1951
+
1952
+
1953
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1954
+ /* ZSTD_decompressSequencesLong() :
1955
+ * decompression function triggered when a minimum share of offsets is considered "long",
1956
+ * aka out of cache.
1957
+ * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance".
1958
+ * This function will try to mitigate main memory latency through the use of prefetching */
1959
+ static size_t
1960
+ ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1961
+ void* dst, size_t maxDstSize,
1962
+ const void* seqStart, size_t seqSize, int nbSeq,
1963
+ const ZSTD_longOffset_e isLongOffset,
1964
+ const int frame)
1965
+ {
1966
+ DEBUGLOG(5, "ZSTD_decompressSequencesLong");
1967
+ #if DYNAMIC_BMI2
1968
+ if (ZSTD_DCtx_get_bmi2(dctx)) {
1969
+ return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1970
+ }
1971
+ #endif
1972
+ return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1973
+ }
1974
+ #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1975
+
1976
+
1977
+ /**
1978
+ * @returns The total size of the history referenceable by zstd, including
1979
+ * both the prefix and the extDict. At @p op any offset larger than this
1980
+ * is invalid.
1981
+ */
1982
+ static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart)
1983
+ {
1984
+ return (size_t)(op - virtualStart);
1985
+ }
1986
+
1987
+ typedef struct {
1988
+ unsigned longOffsetShare;
1989
+ unsigned maxNbAdditionalBits;
1990
+ } ZSTD_OffsetInfo;
1991
+
1992
+ /* ZSTD_getOffsetInfo() :
1993
+ * condition : offTable must be valid
1994
+ * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
1995
+ * compared to maximum possible of (1<<OffFSELog),
1996
+ * as well as the maximum number additional bits required.
1997
+ */
1998
+ static ZSTD_OffsetInfo
1999
+ ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq)
2000
+ {
2001
+ ZSTD_OffsetInfo info = {0, 0};
2002
+ /* If nbSeq == 0, then the offTable is uninitialized, but we have
2003
+ * no sequences, so both values should be 0.
2004
+ */
2005
+ if (nbSeq != 0) {
2006
+ const void* ptr = offTable;
2007
+ U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
2008
+ const ZSTD_seqSymbol* table = offTable + 1;
2009
+ U32 const max = 1 << tableLog;
2010
+ U32 u;
2011
+ DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
2012
+
2013
+ assert(max <= (1 << OffFSELog)); /* max not too large */
2014
+ for (u=0; u<max; u++) {
2015
+ info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits);
2016
+ if (table[u].nbAdditionalBits > 22) info.longOffsetShare += 1;
2017
+ }
2018
+
2019
+ assert(tableLog <= OffFSELog);
2020
+ info.longOffsetShare <<= (OffFSELog - tableLog); /* scale to OffFSELog */
2021
+ }
2022
+
2023
+ return info;
2024
+ }
2025
+
2026
+ /**
2027
+ * @returns The maximum offset we can decode in one read of our bitstream, without
2028
+ * reloading more bits in the middle of the offset bits read. Any offsets larger
2029
+ * than this must use the long offset decoder.
2030
+ */
2031
+ static size_t ZSTD_maxShortOffset(void)
2032
+ {
2033
+ if (MEM_64bits()) {
2034
+ /* We can decode any offset without reloading bits.
2035
+ * This might change if the max window size grows.
2036
+ */
2037
+ ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
2038
+ return (size_t)-1;
2039
+ } else {
2040
+ /* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1.
2041
+ * This offBase would require STREAM_ACCUMULATOR_MIN extra bits.
2042
+ * Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset.
2043
+ */
2044
+ size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1;
2045
+ size_t const maxOffset = maxOffbase - ZSTD_REP_NUM;
2046
+ assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN);
2047
+ return maxOffset;
2048
+ }
2049
+ }
2050
+
2051
+ size_t
2052
+ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
2053
+ void* dst, size_t dstCapacity,
2054
+ const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
2055
+ { /* blockType == blockCompressed */
2056
+ const BYTE* ip = (const BYTE*)src;
2057
+ DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
2058
+
2059
+ /* Note : the wording of the specification
2060
+ * allows compressed block to be sized exactly ZSTD_BLOCKSIZE_MAX.
2061
+ * This generally does not happen, as it makes little sense,
2062
+ * since an uncompressed block would feature same size and have no decompression cost.
2063
+ * Also, note that decoder from reference libzstd before < v1.5.4
2064
+ * would consider this edge case as an error.
2065
+ * As a consequence, avoid generating compressed blocks of size ZSTD_BLOCKSIZE_MAX
2066
+ * for broader compatibility with the deployed ecosystem of zstd decoders */
2067
+ RETURN_ERROR_IF(srcSize > ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
2068
+
2069
+ /* Decode literals section */
2070
+ { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
2071
+ DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize);
2072
+ if (ZSTD_isError(litCSize)) return litCSize;
2073
+ ip += litCSize;
2074
+ srcSize -= litCSize;
2075
+ }
2076
+
2077
+ /* Build Decoding Tables */
2078
+ {
2079
+ /* Compute the maximum block size, which must also work when !frame and fParams are unset.
2080
+ * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
2081
+ */
2082
+ size_t const blockSizeMax = MIN(dstCapacity, (frame ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX));
2083
+ size_t const totalHistorySize = ZSTD_totalHistorySize((BYTE*)dst + blockSizeMax, (BYTE const*)dctx->virtualStart);
2084
+ /* isLongOffset must be true if there are long offsets.
2085
+ * Offsets are long if they are larger than ZSTD_maxShortOffset().
2086
+ * We don't expect that to be the case in 64-bit mode.
2087
+ *
2088
+ * We check here to see if our history is large enough to allow long offsets.
2089
+ * If it isn't, then we can't possible have (valid) long offsets. If the offset
2090
+ * is invalid, then it is okay to read it incorrectly.
2091
+ *
2092
+ * If isLongOffsets is true, then we will later check our decoding table to see
2093
+ * if it is even possible to generate long offsets.
2094
+ */
2095
+ ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));
2096
+ /* These macros control at build-time which decompressor implementation
2097
+ * we use. If neither is defined, we do some inspection and dispatch at
2098
+ * runtime.
2099
+ */
2100
+ #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2101
+ !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2102
+ int usePrefetchDecoder = dctx->ddictIsCold;
2103
+ #else
2104
+ /* Set to 1 to avoid computing offset info if we don't need to.
2105
+ * Otherwise this value is ignored.
2106
+ */
2107
+ int usePrefetchDecoder = 1;
2108
+ #endif
2109
+ int nbSeq;
2110
+ size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
2111
+ if (ZSTD_isError(seqHSize)) return seqHSize;
2112
+ ip += seqHSize;
2113
+ srcSize -= seqHSize;
2114
+
2115
+ RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
2116
+ RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall,
2117
+ "invalid dst");
2118
+
2119
+ /* If we could potentially have long offsets, or we might want to use the prefetch decoder,
2120
+ * compute information about the share of long offsets, and the maximum nbAdditionalBits.
2121
+ * NOTE: could probably use a larger nbSeq limit
2122
+ */
2123
+ if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) {
2124
+ ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq);
2125
+ if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) {
2126
+ /* If isLongOffset, but the maximum number of additional bits that we see in our table is small
2127
+ * enough, then we know it is impossible to have too long an offset in this block, so we can
2128
+ * use the regular offset decoder.
2129
+ */
2130
+ isLongOffset = ZSTD_lo_isRegularOffset;
2131
+ }
2132
+ if (!usePrefetchDecoder) {
2133
+ U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
2134
+ usePrefetchDecoder = (info.longOffsetShare >= minShare);
2135
+ }
2136
+ }
2137
+
2138
+ dctx->ddictIsCold = 0;
2139
+
2140
+ #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
2141
+ !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
2142
+ if (usePrefetchDecoder) {
2143
+ #else
2144
+ (void)usePrefetchDecoder;
2145
+ {
2146
+ #endif
2147
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
2148
+ return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2149
+ #endif
2150
+ }
2151
+
2152
+ #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
2153
+ /* else */
2154
+ if (dctx->litBufferLocation == ZSTD_split)
2155
+ return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2156
+ else
2157
+ return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2158
+ #endif
2159
+ }
2160
+ }
2161
+
2162
+
2163
+ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
2164
+ {
2165
+ if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
2166
+ dctx->dictEnd = dctx->previousDstEnd;
2167
+ dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
2168
+ dctx->prefixStart = dst;
2169
+ dctx->previousDstEnd = dst;
2170
+ }
2171
+ }
2172
+
2173
+
2174
+ size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
2175
+ void* dst, size_t dstCapacity,
2176
+ const void* src, size_t srcSize)
2177
+ {
2178
+ size_t dSize;
2179
+ ZSTD_checkContinuity(dctx, dst, dstCapacity);
2180
+ dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
2181
+ dctx->previousDstEnd = (char*)dst + dSize;
2182
+ return dSize;
2183
+ }
2184
+
2185
+
2186
+ /* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */
2187
+ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
2188
+ void* dst, size_t dstCapacity,
2189
+ const void* src, size_t srcSize)
2190
+ {
2191
+ return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize);
2192
+ }