duckdb 1.1.4-dev2.0 → 1.1.4-dev9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1747) hide show
  1. package/.github/workflows/NodeJS.yml +5 -54
  2. package/binding.gyp +73 -52
  3. package/package.json +1 -1
  4. package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/avg.cpp +2 -2
  5. package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/corr.cpp +4 -4
  6. package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/covar.cpp +2 -2
  7. package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/stddev.cpp +2 -2
  8. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/approx_count.cpp +1 -1
  9. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/arg_min_max.cpp +66 -18
  10. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bitagg.cpp +1 -1
  11. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bitstring_agg.cpp +5 -7
  12. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bool.cpp +3 -1
  13. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/kurtosis.cpp +1 -1
  14. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/product.cpp +1 -1
  15. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/skew.cpp +2 -2
  16. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/string_agg.cpp +1 -1
  17. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/sum.cpp +13 -2
  18. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/approx_top_k.cpp +3 -3
  19. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/approximate_quantile.cpp +51 -15
  20. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/mad.cpp +25 -10
  21. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/mode.cpp +215 -71
  22. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/quantile.cpp +58 -31
  23. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -2
  24. package/src/duckdb/{src → extension}/core_functions/aggregate/nested/binned_histogram.cpp +9 -4
  25. package/src/duckdb/{src → extension}/core_functions/aggregate/nested/histogram.cpp +4 -2
  26. package/src/duckdb/{src → extension}/core_functions/aggregate/nested/list.cpp +1 -1
  27. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_avg.cpp +1 -1
  28. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_count.cpp +2 -2
  29. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_intercept.cpp +6 -2
  30. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_r2.cpp +2 -2
  31. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_slope.cpp +2 -2
  32. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_sxx_syy.cpp +2 -2
  33. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_sxy.cpp +3 -3
  34. package/src/duckdb/extension/core_functions/core_functions_extension.cpp +85 -0
  35. package/src/duckdb/{src → extension}/core_functions/function_list.cpp +30 -51
  36. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/corr.hpp +3 -7
  37. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic_functions.hpp +1 -1
  38. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/distributive_functions.hpp +16 -21
  39. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/histogram_helpers.hpp +1 -1
  40. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/holistic_functions.hpp +1 -1
  41. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/nested_functions.hpp +1 -1
  42. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_helpers.hpp +2 -2
  43. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_sort_tree.hpp +140 -58
  44. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_state.hpp +50 -43
  45. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression/regr_count.hpp +2 -2
  46. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression/regr_slope.hpp +3 -7
  47. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression_functions.hpp +1 -1
  48. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/array_functions.hpp +1 -1
  49. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/bit_functions.hpp +1 -1
  50. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/blob_functions.hpp +1 -10
  51. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/date_functions.hpp +22 -55
  52. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/debug_functions.hpp +1 -1
  53. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/enum_functions.hpp +1 -1
  54. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/generic_functions.hpp +1 -10
  55. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/list_functions.hpp +4 -4
  56. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/map_functions.hpp +1 -10
  57. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/math_functions.hpp +1 -1
  58. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/operators_functions.hpp +1 -1
  59. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/random_functions.hpp +1 -1
  60. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/string_functions.hpp +10 -103
  61. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/struct_functions.hpp +1 -19
  62. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/union_functions.hpp +1 -1
  63. package/src/duckdb/extension/core_functions/include/core_functions_extension.hpp +22 -0
  64. package/src/duckdb/{src → extension}/core_functions/lambda_functions.cpp +1 -1
  65. package/src/duckdb/{src → extension}/core_functions/scalar/array/array_functions.cpp +11 -4
  66. package/src/duckdb/{src → extension}/core_functions/scalar/array/array_value.cpp +2 -2
  67. package/src/duckdb/{src → extension}/core_functions/scalar/bit/bitstring.cpp +12 -5
  68. package/src/duckdb/{src → extension}/core_functions/scalar/blob/base64.cpp +4 -2
  69. package/src/duckdb/{src → extension}/core_functions/scalar/blob/encode.cpp +4 -2
  70. package/src/duckdb/{src → extension}/core_functions/scalar/date/age.cpp +9 -3
  71. package/src/duckdb/extension/core_functions/scalar/date/current.cpp +29 -0
  72. package/src/duckdb/{src → extension}/core_functions/scalar/date/date_diff.cpp +1 -1
  73. package/src/duckdb/{src → extension}/core_functions/scalar/date/date_part.cpp +42 -9
  74. package/src/duckdb/{src → extension}/core_functions/scalar/date/date_sub.cpp +1 -1
  75. package/src/duckdb/{src → extension}/core_functions/scalar/date/date_trunc.cpp +4 -1
  76. package/src/duckdb/{src → extension}/core_functions/scalar/date/epoch.cpp +19 -3
  77. package/src/duckdb/{src → extension}/core_functions/scalar/date/make_date.cpp +40 -5
  78. package/src/duckdb/{src → extension}/core_functions/scalar/date/time_bucket.cpp +4 -1
  79. package/src/duckdb/{src → extension}/core_functions/scalar/date/to_interval.cpp +54 -28
  80. package/src/duckdb/{src → extension}/core_functions/scalar/debug/vector_type.cpp +1 -1
  81. package/src/duckdb/{src → extension}/core_functions/scalar/enum/enum_functions.cpp +2 -7
  82. package/src/duckdb/{src → extension}/core_functions/scalar/generic/alias.cpp +2 -2
  83. package/src/duckdb/{src/function → extension/core_functions}/scalar/generic/binning.cpp +4 -3
  84. package/src/duckdb/{src → extension}/core_functions/scalar/generic/can_implicitly_cast.cpp +1 -1
  85. package/src/duckdb/{src → extension}/core_functions/scalar/generic/current_setting.cpp +1 -1
  86. package/src/duckdb/{src → extension}/core_functions/scalar/generic/hash.cpp +1 -1
  87. package/src/duckdb/{src → extension}/core_functions/scalar/generic/least.cpp +30 -10
  88. package/src/duckdb/{src → extension}/core_functions/scalar/generic/stats.cpp +1 -1
  89. package/src/duckdb/{src → extension}/core_functions/scalar/generic/system_functions.cpp +1 -1
  90. package/src/duckdb/{src → extension}/core_functions/scalar/generic/typeof.cpp +1 -1
  91. package/src/duckdb/{src → extension}/core_functions/scalar/list/array_slice.cpp +93 -88
  92. package/src/duckdb/{src → extension}/core_functions/scalar/list/flatten.cpp +1 -1
  93. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_aggregates.cpp +7 -3
  94. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_distance.cpp +8 -2
  95. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_filter.cpp +3 -3
  96. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_has_any_or_all.cpp +3 -3
  97. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_reduce.cpp +5 -5
  98. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_sort.cpp +1 -1
  99. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_transform.cpp +3 -3
  100. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_value.cpp +3 -3
  101. package/src/duckdb/{src → extension}/core_functions/scalar/list/range.cpp +7 -1
  102. package/src/duckdb/{src → extension}/core_functions/scalar/map/cardinality.cpp +1 -1
  103. package/src/duckdb/{src → extension}/core_functions/scalar/map/map.cpp +5 -4
  104. package/src/duckdb/{src → extension}/core_functions/scalar/map/map_concat.cpp +1 -1
  105. package/src/duckdb/{src → extension}/core_functions/scalar/map/map_entries.cpp +1 -1
  106. package/src/duckdb/{src → extension}/core_functions/scalar/map/map_extract.cpp +13 -25
  107. package/src/duckdb/{src → extension}/core_functions/scalar/map/map_from_entries.cpp +2 -1
  108. package/src/duckdb/{src → extension}/core_functions/scalar/map/map_keys_values.cpp +11 -9
  109. package/src/duckdb/{src → extension}/core_functions/scalar/math/numeric.cpp +83 -37
  110. package/src/duckdb/{src → extension}/core_functions/scalar/operators/bitwise.cpp +19 -1
  111. package/src/duckdb/{src → extension}/core_functions/scalar/random/random.cpp +4 -3
  112. package/src/duckdb/{src → extension}/core_functions/scalar/random/setseed.cpp +2 -1
  113. package/src/duckdb/{src → extension}/core_functions/scalar/string/ascii.cpp +1 -1
  114. package/src/duckdb/{src → extension}/core_functions/scalar/string/bar.cpp +6 -4
  115. package/src/duckdb/{src → extension}/core_functions/scalar/string/chr.cpp +1 -1
  116. package/src/duckdb/{src → extension}/core_functions/scalar/string/damerau_levenshtein.cpp +1 -1
  117. package/src/duckdb/{src → extension}/core_functions/scalar/string/format_bytes.cpp +1 -1
  118. package/src/duckdb/{src → extension}/core_functions/scalar/string/hamming.cpp +1 -1
  119. package/src/duckdb/{src → extension}/core_functions/scalar/string/hex.cpp +7 -3
  120. package/src/duckdb/{src → extension}/core_functions/scalar/string/instr.cpp +4 -4
  121. package/src/duckdb/{src → extension}/core_functions/scalar/string/jaccard.cpp +1 -1
  122. package/src/duckdb/extension/core_functions/scalar/string/jaro_winkler.cpp +112 -0
  123. package/src/duckdb/{src → extension}/core_functions/scalar/string/left_right.cpp +6 -6
  124. package/src/duckdb/{src → extension}/core_functions/scalar/string/levenshtein.cpp +1 -1
  125. package/src/duckdb/{src → extension}/core_functions/scalar/string/pad.cpp +9 -5
  126. package/src/duckdb/{src → extension}/core_functions/scalar/string/parse_path.cpp +4 -4
  127. package/src/duckdb/{src → extension}/core_functions/scalar/string/printf.cpp +3 -1
  128. package/src/duckdb/{src → extension}/core_functions/scalar/string/repeat.cpp +4 -1
  129. package/src/duckdb/{src → extension}/core_functions/scalar/string/replace.cpp +1 -1
  130. package/src/duckdb/{src → extension}/core_functions/scalar/string/reverse.cpp +1 -1
  131. package/src/duckdb/{src → extension}/core_functions/scalar/string/starts_with.cpp +5 -3
  132. package/src/duckdb/{src → extension}/core_functions/scalar/string/to_base.cpp +1 -1
  133. package/src/duckdb/{src → extension}/core_functions/scalar/string/translate.cpp +1 -1
  134. package/src/duckdb/{src → extension}/core_functions/scalar/string/trim.cpp +1 -1
  135. package/src/duckdb/{src → extension}/core_functions/scalar/string/unicode.cpp +1 -1
  136. package/src/duckdb/{src → extension}/core_functions/scalar/string/url_encode.cpp +1 -1
  137. package/src/duckdb/{src → extension}/core_functions/scalar/struct/struct_insert.cpp +25 -31
  138. package/src/duckdb/{src → extension}/core_functions/scalar/union/union_extract.cpp +1 -1
  139. package/src/duckdb/{src → extension}/core_functions/scalar/union/union_tag.cpp +1 -1
  140. package/src/duckdb/{src → extension}/core_functions/scalar/union/union_value.cpp +3 -3
  141. package/src/duckdb/extension/icu/icu-dateadd.cpp +16 -11
  142. package/src/duckdb/extension/icu/icu-datefunc.cpp +2 -2
  143. package/src/duckdb/extension/icu/icu-datepart.cpp +8 -5
  144. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  145. package/src/duckdb/extension/icu/icu-datetrunc.cpp +8 -1
  146. package/src/duckdb/extension/icu/icu-list-range.cpp +2 -2
  147. package/src/duckdb/extension/icu/icu-makedate.cpp +18 -7
  148. package/src/duckdb/extension/icu/icu-strptime.cpp +9 -3
  149. package/src/duckdb/extension/icu/icu-table-range.cpp +2 -2
  150. package/src/duckdb/extension/icu/icu-timebucket.cpp +4 -1
  151. package/src/duckdb/extension/icu/icu-timezone.cpp +67 -1
  152. package/src/duckdb/extension/icu/icu_extension.cpp +60 -5
  153. package/src/duckdb/extension/icu/include/icu-datefunc.hpp +2 -1
  154. package/src/duckdb/extension/icu/third_party/icu/common/bytestriebuilder.cpp +1 -1
  155. package/src/duckdb/extension/icu/third_party/icu/common/dtintrv.cpp +1 -1
  156. package/src/duckdb/extension/icu/third_party/icu/common/filteredbrk.cpp +1 -1
  157. package/src/duckdb/extension/icu/third_party/icu/common/locid.cpp +1 -1
  158. package/src/duckdb/extension/icu/third_party/icu/common/lsr.cpp +1 -1
  159. package/src/duckdb/extension/icu/third_party/icu/common/lsr.h +2 -2
  160. package/src/duckdb/extension/icu/third_party/icu/common/messagepattern.cpp +2 -2
  161. package/src/duckdb/extension/icu/third_party/icu/common/normlzr.cpp +1 -1
  162. package/src/duckdb/extension/icu/third_party/icu/common/rbbinode.h +1 -1
  163. package/src/duckdb/extension/icu/third_party/icu/common/schriter.cpp +1 -1
  164. package/src/duckdb/extension/icu/third_party/icu/common/stringtriebuilder.cpp +8 -8
  165. package/src/duckdb/extension/icu/third_party/icu/common/ucharstriebuilder.cpp +1 -1
  166. package/src/duckdb/extension/icu/third_party/icu/common/uchriter.cpp +1 -1
  167. package/src/duckdb/extension/icu/third_party/icu/common/unicode/brkiter.h +2 -2
  168. package/src/duckdb/extension/icu/third_party/icu/common/unicode/bytestriebuilder.h +1 -1
  169. package/src/duckdb/extension/icu/third_party/icu/common/unicode/chariter.h +3 -3
  170. package/src/duckdb/extension/icu/third_party/icu/common/unicode/dtintrv.h +3 -3
  171. package/src/duckdb/extension/icu/third_party/icu/common/unicode/locid.h +3 -3
  172. package/src/duckdb/extension/icu/third_party/icu/common/unicode/messagepattern.h +4 -4
  173. package/src/duckdb/extension/icu/third_party/icu/common/unicode/normlzr.h +3 -3
  174. package/src/duckdb/extension/icu/third_party/icu/common/unicode/parsepos.h +4 -4
  175. package/src/duckdb/extension/icu/third_party/icu/common/unicode/rbbi.h +2 -2
  176. package/src/duckdb/extension/icu/third_party/icu/common/unicode/schriter.h +1 -1
  177. package/src/duckdb/extension/icu/third_party/icu/common/unicode/strenum.h +2 -2
  178. package/src/duckdb/extension/icu/third_party/icu/common/unicode/stringpiece.h +1 -1
  179. package/src/duckdb/extension/icu/third_party/icu/common/unicode/stringtriebuilder.h +9 -9
  180. package/src/duckdb/extension/icu/third_party/icu/common/unicode/ucharstriebuilder.h +1 -1
  181. package/src/duckdb/extension/icu/third_party/icu/common/unicode/uchriter.h +1 -1
  182. package/src/duckdb/extension/icu/third_party/icu/common/unicode/uniset.h +3 -3
  183. package/src/duckdb/extension/icu/third_party/icu/common/unicode/unistr.h +12 -12
  184. package/src/duckdb/extension/icu/third_party/icu/common/unicode/uobject.h +2 -2
  185. package/src/duckdb/extension/icu/third_party/icu/common/unifiedcache.h +4 -4
  186. package/src/duckdb/extension/icu/third_party/icu/common/uniset.cpp +1 -1
  187. package/src/duckdb/extension/icu/third_party/icu/common/ustr_titlecase_brkiter.cpp +1 -1
  188. package/src/duckdb/extension/icu/third_party/icu/common/ustrenum.cpp +2 -2
  189. package/src/duckdb/extension/icu/third_party/icu/common/uvector.cpp +1 -1
  190. package/src/duckdb/extension/icu/third_party/icu/common/uvector.h +3 -3
  191. package/src/duckdb/extension/icu/third_party/icu/common/uvectr32.cpp +1 -1
  192. package/src/duckdb/extension/icu/third_party/icu/common/uvectr32.h +3 -3
  193. package/src/duckdb/extension/icu/third_party/icu/common/uvectr64.cpp +1 -1
  194. package/src/duckdb/extension/icu/third_party/icu/common/uvectr64.h +3 -3
  195. package/src/duckdb/extension/icu/third_party/icu/i18n/alphaindex.cpp +2 -2
  196. package/src/duckdb/extension/icu/third_party/icu/i18n/calendar.cpp +1 -1
  197. package/src/duckdb/extension/icu/third_party/icu/i18n/choicfmt.cpp +1 -1
  198. package/src/duckdb/extension/icu/third_party/icu/i18n/coleitr.cpp +2 -2
  199. package/src/duckdb/extension/icu/third_party/icu/i18n/coll.cpp +2 -2
  200. package/src/duckdb/extension/icu/third_party/icu/i18n/collationiterator.cpp +1 -1
  201. package/src/duckdb/extension/icu/third_party/icu/i18n/collationiterator.h +2 -2
  202. package/src/duckdb/extension/icu/third_party/icu/i18n/collationsettings.cpp +1 -1
  203. package/src/duckdb/extension/icu/third_party/icu/i18n/collationsettings.h +2 -2
  204. package/src/duckdb/extension/icu/third_party/icu/i18n/currpinf.cpp +1 -1
  205. package/src/duckdb/extension/icu/third_party/icu/i18n/datefmt.cpp +2 -2
  206. package/src/duckdb/extension/icu/third_party/icu/i18n/dcfmtsym.cpp +1 -1
  207. package/src/duckdb/extension/icu/third_party/icu/i18n/decimfmt.cpp +1 -1
  208. package/src/duckdb/extension/icu/third_party/icu/i18n/dtfmtsym.cpp +1 -1
  209. package/src/duckdb/extension/icu/third_party/icu/i18n/dtitvfmt.cpp +1 -1
  210. package/src/duckdb/extension/icu/third_party/icu/i18n/dtitvinf.cpp +1 -1
  211. package/src/duckdb/extension/icu/third_party/icu/i18n/dtptngen.cpp +2 -2
  212. package/src/duckdb/extension/icu/third_party/icu/i18n/dtptngen_impl.h +4 -4
  213. package/src/duckdb/extension/icu/third_party/icu/i18n/dtrule.cpp +2 -2
  214. package/src/duckdb/extension/icu/third_party/icu/i18n/fmtable.cpp +1 -1
  215. package/src/duckdb/extension/icu/third_party/icu/i18n/format.cpp +1 -1
  216. package/src/duckdb/extension/icu/third_party/icu/i18n/fpositer.cpp +1 -1
  217. package/src/duckdb/extension/icu/third_party/icu/i18n/measfmt.cpp +1 -1
  218. package/src/duckdb/extension/icu/third_party/icu/i18n/measunit.cpp +1 -1
  219. package/src/duckdb/extension/icu/third_party/icu/i18n/measure.cpp +1 -1
  220. package/src/duckdb/extension/icu/third_party/icu/i18n/msgfmt.cpp +2 -2
  221. package/src/duckdb/extension/icu/third_party/icu/i18n/nfrs.cpp +1 -1
  222. package/src/duckdb/extension/icu/third_party/icu/i18n/nfrs.h +2 -2
  223. package/src/duckdb/extension/icu/third_party/icu/i18n/nfrule.cpp +1 -1
  224. package/src/duckdb/extension/icu/third_party/icu/i18n/nfrule.h +2 -2
  225. package/src/duckdb/extension/icu/third_party/icu/i18n/nfsubs.cpp +9 -9
  226. package/src/duckdb/extension/icu/third_party/icu/i18n/nfsubs.h +2 -2
  227. package/src/duckdb/extension/icu/third_party/icu/i18n/number_asformat.cpp +1 -1
  228. package/src/duckdb/extension/icu/third_party/icu/i18n/number_asformat.h +1 -1
  229. package/src/duckdb/extension/icu/third_party/icu/i18n/numfmt.cpp +1 -1
  230. package/src/duckdb/extension/icu/third_party/icu/i18n/olsontz.cpp +1 -1
  231. package/src/duckdb/extension/icu/third_party/icu/i18n/olsontz.h +1 -1
  232. package/src/duckdb/extension/icu/third_party/icu/i18n/plurfmt.cpp +2 -2
  233. package/src/duckdb/extension/icu/third_party/icu/i18n/plurrule.cpp +1 -1
  234. package/src/duckdb/extension/icu/third_party/icu/i18n/rbnf.cpp +4 -4
  235. package/src/duckdb/extension/icu/third_party/icu/i18n/rbtz.cpp +2 -2
  236. package/src/duckdb/extension/icu/third_party/icu/i18n/region.cpp +2 -2
  237. package/src/duckdb/extension/icu/third_party/icu/i18n/reldtfmt.cpp +1 -1
  238. package/src/duckdb/extension/icu/third_party/icu/i18n/reldtfmt.h +1 -1
  239. package/src/duckdb/extension/icu/third_party/icu/i18n/rulebasedcollator.cpp +1 -1
  240. package/src/duckdb/extension/icu/third_party/icu/i18n/selfmt.cpp +2 -2
  241. package/src/duckdb/extension/icu/third_party/icu/i18n/simpletz.cpp +1 -1
  242. package/src/duckdb/extension/icu/third_party/icu/i18n/smpdtfmt.cpp +1 -1
  243. package/src/duckdb/extension/icu/third_party/icu/i18n/sortkey.cpp +1 -1
  244. package/src/duckdb/extension/icu/third_party/icu/i18n/timezone.cpp +1 -1
  245. package/src/duckdb/extension/icu/third_party/icu/i18n/tmutamt.cpp +1 -1
  246. package/src/duckdb/extension/icu/third_party/icu/i18n/tzfmt.cpp +1 -1
  247. package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.cpp +1 -1
  248. package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.h +2 -2
  249. package/src/duckdb/extension/icu/third_party/icu/i18n/tznames.cpp +3 -3
  250. package/src/duckdb/extension/icu/third_party/icu/i18n/tznames_impl.cpp +2 -2
  251. package/src/duckdb/extension/icu/third_party/icu/i18n/tznames_impl.h +2 -2
  252. package/src/duckdb/extension/icu/third_party/icu/i18n/tzrule.cpp +8 -8
  253. package/src/duckdb/extension/icu/third_party/icu/i18n/tztrans.cpp +2 -2
  254. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/alphaindex.h +2 -2
  255. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/calendar.h +2 -2
  256. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/choicfmt.h +1 -1
  257. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coleitr.h +2 -2
  258. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coll.h +2 -2
  259. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/currpinf.h +3 -3
  260. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/datefmt.h +1 -1
  261. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dcfmtsym.h +2 -2
  262. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/decimfmt.h +1 -1
  263. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtfmtsym.h +2 -2
  264. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtitvfmt.h +3 -3
  265. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtitvinf.h +3 -3
  266. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtptngen.h +2 -2
  267. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtrule.h +2 -2
  268. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fieldpos.h +4 -4
  269. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fmtable.h +2 -2
  270. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/format.h +2 -2
  271. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fpositer.h +2 -2
  272. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measfmt.h +1 -1
  273. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measunit.h +2 -2
  274. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measure.h +1 -1
  275. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/msgfmt.h +2 -2
  276. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/numfmt.h +1 -1
  277. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/plurfmt.h +2 -2
  278. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/plurrule.h +2 -2
  279. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/rbnf.h +1 -1
  280. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/rbtz.h +2 -2
  281. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/region.h +2 -2
  282. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/search.h +2 -2
  283. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/selfmt.h +2 -2
  284. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/simpletz.h +1 -1
  285. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/smpdtfmt.h +1 -1
  286. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/sortkey.h +3 -3
  287. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/stsearch.h +1 -1
  288. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tblcoll.h +1 -1
  289. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/timezone.h +2 -2
  290. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tmutamt.h +3 -3
  291. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tmutfmt.h +2 -2
  292. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tzfmt.h +1 -1
  293. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tznames.h +2 -2
  294. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tzrule.h +8 -8
  295. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tztrans.h +2 -2
  296. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/vtzone.h +2 -2
  297. package/src/duckdb/extension/icu/third_party/icu/i18n/utf16collationiterator.cpp +2 -2
  298. package/src/duckdb/extension/icu/third_party/icu/i18n/utf16collationiterator.h +2 -2
  299. package/src/duckdb/extension/icu/third_party/icu/i18n/vtzone.cpp +2 -2
  300. package/src/duckdb/extension/json/buffered_json_reader.cpp +6 -1
  301. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +2 -0
  302. package/src/duckdb/extension/json/include/json_common.hpp +14 -10
  303. package/src/duckdb/extension/json/include/json_scan.hpp +48 -7
  304. package/src/duckdb/extension/json/include/json_structure.hpp +2 -1
  305. package/src/duckdb/extension/json/include/json_transform.hpp +5 -2
  306. package/src/duckdb/extension/json/json_functions/copy_json.cpp +1 -1
  307. package/src/duckdb/extension/json/json_functions/json_create.cpp +57 -20
  308. package/src/duckdb/extension/json/json_functions/json_serialize_plan.cpp +7 -6
  309. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +6 -5
  310. package/src/duckdb/extension/json/json_functions/json_structure.cpp +20 -17
  311. package/src/duckdb/extension/json/json_functions/json_transform.cpp +48 -17
  312. package/src/duckdb/extension/json/json_functions/read_json.cpp +83 -34
  313. package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +3 -3
  314. package/src/duckdb/extension/json/json_functions.cpp +14 -16
  315. package/src/duckdb/extension/json/json_scan.cpp +36 -16
  316. package/src/duckdb/extension/json/json_serializer.cpp +1 -1
  317. package/src/duckdb/extension/json/serialize_json.cpp +2 -2
  318. package/src/duckdb/extension/parquet/column_reader.cpp +136 -116
  319. package/src/duckdb/extension/parquet/column_writer.cpp +870 -604
  320. package/src/duckdb/extension/parquet/geo_parquet.cpp +4 -5
  321. package/src/duckdb/extension/parquet/include/boolean_column_reader.hpp +0 -4
  322. package/src/duckdb/extension/parquet/include/column_reader.hpp +24 -19
  323. package/src/duckdb/extension/parquet/include/column_writer.hpp +7 -5
  324. package/src/duckdb/extension/parquet/include/decode_utils.hpp +138 -18
  325. package/src/duckdb/extension/parquet/include/geo_parquet.hpp +4 -3
  326. package/src/duckdb/extension/parquet/include/null_column_reader.hpp +1 -14
  327. package/src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp +45 -0
  328. package/src/duckdb/extension/parquet/include/parquet_crypto.hpp +1 -1
  329. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +101 -90
  330. package/src/duckdb/extension/parquet/include/parquet_dbp_encoder.hpp +179 -0
  331. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +2 -3
  332. package/src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp +48 -0
  333. package/src/duckdb/extension/parquet/include/parquet_extension.hpp +8 -0
  334. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  335. package/src/duckdb/extension/parquet/include/parquet_metadata.hpp +5 -0
  336. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +22 -18
  337. package/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp +1 -5
  338. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +87 -3
  339. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +30 -16
  340. package/src/duckdb/extension/parquet/include/resizable_buffer.hpp +1 -0
  341. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +0 -8
  342. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +1 -1
  343. package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +1 -42
  344. package/src/duckdb/extension/parquet/include/thrift_tools.hpp +13 -1
  345. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +4 -0
  346. package/src/duckdb/extension/parquet/parquet_extension.cpp +240 -197
  347. package/src/duckdb/extension/parquet/parquet_metadata.cpp +138 -6
  348. package/src/duckdb/extension/parquet/parquet_reader.cpp +155 -79
  349. package/src/duckdb/extension/parquet/parquet_statistics.cpp +258 -38
  350. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +17 -3
  351. package/src/duckdb/extension/parquet/parquet_writer.cpp +65 -34
  352. package/src/duckdb/extension/parquet/serialize_parquet.cpp +4 -0
  353. package/src/duckdb/extension/parquet/zstd_file_system.cpp +13 -0
  354. package/src/duckdb/src/catalog/catalog.cpp +272 -97
  355. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +9 -4
  356. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +8 -0
  357. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +145 -95
  358. package/src/duckdb/src/catalog/catalog_entry/scalar_function_catalog_entry.cpp +9 -3
  359. package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +15 -0
  360. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +40 -24
  361. package/src/duckdb/src/catalog/catalog_entry/table_function_catalog_entry.cpp +1 -1
  362. package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +2 -2
  363. package/src/duckdb/src/catalog/catalog_entry.cpp +3 -0
  364. package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +60 -5
  365. package/src/duckdb/src/catalog/catalog_search_path.cpp +27 -14
  366. package/src/duckdb/src/catalog/catalog_set.cpp +75 -31
  367. package/src/duckdb/src/catalog/default/default_functions.cpp +13 -8
  368. package/src/duckdb/src/catalog/default/default_views.cpp +1 -0
  369. package/src/duckdb/src/catalog/dependency_manager.cpp +133 -5
  370. package/src/duckdb/src/catalog/duck_catalog.cpp +17 -9
  371. package/src/duckdb/src/common/adbc/adbc.cpp +18 -0
  372. package/src/duckdb/src/common/allocator.cpp +3 -1
  373. package/src/duckdb/src/common/arrow/arrow_appender.cpp +30 -9
  374. package/src/duckdb/src/common/arrow/arrow_converter.cpp +63 -82
  375. package/src/duckdb/src/common/arrow/arrow_merge_event.cpp +4 -3
  376. package/src/duckdb/src/common/arrow/arrow_type_extension.cpp +361 -0
  377. package/src/duckdb/src/common/arrow/arrow_util.cpp +10 -6
  378. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +6 -2
  379. package/src/duckdb/src/common/arrow/physical_arrow_collector.cpp +2 -1
  380. package/src/duckdb/src/common/arrow/schema_metadata.cpp +27 -14
  381. package/src/duckdb/src/common/assert.cpp +1 -2
  382. package/src/duckdb/src/common/bind_helpers.cpp +1 -1
  383. package/src/duckdb/src/common/box_renderer.cpp +316 -26
  384. package/src/duckdb/src/common/cgroups.cpp +7 -1
  385. package/src/duckdb/src/common/compressed_file_system.cpp +1 -1
  386. package/src/duckdb/src/common/enum_util.cpp +2865 -6882
  387. package/src/duckdb/src/common/enums/compression_type.cpp +12 -0
  388. package/src/duckdb/src/common/enums/metric_type.cpp +24 -0
  389. package/src/duckdb/src/common/enums/optimizer_type.cpp +4 -0
  390. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  391. package/src/duckdb/src/common/error_data.cpp +23 -6
  392. package/src/duckdb/src/common/exception/binder_exception.cpp +1 -1
  393. package/src/duckdb/src/common/exception.cpp +20 -28
  394. package/src/duckdb/src/common/extra_type_info.cpp +85 -20
  395. package/src/duckdb/src/common/file_buffer.cpp +5 -2
  396. package/src/duckdb/src/common/file_system.cpp +8 -3
  397. package/src/duckdb/src/common/fsst.cpp +3 -3
  398. package/src/duckdb/src/common/hive_partitioning.cpp +1 -1
  399. package/src/duckdb/src/common/local_file_system.cpp +169 -60
  400. package/src/duckdb/src/common/multi_file_list.cpp +4 -1
  401. package/src/duckdb/src/common/multi_file_reader.cpp +240 -63
  402. package/src/duckdb/src/common/opener_file_system.cpp +37 -0
  403. package/src/duckdb/src/common/operator/cast_operators.cpp +77 -11
  404. package/src/duckdb/src/common/operator/string_cast.cpp +6 -2
  405. package/src/duckdb/src/common/pipe_file_system.cpp +4 -4
  406. package/src/duckdb/src/common/progress_bar/progress_bar.cpp +25 -14
  407. package/src/duckdb/src/common/radix_partitioning.cpp +17 -16
  408. package/src/duckdb/src/common/random_engine.cpp +39 -3
  409. package/src/duckdb/src/common/render_tree.cpp +3 -19
  410. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  411. package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -58
  412. package/src/duckdb/src/common/row_operations/row_matcher.cpp +2 -2
  413. package/src/duckdb/src/common/row_operations/row_radix_scatter.cpp +2 -0
  414. package/src/duckdb/src/common/row_operations/row_scatter.cpp +20 -19
  415. package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +1 -1
  416. package/src/duckdb/src/common/serializer/memory_stream.cpp +36 -0
  417. package/src/duckdb/src/common/sort/comparators.cpp +7 -7
  418. package/src/duckdb/src/common/sort/partition_state.cpp +2 -2
  419. package/src/duckdb/src/common/stacktrace.cpp +127 -0
  420. package/src/duckdb/src/common/string_util.cpp +157 -32
  421. package/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +15 -3
  422. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +4 -0
  423. package/src/duckdb/src/common/types/column/column_data_collection.cpp +71 -8
  424. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +27 -6
  425. package/src/duckdb/src/common/types/conflict_manager.cpp +21 -7
  426. package/src/duckdb/src/common/types/date.cpp +39 -25
  427. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +4 -11
  428. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +21 -7
  429. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +10 -1
  430. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +17 -17
  431. package/src/duckdb/src/common/types/timestamp.cpp +70 -33
  432. package/src/duckdb/src/common/types/uuid.cpp +11 -0
  433. package/src/duckdb/src/common/types/validity_mask.cpp +16 -5
  434. package/src/duckdb/src/common/types/value.cpp +357 -199
  435. package/src/duckdb/src/common/types/varint.cpp +64 -18
  436. package/src/duckdb/src/common/types/vector.cpp +78 -38
  437. package/src/duckdb/src/common/types.cpp +199 -92
  438. package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +2 -1
  439. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +32 -5
  440. package/src/duckdb/src/common/vector_operations/vector_hash.cpp +3 -1
  441. package/src/duckdb/src/execution/adaptive_filter.cpp +6 -2
  442. package/src/duckdb/src/execution/aggregate_hashtable.cpp +410 -111
  443. package/src/duckdb/src/execution/column_binding_resolver.cpp +2 -2
  444. package/src/duckdb/src/execution/expression_executor/execute_between.cpp +6 -0
  445. package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +4 -3
  446. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +2 -2
  447. package/src/duckdb/src/execution/expression_executor/execute_conjunction.cpp +2 -2
  448. package/src/duckdb/src/execution/expression_executor/execute_function.cpp +1 -0
  449. package/src/duckdb/src/execution/expression_executor/execute_operator.cpp +5 -4
  450. package/src/duckdb/src/execution/expression_executor.cpp +5 -3
  451. package/src/duckdb/src/execution/index/art/art.cpp +208 -72
  452. package/src/duckdb/src/execution/index/art/base_leaf.cpp +1 -1
  453. package/src/duckdb/src/execution/index/art/leaf.cpp +12 -7
  454. package/src/duckdb/src/execution/index/art/node.cpp +2 -1
  455. package/src/duckdb/src/execution/index/art/node256_leaf.cpp +6 -6
  456. package/src/duckdb/src/execution/index/art/plan_art.cpp +50 -55
  457. package/src/duckdb/src/execution/index/art/prefix.cpp +7 -13
  458. package/src/duckdb/src/execution/index/bound_index.cpp +30 -5
  459. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +3 -5
  460. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +14 -9
  461. package/src/duckdb/src/execution/join_hashtable.cpp +254 -158
  462. package/src/duckdb/src/execution/operator/aggregate/grouped_aggregate_data.cpp +1 -1
  463. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +7 -7
  464. package/src/duckdb/src/execution/operator/aggregate/physical_partitioned_aggregate.cpp +226 -0
  465. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +3 -3
  466. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +3 -3
  467. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +77 -70
  468. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +114 -50
  469. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -2
  470. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +19 -10
  471. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +22 -15
  472. package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +95 -0
  473. package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +6 -1
  474. package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +75 -2
  475. package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +40 -12
  476. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +395 -163
  477. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +20 -23
  478. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +115 -49
  479. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +66 -12
  480. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +20 -23
  481. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +220 -46
  482. package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +43 -32
  483. package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +54 -119
  484. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +184 -20
  485. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +83 -21
  486. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_validator.cpp +63 -0
  487. package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +7 -4
  488. package/src/duckdb/src/execution/operator/helper/physical_set.cpp +1 -1
  489. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +3 -2
  490. package/src/duckdb/src/execution/operator/helper/physical_verify_vector.cpp +9 -1
  491. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +132 -15
  492. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +64 -55
  493. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +284 -154
  494. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +40 -55
  495. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +0 -1
  496. package/src/duckdb/src/execution/operator/order/physical_order.cpp +7 -3
  497. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +298 -227
  498. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +5 -2
  499. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +3 -4
  500. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +32 -19
  501. package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +1 -0
  502. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +6 -0
  503. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +58 -19
  504. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +11 -27
  505. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +308 -119
  506. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +105 -55
  507. package/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp +6 -2
  508. package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +1 -1
  509. package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +15 -6
  510. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +92 -50
  511. package/src/duckdb/src/execution/operator/schema/physical_alter.cpp +0 -1
  512. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +8 -4
  513. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +54 -22
  514. package/src/duckdb/src/execution/operator/set/physical_union.cpp +5 -1
  515. package/src/duckdb/src/execution/physical_operator.cpp +15 -9
  516. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +101 -12
  517. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +11 -140
  518. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +11 -13
  519. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +1 -1
  520. package/src/duckdb/src/execution/physical_plan/plan_delete.cpp +1 -1
  521. package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +1 -1
  522. package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +6 -5
  523. package/src/duckdb/src/execution/physical_plan/plan_export.cpp +0 -4
  524. package/src/duckdb/src/execution/physical_plan/plan_filter.cpp +1 -1
  525. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +16 -13
  526. package/src/duckdb/src/execution/physical_plan/plan_insert.cpp +1 -1
  527. package/src/duckdb/src/execution/physical_plan/plan_order.cpp +7 -7
  528. package/src/duckdb/src/execution/physical_plan/plan_prepare.cpp +2 -2
  529. package/src/duckdb/src/execution/physical_plan/plan_projection.cpp +1 -1
  530. package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +8 -3
  531. package/src/duckdb/src/execution/physical_plan/plan_set_operation.cpp +1 -2
  532. package/src/duckdb/src/execution/physical_plan/plan_simple.cpp +1 -2
  533. package/src/duckdb/src/execution/physical_plan/plan_top_n.cpp +3 -2
  534. package/src/duckdb/src/execution/physical_plan_generator.cpp +0 -22
  535. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +136 -116
  536. package/src/duckdb/src/execution/sample/base_reservoir_sample.cpp +136 -0
  537. package/src/duckdb/src/execution/sample/reservoir_sample.cpp +930 -0
  538. package/src/duckdb/src/function/aggregate/distributive/count.cpp +6 -12
  539. package/src/duckdb/src/function/aggregate/distributive/{first.cpp → first_last_any.cpp} +37 -18
  540. package/src/duckdb/src/{core_functions → function}/aggregate/distributive/minmax.cpp +19 -12
  541. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +72 -13
  542. package/src/duckdb/src/function/built_in_functions.cpp +85 -2
  543. package/src/duckdb/src/function/cast/decimal_cast.cpp +1 -1
  544. package/src/duckdb/src/function/cast/string_cast.cpp +1 -1
  545. package/src/duckdb/src/function/cast/struct_cast.cpp +81 -49
  546. package/src/duckdb/src/function/cast/union/from_struct.cpp +7 -5
  547. package/src/duckdb/src/function/compression_config.cpp +6 -0
  548. package/src/duckdb/src/function/encoding_function.cpp +134 -0
  549. package/src/duckdb/src/function/function.cpp +8 -13
  550. package/src/duckdb/src/function/function_binder.cpp +100 -21
  551. package/src/duckdb/src/function/function_list.cpp +178 -0
  552. package/src/duckdb/src/function/macro_function.cpp +4 -4
  553. package/src/duckdb/src/function/pragma/pragma_functions.cpp +0 -2
  554. package/src/duckdb/src/function/pragma/pragma_queries.cpp +0 -4
  555. package/src/duckdb/src/{core_functions/core_functions.cpp → function/register_function_list.cpp} +12 -8
  556. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +62 -23
  557. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +33 -16
  558. package/src/duckdb/src/function/scalar/compressed_materialization_utils.cpp +21 -0
  559. package/src/duckdb/src/{core_functions/scalar/blob → function/scalar}/create_sort_key.cpp +86 -23
  560. package/src/duckdb/src/{core_functions → function}/scalar/date/strftime.cpp +6 -4
  561. package/src/duckdb/src/function/scalar/generic/constant_or_null.cpp +5 -7
  562. package/src/duckdb/src/{core_functions → function}/scalar/generic/error.cpp +3 -1
  563. package/src/duckdb/src/function/scalar/generic/getvariable.cpp +2 -2
  564. package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +1 -7
  565. package/src/duckdb/src/function/scalar/list/list_extract.cpp +27 -21
  566. package/src/duckdb/src/function/scalar/list/list_resize.cpp +8 -12
  567. package/src/duckdb/src/function/scalar/list/list_select.cpp +1 -4
  568. package/src/duckdb/src/function/scalar/list/list_zip.cpp +6 -6
  569. package/src/duckdb/src/{core_functions → function}/scalar/map/map_contains.cpp +2 -2
  570. package/src/duckdb/src/function/scalar/nested_functions.cpp +0 -11
  571. package/src/duckdb/src/function/scalar/{operators → operator}/add.cpp +2 -1
  572. package/src/duckdb/src/function/scalar/{operators → operator}/arithmetic.cpp +195 -127
  573. package/src/duckdb/src/function/scalar/sequence/nextval.cpp +30 -21
  574. package/src/duckdb/src/function/scalar/strftime_format.cpp +10 -0
  575. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +11 -41
  576. package/src/duckdb/src/function/scalar/string/concat.cpp +22 -20
  577. package/src/duckdb/src/function/scalar/string/concat_ws.cpp +2 -2
  578. package/src/duckdb/src/function/scalar/string/contains.cpp +16 -19
  579. package/src/duckdb/src/function/scalar/string/length.cpp +38 -24
  580. package/src/duckdb/src/function/scalar/string/like.cpp +80 -47
  581. package/src/duckdb/src/{core_functions → function}/scalar/string/md5.cpp +2 -2
  582. package/src/duckdb/src/function/scalar/string/nfc_normalize.cpp +2 -6
  583. package/src/duckdb/src/function/scalar/string/prefix.cpp +0 -4
  584. package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +2 -1
  585. package/src/duckdb/src/function/scalar/string/regexp.cpp +17 -7
  586. package/src/duckdb/src/{core_functions → function}/scalar/string/regexp_escape.cpp +2 -2
  587. package/src/duckdb/src/{core_functions → function}/scalar/string/sha1.cpp +1 -1
  588. package/src/duckdb/src/{core_functions → function}/scalar/string/sha256.cpp +1 -1
  589. package/src/duckdb/src/{core_functions → function}/scalar/string/string_split.cpp +4 -5
  590. package/src/duckdb/src/function/scalar/string/strip_accents.cpp +3 -6
  591. package/src/duckdb/src/function/scalar/string/substring.cpp +14 -13
  592. package/src/duckdb/src/function/scalar/string/suffix.cpp +0 -4
  593. package/src/duckdb/src/function/scalar/struct/struct_concat.cpp +115 -0
  594. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +35 -31
  595. package/src/duckdb/src/{core_functions → function}/scalar/struct/struct_pack.cpp +7 -7
  596. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -8
  597. package/src/duckdb/src/function/scalar/system/write_log.cpp +170 -0
  598. package/src/duckdb/src/function/scalar_function.cpp +5 -5
  599. package/src/duckdb/src/function/table/arrow/arrow_array_scan_state.cpp +3 -2
  600. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +287 -1
  601. package/src/duckdb/src/function/table/arrow/arrow_type_info.cpp +6 -6
  602. package/src/duckdb/src/function/table/arrow.cpp +32 -352
  603. package/src/duckdb/src/function/table/arrow_conversion.cpp +43 -7
  604. package/src/duckdb/src/function/table/copy_csv.cpp +38 -23
  605. package/src/duckdb/src/function/table/glob.cpp +1 -1
  606. package/src/duckdb/src/function/table/query_function.cpp +12 -7
  607. package/src/duckdb/src/function/table/read_csv.cpp +114 -46
  608. package/src/duckdb/src/function/table/read_file.cpp +26 -6
  609. package/src/duckdb/src/function/table/sniff_csv.cpp +25 -5
  610. package/src/duckdb/src/function/table/system/duckdb_columns.cpp +1 -1
  611. package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +1 -1
  612. package/src/duckdb/src/function/table/system/duckdb_dependencies.cpp +6 -7
  613. package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +1 -1
  614. package/src/duckdb/src/function/table/system/duckdb_functions.cpp +141 -16
  615. package/src/duckdb/src/function/table/system/duckdb_log.cpp +64 -0
  616. package/src/duckdb/src/function/table/system/duckdb_log_contexts.cpp +65 -0
  617. package/src/duckdb/src/function/table/system/duckdb_memory.cpp +0 -1
  618. package/src/duckdb/src/function/table/system/duckdb_settings.cpp +1 -1
  619. package/src/duckdb/src/function/table/system/duckdb_tables.cpp +1 -13
  620. package/src/duckdb/src/function/table/system/duckdb_types.cpp +1 -1
  621. package/src/duckdb/src/function/table/system/pragma_storage_info.cpp +17 -0
  622. package/src/duckdb/src/function/table/system/pragma_table_info.cpp +6 -0
  623. package/src/duckdb/src/function/table/system/pragma_table_sample.cpp +95 -0
  624. package/src/duckdb/src/function/table/system/test_all_types.cpp +56 -46
  625. package/src/duckdb/src/function/table/system_functions.cpp +3 -0
  626. package/src/duckdb/src/function/table/table_scan.cpp +487 -289
  627. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  628. package/src/duckdb/src/function/table_function.cpp +10 -6
  629. package/src/duckdb/src/function/window/window_aggregate_function.cpp +248 -0
  630. package/src/duckdb/src/function/window/window_aggregate_states.cpp +48 -0
  631. package/src/duckdb/src/function/window/window_aggregator.cpp +88 -0
  632. package/src/duckdb/src/function/window/window_boundaries_state.cpp +854 -0
  633. package/src/duckdb/src/function/window/window_collection.cpp +146 -0
  634. package/src/duckdb/src/function/window/window_constant_aggregator.cpp +357 -0
  635. package/src/duckdb/src/function/window/window_custom_aggregator.cpp +146 -0
  636. package/src/duckdb/src/function/window/window_distinct_aggregator.cpp +758 -0
  637. package/src/duckdb/src/function/window/window_executor.cpp +99 -0
  638. package/src/duckdb/src/function/window/window_index_tree.cpp +63 -0
  639. package/src/duckdb/src/function/window/window_merge_sort_tree.cpp +275 -0
  640. package/src/duckdb/src/function/window/window_naive_aggregator.cpp +361 -0
  641. package/src/duckdb/src/function/window/window_rank_function.cpp +288 -0
  642. package/src/duckdb/src/function/window/window_rownumber_function.cpp +191 -0
  643. package/src/duckdb/src/function/window/window_segment_tree.cpp +594 -0
  644. package/src/duckdb/src/function/window/window_shared_expressions.cpp +50 -0
  645. package/src/duckdb/src/function/window/window_token_tree.cpp +142 -0
  646. package/src/duckdb/src/function/window/window_value_function.cpp +566 -0
  647. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +74 -17
  648. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +1 -1
  649. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp +2 -0
  650. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +9 -0
  651. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/function_entry.hpp +4 -10
  652. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/pragma_function_catalog_entry.hpp +1 -1
  653. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp +2 -2
  654. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/schema_catalog_entry.hpp +2 -0
  655. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +18 -3
  656. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp +1 -1
  657. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/type_catalog_entry.hpp +2 -1
  658. package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +5 -2
  659. package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +21 -18
  660. package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +3 -2
  661. package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +10 -2
  662. package/src/duckdb/src/include/duckdb/catalog/dependency_manager.hpp +11 -0
  663. package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +9 -4
  664. package/src/duckdb/src/include/duckdb/common/allocator.hpp +3 -0
  665. package/src/duckdb/src/include/duckdb/common/array_ptr.hpp +8 -0
  666. package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +4 -1
  667. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +3 -1
  668. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_view_data.hpp +3 -1
  669. package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +2 -1
  670. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +7 -3
  671. package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +26 -3
  672. package/src/duckdb/src/include/duckdb/common/arrow/arrow_query_result.hpp +1 -1
  673. package/src/duckdb/src/include/duckdb/common/arrow/arrow_type_extension.hpp +144 -0
  674. package/src/duckdb/src/include/duckdb/common/arrow/arrow_util.hpp +5 -2
  675. package/src/duckdb/src/include/duckdb/common/arrow/result_arrow_wrapper.hpp +2 -0
  676. package/src/duckdb/src/include/duckdb/common/arrow/schema_metadata.hpp +11 -4
  677. package/src/duckdb/src/include/duckdb/common/assert.hpp +12 -1
  678. package/src/duckdb/src/include/duckdb/common/atomic_ptr.hpp +102 -0
  679. package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +65 -6
  680. package/src/duckdb/src/include/duckdb/common/chrono.hpp +1 -0
  681. package/src/duckdb/src/include/duckdb/common/column_index.hpp +72 -0
  682. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +120 -0
  683. package/src/duckdb/src/include/duckdb/{core_functions/core_functions.hpp → common/enums/collation_type.hpp} +2 -7
  684. package/src/duckdb/src/include/duckdb/common/enums/compression_type.hpp +5 -2
  685. package/src/duckdb/src/include/duckdb/common/enums/function_errors.hpp +18 -0
  686. package/src/duckdb/src/include/duckdb/common/enums/memory_tag.hpp +3 -2
  687. package/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp +7 -2
  688. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +4 -0
  689. package/src/duckdb/src/include/duckdb/common/enums/order_preservation_type.hpp +1 -1
  690. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  691. package/src/duckdb/src/include/duckdb/common/enums/profiler_format.hpp +1 -1
  692. package/src/duckdb/src/include/duckdb/{core_functions/aggregate → common/enums}/quantile_enum.hpp +3 -1
  693. package/src/duckdb/src/include/duckdb/common/enums/scan_vector_type.hpp +2 -0
  694. package/src/duckdb/src/include/duckdb/common/error_data.hpp +1 -0
  695. package/src/duckdb/src/include/duckdb/common/exception/parser_exception.hpp +4 -0
  696. package/src/duckdb/src/include/duckdb/common/exception.hpp +1 -1
  697. package/src/duckdb/src/include/duckdb/common/extension_type_info.hpp +37 -0
  698. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +7 -2
  699. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +9 -3
  700. package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +6 -6
  701. package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +19 -10
  702. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +2 -0
  703. package/src/duckdb/src/include/duckdb/common/file_system.hpp +6 -1
  704. package/src/duckdb/src/include/duckdb/common/fsst.hpp +2 -2
  705. package/src/duckdb/src/include/duckdb/common/helper.hpp +6 -0
  706. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +10 -0
  707. package/src/duckdb/src/include/duckdb/common/insertion_order_preserving_map.hpp +12 -2
  708. package/src/duckdb/src/include/duckdb/common/local_file_system.hpp +3 -0
  709. package/src/duckdb/src/include/duckdb/common/multi_file_list.hpp +2 -1
  710. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +147 -27
  711. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +4 -0
  712. package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +2 -7
  713. package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +16 -5
  714. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +16 -0
  715. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +4 -0
  716. package/src/duckdb/src/include/duckdb/common/platform.hpp +34 -3
  717. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +10 -13
  718. package/src/duckdb/src/include/duckdb/common/random_engine.hpp +8 -3
  719. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +0 -2
  720. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -1
  721. package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +7 -0
  722. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +1 -0
  723. package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +1 -0
  724. package/src/duckdb/src/include/duckdb/common/stacktrace.hpp +25 -0
  725. package/src/duckdb/src/include/duckdb/common/string_util.hpp +30 -2
  726. package/src/duckdb/src/include/duckdb/common/tree_renderer/graphviz_tree_renderer.hpp +1 -1
  727. package/src/duckdb/src/include/duckdb/common/tree_renderer/html_tree_renderer.hpp +1 -1
  728. package/src/duckdb/src/include/duckdb/common/tree_renderer/json_tree_renderer.hpp +1 -1
  729. package/src/duckdb/src/include/duckdb/common/tree_renderer/text_tree_renderer.hpp +3 -2
  730. package/src/duckdb/src/include/duckdb/common/tree_renderer.hpp +2 -0
  731. package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
  732. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +8 -0
  733. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +13 -2
  734. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +2 -1
  735. package/src/duckdb/src/include/duckdb/common/types/conflict_manager.hpp +21 -4
  736. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +4 -1
  737. package/src/duckdb/src/include/duckdb/common/types/date.hpp +9 -4
  738. package/src/duckdb/src/include/duckdb/common/types/date_lookup_cache.hpp +1 -1
  739. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +58 -10
  740. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -4
  741. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +4 -0
  742. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +4 -0
  743. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +10 -0
  744. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +43 -16
  745. package/src/duckdb/src/include/duckdb/common/types/uuid.hpp +3 -1
  746. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +63 -21
  747. package/src/duckdb/src/include/duckdb/common/types/value.hpp +62 -16
  748. package/src/duckdb/src/include/duckdb/common/types/varint.hpp +13 -0
  749. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +34 -7
  750. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +15 -0
  751. package/src/duckdb/src/include/duckdb/common/types.hpp +12 -7
  752. package/src/duckdb/src/include/duckdb/common/uhugeint.hpp +10 -0
  753. package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +12 -13
  754. package/src/duckdb/src/include/duckdb/common/vector_operations/binary_executor.hpp +27 -0
  755. package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +111 -4
  756. package/src/duckdb/src/include/duckdb/common/vector_operations/vector_operations.hpp +0 -1
  757. package/src/duckdb/src/include/duckdb/execution/adaptive_filter.hpp +2 -0
  758. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +48 -10
  759. package/src/duckdb/src/include/duckdb/execution/executor.hpp +2 -1
  760. package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +0 -1
  761. package/src/duckdb/src/include/duckdb/execution/ht_entry.hpp +25 -27
  762. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +28 -18
  763. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +1 -0
  764. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +2 -2
  765. package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +23 -16
  766. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +4 -0
  767. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +2 -2
  768. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +25 -16
  769. package/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp +15 -10
  770. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +1 -1
  771. package/src/duckdb/src/include/duckdb/execution/operator/{persistent/physical_fixed_batch_copy.hpp → aggregate/physical_partitioned_aggregate.hpp} +25 -27
  772. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +1 -2
  773. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +5 -4
  774. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/ungrouped_aggregate_state.hpp +21 -1
  775. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +38 -9
  776. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +8 -9
  777. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +7 -1
  778. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp +29 -23
  779. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp +15 -13
  780. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp +13 -5
  781. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +2 -1
  782. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +24 -10
  783. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +36 -1
  784. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state.hpp +21 -13
  785. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp +52 -22
  786. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp +6 -6
  787. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_validator.hpp +58 -0
  788. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +62 -0
  789. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp +6 -3
  790. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner_boundary.hpp +16 -6
  791. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +9 -4
  792. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +8 -4
  793. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +55 -10
  794. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +2 -2
  795. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_batch_collector.hpp +2 -2
  796. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +2 -2
  797. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +1 -1
  798. package/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp +28 -7
  799. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +6 -9
  800. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +17 -16
  801. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
  802. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +7 -3
  803. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +5 -1
  804. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +2 -2
  805. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
  806. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +55 -4
  807. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +2 -0
  808. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_tableinout_function.hpp +2 -2
  809. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_positional_scan.hpp +2 -1
  810. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +10 -9
  811. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_art_index.hpp +16 -13
  812. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +0 -4
  813. package/src/duckdb/src/include/duckdb/execution/partition_info.hpp +79 -0
  814. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +20 -9
  815. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +1 -11
  816. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +0 -2
  817. package/src/duckdb/src/include/duckdb/execution/progress_data.hpp +58 -0
  818. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +2 -1
  819. package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +160 -31
  820. package/src/duckdb/src/include/duckdb/function/aggregate/distributive_function_utils.hpp +31 -0
  821. package/src/duckdb/src/include/duckdb/function/aggregate/distributive_functions.hpp +61 -10
  822. package/src/duckdb/src/include/duckdb/{core_functions → function}/aggregate/minmax_n_helpers.hpp +1 -1
  823. package/src/duckdb/src/include/duckdb/{core_functions → function}/aggregate/sort_key_helpers.hpp +2 -2
  824. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +47 -27
  825. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -0
  826. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +3 -10
  827. package/src/duckdb/src/include/duckdb/function/cast/bound_cast_data.hpp +13 -6
  828. package/src/duckdb/src/include/duckdb/function/compression/compression.hpp +15 -0
  829. package/src/duckdb/src/include/duckdb/function/compression_function.hpp +29 -6
  830. package/src/duckdb/src/include/duckdb/{core_functions → function}/create_sort_key.hpp +4 -1
  831. package/src/duckdb/src/include/duckdb/function/encoding_function.hpp +78 -0
  832. package/src/duckdb/src/include/duckdb/function/function.hpp +22 -1
  833. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +3 -0
  834. package/src/duckdb/src/include/duckdb/function/function_list.hpp +39 -0
  835. package/src/duckdb/src/include/duckdb/function/function_set.hpp +13 -7
  836. package/src/duckdb/src/include/duckdb/{core_functions → function}/lambda_functions.hpp +1 -1
  837. package/src/duckdb/src/include/duckdb/function/partition_stats.hpp +36 -0
  838. package/src/duckdb/src/include/duckdb/function/register_function_list_helper.hpp +69 -0
  839. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +154 -23
  840. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_utils.hpp +45 -0
  841. package/src/duckdb/src/include/duckdb/function/scalar/date_functions.hpp +45 -0
  842. package/src/duckdb/src/include/duckdb/function/scalar/generic_common.hpp +36 -0
  843. package/src/duckdb/src/include/duckdb/function/scalar/generic_functions.hpp +32 -23
  844. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  845. package/src/duckdb/src/include/duckdb/function/scalar/list_functions.hpp +156 -0
  846. package/src/duckdb/src/include/duckdb/function/scalar/map_functions.hpp +27 -0
  847. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +4 -45
  848. package/src/duckdb/src/include/duckdb/function/scalar/operator_functions.hpp +102 -0
  849. package/src/duckdb/src/include/duckdb/function/scalar/operators.hpp +2 -16
  850. package/src/duckdb/src/include/duckdb/function/scalar/sequence_functions.hpp +16 -25
  851. package/src/duckdb/src/include/duckdb/function/scalar/sequence_utils.hpp +38 -0
  852. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +1 -0
  853. package/src/duckdb/src/include/duckdb/function/scalar/string_common.hpp +49 -0
  854. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +401 -76
  855. package/src/duckdb/src/include/duckdb/function/scalar/struct_functions.hpp +63 -0
  856. package/src/duckdb/src/include/duckdb/function/scalar/struct_utils.hpp +33 -0
  857. package/src/duckdb/src/include/duckdb/function/scalar/system_functions.hpp +45 -0
  858. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +17 -8
  859. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +59 -6
  860. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_type_info.hpp +12 -9
  861. package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_type_info_type.hpp +2 -0
  862. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +18 -13
  863. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +7 -4
  864. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +14 -0
  865. package/src/duckdb/src/include/duckdb/function/table/table_scan.hpp +15 -10
  866. package/src/duckdb/src/include/duckdb/function/table_function.hpp +94 -18
  867. package/src/duckdb/src/include/duckdb/{core_functions → function}/to_interval.hpp +1 -1
  868. package/src/duckdb/src/include/duckdb/function/window/window_aggregate_function.hpp +44 -0
  869. package/src/duckdb/src/include/duckdb/function/window/window_aggregate_states.hpp +56 -0
  870. package/src/duckdb/src/include/duckdb/function/window/window_aggregator.hpp +194 -0
  871. package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +153 -0
  872. package/src/duckdb/src/include/duckdb/function/window/window_collection.hpp +146 -0
  873. package/src/duckdb/src/include/duckdb/function/window/window_constant_aggregator.hpp +38 -0
  874. package/src/duckdb/src/include/duckdb/function/window/window_custom_aggregator.hpp +32 -0
  875. package/src/duckdb/src/include/duckdb/function/window/window_distinct_aggregator.hpp +39 -0
  876. package/src/duckdb/src/include/duckdb/function/window/window_executor.hpp +122 -0
  877. package/src/duckdb/src/include/duckdb/function/window/window_index_tree.hpp +42 -0
  878. package/src/duckdb/src/include/duckdb/function/window/window_merge_sort_tree.hpp +108 -0
  879. package/src/duckdb/src/include/duckdb/function/window/window_naive_aggregator.hpp +33 -0
  880. package/src/duckdb/src/include/duckdb/function/window/window_rank_function.hpp +63 -0
  881. package/src/duckdb/src/include/duckdb/function/window/window_rownumber_function.hpp +43 -0
  882. package/src/duckdb/src/include/duckdb/function/window/window_segment_tree.hpp +31 -0
  883. package/src/duckdb/src/include/duckdb/function/window/window_shared_expressions.hpp +76 -0
  884. package/src/duckdb/src/include/duckdb/function/window/window_token_tree.hpp +46 -0
  885. package/src/duckdb/src/include/duckdb/function/window/window_value_function.hpp +79 -0
  886. package/src/duckdb/src/include/duckdb/logging/http_logger.hpp +2 -0
  887. package/src/duckdb/src/include/duckdb/logging/log_manager.hpp +81 -0
  888. package/src/duckdb/src/include/duckdb/logging/log_storage.hpp +127 -0
  889. package/src/duckdb/src/include/duckdb/logging/logger.hpp +287 -0
  890. package/src/duckdb/src/include/duckdb/logging/logging.hpp +83 -0
  891. package/src/duckdb/src/include/duckdb/main/appender.hpp +41 -18
  892. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +6 -3
  893. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +7 -2
  894. package/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp +317 -231
  895. package/src/duckdb/src/include/duckdb/main/client_config.hpp +17 -1
  896. package/src/duckdb/src/include/duckdb/main/client_context.hpp +28 -6
  897. package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
  898. package/src/duckdb/src/include/duckdb/main/client_context_wrapper.hpp +5 -0
  899. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -2
  900. package/src/duckdb/src/include/duckdb/main/client_properties.hpp +8 -3
  901. package/src/duckdb/src/include/duckdb/main/config.hpp +52 -8
  902. package/src/duckdb/src/include/duckdb/main/connection.hpp +18 -3
  903. package/src/duckdb/src/include/duckdb/main/database.hpp +8 -7
  904. package/src/duckdb/src/include/duckdb/main/database_file_opener.hpp +5 -1
  905. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +3 -0
  906. package/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp +1 -0
  907. package/src/duckdb/src/include/duckdb/main/extension.hpp +8 -2
  908. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +548 -9
  909. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +18 -0
  910. package/src/duckdb/src/include/duckdb/main/extension_util.hpp +12 -7
  911. package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +3 -3
  912. package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +2 -2
  913. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +8 -4
  914. package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +3 -1
  915. package/src/duckdb/src/include/duckdb/main/relation/delete_relation.hpp +2 -2
  916. package/src/duckdb/src/include/duckdb/main/relation/subquery_relation.hpp +1 -4
  917. package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +3 -1
  918. package/src/duckdb/src/include/duckdb/main/relation/table_relation.hpp +3 -0
  919. package/src/duckdb/src/include/duckdb/main/relation/update_relation.hpp +3 -2
  920. package/src/duckdb/src/include/duckdb/main/relation/value_relation.hpp +7 -0
  921. package/src/duckdb/src/include/duckdb/main/relation/view_relation.hpp +1 -0
  922. package/src/duckdb/src/include/duckdb/main/relation/write_parquet_relation.hpp +1 -1
  923. package/src/duckdb/src/include/duckdb/main/relation.hpp +45 -9
  924. package/src/duckdb/src/include/duckdb/main/secret/secret_storage.hpp +20 -22
  925. package/src/duckdb/src/include/duckdb/main/settings.hpp +613 -378
  926. package/src/duckdb/src/include/duckdb/main/table_description.hpp +14 -4
  927. package/src/duckdb/src/include/duckdb/optimizer/build_probe_side_optimizer.hpp +1 -3
  928. package/src/duckdb/src/include/duckdb/optimizer/column_lifetime_analyzer.hpp +14 -7
  929. package/src/duckdb/src/include/duckdb/optimizer/common_aggregate_optimizer.hpp +2 -2
  930. package/src/duckdb/src/include/duckdb/optimizer/empty_result_pullup.hpp +27 -0
  931. package/src/duckdb/src/include/duckdb/optimizer/expression_heuristics.hpp +1 -1
  932. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +6 -1
  933. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +2 -0
  934. package/src/duckdb/src/include/duckdb/optimizer/in_clause_rewriter.hpp +3 -0
  935. package/src/duckdb/src/include/duckdb/optimizer/join_filter_pushdown_optimizer.hpp +5 -0
  936. package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +2 -0
  937. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +2 -2
  938. package/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp +45 -0
  939. package/src/duckdb/src/include/duckdb/optimizer/matcher/expression_matcher.hpp +23 -0
  940. package/src/duckdb/src/include/duckdb/optimizer/matcher/type_matcher.hpp +18 -0
  941. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +9 -0
  942. package/src/duckdb/src/include/duckdb/optimizer/remove_unused_columns.hpp +33 -11
  943. package/src/duckdb/src/include/duckdb/optimizer/rule/distinct_aggregate_optimizer.hpp +34 -0
  944. package/src/duckdb/src/include/duckdb/optimizer/sampling_pushdown.hpp +25 -0
  945. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +3 -1
  946. package/src/duckdb/src/include/duckdb/optimizer/sum_rewriter.hpp +37 -0
  947. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +4 -0
  948. package/src/duckdb/src/include/duckdb/parallel/event.hpp +3 -0
  949. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +1 -1
  950. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +26 -8
  951. package/src/duckdb/src/include/duckdb/parallel/thread_context.hpp +3 -0
  952. package/src/duckdb/src/include/duckdb/parser/base_expression.hpp +51 -3
  953. package/src/duckdb/src/include/duckdb/parser/constraints/unique_constraint.hpp +28 -44
  954. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
  955. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +2 -2
  956. package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +1 -1
  957. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +2 -2
  958. package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +6 -6
  959. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +11 -1
  960. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +12 -0
  961. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +1 -0
  962. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_scalar_function_info.hpp +3 -2
  963. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +22 -1
  964. package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +3 -4
  965. package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_column_info.hpp +1 -1
  966. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_function_info.hpp +16 -12
  967. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +3 -3
  968. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +5 -5
  969. package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +12 -3
  970. package/src/duckdb/src/include/duckdb/parser/parser.hpp +3 -0
  971. package/src/duckdb/src/include/duckdb/parser/qualified_name.hpp +17 -57
  972. package/src/duckdb/src/include/duckdb/parser/qualified_name_set.hpp +19 -3
  973. package/src/duckdb/src/include/duckdb/parser/simplified_token.hpp +2 -1
  974. package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +12 -9
  975. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -1
  976. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -2
  977. package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +45 -28
  978. package/src/duckdb/src/include/duckdb/planner/binder.hpp +23 -11
  979. package/src/duckdb/src/include/duckdb/planner/binding_alias.hpp +44 -0
  980. package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +1 -0
  981. package/src/duckdb/src/include/duckdb/planner/collation_binding.hpp +4 -3
  982. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +11 -10
  983. package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +2 -0
  984. package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +1 -0
  985. package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +4 -4
  986. package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +6 -0
  987. package/src/duckdb/src/include/duckdb/planner/expression.hpp +2 -0
  988. package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +2 -0
  989. package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +9 -4
  990. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +8 -2
  991. package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +1 -2
  992. package/src/duckdb/src/include/duckdb/planner/filter/dynamic_filter.hpp +48 -0
  993. package/src/duckdb/src/include/duckdb/planner/filter/in_filter.hpp +37 -0
  994. package/src/duckdb/src/include/duckdb/planner/filter/optional_filter.hpp +35 -0
  995. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +4 -0
  996. package/src/duckdb/src/include/duckdb/planner/logical_operator_visitor.hpp +3 -0
  997. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +2 -0
  998. package/src/duckdb/src/include/duckdb/planner/operator/logical_create_index.hpp +9 -9
  999. package/src/duckdb/src/include/duckdb/planner/operator/logical_filter.hpp +4 -0
  1000. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +16 -7
  1001. package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +2 -0
  1002. package/src/duckdb/src/include/duckdb/planner/operator/logical_join.hpp +4 -0
  1003. package/src/duckdb/src/include/duckdb/planner/operator/logical_order.hpp +5 -1
  1004. package/src/duckdb/src/include/duckdb/planner/operator/logical_top_n.hpp +5 -3
  1005. package/src/duckdb/src/include/duckdb/planner/table_binding.hpp +14 -6
  1006. package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +12 -8
  1007. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -0
  1008. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -0
  1009. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +82 -26
  1010. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +1 -1
  1011. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +10 -3
  1012. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +2 -1
  1013. package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +4 -13
  1014. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +14 -15
  1015. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_constants.hpp +1 -1
  1016. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +13 -15
  1017. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +1 -1
  1018. package/src/duckdb/src/include/duckdb/storage/compression/dictionary/analyze.hpp +46 -0
  1019. package/src/duckdb/src/include/duckdb/storage/compression/dictionary/common.hpp +60 -0
  1020. package/src/duckdb/src/include/duckdb/storage/compression/dictionary/compression.hpp +61 -0
  1021. package/src/duckdb/src/include/duckdb/storage/compression/dictionary/decompression.hpp +50 -0
  1022. package/src/duckdb/src/include/duckdb/storage/compression/empty_validity.hpp +100 -0
  1023. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +1 -1
  1024. package/src/duckdb/src/include/duckdb/storage/compression/roaring/appender.hpp +150 -0
  1025. package/src/duckdb/src/include/duckdb/storage/compression/roaring/roaring.hpp +618 -0
  1026. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +53 -31
  1027. package/src/duckdb/src/include/duckdb/storage/index.hpp +2 -3
  1028. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +0 -1
  1029. package/src/duckdb/src/include/duckdb/storage/segment/uncompressed.hpp +4 -1
  1030. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +3 -3
  1031. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +1 -1
  1032. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -4
  1033. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +16 -1
  1034. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +2 -1
  1035. package/src/duckdb/src/include/duckdb/storage/storage_index.hpp +70 -0
  1036. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +5 -7
  1037. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +4 -3
  1038. package/src/duckdb/src/include/duckdb/storage/storage_options.hpp +23 -0
  1039. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +34 -6
  1040. package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +2 -0
  1041. package/src/duckdb/src/include/duckdb/storage/table/array_column_data.hpp +2 -2
  1042. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +2 -1
  1043. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +39 -10
  1044. package/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp +56 -14
  1045. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +35 -29
  1046. package/src/duckdb/src/include/duckdb/storage/table/delete_state.hpp +1 -1
  1047. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
  1048. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +7 -1
  1049. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +19 -6
  1050. package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +2 -1
  1051. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +29 -6
  1052. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +10 -10
  1053. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +5 -0
  1054. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +26 -19
  1055. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +8 -1
  1056. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +16 -14
  1057. package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +2 -0
  1058. package/src/duckdb/src/include/duckdb/storage/table_io_manager.hpp +3 -0
  1059. package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +1 -0
  1060. package/src/duckdb/src/include/duckdb/storage/temporary_file_manager.hpp +228 -61
  1061. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +14 -10
  1062. package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +3 -1
  1063. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +3 -2
  1064. package/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp +1 -0
  1065. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +19 -17
  1066. package/src/duckdb/src/include/duckdb/transaction/rollback_state.hpp +5 -2
  1067. package/src/duckdb/src/include/duckdb/transaction/transaction.hpp +1 -2
  1068. package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +13 -8
  1069. package/src/duckdb/src/include/duckdb/transaction/undo_buffer_allocator.hpp +79 -0
  1070. package/src/duckdb/src/include/duckdb/transaction/update_info.hpp +43 -13
  1071. package/src/duckdb/src/include/duckdb/transaction/wal_write_state.hpp +4 -1
  1072. package/src/duckdb/src/include/duckdb/verification/copied_statement_verifier.hpp +4 -2
  1073. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier.hpp +4 -2
  1074. package/src/duckdb/src/include/duckdb/verification/external_statement_verifier.hpp +4 -2
  1075. package/src/duckdb/src/include/duckdb/verification/fetch_row_verifier.hpp +4 -2
  1076. package/src/duckdb/src/include/duckdb/verification/no_operator_caching_verifier.hpp +4 -2
  1077. package/src/duckdb/src/include/duckdb/verification/parsed_statement_verifier.hpp +4 -2
  1078. package/src/duckdb/src/include/duckdb/verification/prepared_statement_verifier.hpp +7 -3
  1079. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +11 -5
  1080. package/src/duckdb/src/include/duckdb/verification/unoptimized_statement_verifier.hpp +4 -2
  1081. package/src/duckdb/src/include/duckdb.h +424 -41
  1082. package/src/duckdb/src/include/duckdb_extension.h +301 -195
  1083. package/src/duckdb/src/logging/log_manager.cpp +157 -0
  1084. package/src/duckdb/src/logging/log_storage.cpp +209 -0
  1085. package/src/duckdb/src/logging/logger.cpp +211 -0
  1086. package/src/duckdb/src/logging/logging.cpp +42 -0
  1087. package/src/duckdb/src/main/appender.cpp +187 -45
  1088. package/src/duckdb/src/main/attached_database.cpp +16 -8
  1089. package/src/duckdb/src/main/capi/appender-c.cpp +47 -4
  1090. package/src/duckdb/src/main/capi/arrow-c.cpp +9 -4
  1091. package/src/duckdb/src/main/capi/config-c.cpp +17 -4
  1092. package/src/duckdb/src/main/capi/datetime-c.cpp +15 -0
  1093. package/src/duckdb/src/main/capi/duckdb-c.cpp +54 -13
  1094. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +212 -4
  1095. package/src/duckdb/src/main/capi/helper-c.cpp +3 -0
  1096. package/src/duckdb/src/main/capi/prepared-c.cpp +26 -7
  1097. package/src/duckdb/src/main/capi/replacement_scan-c.cpp +1 -1
  1098. package/src/duckdb/src/main/capi/result-c.cpp +3 -0
  1099. package/src/duckdb/src/main/capi/table_description-c.cpp +43 -10
  1100. package/src/duckdb/src/main/capi/threading-c.cpp +4 -4
  1101. package/src/duckdb/src/main/client_context.cpp +125 -51
  1102. package/src/duckdb/src/main/client_context_file_opener.cpp +4 -0
  1103. package/src/duckdb/src/main/client_context_wrapper.cpp +4 -0
  1104. package/src/duckdb/src/main/client_data.cpp +1 -1
  1105. package/src/duckdb/src/main/client_verify.cpp +39 -20
  1106. package/src/duckdb/src/main/config.cpp +266 -74
  1107. package/src/duckdb/src/main/connection.cpp +53 -13
  1108. package/src/duckdb/src/main/database.cpp +39 -18
  1109. package/src/duckdb/src/main/database_manager.cpp +12 -11
  1110. package/src/duckdb/src/main/db_instance_cache.cpp +14 -7
  1111. package/src/duckdb/src/main/extension/extension_helper.cpp +24 -23
  1112. package/src/duckdb/src/main/extension/extension_install.cpp +19 -7
  1113. package/src/duckdb/src/main/extension/extension_load.cpp +91 -41
  1114. package/src/duckdb/src/main/extension/extension_util.cpp +40 -19
  1115. package/src/duckdb/src/main/extension.cpp +20 -11
  1116. package/src/duckdb/src/main/profiling_info.cpp +19 -5
  1117. package/src/duckdb/src/main/query_profiler.cpp +135 -36
  1118. package/src/duckdb/src/main/query_result.cpp +2 -1
  1119. package/src/duckdb/src/main/relation/aggregate_relation.cpp +3 -3
  1120. package/src/duckdb/src/main/relation/create_table_relation.cpp +5 -4
  1121. package/src/duckdb/src/main/relation/create_view_relation.cpp +2 -2
  1122. package/src/duckdb/src/main/relation/cross_product_relation.cpp +2 -2
  1123. package/src/duckdb/src/main/relation/delete_relation.cpp +2 -2
  1124. package/src/duckdb/src/main/relation/delim_get_relation.cpp +1 -1
  1125. package/src/duckdb/src/main/relation/distinct_relation.cpp +1 -1
  1126. package/src/duckdb/src/main/relation/explain_relation.cpp +1 -1
  1127. package/src/duckdb/src/main/relation/filter_relation.cpp +1 -1
  1128. package/src/duckdb/src/main/relation/insert_relation.cpp +1 -1
  1129. package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
  1130. package/src/duckdb/src/main/relation/order_relation.cpp +1 -1
  1131. package/src/duckdb/src/main/relation/projection_relation.cpp +3 -3
  1132. package/src/duckdb/src/main/relation/query_relation.cpp +1 -1
  1133. package/src/duckdb/src/main/relation/read_csv_relation.cpp +58 -20
  1134. package/src/duckdb/src/main/relation/setop_relation.cpp +2 -2
  1135. package/src/duckdb/src/main/relation/subquery_relation.cpp +3 -8
  1136. package/src/duckdb/src/main/relation/table_function_relation.cpp +10 -1
  1137. package/src/duckdb/src/main/relation/table_relation.cpp +19 -3
  1138. package/src/duckdb/src/main/relation/update_relation.cpp +2 -2
  1139. package/src/duckdb/src/main/relation/value_relation.cpp +42 -2
  1140. package/src/duckdb/src/main/relation/view_relation.cpp +8 -2
  1141. package/src/duckdb/src/main/relation/write_csv_relation.cpp +1 -1
  1142. package/src/duckdb/src/main/relation/write_parquet_relation.cpp +1 -1
  1143. package/src/duckdb/src/main/relation.cpp +49 -28
  1144. package/src/duckdb/src/main/secret/secret_manager.cpp +1 -1
  1145. package/src/duckdb/src/main/secret/secret_storage.cpp +6 -4
  1146. package/src/duckdb/src/main/settings/autogenerated_settings.cpp +1102 -0
  1147. package/src/duckdb/src/main/settings/custom_settings.cpp +1343 -0
  1148. package/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp +60 -37
  1149. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +1 -1
  1150. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +126 -72
  1151. package/src/duckdb/src/optimizer/common_aggregate_optimizer.cpp +22 -6
  1152. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +3 -3
  1153. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +2 -2
  1154. package/src/duckdb/src/optimizer/compressed_materialization.cpp +3 -3
  1155. package/src/duckdb/src/optimizer/cse_optimizer.cpp +7 -7
  1156. package/src/duckdb/src/optimizer/deliminator.cpp +6 -5
  1157. package/src/duckdb/src/optimizer/empty_result_pullup.cpp +96 -0
  1158. package/src/duckdb/src/optimizer/expression_heuristics.cpp +11 -3
  1159. package/src/duckdb/src/optimizer/expression_rewriter.cpp +9 -2
  1160. package/src/duckdb/src/optimizer/filter_combiner.cpp +190 -88
  1161. package/src/duckdb/src/optimizer/filter_pushdown.cpp +6 -5
  1162. package/src/duckdb/src/optimizer/in_clause_rewriter.cpp +25 -9
  1163. package/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +170 -72
  1164. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +5 -4
  1165. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +3 -1
  1166. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +7 -7
  1167. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +15 -6
  1168. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +37 -22
  1169. package/src/duckdb/src/optimizer/late_materialization.cpp +414 -0
  1170. package/src/duckdb/src/optimizer/limit_pushdown.cpp +1 -0
  1171. package/src/duckdb/src/optimizer/matcher/expression_matcher.cpp +30 -2
  1172. package/src/duckdb/src/optimizer/optimizer.cpp +67 -7
  1173. package/src/duckdb/src/optimizer/pullup/pullup_filter.cpp +3 -3
  1174. package/src/duckdb/src/optimizer/pullup/pullup_projection.cpp +2 -2
  1175. package/src/duckdb/src/optimizer/pullup/pullup_set_operation.cpp +1 -1
  1176. package/src/duckdb/src/optimizer/pushdown/pushdown_aggregate.cpp +2 -2
  1177. package/src/duckdb/src/optimizer/pushdown/pushdown_filter.cpp +1 -1
  1178. package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +1 -1
  1179. package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +3 -3
  1180. package/src/duckdb/src/optimizer/pushdown/pushdown_projection.cpp +5 -3
  1181. package/src/duckdb/src/optimizer/pushdown/pushdown_set_operation.cpp +1 -1
  1182. package/src/duckdb/src/optimizer/pushdown/pushdown_unnest.cpp +52 -0
  1183. package/src/duckdb/src/optimizer/pushdown/pushdown_window.cpp +2 -2
  1184. package/src/duckdb/src/optimizer/regex_range_filter.cpp +1 -1
  1185. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +1 -1
  1186. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +168 -38
  1187. package/src/duckdb/src/optimizer/rule/arithmetic_simplification.cpp +2 -1
  1188. package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +8 -5
  1189. package/src/duckdb/src/optimizer/rule/conjunction_simplification.cpp +2 -2
  1190. package/src/duckdb/src/optimizer/rule/constant_folding.cpp +2 -2
  1191. package/src/duckdb/src/optimizer/rule/distinct_aggregate_optimizer.cpp +65 -0
  1192. package/src/duckdb/src/optimizer/rule/distributivity.cpp +2 -2
  1193. package/src/duckdb/src/optimizer/rule/enum_comparison.cpp +2 -1
  1194. package/src/duckdb/src/optimizer/rule/equal_or_null_simplification.cpp +4 -3
  1195. package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +3 -3
  1196. package/src/duckdb/src/optimizer/rule/like_optimizations.cpp +3 -1
  1197. package/src/duckdb/src/optimizer/rule/move_constants.cpp +9 -9
  1198. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +4 -3
  1199. package/src/duckdb/src/optimizer/rule/timestamp_comparison.cpp +1 -1
  1200. package/src/duckdb/src/optimizer/sampling_pushdown.cpp +24 -0
  1201. package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +1 -1
  1202. package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +1 -1
  1203. package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +1 -1
  1204. package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +74 -0
  1205. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +10 -7
  1206. package/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +3 -3
  1207. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +3 -3
  1208. package/src/duckdb/src/optimizer/statistics/operator/propagate_window.cpp +3 -0
  1209. package/src/duckdb/src/optimizer/sum_rewriter.cpp +174 -0
  1210. package/src/duckdb/src/optimizer/topn_optimizer.cpp +71 -0
  1211. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +5 -5
  1212. package/src/duckdb/src/parallel/event.cpp +4 -0
  1213. package/src/duckdb/src/parallel/executor.cpp +11 -29
  1214. package/src/duckdb/src/parallel/executor_task.cpp +8 -3
  1215. package/src/duckdb/src/parallel/pipeline.cpp +15 -8
  1216. package/src/duckdb/src/parallel/pipeline_executor.cpp +67 -43
  1217. package/src/duckdb/src/parallel/thread_context.cpp +12 -1
  1218. package/src/duckdb/src/parser/column_definition.cpp +3 -3
  1219. package/src/duckdb/src/parser/constraints/unique_constraint.cpp +72 -9
  1220. package/src/duckdb/src/parser/expression/columnref_expression.cpp +15 -3
  1221. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +1 -1
  1222. package/src/duckdb/src/parser/expression/function_expression.cpp +1 -1
  1223. package/src/duckdb/src/parser/expression/lambda_expression.cpp +3 -3
  1224. package/src/duckdb/src/parser/expression/lambdaref_expression.cpp +1 -1
  1225. package/src/duckdb/src/parser/expression/star_expression.cpp +46 -2
  1226. package/src/duckdb/src/parser/expression/window_expression.cpp +24 -1
  1227. package/src/duckdb/src/parser/parsed_data/alter_info.cpp +26 -2
  1228. package/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +5 -3
  1229. package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +29 -1
  1230. package/src/duckdb/src/parser/parsed_data/attach_info.cpp +6 -6
  1231. package/src/duckdb/src/parser/parsed_data/create_aggregate_function_info.cpp +1 -1
  1232. package/src/duckdb/src/parser/parsed_data/create_function_info.cpp +17 -0
  1233. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +16 -15
  1234. package/src/duckdb/src/parser/parsed_data/create_macro_info.cpp +1 -1
  1235. package/src/duckdb/src/parser/parsed_data/create_pragma_function_info.cpp +1 -1
  1236. package/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp +3 -2
  1237. package/src/duckdb/src/parser/parsed_data/create_schema_info.cpp +1 -1
  1238. package/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp +1 -1
  1239. package/src/duckdb/src/parser/parsed_data/create_table_info.cpp +1 -0
  1240. package/src/duckdb/src/parser/parsed_data/create_type_info.cpp +4 -4
  1241. package/src/duckdb/src/parser/parsed_data/load_info.cpp +1 -0
  1242. package/src/duckdb/src/parser/parsed_data/sample_options.cpp +31 -1
  1243. package/src/duckdb/src/parser/parsed_expression.cpp +1 -1
  1244. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +4 -1
  1245. package/src/duckdb/src/parser/parser.cpp +129 -0
  1246. package/src/duckdb/src/parser/qualified_name.cpp +99 -0
  1247. package/src/duckdb/src/parser/query_error_context.cpp +35 -6
  1248. package/src/duckdb/src/parser/query_node/select_node.cpp +4 -4
  1249. package/src/duckdb/src/parser/statement/delete_statement.cpp +6 -1
  1250. package/src/duckdb/src/parser/statement/insert_statement.cpp +4 -3
  1251. package/src/duckdb/src/parser/statement/update_statement.cpp +6 -1
  1252. package/src/duckdb/src/parser/tableref/pivotref.cpp +2 -2
  1253. package/src/duckdb/src/parser/tableref.cpp +2 -2
  1254. package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +16 -24
  1255. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +1 -1
  1256. package/src/duckdb/src/parser/transform/expression/transform_bool_expr.cpp +5 -5
  1257. package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +61 -13
  1258. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +10 -4
  1259. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -2
  1260. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +30 -3
  1261. package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +1 -1
  1262. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +25 -6
  1263. package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +1 -1
  1264. package/src/duckdb/src/parser/transform/helpers/transform_sample.cpp +10 -3
  1265. package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +4 -3
  1266. package/src/duckdb/src/parser/transform/statement/transform_alter_table.cpp +18 -3
  1267. package/src/duckdb/src/parser/transform/statement/transform_comment_on.cpp +1 -1
  1268. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +0 -1
  1269. package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +5 -5
  1270. package/src/duckdb/src/parser/transform/statement/transform_create_table.cpp +26 -12
  1271. package/src/duckdb/src/parser/transform/statement/transform_create_table_as.cpp +11 -3
  1272. package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +1 -1
  1273. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -0
  1274. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +3 -3
  1275. package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +4 -4
  1276. package/src/duckdb/src/parser/transform/statement/transform_set.cpp +2 -2
  1277. package/src/duckdb/src/parser/transform/statement/transform_show.cpp +21 -3
  1278. package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +8 -6
  1279. package/src/duckdb/src/parser/transformer.cpp +2 -2
  1280. package/src/duckdb/src/planner/bind_context.cpp +308 -136
  1281. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +30 -31
  1282. package/src/duckdb/src/planner/binder/expression/bind_between_expression.cpp +4 -2
  1283. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +102 -94
  1284. package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +7 -5
  1285. package/src/duckdb/src/planner/binder/expression/bind_conjunction_expression.cpp +1 -1
  1286. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +7 -7
  1287. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +10 -10
  1288. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +24 -6
  1289. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +23 -15
  1290. package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +1 -1
  1291. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +97 -19
  1292. package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +74 -16
  1293. package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +6 -6
  1294. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +49 -15
  1295. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +32 -23
  1296. package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +20 -3
  1297. package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +2 -2
  1298. package/src/duckdb/src/planner/binder/query_node/plan_query_node.cpp +3 -0
  1299. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +6 -5
  1300. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +38 -19
  1301. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +2 -12
  1302. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +117 -412
  1303. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +423 -144
  1304. package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
  1305. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +5 -0
  1306. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
  1307. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +0 -4
  1308. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +31 -13
  1309. package/src/duckdb/src/planner/binder/statement/bind_pragma.cpp +1 -1
  1310. package/src/duckdb/src/planner/binder/statement/bind_simple.cpp +96 -27
  1311. package/src/duckdb/src/planner/binder/statement/bind_summarize.cpp +1 -1
  1312. package/src/duckdb/src/planner/binder/statement/bind_update.cpp +5 -3
  1313. package/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp +7 -6
  1314. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +36 -9
  1315. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +34 -34
  1316. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +72 -35
  1317. package/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +99 -18
  1318. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +23 -11
  1319. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +22 -19
  1320. package/src/duckdb/src/planner/binder.cpp +23 -45
  1321. package/src/duckdb/src/planner/binding_alias.cpp +69 -0
  1322. package/src/duckdb/src/planner/bound_parameter_map.cpp +1 -1
  1323. package/src/duckdb/src/planner/bound_result_modifier.cpp +6 -2
  1324. package/src/duckdb/src/planner/collation_binding.cpp +38 -4
  1325. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +17 -5
  1326. package/src/duckdb/src/planner/expression/bound_expression.cpp +1 -1
  1327. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +8 -1
  1328. package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +2 -2
  1329. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +24 -4
  1330. package/src/duckdb/src/planner/expression.cpp +7 -1
  1331. package/src/duckdb/src/planner/expression_binder/aggregate_binder.cpp +1 -1
  1332. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +2 -2
  1333. package/src/duckdb/src/planner/expression_binder/group_binder.cpp +2 -2
  1334. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +16 -0
  1335. package/src/duckdb/src/planner/expression_binder/index_binder.cpp +53 -1
  1336. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +3 -3
  1337. package/src/duckdb/src/planner/expression_binder/order_binder.cpp +8 -8
  1338. package/src/duckdb/src/planner/expression_binder/relation_binder.cpp +1 -1
  1339. package/src/duckdb/src/planner/expression_binder/select_bind_state.cpp +2 -2
  1340. package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +1 -1
  1341. package/src/duckdb/src/planner/expression_binder/update_binder.cpp +1 -1
  1342. package/src/duckdb/src/planner/expression_binder.cpp +7 -7
  1343. package/src/duckdb/src/planner/expression_iterator.cpp +6 -3
  1344. package/src/duckdb/src/planner/filter/constant_filter.cpp +17 -2
  1345. package/src/duckdb/src/planner/filter/dynamic_filter.cpp +68 -0
  1346. package/src/duckdb/src/planner/filter/in_filter.cpp +84 -0
  1347. package/src/duckdb/src/planner/filter/null_filter.cpp +1 -2
  1348. package/src/duckdb/src/planner/filter/optional_filter.cpp +29 -0
  1349. package/src/duckdb/src/planner/filter/struct_filter.cpp +11 -6
  1350. package/src/duckdb/src/planner/joinside.cpp +6 -5
  1351. package/src/duckdb/src/planner/logical_operator.cpp +4 -1
  1352. package/src/duckdb/src/planner/logical_operator_visitor.cpp +68 -2
  1353. package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +23 -0
  1354. package/src/duckdb/src/planner/operator/logical_create_index.cpp +16 -12
  1355. package/src/duckdb/src/planner/operator/logical_filter.cpp +1 -1
  1356. package/src/duckdb/src/planner/operator/logical_get.cpp +48 -25
  1357. package/src/duckdb/src/planner/operator/logical_insert.cpp +1 -1
  1358. package/src/duckdb/src/planner/operator/logical_join.cpp +1 -1
  1359. package/src/duckdb/src/planner/operator/logical_order.cpp +4 -11
  1360. package/src/duckdb/src/planner/operator/logical_top_n.cpp +7 -0
  1361. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +33 -5
  1362. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +2 -2
  1363. package/src/duckdb/src/planner/table_binding.cpp +74 -36
  1364. package/src/duckdb/src/planner/table_filter.cpp +5 -8
  1365. package/src/duckdb/src/storage/arena_allocator.cpp +5 -4
  1366. package/src/duckdb/src/storage/buffer/block_handle.cpp +88 -17
  1367. package/src/duckdb/src/storage/buffer/block_manager.cpp +34 -26
  1368. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -2
  1369. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +70 -49
  1370. package/src/duckdb/src/storage/buffer_manager.cpp +4 -0
  1371. package/src/duckdb/src/storage/checkpoint_manager.cpp +24 -5
  1372. package/src/duckdb/src/storage/compression/bitpacking.cpp +14 -16
  1373. package/src/duckdb/src/storage/compression/dictionary/analyze.cpp +54 -0
  1374. package/src/duckdb/src/storage/compression/dictionary/common.cpp +90 -0
  1375. package/src/duckdb/src/storage/compression/dictionary/compression.cpp +174 -0
  1376. package/src/duckdb/src/storage/compression/dictionary/decompression.cpp +115 -0
  1377. package/src/duckdb/src/storage/compression/dictionary_compression.cpp +53 -545
  1378. package/src/duckdb/src/storage/compression/empty_validity.cpp +15 -0
  1379. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +25 -16
  1380. package/src/duckdb/src/storage/compression/fsst.cpp +101 -47
  1381. package/src/duckdb/src/storage/compression/numeric_constant.cpp +92 -2
  1382. package/src/duckdb/src/storage/compression/rle.cpp +216 -46
  1383. package/src/duckdb/src/storage/compression/roaring/analyze.cpp +179 -0
  1384. package/src/duckdb/src/storage/compression/roaring/common.cpp +282 -0
  1385. package/src/duckdb/src/storage/compression/roaring/compress.cpp +481 -0
  1386. package/src/duckdb/src/storage/compression/roaring/metadata.cpp +262 -0
  1387. package/src/duckdb/src/storage/compression/roaring/scan.cpp +364 -0
  1388. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +47 -65
  1389. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +102 -39
  1390. package/src/duckdb/src/storage/compression/zstd.cpp +1049 -0
  1391. package/src/duckdb/src/storage/data_table.cpp +312 -172
  1392. package/src/duckdb/src/storage/local_storage.cpp +104 -46
  1393. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +1 -1
  1394. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +7 -3
  1395. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +138 -58
  1396. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +14 -0
  1397. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +19 -8
  1398. package/src/duckdb/src/storage/serialization/serialize_statement.cpp +2 -0
  1399. package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +43 -0
  1400. package/src/duckdb/src/storage/serialization/serialize_types.cpp +32 -5
  1401. package/src/duckdb/src/storage/single_file_block_manager.cpp +6 -8
  1402. package/src/duckdb/src/storage/standard_buffer_manager.cpp +82 -71
  1403. package/src/duckdb/src/storage/statistics/column_statistics.cpp +3 -3
  1404. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +18 -17
  1405. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +34 -22
  1406. package/src/duckdb/src/storage/statistics/string_stats.cpp +14 -3
  1407. package/src/duckdb/src/storage/storage_info.cpp +72 -10
  1408. package/src/duckdb/src/storage/storage_manager.cpp +41 -47
  1409. package/src/duckdb/src/storage/table/array_column_data.cpp +7 -1
  1410. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +10 -9
  1411. package/src/duckdb/src/storage/table/column_data.cpp +105 -43
  1412. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +307 -132
  1413. package/src/duckdb/src/storage/table/column_segment.cpp +36 -13
  1414. package/src/duckdb/src/storage/table/list_column_data.cpp +4 -4
  1415. package/src/duckdb/src/storage/table/row_group.cpp +159 -66
  1416. package/src/duckdb/src/storage/table/row_group_collection.cpp +157 -68
  1417. package/src/duckdb/src/storage/table/row_version_manager.cpp +33 -10
  1418. package/src/duckdb/src/storage/table/scan_state.cpp +21 -7
  1419. package/src/duckdb/src/storage/table/standard_column_data.cpp +68 -5
  1420. package/src/duckdb/src/storage/table/struct_column_data.cpp +42 -4
  1421. package/src/duckdb/src/storage/table/table_statistics.cpp +91 -5
  1422. package/src/duckdb/src/storage/table/update_segment.cpp +287 -210
  1423. package/src/duckdb/src/storage/table_index_list.cpp +55 -58
  1424. package/src/duckdb/src/storage/temporary_file_manager.cpp +412 -149
  1425. package/src/duckdb/src/storage/wal_replay.cpp +132 -48
  1426. package/src/duckdb/src/storage/write_ahead_log.cpp +75 -48
  1427. package/src/duckdb/src/transaction/cleanup_state.cpp +0 -1
  1428. package/src/duckdb/src/transaction/commit_state.cpp +23 -14
  1429. package/src/duckdb/src/transaction/duck_transaction.cpp +29 -25
  1430. package/src/duckdb/src/transaction/duck_transaction_manager.cpp +18 -6
  1431. package/src/duckdb/src/transaction/meta_transaction.cpp +3 -2
  1432. package/src/duckdb/src/transaction/rollback_state.cpp +5 -2
  1433. package/src/duckdb/src/transaction/transaction_context.cpp +9 -1
  1434. package/src/duckdb/src/transaction/undo_buffer.cpp +35 -27
  1435. package/src/duckdb/src/transaction/undo_buffer_allocator.cpp +72 -0
  1436. package/src/duckdb/src/transaction/wal_write_state.cpp +12 -10
  1437. package/src/duckdb/src/verification/copied_statement_verifier.cpp +7 -4
  1438. package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +7 -5
  1439. package/src/duckdb/src/verification/external_statement_verifier.cpp +7 -4
  1440. package/src/duckdb/src/verification/fetch_row_verifier.cpp +7 -4
  1441. package/src/duckdb/src/verification/no_operator_caching_verifier.cpp +8 -4
  1442. package/src/duckdb/src/verification/parsed_statement_verifier.cpp +7 -4
  1443. package/src/duckdb/src/verification/prepared_statement_verifier.cpp +16 -12
  1444. package/src/duckdb/src/verification/statement_verifier.cpp +20 -15
  1445. package/src/duckdb/src/verification/unoptimized_statement_verifier.cpp +7 -4
  1446. package/src/duckdb/third_party/fsst/libfsst.hpp +1 -0
  1447. package/src/duckdb/third_party/httplib/httplib.hpp +15 -22
  1448. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +4 -2
  1449. package/src/duckdb/third_party/libpg_query/pg_functions.cpp +2 -4
  1450. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +14278 -13832
  1451. package/src/duckdb/third_party/parquet/parquet_types.cpp +3410 -1686
  1452. package/src/duckdb/third_party/parquet/parquet_types.h +1585 -1204
  1453. package/src/duckdb/third_party/skiplist/SkipList.h +0 -1
  1454. package/src/duckdb/third_party/snappy/snappy-stubs-internal.h +13 -15
  1455. package/src/duckdb/third_party/zstd/common/debug.cpp +36 -0
  1456. package/src/duckdb/third_party/zstd/common/entropy_common.cpp +173 -49
  1457. package/src/duckdb/third_party/zstd/common/error_private.cpp +11 -3
  1458. package/src/duckdb/third_party/zstd/common/fse_decompress.cpp +126 -97
  1459. package/src/duckdb/third_party/zstd/common/pool.cpp +376 -0
  1460. package/src/duckdb/third_party/zstd/common/threading.cpp +193 -0
  1461. package/src/duckdb/third_party/zstd/common/xxhash.cpp +18 -14
  1462. package/src/duckdb/third_party/zstd/common/zstd_common.cpp +3 -38
  1463. package/src/duckdb/third_party/zstd/compress/fse_compress.cpp +93 -165
  1464. package/src/duckdb/third_party/zstd/compress/hist.cpp +28 -31
  1465. package/src/duckdb/third_party/zstd/compress/huf_compress.cpp +957 -291
  1466. package/src/duckdb/third_party/zstd/compress/zstd_compress.cpp +3988 -1124
  1467. package/src/duckdb/third_party/zstd/compress/zstd_compress_literals.cpp +120 -43
  1468. package/src/duckdb/third_party/zstd/compress/zstd_compress_sequences.cpp +47 -23
  1469. package/src/duckdb/third_party/zstd/compress/zstd_compress_superblock.cpp +274 -424
  1470. package/src/duckdb/third_party/zstd/compress/zstd_double_fast.cpp +403 -153
  1471. package/src/duckdb/third_party/zstd/compress/zstd_fast.cpp +741 -268
  1472. package/src/duckdb/third_party/zstd/compress/zstd_lazy.cpp +1339 -278
  1473. package/src/duckdb/third_party/zstd/compress/zstd_ldm.cpp +334 -222
  1474. package/src/duckdb/third_party/zstd/compress/zstd_opt.cpp +674 -298
  1475. package/src/duckdb/third_party/zstd/compress/zstdmt_compress.cpp +1885 -0
  1476. package/src/duckdb/third_party/zstd/decompress/huf_decompress.cpp +1247 -586
  1477. package/src/duckdb/third_party/zstd/decompress/zstd_ddict.cpp +18 -17
  1478. package/src/duckdb/third_party/zstd/decompress/zstd_decompress.cpp +724 -270
  1479. package/src/duckdb/third_party/zstd/decompress/zstd_decompress_block.cpp +1193 -393
  1480. package/src/duckdb/third_party/zstd/deprecated/zbuff_common.cpp +30 -0
  1481. package/src/duckdb/third_party/zstd/deprecated/zbuff_compress.cpp +171 -0
  1482. package/src/duckdb/third_party/zstd/deprecated/zbuff_decompress.cpp +80 -0
  1483. package/src/duckdb/third_party/zstd/dict/cover.cpp +1271 -0
  1484. package/src/duckdb/third_party/zstd/dict/divsufsort.cpp +1916 -0
  1485. package/src/duckdb/third_party/zstd/dict/fastcover.cpp +775 -0
  1486. package/src/duckdb/third_party/zstd/dict/zdict.cpp +1139 -0
  1487. package/src/duckdb/third_party/zstd/include/zdict.h +473 -0
  1488. package/src/duckdb/third_party/zstd/include/zstd/common/allocations.h +58 -0
  1489. package/src/duckdb/third_party/zstd/include/zstd/common/bits.h +204 -0
  1490. package/src/duckdb/third_party/zstd/include/zstd/common/bitstream.h +88 -85
  1491. package/src/duckdb/third_party/zstd/include/zstd/common/compiler.h +243 -47
  1492. package/src/duckdb/third_party/zstd/include/zstd/common/cpu.h +253 -0
  1493. package/src/duckdb/third_party/zstd/include/zstd/common/debug.h +31 -31
  1494. package/src/duckdb/third_party/zstd/include/zstd/common/error_private.h +94 -6
  1495. package/src/duckdb/third_party/zstd/include/zstd/common/fse.h +424 -64
  1496. package/src/duckdb/third_party/zstd/include/zstd/common/huf.h +255 -70
  1497. package/src/duckdb/third_party/zstd/include/zstd/common/mem.h +125 -85
  1498. package/src/duckdb/third_party/zstd/include/zstd/common/pool.h +84 -0
  1499. package/src/duckdb/third_party/zstd/include/zstd/common/portability_macros.h +158 -0
  1500. package/src/duckdb/third_party/zstd/include/zstd/common/threading.h +152 -0
  1501. package/src/duckdb/third_party/zstd/include/zstd/common/{xxhash.h → xxhash.hpp} +0 -1
  1502. package/src/duckdb/third_party/zstd/include/zstd/common/{xxhash_static.h → xxhash_static.hpp} +1 -1
  1503. package/src/duckdb/third_party/zstd/include/zstd/common/zstd_deps.h +122 -0
  1504. package/src/duckdb/third_party/zstd/include/zstd/common/zstd_internal.h +143 -174
  1505. package/src/duckdb/third_party/zstd/include/zstd/common/zstd_trace.h +159 -0
  1506. package/src/duckdb/third_party/zstd/include/zstd/compress/clevels.h +136 -0
  1507. package/src/duckdb/third_party/zstd/include/zstd/compress/hist.h +4 -4
  1508. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_internal.h +631 -220
  1509. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_literals.h +17 -7
  1510. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_sequences.h +2 -2
  1511. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_superblock.h +3 -2
  1512. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_cwksp.h +256 -153
  1513. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_double_fast.h +16 -3
  1514. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_fast.h +4 -3
  1515. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_lazy.h +145 -11
  1516. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_ldm.h +14 -6
  1517. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_ldm_geartab.h +110 -0
  1518. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_opt.h +33 -9
  1519. package/src/duckdb/third_party/zstd/include/zstd/compress/zstdmt_compress.h +107 -0
  1520. package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_ddict.h +4 -3
  1521. package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_block.h +20 -6
  1522. package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_internal.h +88 -16
  1523. package/src/duckdb/third_party/zstd/include/zstd/deprecated/zbuff.h +214 -0
  1524. package/src/duckdb/third_party/zstd/include/zstd/dict/cover.h +156 -0
  1525. package/src/duckdb/third_party/zstd/include/zstd/dict/divsufsort.h +62 -0
  1526. package/src/duckdb/third_party/zstd/include/zstd.h +2171 -93
  1527. package/src/duckdb/third_party/zstd/include/{zstd/common/zstd_errors.h → zstd_errors.h} +32 -10
  1528. package/src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp +8 -0
  1529. package/src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp +20 -0
  1530. package/src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp +12 -0
  1531. package/src/duckdb/ub_extension_core_functions_aggregate_nested.cpp +6 -0
  1532. package/src/duckdb/ub_extension_core_functions_aggregate_regression.cpp +14 -0
  1533. package/src/duckdb/ub_extension_core_functions_scalar_array.cpp +4 -0
  1534. package/src/duckdb/ub_extension_core_functions_scalar_bit.cpp +2 -0
  1535. package/src/duckdb/ub_extension_core_functions_scalar_blob.cpp +4 -0
  1536. package/src/duckdb/ub_extension_core_functions_scalar_date.cpp +20 -0
  1537. package/src/duckdb/ub_extension_core_functions_scalar_debug.cpp +2 -0
  1538. package/src/duckdb/ub_extension_core_functions_scalar_enum.cpp +2 -0
  1539. package/src/duckdb/ub_extension_core_functions_scalar_generic.cpp +18 -0
  1540. package/src/duckdb/ub_extension_core_functions_scalar_list.cpp +22 -0
  1541. package/src/duckdb/ub_extension_core_functions_scalar_map.cpp +14 -0
  1542. package/src/duckdb/ub_extension_core_functions_scalar_math.cpp +2 -0
  1543. package/src/duckdb/ub_extension_core_functions_scalar_operators.cpp +2 -0
  1544. package/src/duckdb/ub_extension_core_functions_scalar_random.cpp +4 -0
  1545. package/src/duckdb/ub_extension_core_functions_scalar_string.cpp +48 -0
  1546. package/src/duckdb/ub_extension_core_functions_scalar_struct.cpp +2 -0
  1547. package/src/duckdb/ub_extension_core_functions_scalar_union.cpp +6 -0
  1548. package/src/duckdb/ub_src_common.cpp +4 -0
  1549. package/src/duckdb/ub_src_common_arrow.cpp +3 -1
  1550. package/src/duckdb/ub_src_execution.cpp +0 -6
  1551. package/src/duckdb/ub_src_execution_operator_aggregate.cpp +2 -0
  1552. package/src/duckdb/ub_src_execution_operator_csv_scanner_encode.cpp +2 -0
  1553. package/src/duckdb/ub_src_execution_operator_csv_scanner_util.cpp +2 -0
  1554. package/src/duckdb/ub_src_execution_sample.cpp +4 -0
  1555. package/src/duckdb/ub_src_function.cpp +6 -0
  1556. package/src/duckdb/ub_src_function_aggregate.cpp +0 -2
  1557. package/src/duckdb/ub_src_function_aggregate_distributive.cpp +3 -1
  1558. package/src/duckdb/ub_src_function_scalar.cpp +2 -8
  1559. package/src/duckdb/ub_src_function_scalar_date.cpp +2 -0
  1560. package/src/duckdb/ub_src_function_scalar_generic.cpp +2 -2
  1561. package/src/duckdb/ub_src_function_scalar_map.cpp +2 -0
  1562. package/src/duckdb/ub_src_function_scalar_operator.cpp +8 -0
  1563. package/src/duckdb/ub_src_function_scalar_string.cpp +10 -0
  1564. package/src/duckdb/ub_src_function_scalar_struct.cpp +4 -0
  1565. package/src/duckdb/ub_src_function_scalar_system.cpp +2 -0
  1566. package/src/duckdb/ub_src_function_table_system.cpp +6 -0
  1567. package/src/duckdb/ub_src_function_window.cpp +36 -0
  1568. package/src/duckdb/ub_src_logging.cpp +8 -0
  1569. package/src/duckdb/ub_src_main_settings.cpp +3 -1
  1570. package/src/duckdb/ub_src_optimizer.cpp +8 -0
  1571. package/src/duckdb/ub_src_optimizer_pushdown.cpp +2 -0
  1572. package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
  1573. package/src/duckdb/ub_src_parser.cpp +2 -0
  1574. package/src/duckdb/ub_src_parser_parsed_data.cpp +2 -0
  1575. package/src/duckdb/ub_src_planner.cpp +2 -0
  1576. package/src/duckdb/ub_src_planner_filter.cpp +6 -0
  1577. package/src/duckdb/ub_src_storage_compression.cpp +4 -0
  1578. package/src/duckdb/ub_src_storage_compression_dictionary.cpp +8 -0
  1579. package/src/duckdb/ub_src_storage_compression_roaring.cpp +10 -0
  1580. package/src/duckdb/ub_src_transaction.cpp +2 -0
  1581. package/vendor.py +1 -1
  1582. package/src/duckdb/extension/json/yyjson/include/yyjson.hpp +0 -6003
  1583. package/src/duckdb/extension/json/yyjson/yyjson.cpp +0 -8218
  1584. package/src/duckdb/src/common/arrow/appender/list_data.cpp +0 -78
  1585. package/src/duckdb/src/common/arrow/appender/map_data.cpp +0 -91
  1586. package/src/duckdb/src/common/cycle_counter.cpp +0 -76
  1587. package/src/duckdb/src/common/field_writer.cpp +0 -97
  1588. package/src/duckdb/src/common/http_state.cpp +0 -95
  1589. package/src/duckdb/src/common/preserved_error.cpp +0 -87
  1590. package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
  1591. package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +0 -27
  1592. package/src/duckdb/src/common/serializer/buffered_serializer.cpp +0 -36
  1593. package/src/duckdb/src/common/serializer/format_serializer.cpp +0 -15
  1594. package/src/duckdb/src/common/serializer.cpp +0 -24
  1595. package/src/duckdb/src/common/types/chunk_collection.cpp +0 -190
  1596. package/src/duckdb/src/core_functions/aggregate/distributive/entropy.cpp +0 -183
  1597. package/src/duckdb/src/core_functions/scalar/date/current.cpp +0 -54
  1598. package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +0 -78
  1599. package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +0 -70
  1600. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +0 -412
  1601. package/src/duckdb/src/core_functions/scalar/secret/which_secret.cpp +0 -28
  1602. package/src/duckdb/src/core_functions/scalar/string/jaro_winkler.cpp +0 -71
  1603. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
  1604. package/src/duckdb/src/execution/index/art/node16.cpp +0 -196
  1605. package/src/duckdb/src/execution/index/art/node4.cpp +0 -189
  1606. package/src/duckdb/src/execution/index/unknown_index.cpp +0 -65
  1607. package/src/duckdb/src/execution/operator/csv_scanner/base_csv_reader.cpp +0 -595
  1608. package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +0 -434
  1609. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +0 -89
  1610. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +0 -90
  1611. package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +0 -95
  1612. package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +0 -494
  1613. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +0 -35
  1614. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +0 -99
  1615. package/src/duckdb/src/execution/operator/csv_scanner/parallel_csv_reader.cpp +0 -689
  1616. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +0 -242
  1617. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +0 -695
  1618. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
  1619. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
  1620. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
  1621. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -280
  1622. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +0 -666
  1623. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +0 -499
  1624. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +0 -207
  1625. package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
  1626. package/src/duckdb/src/execution/physical_plan/plan_limit_percent.cpp +0 -18
  1627. package/src/duckdb/src/execution/physical_plan/plan_show_select.cpp +0 -47
  1628. package/src/duckdb/src/execution/reservoir_sample.cpp +0 -324
  1629. package/src/duckdb/src/execution/window_executor.cpp +0 -1830
  1630. package/src/duckdb/src/execution/window_segment_tree.cpp +0 -2073
  1631. package/src/duckdb/src/extension_forward_decl/icu.cpp +0 -59
  1632. package/src/duckdb/src/function/aggregate/distributive_functions.cpp +0 -15
  1633. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +0 -29
  1634. package/src/duckdb/src/function/scalar/generic_functions.cpp +0 -11
  1635. package/src/duckdb/src/function/scalar/list/list_concat.cpp +0 -143
  1636. package/src/duckdb/src/function/scalar/operators.cpp +0 -14
  1637. package/src/duckdb/src/function/scalar/sequence_functions.cpp +0 -10
  1638. package/src/duckdb/src/function/scalar/string_functions.cpp +0 -22
  1639. package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +0 -173
  1640. package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +0 -101
  1641. package/src/duckdb/src/include/duckdb/catalog/mapping_value.hpp +0 -92
  1642. package/src/duckdb/src/include/duckdb/common/arrow/arrow_types_extension.hpp +0 -42
  1643. package/src/duckdb/src/include/duckdb/common/cycle_counter.hpp +0 -68
  1644. package/src/duckdb/src/include/duckdb/common/enums/index_type.hpp +0 -34
  1645. package/src/duckdb/src/include/duckdb/common/http_state.hpp +0 -113
  1646. package/src/duckdb/src/include/duckdb/common/platform.h +0 -58
  1647. package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +0 -59
  1648. package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +0 -192
  1649. package/src/duckdb/src/include/duckdb/common/types/chunk_collection.hpp +0 -137
  1650. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +0 -65
  1651. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +0 -63
  1652. package/src/duckdb/src/include/duckdb/execution/index/unknown_index.hpp +0 -65
  1653. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer.hpp +0 -103
  1654. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.hpp +0 -74
  1655. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_file_handle.hpp +0 -60
  1656. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +0 -253
  1657. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_option.hpp +0 -155
  1658. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_reader_options.hpp +0 -163
  1659. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/state_machine_options.hpp +0 -35
  1660. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/base_scanner.hpp +0 -228
  1661. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/column_count_scanner.hpp +0 -70
  1662. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/scanner_boundary.hpp +0 -93
  1663. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/skip_scanner.hpp +0 -60
  1664. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/string_value_scanner.hpp +0 -197
  1665. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/quote_rules.hpp +0 -21
  1666. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state.hpp +0 -30
  1667. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine.hpp +0 -99
  1668. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.hpp +0 -87
  1669. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/csv_file_scanner.hpp +0 -70
  1670. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/global_csv_state.hpp +0 -80
  1671. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_casting.hpp +0 -137
  1672. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_error.hpp +0 -104
  1673. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +0 -79
  1674. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/base_csv_reader.hpp +0 -119
  1675. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +0 -72
  1676. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +0 -110
  1677. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +0 -103
  1678. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_file_handle.hpp +0 -59
  1679. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_line_info.hpp +0 -46
  1680. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp +0 -210
  1681. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +0 -131
  1682. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state.hpp +0 -28
  1683. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +0 -70
  1684. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +0 -65
  1685. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/parallel_csv_reader.hpp +0 -167
  1686. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +0 -21
  1687. package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +0 -343
  1688. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +0 -165
  1689. package/src/duckdb/src/include/duckdb/optimizer/column_lifetime_optimizer.hpp +0 -45
  1690. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +0 -57
  1691. package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_info.hpp +0 -45
  1692. package/src/duckdb/src/include/duckdb/parser/statement/show_statement.hpp +0 -32
  1693. package/src/duckdb/src/include/duckdb/planner/operator/logical_limit_percent.hpp +0 -49
  1694. package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +0 -42
  1695. package/src/duckdb/src/main/settings/settings.cpp +0 -2056
  1696. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +0 -36
  1697. package/src/duckdb/src/parser/parsed_data/comment_on_info.cpp +0 -19
  1698. package/src/duckdb/src/parser/statement/show_statement.cpp +0 -15
  1699. package/src/duckdb/src/planner/binder/statement/bind_show.cpp +0 -30
  1700. package/src/duckdb/src/planner/operator/logical_limit_percent.cpp +0 -14
  1701. package/src/duckdb/src/storage/statistics/numeric_stats_union.cpp +0 -70
  1702. package/src/duckdb/third_party/fsst/fsst_avx512.cpp +0 -140
  1703. package/src/duckdb/third_party/fsst/fsst_avx512.inc +0 -57
  1704. package/src/duckdb/third_party/fsst/fsst_avx512_unroll1.inc +0 -57
  1705. package/src/duckdb/third_party/fsst/fsst_avx512_unroll2.inc +0 -114
  1706. package/src/duckdb/third_party/fsst/fsst_avx512_unroll3.inc +0 -171
  1707. package/src/duckdb/third_party/fsst/fsst_avx512_unroll4.inc +0 -228
  1708. package/src/duckdb/third_party/parquet/parquet_constants.cpp +0 -17
  1709. package/src/duckdb/third_party/parquet/parquet_constants.h +0 -24
  1710. package/src/duckdb/third_party/re2/util/pod_array.h +0 -55
  1711. package/src/duckdb/third_party/re2/util/sparse_array.h +0 -392
  1712. package/src/duckdb/third_party/re2/util/sparse_set.h +0 -264
  1713. package/src/duckdb/third_party/zstd/include/zstd/common/fse_static.h +0 -421
  1714. package/src/duckdb/third_party/zstd/include/zstd/common/huf_static.h +0 -238
  1715. package/src/duckdb/third_party/zstd/include/zstd_static.h +0 -1070
  1716. package/src/duckdb/ub_src_core_functions.cpp +0 -6
  1717. package/src/duckdb/ub_src_core_functions_aggregate_algebraic.cpp +0 -8
  1718. package/src/duckdb/ub_src_core_functions_aggregate_distributive.cpp +0 -24
  1719. package/src/duckdb/ub_src_core_functions_aggregate_holistic.cpp +0 -12
  1720. package/src/duckdb/ub_src_core_functions_aggregate_nested.cpp +0 -6
  1721. package/src/duckdb/ub_src_core_functions_aggregate_regression.cpp +0 -14
  1722. package/src/duckdb/ub_src_core_functions_scalar_array.cpp +0 -4
  1723. package/src/duckdb/ub_src_core_functions_scalar_bit.cpp +0 -2
  1724. package/src/duckdb/ub_src_core_functions_scalar_blob.cpp +0 -6
  1725. package/src/duckdb/ub_src_core_functions_scalar_date.cpp +0 -22
  1726. package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +0 -2
  1727. package/src/duckdb/ub_src_core_functions_scalar_enum.cpp +0 -2
  1728. package/src/duckdb/ub_src_core_functions_scalar_generic.cpp +0 -18
  1729. package/src/duckdb/ub_src_core_functions_scalar_list.cpp +0 -22
  1730. package/src/duckdb/ub_src_core_functions_scalar_map.cpp +0 -16
  1731. package/src/duckdb/ub_src_core_functions_scalar_math.cpp +0 -2
  1732. package/src/duckdb/ub_src_core_functions_scalar_operators.cpp +0 -2
  1733. package/src/duckdb/ub_src_core_functions_scalar_random.cpp +0 -4
  1734. package/src/duckdb/ub_src_core_functions_scalar_secret.cpp +0 -2
  1735. package/src/duckdb/ub_src_core_functions_scalar_string.cpp +0 -58
  1736. package/src/duckdb/ub_src_core_functions_scalar_struct.cpp +0 -4
  1737. package/src/duckdb/ub_src_core_functions_scalar_union.cpp +0 -6
  1738. package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +0 -18
  1739. package/src/duckdb/ub_src_function_scalar_operators.cpp +0 -8
  1740. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/covar.hpp +0 -0
  1741. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/stddev.hpp +0 -0
  1742. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/sum_helpers.hpp +0 -0
  1743. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/array_kernels.hpp +0 -0
  1744. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/function_list.hpp +0 -0
  1745. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/secret_functions.hpp +0 -0
  1746. /package/src/duckdb/src/function/scalar/{operators → operator}/multiply.cpp +0 -0
  1747. /package/src/duckdb/src/function/scalar/{operators → operator}/subtract.cpp +0 -0
@@ -1,5 +1,5 @@
1
1
  /*
2
- * Copyright (c) 2016-2020, Yann Collet, Facebook, Inc.
2
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
3
3
  * All rights reserved.
4
4
  *
5
5
  * This source code is licensed under both the BSD-style license (found in the
@@ -14,18 +14,19 @@
14
14
  /*-*******************************************************
15
15
  * Dependencies
16
16
  *********************************************************/
17
- #include <string.h> /* memcpy, memmove, memset */
17
+ #include "zstd/common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
18
18
  #include "zstd/common/compiler.h" /* prefetch */
19
+ #include "zstd/common/cpu.h" /* bmi2 */
19
20
  #include "zstd/common/mem.h" /* low level memory routines */
21
+ #define FSE_STATIC_LINKING_ONLY
20
22
  #include "zstd/common/fse.h"
21
- #include "zstd/common/fse_static.h"
22
23
  #include "zstd/common/huf.h"
23
- #include "zstd/common/huf_static.h"
24
24
  #include "zstd/common/zstd_internal.h"
25
25
  #include "zstd/decompress/zstd_decompress_internal.h" /* ZSTD_DCtx */
26
26
  #include "zstd/decompress/zstd_ddict.h" /* ZSTD_DDictDictContent */
27
27
  #include "zstd/decompress/zstd_decompress_block.h"
28
- namespace duckdb_zstd {
28
+ #include "zstd/common/bits.h" /* ZSTD_highbit32 */
29
+
29
30
  /*_*******************************************************
30
31
  * Macros
31
32
  **********************************************************/
@@ -39,23 +40,31 @@ namespace duckdb_zstd {
39
40
  #error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
40
41
  #endif
41
42
 
43
+ namespace duckdb_zstd {
42
44
 
43
45
  /*_*******************************************************
44
46
  * Memory operations
45
47
  **********************************************************/
46
- static void ZSTD_copy4(void* dst, const void* src) { memcpy(dst, src, 4); }
48
+ static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
47
49
 
48
50
 
49
51
  /*-*************************************************************
50
52
  * Block decoding
51
53
  ***************************************************************/
52
54
 
55
+ static size_t ZSTD_blockSizeMax(ZSTD_DCtx const* dctx)
56
+ {
57
+ size_t const blockSizeMax = dctx->isFrameDecompression ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX;
58
+ assert(blockSizeMax <= ZSTD_BLOCKSIZE_MAX);
59
+ return blockSizeMax;
60
+ }
61
+
53
62
  /*! ZSTD_getcBlockSize() :
54
63
  * Provides the size of compressed block from block header `src` */
55
64
  size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
56
65
  blockProperties_t* bpPtr)
57
66
  {
58
- RETURN_ERROR_IF(srcSize < ZSTDInternalConstants::ZSTD_blockHeaderSize, srcSize_wrong, "");
67
+ RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong, "");
59
68
 
60
69
  { U32 const cBlockHeader = MEM_readLE24(src);
61
70
  U32 const cSize = cBlockHeader >> 3;
@@ -68,36 +77,90 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
68
77
  }
69
78
  }
70
79
 
80
+ /* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
81
+ static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
82
+ const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
83
+ {
84
+ size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
85
+ assert(litSize <= blockSizeMax);
86
+ assert(dctx->isFrameDecompression || streaming == not_streaming);
87
+ assert(expectedWriteSize <= blockSizeMax);
88
+ if (streaming == not_streaming && dstCapacity > blockSizeMax + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) {
89
+ /* If we aren't streaming, we can just put the literals after the output
90
+ * of the current block. We don't need to worry about overwriting the
91
+ * extDict of our window, because it doesn't exist.
92
+ * So if we have space after the end of the block, just put it there.
93
+ */
94
+ dctx->litBuffer = (BYTE*)dst + blockSizeMax + WILDCOPY_OVERLENGTH;
95
+ dctx->litBufferEnd = dctx->litBuffer + litSize;
96
+ dctx->litBufferLocation = ZSTD_in_dst;
97
+ } else if (litSize <= ZSTD_LITBUFFEREXTRASIZE) {
98
+ /* Literals fit entirely within the extra buffer, put them there to avoid
99
+ * having to split the literals.
100
+ */
101
+ dctx->litBuffer = dctx->litExtraBuffer;
102
+ dctx->litBufferEnd = dctx->litBuffer + litSize;
103
+ dctx->litBufferLocation = ZSTD_not_in_dst;
104
+ } else {
105
+ assert(blockSizeMax > ZSTD_LITBUFFEREXTRASIZE);
106
+ /* Literals must be split between the output block and the extra lit
107
+ * buffer. We fill the extra lit buffer with the tail of the literals,
108
+ * and put the rest of the literals at the end of the block, with
109
+ * WILDCOPY_OVERLENGTH of buffer room to allow for overreads.
110
+ * This MUST not write more than our maxBlockSize beyond dst, because in
111
+ * streaming mode, that could overwrite part of our extDict window.
112
+ */
113
+ if (splitImmediately) {
114
+ /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
115
+ dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
116
+ dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
117
+ } else {
118
+ /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */
119
+ dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
120
+ dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
121
+ }
122
+ dctx->litBufferLocation = ZSTD_split;
123
+ assert(dctx->litBufferEnd <= (BYTE*)dst + expectedWriteSize);
124
+ }
125
+ }
71
126
 
72
- /* Hidden declaration for fullbench */
73
- size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
74
- const void* src, size_t srcSize);
75
127
  /*! ZSTD_decodeLiteralsBlock() :
128
+ * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
129
+ * in the dstBuffer. If there is room to do so, it will be stored in full in the excess dst space after where the current
130
+ * block will be output. Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
131
+ * stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
132
+ *
76
133
  * @return : nb of bytes read from src (< srcSize )
77
134
  * note : symbol not declared but exposed for fullbench */
78
- size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
79
- const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */
135
+ static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
136
+ const void* src, size_t srcSize, /* note : srcSize < BLOCKSIZE */
137
+ void* dst, size_t dstCapacity, const streaming_operation streaming)
80
138
  {
81
139
  DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
82
140
  RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
83
141
 
84
142
  { const BYTE* const istart = (const BYTE*) src;
85
143
  symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
144
+ size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
86
145
 
87
146
  switch(litEncType)
88
147
  {
89
148
  case set_repeat:
90
149
  DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block");
91
150
  RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted, "");
92
- /* fall-through */
151
+ ZSTD_FALLTHROUGH;
93
152
 
94
153
  case set_compressed:
95
- RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3");
154
+ RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need up to 5 for case 3");
96
155
  { size_t lhSize, litSize, litCSize;
97
156
  U32 singleStream=0;
98
157
  U32 const lhlCode = (istart[0] >> 2) & 3;
99
158
  U32 const lhc = MEM_readLE32(istart);
100
159
  size_t hufSuccess;
160
+ size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
161
+ int const flags = 0
162
+ | (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
163
+ | (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
101
164
  switch(lhlCode)
102
165
  {
103
166
  case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
@@ -120,8 +183,15 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
120
183
  litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
121
184
  break;
122
185
  }
123
- RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
186
+ RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
187
+ RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
188
+ if (!singleStream)
189
+ RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
190
+ "Not enough literals (%zu) for the 4-streams mode (min %u)",
191
+ litSize, MIN_LITERALS_FOR_4_STREAMS);
124
192
  RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
193
+ RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
194
+ ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
125
195
 
126
196
  /* prefetch huffman table if cold */
127
197
  if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
@@ -130,13 +200,14 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
130
200
 
131
201
  if (litEncType==set_repeat) {
132
202
  if (singleStream) {
133
- hufSuccess = HUF_decompress1X_usingDTable_bmi2(
203
+ hufSuccess = HUF_decompress1X_usingDTable(
134
204
  dctx->litBuffer, litSize, istart+lhSize, litCSize,
135
- dctx->HUFptr, dctx->bmi2);
205
+ dctx->HUFptr, flags);
136
206
  } else {
137
- hufSuccess = HUF_decompress4X_usingDTable_bmi2(
207
+ assert(litSize >= MIN_LITERALS_FOR_4_STREAMS);
208
+ hufSuccess = HUF_decompress4X_usingDTable(
138
209
  dctx->litBuffer, litSize, istart+lhSize, litCSize,
139
- dctx->HUFptr, dctx->bmi2);
210
+ dctx->HUFptr, flags);
140
211
  }
141
212
  } else {
142
213
  if (singleStream) {
@@ -144,20 +215,29 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
144
215
  hufSuccess = HUF_decompress1X_DCtx_wksp(
145
216
  dctx->entropy.hufTable, dctx->litBuffer, litSize,
146
217
  istart+lhSize, litCSize, dctx->workspace,
147
- sizeof(dctx->workspace));
218
+ sizeof(dctx->workspace), flags);
148
219
  #else
149
- hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
220
+ hufSuccess = HUF_decompress1X1_DCtx_wksp(
150
221
  dctx->entropy.hufTable, dctx->litBuffer, litSize,
151
222
  istart+lhSize, litCSize, dctx->workspace,
152
- sizeof(dctx->workspace), dctx->bmi2);
223
+ sizeof(dctx->workspace), flags);
153
224
  #endif
154
225
  } else {
155
- hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
226
+ hufSuccess = HUF_decompress4X_hufOnly_wksp(
156
227
  dctx->entropy.hufTable, dctx->litBuffer, litSize,
157
228
  istart+lhSize, litCSize, dctx->workspace,
158
- sizeof(dctx->workspace), dctx->bmi2);
229
+ sizeof(dctx->workspace), flags);
159
230
  }
160
231
  }
232
+ if (dctx->litBufferLocation == ZSTD_split)
233
+ {
234
+ assert(litSize > ZSTD_LITBUFFEREXTRASIZE);
235
+ ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
236
+ ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
237
+ dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
238
+ dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
239
+ assert(dctx->litBufferEnd <= (BYTE*)dst + blockSizeMax);
240
+ }
161
241
 
162
242
  RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
163
243
 
@@ -165,13 +245,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
165
245
  dctx->litSize = litSize;
166
246
  dctx->litEntropy = 1;
167
247
  if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
168
- memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
169
248
  return litCSize + lhSize;
170
249
  }
171
250
 
172
251
  case set_basic:
173
252
  { size_t litSize, lhSize;
174
253
  U32 const lhlCode = ((istart[0]) >> 2) & 3;
254
+ size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
175
255
  switch(lhlCode)
176
256
  {
177
257
  case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
@@ -184,27 +264,42 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
184
264
  break;
185
265
  case 3:
186
266
  lhSize = 3;
267
+ RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize = 3");
187
268
  litSize = MEM_readLE24(istart) >> 4;
188
269
  break;
189
270
  }
190
271
 
272
+ RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
273
+ RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
274
+ RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
275
+ ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
191
276
  if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
192
277
  RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
193
- memcpy(dctx->litBuffer, istart+lhSize, litSize);
278
+ if (dctx->litBufferLocation == ZSTD_split)
279
+ {
280
+ ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
281
+ ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
282
+ }
283
+ else
284
+ {
285
+ ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
286
+ }
194
287
  dctx->litPtr = dctx->litBuffer;
195
288
  dctx->litSize = litSize;
196
- memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
197
289
  return lhSize+litSize;
198
290
  }
199
291
  /* direct reference into compressed stream */
200
292
  dctx->litPtr = istart+lhSize;
201
293
  dctx->litSize = litSize;
294
+ dctx->litBufferEnd = dctx->litPtr + litSize;
295
+ dctx->litBufferLocation = ZSTD_not_in_dst;
202
296
  return lhSize+litSize;
203
297
  }
204
298
 
205
299
  case set_rle:
206
300
  { U32 const lhlCode = ((istart[0]) >> 2) & 3;
207
301
  size_t litSize, lhSize;
302
+ size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
208
303
  switch(lhlCode)
209
304
  {
210
305
  case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
@@ -213,16 +308,28 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
213
308
  break;
214
309
  case 1:
215
310
  lhSize = 2;
311
+ RETURN_ERROR_IF(srcSize<3, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 3");
216
312
  litSize = MEM_readLE16(istart) >> 4;
217
313
  break;
218
314
  case 3:
219
315
  lhSize = 3;
316
+ RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 2; here we need lhSize+1 = 4");
220
317
  litSize = MEM_readLE24(istart) >> 4;
221
- RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
222
318
  break;
223
319
  }
224
- RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
225
- memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
320
+ RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
321
+ RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
322
+ RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
323
+ ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
324
+ if (dctx->litBufferLocation == ZSTD_split)
325
+ {
326
+ ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
327
+ ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
328
+ }
329
+ else
330
+ {
331
+ ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
332
+ }
226
333
  dctx->litPtr = dctx->litBuffer;
227
334
  dctx->litSize = litSize;
228
335
  return lhSize+1;
@@ -233,9 +340,21 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
233
340
  }
234
341
  }
235
342
 
343
+ /* Hidden declaration for fullbench */
344
+ size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
345
+ const void* src, size_t srcSize,
346
+ void* dst, size_t dstCapacity);
347
+ size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
348
+ const void* src, size_t srcSize,
349
+ void* dst, size_t dstCapacity)
350
+ {
351
+ dctx->isFrameDecompression = 0;
352
+ return ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, not_streaming);
353
+ }
354
+
236
355
  /* Default FSE distribution tables.
237
356
  * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
238
- * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions
357
+ * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
239
358
  * They were generated programmatically with following method :
240
359
  * - start from default distributions, present in /lib/common/zstd_internal.h
241
360
  * - generate tables normally, using ZSTD_buildFSETable()
@@ -342,7 +461,7 @@ static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
342
461
  }; /* ML_defaultDTable */
343
462
 
344
463
 
345
- static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddBits)
464
+ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
346
465
  {
347
466
  void* ptr = dt;
348
467
  ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
@@ -354,7 +473,7 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
354
473
  cell->nbBits = 0;
355
474
  cell->nextState = 0;
356
475
  assert(nbAddBits < 255);
357
- cell->nbAdditionalBits = (BYTE)nbAddBits;
476
+ cell->nbAdditionalBits = nbAddBits;
358
477
  cell->baseValue = baseValue;
359
478
  }
360
479
 
@@ -363,23 +482,26 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
363
482
  * generate FSE decoding table for one symbol (ll, ml or off)
364
483
  * cannot fail if input is valid =>
365
484
  * all inputs are presumed validated at this stage */
366
- void
367
- ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
485
+ FORCE_INLINE_TEMPLATE
486
+ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
368
487
  const short* normalizedCounter, unsigned maxSymbolValue,
369
- const U32* baseValue, const U32* nbAdditionalBits,
370
- unsigned tableLog)
488
+ const U32* baseValue, const U8* nbAdditionalBits,
489
+ unsigned tableLog, void* wksp, size_t wkspSize)
371
490
  {
372
491
  ZSTD_seqSymbol* const tableDecode = dt+1;
373
- U16 symbolNext[MaxSeq+1];
374
-
375
492
  U32 const maxSV1 = maxSymbolValue + 1;
376
493
  U32 const tableSize = 1 << tableLog;
377
- U32 highThreshold = tableSize-1;
494
+
495
+ U16* symbolNext = (U16*)wksp;
496
+ BYTE* spread = (BYTE*)(symbolNext + MaxSeq + 1);
497
+ U32 highThreshold = tableSize - 1;
498
+
378
499
 
379
500
  /* Sanity Checks */
380
501
  assert(maxSymbolValue <= MaxSeq);
381
502
  assert(tableLog <= MaxFSELog);
382
-
503
+ assert(wkspSize >= ZSTD_BUILD_FSE_TABLE_WKSP_SIZE);
504
+ (void)wkspSize;
383
505
  /* Init, lay down lowprob symbols */
384
506
  { ZSTD_seqSymbol_header DTableH;
385
507
  DTableH.tableLog = tableLog;
@@ -395,34 +517,128 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
395
517
  assert(normalizedCounter[s]>=0);
396
518
  symbolNext[s] = (U16)normalizedCounter[s];
397
519
  } } }
398
- memcpy(dt, &DTableH, sizeof(DTableH));
520
+ ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
399
521
  }
400
522
 
401
523
  /* Spread symbols */
402
- { U32 const tableMask = tableSize-1;
524
+ assert(tableSize <= 512);
525
+ /* Specialized symbol spreading for the case when there are
526
+ * no low probability (-1 count) symbols. When compressing
527
+ * small blocks we avoid low probability symbols to hit this
528
+ * case, since header decoding speed matters more.
529
+ */
530
+ if (highThreshold == tableSize - 1) {
531
+ size_t const tableMask = tableSize-1;
532
+ size_t const step = FSE_TABLESTEP(tableSize);
533
+ /* First lay down the symbols in order.
534
+ * We use a uint64_t to lay down 8 bytes at a time. This reduces branch
535
+ * misses since small blocks generally have small table logs, so nearly
536
+ * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
537
+ * our buffer to handle the over-write.
538
+ */
539
+ {
540
+ U64 const add = 0x0101010101010101ull;
541
+ size_t pos = 0;
542
+ U64 sv = 0;
543
+ U32 s;
544
+ for (s=0; s<maxSV1; ++s, sv += add) {
545
+ int i;
546
+ int const n = normalizedCounter[s];
547
+ MEM_write64(spread + pos, sv);
548
+ for (i = 8; i < n; i += 8) {
549
+ MEM_write64(spread + pos + i, sv);
550
+ }
551
+ assert(n>=0);
552
+ pos += (size_t)n;
553
+ }
554
+ }
555
+ /* Now we spread those positions across the table.
556
+ * The benefit of doing it in two stages is that we avoid the
557
+ * variable size inner loop, which caused lots of branch misses.
558
+ * Now we can run through all the positions without any branch misses.
559
+ * We unroll the loop twice, since that is what empirically worked best.
560
+ */
561
+ {
562
+ size_t position = 0;
563
+ size_t s;
564
+ size_t const unroll = 2;
565
+ assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
566
+ for (s = 0; s < (size_t)tableSize; s += unroll) {
567
+ size_t u;
568
+ for (u = 0; u < unroll; ++u) {
569
+ size_t const uPosition = (position + (u * step)) & tableMask;
570
+ tableDecode[uPosition].baseValue = spread[s + u];
571
+ }
572
+ position = (position + (unroll * step)) & tableMask;
573
+ }
574
+ assert(position == 0);
575
+ }
576
+ } else {
577
+ U32 const tableMask = tableSize-1;
403
578
  U32 const step = FSE_TABLESTEP(tableSize);
404
579
  U32 s, position = 0;
405
580
  for (s=0; s<maxSV1; s++) {
406
581
  int i;
407
- for (i=0; i<normalizedCounter[s]; i++) {
582
+ int const n = normalizedCounter[s];
583
+ for (i=0; i<n; i++) {
408
584
  tableDecode[position].baseValue = s;
409
585
  position = (position + step) & tableMask;
410
- while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
586
+ while (UNLIKELY(position > highThreshold)) position = (position + step) & tableMask; /* lowprob area */
411
587
  } }
412
588
  assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
413
589
  }
414
590
 
415
591
  /* Build Decoding table */
416
- { U32 u;
592
+ {
593
+ U32 u;
417
594
  for (u=0; u<tableSize; u++) {
418
595
  U32 const symbol = tableDecode[u].baseValue;
419
596
  U32 const nextState = symbolNext[symbol]++;
420
- tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
597
+ tableDecode[u].nbBits = (BYTE) (tableLog - ZSTD_highbit32(nextState) );
421
598
  tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
422
599
  assert(nbAdditionalBits[symbol] < 255);
423
- tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
600
+ tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
424
601
  tableDecode[u].baseValue = baseValue[symbol];
425
- } }
602
+ }
603
+ }
604
+ }
605
+
606
+ /* Avoids the FORCE_INLINE of the _body() function. */
607
+ static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
608
+ const short* normalizedCounter, unsigned maxSymbolValue,
609
+ const U32* baseValue, const U8* nbAdditionalBits,
610
+ unsigned tableLog, void* wksp, size_t wkspSize)
611
+ {
612
+ ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
613
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
614
+ }
615
+
616
+ #if DYNAMIC_BMI2
617
+ BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
618
+ const short* normalizedCounter, unsigned maxSymbolValue,
619
+ const U32* baseValue, const U8* nbAdditionalBits,
620
+ unsigned tableLog, void* wksp, size_t wkspSize)
621
+ {
622
+ ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
623
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
624
+ }
625
+ #endif
626
+
627
+ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
628
+ const short* normalizedCounter, unsigned maxSymbolValue,
629
+ const U32* baseValue, const U8* nbAdditionalBits,
630
+ unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
631
+ {
632
+ #if DYNAMIC_BMI2
633
+ if (bmi2) {
634
+ ZSTD_buildFSETable_body_bmi2(dt, normalizedCounter, maxSymbolValue,
635
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
636
+ return;
637
+ }
638
+ #endif
639
+ (void)bmi2;
640
+ ZSTD_buildFSETable_body_default(dt, normalizedCounter, maxSymbolValue,
641
+ baseValue, nbAdditionalBits, tableLog, wksp, wkspSize);
426
642
  }
427
643
 
428
644
 
@@ -432,9 +648,10 @@ ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
432
648
  static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
433
649
  symbolEncodingType_e type, unsigned max, U32 maxLog,
434
650
  const void* src, size_t srcSize,
435
- const U32* baseValue, const U32* nbAdditionalBits,
651
+ const U32* baseValue, const U8* nbAdditionalBits,
436
652
  const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
437
- int ddictIsCold, int nbSeq)
653
+ int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
654
+ int bmi2)
438
655
  {
439
656
  switch(type)
440
657
  {
@@ -443,7 +660,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
443
660
  RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
444
661
  { U32 const symbol = *(const BYTE*)src;
445
662
  U32 const baseline = baseValue[symbol];
446
- U32 const nbBits = nbAdditionalBits[symbol];
663
+ U8 const nbBits = nbAdditionalBits[symbol];
447
664
  ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
448
665
  }
449
666
  *DTablePtr = DTableSpace;
@@ -466,7 +683,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
466
683
  size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
467
684
  RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
468
685
  RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
469
- ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
686
+ ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog, wksp, wkspSize, bmi2);
470
687
  *DTablePtr = DTableSpace;
471
688
  return headerSize;
472
689
  }
@@ -479,7 +696,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
479
696
  size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
480
697
  const void* src, size_t srcSize)
481
698
  {
482
- const BYTE* const istart = (const BYTE* const)src;
699
+ const BYTE* const istart = (const BYTE*)src;
483
700
  const BYTE* const iend = istart + srcSize;
484
701
  const BYTE* ip = istart;
485
702
  int nbSeq;
@@ -490,15 +707,11 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
490
707
 
491
708
  /* SeqHead */
492
709
  nbSeq = *ip++;
493
- if (!nbSeq) {
494
- *nbSeqPtr=0;
495
- RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
496
- return 1;
497
- }
498
710
  if (nbSeq > 0x7F) {
499
711
  if (nbSeq == 0xFF) {
500
712
  RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
501
- nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
713
+ nbSeq = MEM_readLE16(ip) + LONGNBSEQ;
714
+ ip+=2;
502
715
  } else {
503
716
  RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
504
717
  nbSeq = ((nbSeq-0x80)<<8) + *ip++;
@@ -506,8 +719,16 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
506
719
  }
507
720
  *nbSeqPtr = nbSeq;
508
721
 
722
+ if (nbSeq == 0) {
723
+ /* No sequence : section ends immediately */
724
+ RETURN_ERROR_IF(ip != iend, corruption_detected,
725
+ "extraneous data present in the Sequences section");
726
+ return (size_t)(ip - istart);
727
+ }
728
+
509
729
  /* FSE table descriptors */
510
730
  RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
731
+ RETURN_ERROR_IF(*ip & 3, corruption_detected, ""); /* The last field, Reserved, must be all-zeroes. */
511
732
  { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
512
733
  symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
513
734
  symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
@@ -517,9 +738,11 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
517
738
  { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
518
739
  LLtype, MaxLL, LLFSELog,
519
740
  ip, iend-ip,
520
- ZSTDConstants::LL_base, ZSTDInternalConstants::LL_bits,
741
+ LL_base, LL_bits,
521
742
  LL_defaultDTable, dctx->fseEntropy,
522
- dctx->ddictIsCold, nbSeq);
743
+ dctx->ddictIsCold, nbSeq,
744
+ dctx->workspace, sizeof(dctx->workspace),
745
+ ZSTD_DCtx_get_bmi2(dctx));
523
746
  RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
524
747
  ip += llhSize;
525
748
  }
@@ -527,9 +750,11 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
527
750
  { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
528
751
  OFtype, MaxOff, OffFSELog,
529
752
  ip, iend-ip,
530
- ZSTDConstants::OF_base, ZSTDConstants::OF_bits,
753
+ OF_base, OF_bits,
531
754
  OF_defaultDTable, dctx->fseEntropy,
532
- dctx->ddictIsCold, nbSeq);
755
+ dctx->ddictIsCold, nbSeq,
756
+ dctx->workspace, sizeof(dctx->workspace),
757
+ ZSTD_DCtx_get_bmi2(dctx));
533
758
  RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
534
759
  ip += ofhSize;
535
760
  }
@@ -537,9 +762,11 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
537
762
  { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
538
763
  MLtype, MaxML, MLFSELog,
539
764
  ip, iend-ip,
540
- ZSTDConstants::ML_base, ZSTDInternalConstants::ML_bits,
765
+ ML_base, ML_bits,
541
766
  ML_defaultDTable, dctx->fseEntropy,
542
- dctx->ddictIsCold, nbSeq);
767
+ dctx->ddictIsCold, nbSeq,
768
+ dctx->workspace, sizeof(dctx->workspace),
769
+ ZSTD_DCtx_get_bmi2(dctx));
543
770
  RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
544
771
  ip += mlhSize;
545
772
  }
@@ -553,7 +780,6 @@ typedef struct {
553
780
  size_t litLength;
554
781
  size_t matchLength;
555
782
  size_t offset;
556
- const BYTE* match;
557
783
  } seq_t;
558
784
 
559
785
  typedef struct {
@@ -567,9 +793,6 @@ typedef struct {
567
793
  ZSTD_fseState stateOffb;
568
794
  ZSTD_fseState stateML;
569
795
  size_t prevOffset[ZSTD_REP_NUM];
570
- const BYTE* prefixStart;
571
- const BYTE* dictEnd;
572
- size_t pos;
573
796
  } seqState_t;
574
797
 
575
798
  /*! ZSTD_overlapCopy8() :
@@ -612,7 +835,7 @@ HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
612
835
  * - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
613
836
  * The src buffer must be before the dst buffer.
614
837
  */
615
- static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
838
+ static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
616
839
  ptrdiff_t const diff = op - ip;
617
840
  BYTE* const oend = op + length;
618
841
 
@@ -628,6 +851,7 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
628
851
  /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
629
852
  assert(length >= 8);
630
853
  ZSTD_overlapCopy8(&op, &ip, diff);
854
+ length -= 8;
631
855
  assert(op - ip >= 8);
632
856
  assert(op <= oend);
633
857
  }
@@ -642,12 +866,35 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
642
866
  assert(oend > oend_w);
643
867
  ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
644
868
  ip += oend_w - op;
645
- op = oend_w;
869
+ op += oend_w - op;
646
870
  }
647
871
  /* Handle the leftovers. */
648
872
  while (op < oend) *op++ = *ip++;
649
873
  }
650
874
 
875
+ /* ZSTD_safecopyDstBeforeSrc():
876
+ * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
877
+ * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
878
+ static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length) {
879
+ ptrdiff_t const diff = op - ip;
880
+ BYTE* const oend = op + length;
881
+
882
+ if (length < 8 || diff > -8) {
883
+ /* Handle short lengths, close overlaps, and dst not before src. */
884
+ while (op < oend) *op++ = *ip++;
885
+ return;
886
+ }
887
+
888
+ if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
889
+ ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
890
+ ip += oend - WILDCOPY_OVERLENGTH - op;
891
+ op += oend - WILDCOPY_OVERLENGTH - op;
892
+ }
893
+
894
+ /* Handle the leftovers. */
895
+ while (op < oend) *op++ = *ip++;
896
+ }
897
+
651
898
  /* ZSTD_execSequenceEnd():
652
899
  * This version handles cases that are near the end of the output buffer. It requires
653
900
  * more careful checks to make sure there is no overflow. By separating out these hard
@@ -657,10 +904,11 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
657
904
  * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
658
905
  */
659
906
  FORCE_NOINLINE
907
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
660
908
  size_t ZSTD_execSequenceEnd(BYTE* op,
661
- BYTE* const oend, seq_t sequence,
662
- const BYTE** litPtr, const BYTE* const litLimit,
663
- const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
909
+ BYTE* const oend, seq_t sequence,
910
+ const BYTE** litPtr, const BYTE* const litLimit,
911
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
664
912
  {
665
913
  BYTE* const oLitEnd = op + sequence.litLength;
666
914
  size_t const sequenceLength = sequence.litLength + sequence.matchLength;
@@ -683,27 +931,78 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
683
931
  if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
684
932
  /* offset beyond prefix */
685
933
  RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
686
- match = dictEnd - (prefixStart-match);
934
+ match = dictEnd - (prefixStart - match);
687
935
  if (match + sequence.matchLength <= dictEnd) {
688
- memmove(oLitEnd, match, sequence.matchLength);
936
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
689
937
  return sequenceLength;
690
938
  }
691
939
  /* span extDict & currentPrefixSegment */
692
940
  { size_t const length1 = dictEnd - match;
693
- memmove(oLitEnd, match, length1);
694
- op = oLitEnd + length1;
695
- sequence.matchLength -= length1;
696
- match = prefixStart;
697
- } }
941
+ ZSTD_memmove(oLitEnd, match, length1);
942
+ op = oLitEnd + length1;
943
+ sequence.matchLength -= length1;
944
+ match = prefixStart;
945
+ }
946
+ }
947
+ ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
948
+ return sequenceLength;
949
+ }
950
+
951
+ /* ZSTD_execSequenceEndSplitLitBuffer():
952
+ * This version is intended to be used during instances where the litBuffer is still split. It is kept separate to avoid performance impact for the good case.
953
+ */
954
+ FORCE_NOINLINE
955
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
956
+ size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
957
+ BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
958
+ const BYTE** litPtr, const BYTE* const litLimit,
959
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
960
+ {
961
+ BYTE* const oLitEnd = op + sequence.litLength;
962
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
963
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
964
+ const BYTE* match = oLitEnd - sequence.offset;
965
+
966
+
967
+ /* bounds checks : careful of address space overflow in 32-bit mode */
968
+ RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
969
+ RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
970
+ assert(op < op + sequenceLength);
971
+ assert(oLitEnd < op + sequenceLength);
972
+
973
+ /* copy literals */
974
+ RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
975
+ ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
976
+ op = oLitEnd;
977
+ *litPtr = iLitEnd;
978
+
979
+ /* copy Match */
980
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
981
+ /* offset beyond prefix */
982
+ RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
983
+ match = dictEnd - (prefixStart - match);
984
+ if (match + sequence.matchLength <= dictEnd) {
985
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
986
+ return sequenceLength;
987
+ }
988
+ /* span extDict & currentPrefixSegment */
989
+ { size_t const length1 = dictEnd - match;
990
+ ZSTD_memmove(oLitEnd, match, length1);
991
+ op = oLitEnd + length1;
992
+ sequence.matchLength -= length1;
993
+ match = prefixStart;
994
+ }
995
+ }
698
996
  ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
699
997
  return sequenceLength;
700
998
  }
701
999
 
702
1000
  HINT_INLINE
1001
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
703
1002
  size_t ZSTD_execSequence(BYTE* op,
704
- BYTE* const oend, seq_t sequence,
705
- const BYTE** litPtr, const BYTE* const litLimit,
706
- const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
1003
+ BYTE* const oend, seq_t sequence,
1004
+ const BYTE** litPtr, const BYTE* const litLimit,
1005
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
707
1006
  {
708
1007
  BYTE* const oLitEnd = op + sequence.litLength;
709
1008
  size_t const sequenceLength = sequence.litLength + sequence.matchLength;
@@ -714,6 +1013,104 @@ size_t ZSTD_execSequence(BYTE* op,
714
1013
 
715
1014
  assert(op != NULL /* Precondition */);
716
1015
  assert(oend_w < oend /* No underflow */);
1016
+
1017
+ #if defined(__aarch64__)
1018
+ /* prefetch sequence starting from match that will be used for copy later */
1019
+ PREFETCH_L1(match);
1020
+ #endif
1021
+ /* Handle edge cases in a slow path:
1022
+ * - Read beyond end of literals
1023
+ * - Match end is within WILDCOPY_OVERLIMIT of oend
1024
+ * - 32-bit mode and the match length overflows
1025
+ */
1026
+ if (UNLIKELY(
1027
+ iLitEnd > litLimit ||
1028
+ oMatchEnd > oend_w ||
1029
+ (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
1030
+ return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
1031
+
1032
+ /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
1033
+ assert(op <= oLitEnd /* No overflow */);
1034
+ assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
1035
+ assert(oMatchEnd <= oend /* No underflow */);
1036
+ assert(iLitEnd <= litLimit /* Literal length is in bounds */);
1037
+ assert(oLitEnd <= oend_w /* Can wildcopy literals */);
1038
+ assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
1039
+
1040
+ /* Copy Literals:
1041
+ * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
1042
+ * We likely don't need the full 32-byte wildcopy.
1043
+ */
1044
+ assert(WILDCOPY_OVERLENGTH >= 16);
1045
+ ZSTD_copy16(op, (*litPtr));
1046
+ if (UNLIKELY(sequence.litLength > 16)) {
1047
+ ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
1048
+ }
1049
+ op = oLitEnd;
1050
+ *litPtr = iLitEnd; /* update for next sequence */
1051
+
1052
+ /* Copy Match */
1053
+ if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1054
+ /* offset beyond prefix -> go into extDict */
1055
+ RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
1056
+ match = dictEnd + (match - prefixStart);
1057
+ if (match + sequence.matchLength <= dictEnd) {
1058
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
1059
+ return sequenceLength;
1060
+ }
1061
+ /* span extDict & currentPrefixSegment */
1062
+ { size_t const length1 = dictEnd - match;
1063
+ ZSTD_memmove(oLitEnd, match, length1);
1064
+ op = oLitEnd + length1;
1065
+ sequence.matchLength -= length1;
1066
+ match = prefixStart;
1067
+ }
1068
+ }
1069
+ /* Match within prefix of 1 or more bytes */
1070
+ assert(op <= oMatchEnd);
1071
+ assert(oMatchEnd <= oend_w);
1072
+ assert(match >= prefixStart);
1073
+ assert(sequence.matchLength >= 1);
1074
+
1075
+ /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
1076
+ * without overlap checking.
1077
+ */
1078
+ if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
1079
+ /* We bet on a full wildcopy for matches, since we expect matches to be
1080
+ * longer than literals (in general). In silesia, ~10% of matches are longer
1081
+ * than 16 bytes.
1082
+ */
1083
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
1084
+ return sequenceLength;
1085
+ }
1086
+ assert(sequence.offset < WILDCOPY_VECLEN);
1087
+
1088
+ /* Copy 8 bytes and spread the offset to be >= 8. */
1089
+ ZSTD_overlapCopy8(&op, &match, sequence.offset);
1090
+
1091
+ /* If the match length is > 8 bytes, then continue with the wildcopy. */
1092
+ if (sequence.matchLength > 8) {
1093
+ assert(op < oMatchEnd);
1094
+ ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
1095
+ }
1096
+ return sequenceLength;
1097
+ }
1098
+
1099
+ HINT_INLINE
1100
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
1101
+ size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
1102
+ BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
1103
+ const BYTE** litPtr, const BYTE* const litLimit,
1104
+ const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
1105
+ {
1106
+ BYTE* const oLitEnd = op + sequence.litLength;
1107
+ size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1108
+ BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
1109
+ const BYTE* const iLitEnd = *litPtr + sequence.litLength;
1110
+ const BYTE* match = oLitEnd - sequence.offset;
1111
+
1112
+ assert(op != NULL /* Precondition */);
1113
+ assert(oend_w < oend /* No underflow */);
717
1114
  /* Handle edge cases in a slow path:
718
1115
  * - Read beyond end of literals
719
1116
  * - Match end is within WILDCOPY_OVERLIMIT of oend
@@ -723,7 +1120,7 @@ size_t ZSTD_execSequence(BYTE* op,
723
1120
  iLitEnd > litLimit ||
724
1121
  oMatchEnd > oend_w ||
725
1122
  (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
726
- return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
1123
+ return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
727
1124
 
728
1125
  /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
729
1126
  assert(op <= oLitEnd /* No overflow */);
@@ -751,12 +1148,12 @@ size_t ZSTD_execSequence(BYTE* op,
751
1148
  RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
752
1149
  match = dictEnd + (match - prefixStart);
753
1150
  if (match + sequence.matchLength <= dictEnd) {
754
- memmove(oLitEnd, match, sequence.matchLength);
1151
+ ZSTD_memmove(oLitEnd, match, sequence.matchLength);
755
1152
  return sequenceLength;
756
1153
  }
757
1154
  /* span extDict & currentPrefixSegment */
758
1155
  { size_t const length1 = dictEnd - match;
759
- memmove(oLitEnd, match, length1);
1156
+ ZSTD_memmove(oLitEnd, match, length1);
760
1157
  op = oLitEnd + length1;
761
1158
  sequence.matchLength -= length1;
762
1159
  match = prefixStart;
@@ -791,6 +1188,7 @@ size_t ZSTD_execSequence(BYTE* op,
791
1188
  return sequenceLength;
792
1189
  }
793
1190
 
1191
+
794
1192
  static void
795
1193
  ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
796
1194
  {
@@ -804,24 +1202,14 @@ ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqS
804
1202
  }
805
1203
 
806
1204
  FORCE_INLINE_TEMPLATE void
807
- ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
1205
+ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
808
1206
  {
809
- ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
810
- U32 const nbBits = DInfo.nbBits;
811
1207
  size_t const lowBits = BIT_readBits(bitD, nbBits);
812
- DStatePtr->state = DInfo.nextState + lowBits;
813
- }
814
-
815
- FORCE_INLINE_TEMPLATE void
816
- ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
817
- {
818
- U32 const nbBits = DInfo.nbBits;
819
- size_t const lowBits = BIT_readBits(bitD, nbBits);
820
- DStatePtr->state = DInfo.nextState + lowBits;
1208
+ DStatePtr->state = nextState + lowBits;
821
1209
  }
822
1210
 
823
1211
  /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
824
- * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
1212
+ * offset bits. But we can only read at most STREAM_ACCUMULATOR_MIN_32
825
1213
  * bits before reloading. This value is the maximum number of bytes we read
826
1214
  * after reloading when we are decoding long offsets.
827
1215
  */
@@ -831,122 +1219,135 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD
831
1219
  : 0)
832
1220
 
833
1221
  typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
834
- typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
835
1222
 
1223
+ /**
1224
+ * ZSTD_decodeSequence():
1225
+ * @p longOffsets : tells the decoder to reload more bit while decoding large offsets
1226
+ * only used in 32-bit mode
1227
+ * @return : Sequence (litL + matchL + offset)
1228
+ */
836
1229
  FORCE_INLINE_TEMPLATE seq_t
837
- ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
1230
+ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq)
838
1231
  {
839
1232
  seq_t seq;
840
- ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
841
- ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state];
842
- ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state];
843
- U32 const llBase = llDInfo.baseValue;
844
- U32 const mlBase = mlDInfo.baseValue;
845
- U32 const ofBase = ofDInfo.baseValue;
846
- BYTE const llBits = llDInfo.nbAdditionalBits;
847
- BYTE const mlBits = mlDInfo.nbAdditionalBits;
848
- BYTE const ofBits = ofDInfo.nbAdditionalBits;
849
- BYTE const totalBits = llBits+mlBits+ofBits;
850
-
851
- /* sequence */
852
- { size_t offset;
853
- if (ofBits > 1) {
854
- ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
855
- ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
856
- assert(ofBits <= MaxOff);
857
- if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
858
- U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
859
- offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
860
- BIT_reloadDStream(&seqState->DStream);
861
- if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
862
- assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
863
- } else {
864
- offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
865
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
866
- }
867
- seqState->prevOffset[2] = seqState->prevOffset[1];
868
- seqState->prevOffset[1] = seqState->prevOffset[0];
869
- seqState->prevOffset[0] = offset;
870
- } else {
871
- U32 const ll0 = (llBase == 0);
872
- if (LIKELY((ofBits == 0))) {
873
- if (LIKELY(!ll0))
874
- offset = seqState->prevOffset[0];
875
- else {
876
- offset = seqState->prevOffset[1];
877
- seqState->prevOffset[1] = seqState->prevOffset[0];
878
- seqState->prevOffset[0] = offset;
1233
+ /*
1234
+ * ZSTD_seqSymbol is a 64 bits wide structure.
1235
+ * It can be loaded in one operation
1236
+ * and its fields extracted by simply shifting or bit-extracting on aarch64.
1237
+ * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
1238
+ * operations that cause performance drop. This can be avoided by using this
1239
+ * ZSTD_memcpy hack.
1240
+ */
1241
+ #if defined(__aarch64__) && (defined(__GNUC__) && !defined(__clang__))
1242
+ ZSTD_seqSymbol llDInfoS, mlDInfoS, ofDInfoS;
1243
+ ZSTD_seqSymbol* const llDInfo = &llDInfoS;
1244
+ ZSTD_seqSymbol* const mlDInfo = &mlDInfoS;
1245
+ ZSTD_seqSymbol* const ofDInfo = &ofDInfoS;
1246
+ ZSTD_memcpy(llDInfo, seqState->stateLL.table + seqState->stateLL.state, sizeof(ZSTD_seqSymbol));
1247
+ ZSTD_memcpy(mlDInfo, seqState->stateML.table + seqState->stateML.state, sizeof(ZSTD_seqSymbol));
1248
+ ZSTD_memcpy(ofDInfo, seqState->stateOffb.table + seqState->stateOffb.state, sizeof(ZSTD_seqSymbol));
1249
+ #else
1250
+ const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
1251
+ const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
1252
+ const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
1253
+ #endif
1254
+ seq.matchLength = mlDInfo->baseValue;
1255
+ seq.litLength = llDInfo->baseValue;
1256
+ { U32 const ofBase = ofDInfo->baseValue;
1257
+ BYTE const llBits = llDInfo->nbAdditionalBits;
1258
+ BYTE const mlBits = mlDInfo->nbAdditionalBits;
1259
+ BYTE const ofBits = ofDInfo->nbAdditionalBits;
1260
+ BYTE const totalBits = llBits+mlBits+ofBits;
1261
+
1262
+ U16 const llNext = llDInfo->nextState;
1263
+ U16 const mlNext = mlDInfo->nextState;
1264
+ U16 const ofNext = ofDInfo->nextState;
1265
+ U32 const llnbBits = llDInfo->nbBits;
1266
+ U32 const mlnbBits = mlDInfo->nbBits;
1267
+ U32 const ofnbBits = ofDInfo->nbBits;
1268
+
1269
+ assert(llBits <= MaxLLBits);
1270
+ assert(mlBits <= MaxMLBits);
1271
+ assert(ofBits <= MaxOff);
1272
+ /*
1273
+ * As gcc has better branch and block analyzers, sometimes it is only
1274
+ * valuable to mark likeliness for clang, it gives around 3-4% of
1275
+ * performance.
1276
+ */
1277
+
1278
+ /* sequence */
1279
+ { size_t offset;
1280
+ if (ofBits > 1) {
1281
+ ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
1282
+ ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
1283
+ ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 > LONG_OFFSETS_MAX_EXTRA_BITS_32);
1284
+ ZSTD_STATIC_ASSERT(STREAM_ACCUMULATOR_MIN_32 - LONG_OFFSETS_MAX_EXTRA_BITS_32 >= MaxMLBits);
1285
+ if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
1286
+ /* Always read extra bits, this keeps the logic simple,
1287
+ * avoids branches, and avoids accidentally reading 0 bits.
1288
+ */
1289
+ U32 const extraBits = LONG_OFFSETS_MAX_EXTRA_BITS_32;
1290
+ offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
1291
+ BIT_reloadDStream(&seqState->DStream);
1292
+ offset += BIT_readBitsFast(&seqState->DStream, extraBits);
1293
+ } else {
1294
+ offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
1295
+ if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
879
1296
  }
1297
+ seqState->prevOffset[2] = seqState->prevOffset[1];
1298
+ seqState->prevOffset[1] = seqState->prevOffset[0];
1299
+ seqState->prevOffset[0] = offset;
880
1300
  } else {
881
- offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
882
- { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
883
- temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
884
- if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
885
- seqState->prevOffset[1] = seqState->prevOffset[0];
886
- seqState->prevOffset[0] = offset = temp;
887
- } } }
888
- seq.offset = offset;
889
- }
1301
+ U32 const ll0 = (llDInfo->baseValue == 0);
1302
+ if (LIKELY((ofBits == 0))) {
1303
+ offset = seqState->prevOffset[ll0];
1304
+ seqState->prevOffset[1] = seqState->prevOffset[!ll0];
1305
+ seqState->prevOffset[0] = offset;
1306
+ } else {
1307
+ offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
1308
+ { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
1309
+ temp -= !temp; /* 0 is not valid: input corrupted => force offset to -1 => corruption detected at execSequence */
1310
+ if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
1311
+ seqState->prevOffset[1] = seqState->prevOffset[0];
1312
+ seqState->prevOffset[0] = offset = temp;
1313
+ } } }
1314
+ seq.offset = offset;
1315
+ }
890
1316
 
891
- seq.matchLength = mlBase;
892
- if (mlBits > 0)
893
- seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
1317
+ if (mlBits > 0)
1318
+ seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
894
1319
 
895
- if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
896
- BIT_reloadDStream(&seqState->DStream);
897
- if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
898
- BIT_reloadDStream(&seqState->DStream);
899
- /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
900
- ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
1320
+ if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
1321
+ BIT_reloadDStream(&seqState->DStream);
1322
+ if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
1323
+ BIT_reloadDStream(&seqState->DStream);
1324
+ /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
1325
+ ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
901
1326
 
902
- seq.litLength = llBase;
903
- if (llBits > 0)
904
- seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
1327
+ if (llBits > 0)
1328
+ seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
905
1329
 
906
- if (MEM_32bits())
907
- BIT_reloadDStream(&seqState->DStream);
1330
+ if (MEM_32bits())
1331
+ BIT_reloadDStream(&seqState->DStream);
908
1332
 
909
- DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
910
- (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1333
+ DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
1334
+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
911
1335
 
912
- if (prefetch == ZSTD_p_prefetch) {
913
- size_t const pos = seqState->pos + seq.litLength;
914
- const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
915
- seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
916
- * No consequence though : no memory access will occur, offset is only used for prefetching */
917
- seqState->pos = pos + seq.matchLength;
918
- }
919
-
920
- /* ANS state update
921
- * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
922
- * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
923
- * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
924
- * better option, so it is the default for other compilers. But, if you
925
- * measure that it is worse, please put up a pull request.
926
- */
927
- {
928
- #if defined(__GNUC__) && !defined(__clang__)
929
- const int kUseUpdateFseState = 1;
930
- #else
931
- const int kUseUpdateFseState = 0;
932
- #endif
933
- if (kUseUpdateFseState) {
934
- ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
935
- ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
1336
+ if (!isLastSeq) {
1337
+ /* don't update FSE state for last Sequence */
1338
+ ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits); /* <= 9 bits */
1339
+ ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits); /* <= 9 bits */
936
1340
  if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
937
- ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
938
- } else {
939
- ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */
940
- ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */
941
- if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
942
- ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */
1341
+ ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits); /* <= 8 bits */
1342
+ BIT_reloadDStream(&seqState->DStream);
943
1343
  }
944
1344
  }
945
1345
 
946
1346
  return seq;
947
1347
  }
948
1348
 
949
- #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1349
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1350
+ #if DEBUGLEVEL >= 1
950
1351
  static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
951
1352
  {
952
1353
  size_t const windowSize = dctx->fParams.windowSize;
@@ -961,59 +1362,65 @@ static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStar
961
1362
  /* Dictionary is active. */
962
1363
  return 1;
963
1364
  }
1365
+ #endif
964
1366
 
965
- MEM_STATIC void ZSTD_assertValidSequence(
1367
+ static void ZSTD_assertValidSequence(
966
1368
  ZSTD_DCtx const* dctx,
967
1369
  BYTE const* op, BYTE const* oend,
968
1370
  seq_t const seq,
969
1371
  BYTE const* prefixStart, BYTE const* virtualStart)
970
1372
  {
971
- size_t const windowSize = dctx->fParams.windowSize;
972
- size_t const sequenceSize = seq.litLength + seq.matchLength;
973
- BYTE const* const oLitEnd = op + seq.litLength;
974
- DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
975
- (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
976
- assert(op <= oend);
977
- assert((size_t)(oend - op) >= sequenceSize);
978
- assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
979
- if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
980
- size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
981
- /* Offset must be within the dictionary. */
982
- assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
983
- assert(seq.offset <= windowSize + dictSize);
984
- } else {
985
- /* Offset must be within our window. */
986
- assert(seq.offset <= windowSize);
1373
+ #if DEBUGLEVEL >= 1
1374
+ if (dctx->isFrameDecompression) {
1375
+ size_t const windowSize = dctx->fParams.windowSize;
1376
+ size_t const sequenceSize = seq.litLength + seq.matchLength;
1377
+ BYTE const* const oLitEnd = op + seq.litLength;
1378
+ DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
1379
+ (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
1380
+ assert(op <= oend);
1381
+ assert((size_t)(oend - op) >= sequenceSize);
1382
+ assert(sequenceSize <= ZSTD_blockSizeMax(dctx));
1383
+ if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
1384
+ size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
1385
+ /* Offset must be within the dictionary. */
1386
+ assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
1387
+ assert(seq.offset <= windowSize + dictSize);
1388
+ } else {
1389
+ /* Offset must be within our window. */
1390
+ assert(seq.offset <= windowSize);
1391
+ }
987
1392
  }
1393
+ #else
1394
+ (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
1395
+ #endif
988
1396
  }
989
1397
  #endif
990
1398
 
991
1399
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1400
+
1401
+
992
1402
  FORCE_INLINE_TEMPLATE size_t
993
1403
  DONT_VECTORIZE
994
- ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
1404
+ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
995
1405
  void* dst, size_t maxDstSize,
996
1406
  const void* seqStart, size_t seqSize, int nbSeq,
997
- const ZSTD_longOffset_e isLongOffset,
998
- const int frame)
1407
+ const ZSTD_longOffset_e isLongOffset)
999
1408
  {
1000
1409
  const BYTE* ip = (const BYTE*)seqStart;
1001
1410
  const BYTE* const iend = ip + seqSize;
1002
- BYTE* const ostart = (BYTE* const)dst;
1003
- BYTE* const oend = ostart + maxDstSize;
1411
+ BYTE* const ostart = (BYTE*)dst;
1412
+ BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
1004
1413
  BYTE* op = ostart;
1005
1414
  const BYTE* litPtr = dctx->litPtr;
1006
- const BYTE* const litEnd = litPtr + dctx->litSize;
1415
+ const BYTE* litBufferEnd = dctx->litBufferEnd;
1007
1416
  const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1008
1417
  const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
1009
1418
  const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1010
- DEBUGLOG(5, "ZSTD_decompressSequences_body");
1011
- (void)frame;
1419
+ DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq);
1012
1420
 
1013
- /* Regen sequences */
1421
+ /* Literals are split between internal buffer & output buffer */
1014
1422
  if (nbSeq) {
1015
1423
  seqState_t seqState;
1016
- size_t error = 0;
1017
1424
  dctx->fseEntropy = 1;
1018
1425
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1019
1426
  RETURN_ERROR_IF(
@@ -1029,134 +1436,331 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
1029
1436
  BIT_DStream_endOfBuffer < BIT_DStream_completed &&
1030
1437
  BIT_DStream_completed < BIT_DStream_overflow);
1031
1438
 
1439
+ /* decompress without overrunning litPtr begins */
1440
+ { seq_t sequence = {0,0,0}; /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one iteration) */
1441
+ /* Align the decompression loop to 32 + 16 bytes.
1442
+ *
1443
+ * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
1444
+ * speed swings based on the alignment of the decompression loop. This
1445
+ * performance swing is caused by parts of the decompression loop falling
1446
+ * out of the DSB. The entire decompression loop should fit in the DSB,
1447
+ * when it can't we get much worse performance. You can measure if you've
1448
+ * hit the good case or the bad case with this perf command for some
1449
+ * compressed file test.zst:
1450
+ *
1451
+ * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
1452
+ * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
1453
+ *
1454
+ * If you see most cycles served out of the MITE you've hit the bad case.
1455
+ * If you see most cycles served out of the DSB you've hit the good case.
1456
+ * If it is pretty even then you may be in an okay case.
1457
+ *
1458
+ * This issue has been reproduced on the following CPUs:
1459
+ * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
1460
+ * Use Instruments->Counters to get DSB/MITE cycles.
1461
+ * I never got performance swings, but I was able to
1462
+ * go from the good case of mostly DSB to half of the
1463
+ * cycles served from MITE.
1464
+ * - Coffeelake: Intel i9-9900k
1465
+ * - Coffeelake: Intel i7-9700k
1466
+ *
1467
+ * I haven't been able to reproduce the instability or DSB misses on any
1468
+ * of the following CPUS:
1469
+ * - Haswell
1470
+ * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
1471
+ * - Skylake
1472
+ *
1473
+ * Alignment is done for each of the three major decompression loops:
1474
+ * - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
1475
+ * - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
1476
+ * - ZSTD_decompressSequences_body
1477
+ * Alignment choices are made to minimize large swings on bad cases and influence on performance
1478
+ * from changes external to this code, rather than to overoptimize on the current commit.
1479
+ *
1480
+ * If you are seeing performance stability this script can help test.
1481
+ * It tests on 4 commits in zstd where I saw performance change.
1482
+ *
1483
+ * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
1484
+ */
1032
1485
  #if defined(__GNUC__) && defined(__x86_64__)
1033
- /* Align the decompression loop to 32 + 16 bytes.
1034
- *
1035
- * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
1036
- * speed swings based on the alignment of the decompression loop. This
1037
- * performance swing is caused by parts of the decompression loop falling
1038
- * out of the DSB. The entire decompression loop should fit in the DSB,
1039
- * when it can't we get much worse performance. You can measure if you've
1040
- * hit the good case or the bad case with this perf command for some
1041
- * compressed file test.zst:
1042
- *
1043
- * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
1044
- * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
1045
- *
1046
- * If you see most cycles served out of the MITE you've hit the bad case.
1047
- * If you see most cycles served out of the DSB you've hit the good case.
1048
- * If it is pretty even then you may be in an okay case.
1049
- *
1050
- * I've been able to reproduce this issue on the following CPUs:
1051
- * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
1052
- * Use Instruments->Counters to get DSB/MITE cycles.
1053
- * I never got performance swings, but I was able to
1054
- * go from the good case of mostly DSB to half of the
1055
- * cycles served from MITE.
1056
- * - Coffeelake: Intel i9-9900k
1057
- *
1058
- * I haven't been able to reproduce the instability or DSB misses on any
1059
- * of the following CPUS:
1060
- * - Haswell
1061
- * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
1062
- * - Skylake
1063
- *
1064
- * If you are seeing performance stability this script can help test.
1065
- * It tests on 4 commits in zstd where I saw performance change.
1066
- *
1067
- * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
1068
- */
1069
- __asm__(".p2align 5");
1070
- __asm__("nop");
1071
- __asm__(".p2align 4");
1486
+ __asm__(".p2align 6");
1487
+ # if __GNUC__ >= 7
1488
+ /* good for gcc-7, gcc-9, and gcc-11 */
1489
+ __asm__("nop");
1490
+ __asm__(".p2align 5");
1491
+ __asm__("nop");
1492
+ __asm__(".p2align 4");
1493
+ # if __GNUC__ == 8 || __GNUC__ == 10
1494
+ /* good for gcc-8 and gcc-10 */
1495
+ __asm__("nop");
1496
+ __asm__(".p2align 3");
1497
+ # endif
1498
+ # endif
1072
1499
  #endif
1073
- for ( ; ; ) {
1074
- seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
1075
- size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
1500
+
1501
+ /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
1502
+ for ( ; nbSeq; nbSeq--) {
1503
+ sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
1504
+ if (litPtr + sequence.litLength > dctx->litBufferEnd) break;
1505
+ { size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1076
1506
  #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1077
- assert(!ZSTD_isError(oneSeqSize));
1078
- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1507
+ assert(!ZSTD_isError(oneSeqSize));
1508
+ ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1079
1509
  #endif
1080
- DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1081
- BIT_reloadDStream(&(seqState.DStream));
1082
- /* gcc and clang both don't like early returns in this loop.
1083
- * gcc doesn't like early breaks either.
1084
- * Instead save an error and report it at the end.
1085
- * When there is an error, don't increment op, so we don't
1086
- * overwrite.
1087
- */
1088
- if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize;
1089
- else op += oneSeqSize;
1090
- if (UNLIKELY(!--nbSeq)) break;
1510
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1511
+ return oneSeqSize;
1512
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1513
+ op += oneSeqSize;
1514
+ } }
1515
+ DEBUGLOG(6, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)");
1516
+
1517
+ /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
1518
+ if (nbSeq > 0) {
1519
+ const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1520
+ DEBUGLOG(6, "There are %i sequences left, and %zu/%zu literals left in buffer", nbSeq, leftoverLit, sequence.litLength);
1521
+ if (leftoverLit) {
1522
+ RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1523
+ ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1524
+ sequence.litLength -= leftoverLit;
1525
+ op += leftoverLit;
1526
+ }
1527
+ litPtr = dctx->litExtraBuffer;
1528
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1529
+ dctx->litBufferLocation = ZSTD_not_in_dst;
1530
+ { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1531
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1532
+ assert(!ZSTD_isError(oneSeqSize));
1533
+ ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1534
+ #endif
1535
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1536
+ return oneSeqSize;
1537
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1538
+ op += oneSeqSize;
1539
+ }
1540
+ nbSeq--;
1541
+ }
1542
+ }
1543
+
1544
+ if (nbSeq > 0) {
1545
+ /* there is remaining lit from extra buffer */
1546
+
1547
+ #if defined(__GNUC__) && defined(__x86_64__)
1548
+ __asm__(".p2align 6");
1549
+ __asm__("nop");
1550
+ # if __GNUC__ != 7
1551
+ /* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
1552
+ __asm__(".p2align 4");
1553
+ __asm__("nop");
1554
+ __asm__(".p2align 3");
1555
+ # elif __GNUC__ >= 11
1556
+ __asm__(".p2align 3");
1557
+ # else
1558
+ __asm__(".p2align 5");
1559
+ __asm__("nop");
1560
+ __asm__(".p2align 3");
1561
+ # endif
1562
+ #endif
1563
+
1564
+ for ( ; nbSeq ; nbSeq--) {
1565
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
1566
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
1567
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1568
+ assert(!ZSTD_isError(oneSeqSize));
1569
+ ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1570
+ #endif
1571
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1572
+ return oneSeqSize;
1573
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1574
+ op += oneSeqSize;
1575
+ }
1091
1576
  }
1092
1577
 
1093
1578
  /* check if reached exact end */
1094
- DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
1095
- if (ZSTD_isError(error)) return error;
1579
+ DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
1096
1580
  RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1097
- RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
1581
+ DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed);
1582
+ RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
1098
1583
  /* save reps for next block */
1099
1584
  { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1100
1585
  }
1101
1586
 
1102
1587
  /* last literal segment */
1103
- { size_t const lastLLSize = litEnd - litPtr;
1588
+ if (dctx->litBufferLocation == ZSTD_split) {
1589
+ /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
1590
+ size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
1591
+ DEBUGLOG(6, "copy last literals from segment : %u", (U32)lastLLSize);
1592
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1593
+ if (op != NULL) {
1594
+ ZSTD_memmove(op, litPtr, lastLLSize);
1595
+ op += lastLLSize;
1596
+ }
1597
+ litPtr = dctx->litExtraBuffer;
1598
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1599
+ dctx->litBufferLocation = ZSTD_not_in_dst;
1600
+ }
1601
+ /* copy last literals from internal buffer */
1602
+ { size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
1603
+ DEBUGLOG(6, "copy last literals from internal buffer : %u", (U32)lastLLSize);
1104
1604
  RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1105
1605
  if (op != NULL) {
1106
- memcpy(op, litPtr, lastLLSize);
1606
+ ZSTD_memcpy(op, litPtr, lastLLSize);
1107
1607
  op += lastLLSize;
1608
+ } }
1609
+
1610
+ DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
1611
+ return (size_t)(op - ostart);
1612
+ }
1613
+
1614
+ FORCE_INLINE_TEMPLATE size_t
1615
+ DONT_VECTORIZE
1616
+ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
1617
+ void* dst, size_t maxDstSize,
1618
+ const void* seqStart, size_t seqSize, int nbSeq,
1619
+ const ZSTD_longOffset_e isLongOffset)
1620
+ {
1621
+ const BYTE* ip = (const BYTE*)seqStart;
1622
+ const BYTE* const iend = ip + seqSize;
1623
+ BYTE* const ostart = (BYTE*)dst;
1624
+ BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ZSTD_maybeNullPtrAdd(ostart, maxDstSize) : dctx->litBuffer;
1625
+ BYTE* op = ostart;
1626
+ const BYTE* litPtr = dctx->litPtr;
1627
+ const BYTE* const litEnd = litPtr + dctx->litSize;
1628
+ const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
1629
+ const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
1630
+ const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
1631
+ DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
1632
+
1633
+ /* Regen sequences */
1634
+ if (nbSeq) {
1635
+ seqState_t seqState;
1636
+ dctx->fseEntropy = 1;
1637
+ { U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1638
+ RETURN_ERROR_IF(
1639
+ ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
1640
+ corruption_detected, "");
1641
+ ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1642
+ ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1643
+ ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1644
+ assert(dst != NULL);
1645
+
1646
+ #if defined(__GNUC__) && defined(__x86_64__)
1647
+ __asm__(".p2align 6");
1648
+ __asm__("nop");
1649
+ # if __GNUC__ >= 7
1650
+ __asm__(".p2align 5");
1651
+ __asm__("nop");
1652
+ __asm__(".p2align 3");
1653
+ # else
1654
+ __asm__(".p2align 4");
1655
+ __asm__("nop");
1656
+ __asm__(".p2align 3");
1657
+ # endif
1658
+ #endif
1659
+
1660
+ for ( ; nbSeq ; nbSeq--) {
1661
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
1662
+ size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
1663
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1664
+ assert(!ZSTD_isError(oneSeqSize));
1665
+ ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1666
+ #endif
1667
+ if (UNLIKELY(ZSTD_isError(oneSeqSize)))
1668
+ return oneSeqSize;
1669
+ DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1670
+ op += oneSeqSize;
1108
1671
  }
1672
+
1673
+ /* check if reached exact end */
1674
+ assert(nbSeq == 0);
1675
+ RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
1676
+ /* save reps for next block */
1677
+ { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1109
1678
  }
1110
1679
 
1111
- return op-ostart;
1680
+ /* last literal segment */
1681
+ { size_t const lastLLSize = (size_t)(litEnd - litPtr);
1682
+ DEBUGLOG(6, "copy last literals : %u", (U32)lastLLSize);
1683
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1684
+ if (op != NULL) {
1685
+ ZSTD_memcpy(op, litPtr, lastLLSize);
1686
+ op += lastLLSize;
1687
+ } }
1688
+
1689
+ DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
1690
+ return (size_t)(op - ostart);
1112
1691
  }
1113
1692
 
1114
1693
  static size_t
1115
1694
  ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
1116
1695
  void* dst, size_t maxDstSize,
1117
1696
  const void* seqStart, size_t seqSize, int nbSeq,
1118
- const ZSTD_longOffset_e isLongOffset,
1119
- const int frame)
1697
+ const ZSTD_longOffset_e isLongOffset)
1120
1698
  {
1121
- return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1699
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1700
+ }
1701
+
1702
+ static size_t
1703
+ ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
1704
+ void* dst, size_t maxDstSize,
1705
+ const void* seqStart, size_t seqSize, int nbSeq,
1706
+ const ZSTD_longOffset_e isLongOffset)
1707
+ {
1708
+ return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1122
1709
  }
1123
1710
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1124
1711
 
1125
1712
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1713
+
1714
+ FORCE_INLINE_TEMPLATE
1715
+
1716
+ size_t ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
1717
+ const BYTE* const prefixStart, const BYTE* const dictEnd)
1718
+ {
1719
+ prefetchPos += sequence.litLength;
1720
+ { const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
1721
+ /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
1722
+ * No consequence though : memory address is only used for prefetching, not for dereferencing */
1723
+ const BYTE* const match = ZSTD_wrappedPtrSub(ZSTD_wrappedPtrAdd(matchBase, prefetchPos), sequence.offset);
1724
+ PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1725
+ }
1726
+ return prefetchPos + sequence.matchLength;
1727
+ }
1728
+
1729
+ /* This decoding function employs prefetching
1730
+ * to reduce latency impact of cache misses.
1731
+ * It's generally employed when block contains a significant portion of long-distance matches
1732
+ * or when coupled with a "cold" dictionary */
1126
1733
  FORCE_INLINE_TEMPLATE size_t
1127
1734
  ZSTD_decompressSequencesLong_body(
1128
1735
  ZSTD_DCtx* dctx,
1129
1736
  void* dst, size_t maxDstSize,
1130
1737
  const void* seqStart, size_t seqSize, int nbSeq,
1131
- const ZSTD_longOffset_e isLongOffset,
1132
- const int frame)
1738
+ const ZSTD_longOffset_e isLongOffset)
1133
1739
  {
1134
1740
  const BYTE* ip = (const BYTE*)seqStart;
1135
1741
  const BYTE* const iend = ip + seqSize;
1136
- BYTE* const ostart = (BYTE* const)dst;
1137
- BYTE* const oend = ostart + maxDstSize;
1742
+ BYTE* const ostart = (BYTE*)dst;
1743
+ BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
1138
1744
  BYTE* op = ostart;
1139
1745
  const BYTE* litPtr = dctx->litPtr;
1140
- const BYTE* const litEnd = litPtr + dctx->litSize;
1746
+ const BYTE* litBufferEnd = dctx->litBufferEnd;
1141
1747
  const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1142
1748
  const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1143
1749
  const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1144
- (void)frame;
1145
1750
 
1146
1751
  /* Regen sequences */
1147
1752
  if (nbSeq) {
1148
- #define STORED_SEQS 4
1753
+ #define STORED_SEQS 8
1149
1754
  #define STORED_SEQS_MASK (STORED_SEQS-1)
1150
- #define ADVANCED_SEQS 4
1755
+ #define ADVANCED_SEQS STORED_SEQS
1151
1756
  seq_t sequences[STORED_SEQS];
1152
1757
  int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
1153
1758
  seqState_t seqState;
1154
1759
  int seqNb;
1760
+ size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
1761
+
1155
1762
  dctx->fseEntropy = 1;
1156
1763
  { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1157
- seqState.prefixStart = prefixStart;
1158
- seqState.pos = (size_t)(op-prefixStart);
1159
- seqState.dictEnd = dictEnd;
1160
1764
  assert(dst != NULL);
1161
1765
  assert(iend >= ip);
1162
1766
  RETURN_ERROR_IF(
@@ -1167,37 +1771,95 @@ ZSTD_decompressSequencesLong_body(
1167
1771
  ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1168
1772
 
1169
1773
  /* prepare in advance */
1170
- for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
1171
- sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
1172
- PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1774
+ for (seqNb=0; seqNb<seqAdvance; seqNb++) {
1775
+ seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
1776
+ prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1777
+ sequences[seqNb] = sequence;
1173
1778
  }
1174
- RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
1175
1779
 
1176
- /* decode and decompress */
1177
- for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
1178
- seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
1179
- size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1780
+ /* decompress without stomping litBuffer */
1781
+ for (; seqNb < nbSeq; seqNb++) {
1782
+ seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
1783
+
1784
+ if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) {
1785
+ /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
1786
+ const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1787
+ if (leftoverLit)
1788
+ {
1789
+ RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1790
+ ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1791
+ sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
1792
+ op += leftoverLit;
1793
+ }
1794
+ litPtr = dctx->litExtraBuffer;
1795
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1796
+ dctx->litBufferLocation = ZSTD_not_in_dst;
1797
+ { size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1180
1798
  #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1181
- assert(!ZSTD_isError(oneSeqSize));
1182
- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1799
+ assert(!ZSTD_isError(oneSeqSize));
1800
+ ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1183
1801
  #endif
1184
- if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1185
- PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1186
- sequences[seqNb & STORED_SEQS_MASK] = sequence;
1187
- op += oneSeqSize;
1802
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1803
+
1804
+ prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1805
+ sequences[seqNb & STORED_SEQS_MASK] = sequence;
1806
+ op += oneSeqSize;
1807
+ } }
1808
+ else
1809
+ {
1810
+ /* lit buffer is either wholly contained in first or second split, or not split at all*/
1811
+ size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1812
+ ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
1813
+ ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1814
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1815
+ assert(!ZSTD_isError(oneSeqSize));
1816
+ ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1817
+ #endif
1818
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1819
+
1820
+ prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
1821
+ sequences[seqNb & STORED_SEQS_MASK] = sequence;
1822
+ op += oneSeqSize;
1823
+ }
1188
1824
  }
1189
- RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
1825
+ RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
1190
1826
 
1191
1827
  /* finish queue */
1192
1828
  seqNb -= seqAdvance;
1193
1829
  for ( ; seqNb<nbSeq ; seqNb++) {
1194
- size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1830
+ seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
1831
+ if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) {
1832
+ const size_t leftoverLit = dctx->litBufferEnd - litPtr;
1833
+ if (leftoverLit) {
1834
+ RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
1835
+ ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
1836
+ sequence->litLength -= leftoverLit;
1837
+ op += leftoverLit;
1838
+ }
1839
+ litPtr = dctx->litExtraBuffer;
1840
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1841
+ dctx->litBufferLocation = ZSTD_not_in_dst;
1842
+ { size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1195
1843
  #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1196
- assert(!ZSTD_isError(oneSeqSize));
1197
- if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1844
+ assert(!ZSTD_isError(oneSeqSize));
1845
+ ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1198
1846
  #endif
1199
- if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1200
- op += oneSeqSize;
1847
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1848
+ op += oneSeqSize;
1849
+ }
1850
+ }
1851
+ else
1852
+ {
1853
+ size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
1854
+ ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
1855
+ ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
1856
+ #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1857
+ assert(!ZSTD_isError(oneSeqSize));
1858
+ ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1859
+ #endif
1860
+ if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1861
+ op += oneSeqSize;
1862
+ }
1201
1863
  }
1202
1864
 
1203
1865
  /* save reps for next block */
@@ -1205,25 +1867,34 @@ ZSTD_decompressSequencesLong_body(
1205
1867
  }
1206
1868
 
1207
1869
  /* last literal segment */
1208
- { size_t const lastLLSize = litEnd - litPtr;
1870
+ if (dctx->litBufferLocation == ZSTD_split) { /* first deplete literal buffer in dst, then copy litExtraBuffer */
1871
+ size_t const lastLLSize = litBufferEnd - litPtr;
1872
+ RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
1873
+ if (op != NULL) {
1874
+ ZSTD_memmove(op, litPtr, lastLLSize);
1875
+ op += lastLLSize;
1876
+ }
1877
+ litPtr = dctx->litExtraBuffer;
1878
+ litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
1879
+ }
1880
+ { size_t const lastLLSize = litBufferEnd - litPtr;
1209
1881
  RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1210
1882
  if (op != NULL) {
1211
- memcpy(op, litPtr, lastLLSize);
1883
+ ZSTD_memmove(op, litPtr, lastLLSize);
1212
1884
  op += lastLLSize;
1213
1885
  }
1214
1886
  }
1215
1887
 
1216
- return op-ostart;
1888
+ return (size_t)(op - ostart);
1217
1889
  }
1218
1890
 
1219
1891
  static size_t
1220
1892
  ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1221
1893
  void* dst, size_t maxDstSize,
1222
1894
  const void* seqStart, size_t seqSize, int nbSeq,
1223
- const ZSTD_longOffset_e isLongOffset,
1224
- const int frame)
1895
+ const ZSTD_longOffset_e isLongOffset)
1225
1896
  {
1226
- return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1897
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1227
1898
  }
1228
1899
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1229
1900
 
@@ -1232,17 +1903,34 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1232
1903
  #if DYNAMIC_BMI2
1233
1904
 
1234
1905
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1906
+ static BMI2_TARGET_ATTRIBUTE size_t
1907
+ DONT_VECTORIZE
1908
+ ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1909
+ void* dst, size_t maxDstSize,
1910
+ const void* seqStart, size_t seqSize, int nbSeq,
1911
+ const ZSTD_longOffset_e isLongOffset)
1912
+ {
1913
+ return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1914
+ }
1915
+ static BMI2_TARGET_ATTRIBUTE size_t
1916
+ DONT_VECTORIZE
1917
+ ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
1918
+ void* dst, size_t maxDstSize,
1919
+ const void* seqStart, size_t seqSize, int nbSeq,
1920
+ const ZSTD_longOffset_e isLongOffset)
1921
+ {
1922
+ return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1923
+ }
1235
1924
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1236
1925
 
1237
1926
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1238
- static TARGET_ATTRIBUTE("bmi2") size_t
1927
+ static BMI2_TARGET_ATTRIBUTE size_t
1239
1928
  ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1240
1929
  void* dst, size_t maxDstSize,
1241
1930
  const void* seqStart, size_t seqSize, int nbSeq,
1242
- const ZSTD_longOffset_e isLongOffset,
1243
- const int frame)
1931
+ const ZSTD_longOffset_e isLongOffset)
1244
1932
  {
1245
- return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1933
+ return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1246
1934
  }
1247
1935
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1248
1936
 
@@ -1252,18 +1940,34 @@ typedef size_t (*ZSTD_decompressSequences_t)(
1252
1940
  ZSTD_DCtx* dctx,
1253
1941
  void* dst, size_t maxDstSize,
1254
1942
  const void* seqStart, size_t seqSize, int nbSeq,
1255
- const ZSTD_longOffset_e isLongOffset,
1256
- const int frame);
1943
+ const ZSTD_longOffset_e isLongOffset);
1257
1944
 
1258
1945
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1259
1946
  static size_t
1260
1947
  ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1261
1948
  const void* seqStart, size_t seqSize, int nbSeq,
1262
- const ZSTD_longOffset_e isLongOffset,
1263
- const int frame)
1949
+ const ZSTD_longOffset_e isLongOffset)
1264
1950
  {
1265
1951
  DEBUGLOG(5, "ZSTD_decompressSequences");
1266
- return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1952
+ #if DYNAMIC_BMI2
1953
+ if (ZSTD_DCtx_get_bmi2(dctx)) {
1954
+ return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1955
+ }
1956
+ #endif
1957
+ return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1958
+ }
1959
+ static size_t
1960
+ ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1961
+ const void* seqStart, size_t seqSize, int nbSeq,
1962
+ const ZSTD_longOffset_e isLongOffset)
1963
+ {
1964
+ DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
1965
+ #if DYNAMIC_BMI2
1966
+ if (ZSTD_DCtx_get_bmi2(dctx)) {
1967
+ return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1968
+ }
1969
+ #endif
1970
+ return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1267
1971
  }
1268
1972
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1269
1973
 
@@ -1278,69 +1982,114 @@ static size_t
1278
1982
  ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1279
1983
  void* dst, size_t maxDstSize,
1280
1984
  const void* seqStart, size_t seqSize, int nbSeq,
1281
- const ZSTD_longOffset_e isLongOffset,
1282
- const int frame)
1985
+ const ZSTD_longOffset_e isLongOffset)
1283
1986
  {
1284
1987
  DEBUGLOG(5, "ZSTD_decompressSequencesLong");
1285
1988
  #if DYNAMIC_BMI2
1286
- if (dctx->bmi2) {
1287
- return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1989
+ if (ZSTD_DCtx_get_bmi2(dctx)) {
1990
+ return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1288
1991
  }
1289
1992
  #endif
1290
- return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1993
+ return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
1291
1994
  }
1292
1995
  #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1293
1996
 
1294
1997
 
1998
+ /**
1999
+ * @returns The total size of the history referenceable by zstd, including
2000
+ * both the prefix and the extDict. At @p op any offset larger than this
2001
+ * is invalid.
2002
+ */
2003
+ static size_t ZSTD_totalHistorySize(BYTE* op, BYTE const* virtualStart)
2004
+ {
2005
+ return (size_t)(op - virtualStart);
2006
+ }
1295
2007
 
1296
- #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1297
- !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1298
- /* ZSTD_getLongOffsetsShare() :
2008
+ typedef struct {
2009
+ unsigned longOffsetShare;
2010
+ unsigned maxNbAdditionalBits;
2011
+ } ZSTD_OffsetInfo;
2012
+
2013
+ /* ZSTD_getOffsetInfo() :
1299
2014
  * condition : offTable must be valid
1300
2015
  * @return : "share" of long offsets (arbitrarily defined as > (1<<23))
1301
- * compared to maximum possible of (1<<OffFSELog) */
1302
- static unsigned
1303
- ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
2016
+ * compared to maximum possible of (1<<OffFSELog),
2017
+ * as well as the maximum number additional bits required.
2018
+ */
2019
+ static ZSTD_OffsetInfo
2020
+ ZSTD_getOffsetInfo(const ZSTD_seqSymbol* offTable, int nbSeq)
1304
2021
  {
1305
- const void* ptr = offTable;
1306
- U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
1307
- const ZSTD_seqSymbol* table = offTable + 1;
1308
- U32 const max = 1 << tableLog;
1309
- U32 u, total = 0;
1310
- DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
1311
-
1312
- assert(max <= (1 << OffFSELog)); /* max not too large */
1313
- for (u=0; u<max; u++) {
1314
- if (table[u].nbAdditionalBits > 22) total += 1;
2022
+ ZSTD_OffsetInfo info = {0, 0};
2023
+ /* If nbSeq == 0, then the offTable is uninitialized, but we have
2024
+ * no sequences, so both values should be 0.
2025
+ */
2026
+ if (nbSeq != 0) {
2027
+ const void* ptr = offTable;
2028
+ U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
2029
+ const ZSTD_seqSymbol* table = offTable + 1;
2030
+ U32 const max = 1 << tableLog;
2031
+ U32 u;
2032
+ DEBUGLOG(5, "ZSTD_getLongOffsetsShare: (tableLog=%u)", tableLog);
2033
+
2034
+ assert(max <= (1 << OffFSELog)); /* max not too large */
2035
+ for (u=0; u<max; u++) {
2036
+ info.maxNbAdditionalBits = MAX(info.maxNbAdditionalBits, table[u].nbAdditionalBits);
2037
+ if (table[u].nbAdditionalBits > 22) info.longOffsetShare += 1;
2038
+ }
2039
+
2040
+ assert(tableLog <= OffFSELog);
2041
+ info.longOffsetShare <<= (OffFSELog - tableLog); /* scale to OffFSELog */
1315
2042
  }
1316
2043
 
1317
- assert(tableLog <= OffFSELog);
1318
- total <<= (OffFSELog - tableLog); /* scale to OffFSELog */
2044
+ return info;
2045
+ }
1319
2046
 
1320
- return total;
2047
+ /**
2048
+ * @returns The maximum offset we can decode in one read of our bitstream, without
2049
+ * reloading more bits in the middle of the offset bits read. Any offsets larger
2050
+ * than this must use the long offset decoder.
2051
+ */
2052
+ static size_t ZSTD_maxShortOffset(void)
2053
+ {
2054
+ if (MEM_64bits()) {
2055
+ /* We can decode any offset without reloading bits.
2056
+ * This might change if the max window size grows.
2057
+ */
2058
+ ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
2059
+ return (size_t)-1;
2060
+ } else {
2061
+ /* The maximum offBase is (1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1.
2062
+ * This offBase would require STREAM_ACCUMULATOR_MIN extra bits.
2063
+ * Then we have to subtract ZSTD_REP_NUM to get the maximum possible offset.
2064
+ */
2065
+ size_t const maxOffbase = ((size_t)1 << (STREAM_ACCUMULATOR_MIN + 1)) - 1;
2066
+ size_t const maxOffset = maxOffbase - ZSTD_REP_NUM;
2067
+ assert(ZSTD_highbit32((U32)maxOffbase) == STREAM_ACCUMULATOR_MIN);
2068
+ return maxOffset;
2069
+ }
1321
2070
  }
1322
- #endif
1323
2071
 
1324
2072
  size_t
1325
2073
  ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1326
2074
  void* dst, size_t dstCapacity,
1327
- const void* src, size_t srcSize, const int frame)
2075
+ const void* src, size_t srcSize, const streaming_operation streaming)
1328
2076
  { /* blockType == blockCompressed */
1329
2077
  const BYTE* ip = (const BYTE*)src;
1330
- /* isLongOffset must be true if there are long offsets.
1331
- * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
1332
- * We don't expect that to be the case in 64-bit mode.
1333
- * In block mode, window size is not known, so we have to be conservative.
1334
- * (note: but it could be evaluated from current-lowLimit)
1335
- */
1336
- ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
1337
- DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
1338
-
1339
- RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
2078
+ DEBUGLOG(5, "ZSTD_decompressBlock_internal (cSize : %u)", (unsigned)srcSize);
2079
+
2080
+ /* Note : the wording of the specification
2081
+ * allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx).
2082
+ * This generally does not happen, as it makes little sense,
2083
+ * since an uncompressed block would feature same size and have no decompression cost.
2084
+ * Also, note that decoder from reference libzstd before < v1.5.4
2085
+ * would consider this edge case as an error.
2086
+ * As a consequence, avoid generating compressed blocks of size ZSTD_blockSizeMax(dctx)
2087
+ * for broader compatibility with the deployed ecosystem of zstd decoders */
2088
+ RETURN_ERROR_IF(srcSize > ZSTD_blockSizeMax(dctx), srcSize_wrong, "");
1340
2089
 
1341
2090
  /* Decode literals section */
1342
- { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
1343
- DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
2091
+ { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
2092
+ DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : cSize=%u, nbLiterals=%zu", (U32)litCSize, dctx->litSize);
1344
2093
  if (ZSTD_isError(litCSize)) return litCSize;
1345
2094
  ip += litCSize;
1346
2095
  srcSize -= litCSize;
@@ -1348,6 +2097,23 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1348
2097
 
1349
2098
  /* Build Decoding Tables */
1350
2099
  {
2100
+ /* Compute the maximum block size, which must also work when !frame and fParams are unset.
2101
+ * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
2102
+ */
2103
+ size_t const blockSizeMax = MIN(dstCapacity, ZSTD_blockSizeMax(dctx));
2104
+ size_t const totalHistorySize = ZSTD_totalHistorySize(ZSTD_maybeNullPtrAdd((BYTE*)dst, blockSizeMax), (BYTE const*)dctx->virtualStart);
2105
+ /* isLongOffset must be true if there are long offsets.
2106
+ * Offsets are long if they are larger than ZSTD_maxShortOffset().
2107
+ * We don't expect that to be the case in 64-bit mode.
2108
+ *
2109
+ * We check here to see if our history is large enough to allow long offsets.
2110
+ * If it isn't, then we can't possible have (valid) long offsets. If the offset
2111
+ * is invalid, then it is okay to read it incorrectly.
2112
+ *
2113
+ * If isLongOffsets is true, then we will later check our decoding table to see
2114
+ * if it is even possible to generate long offsets.
2115
+ */
2116
+ ZSTD_longOffset_e isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (totalHistorySize > ZSTD_maxShortOffset()));
1351
2117
  /* These macros control at build-time which decompressor implementation
1352
2118
  * we use. If neither is defined, we do some inspection and dispatch at
1353
2119
  * runtime.
@@ -1355,6 +2121,11 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1355
2121
  #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1356
2122
  !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1357
2123
  int usePrefetchDecoder = dctx->ddictIsCold;
2124
+ #else
2125
+ /* Set to 1 to avoid computing offset info if we don't need to.
2126
+ * Otherwise this value is ignored.
2127
+ */
2128
+ int usePrefetchDecoder = 1;
1358
2129
  #endif
1359
2130
  int nbSeq;
1360
2131
  size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
@@ -1362,40 +2133,58 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1362
2133
  ip += seqHSize;
1363
2134
  srcSize -= seqHSize;
1364
2135
 
1365
- RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
2136
+ RETURN_ERROR_IF((dst == NULL || dstCapacity == 0) && nbSeq > 0, dstSize_tooSmall, "NULL not handled");
2137
+ RETURN_ERROR_IF(MEM_64bits() && sizeof(size_t) == sizeof(void*) && (size_t)(-1) - (size_t)dst < (size_t)(1 << 20), dstSize_tooSmall,
2138
+ "invalid dst");
1366
2139
 
1367
- #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1368
- !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1369
- if ( !usePrefetchDecoder
1370
- && (!frame || (dctx->fParams.windowSize > (1<<24)))
1371
- && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */
1372
- U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
1373
- U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
1374
- usePrefetchDecoder = (shareLongOffsets >= minShare);
2140
+ /* If we could potentially have long offsets, or we might want to use the prefetch decoder,
2141
+ * compute information about the share of long offsets, and the maximum nbAdditionalBits.
2142
+ * NOTE: could probably use a larger nbSeq limit
2143
+ */
2144
+ if (isLongOffset || (!usePrefetchDecoder && (totalHistorySize > (1u << 24)) && (nbSeq > 8))) {
2145
+ ZSTD_OffsetInfo const info = ZSTD_getOffsetInfo(dctx->OFTptr, nbSeq);
2146
+ if (isLongOffset && info.maxNbAdditionalBits <= STREAM_ACCUMULATOR_MIN) {
2147
+ /* If isLongOffset, but the maximum number of additional bits that we see in our table is small
2148
+ * enough, then we know it is impossible to have too long an offset in this block, so we can
2149
+ * use the regular offset decoder.
2150
+ */
2151
+ isLongOffset = ZSTD_lo_isRegularOffset;
2152
+ }
2153
+ if (!usePrefetchDecoder) {
2154
+ U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
2155
+ usePrefetchDecoder = (info.longOffsetShare >= minShare);
2156
+ }
1375
2157
  }
1376
- #endif
1377
2158
 
1378
2159
  dctx->ddictIsCold = 0;
1379
2160
 
1380
2161
  #if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1381
2162
  !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1382
- if (usePrefetchDecoder)
2163
+ if (usePrefetchDecoder) {
2164
+ #else
2165
+ (void)usePrefetchDecoder;
2166
+ {
1383
2167
  #endif
1384
2168
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1385
- return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2169
+ return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
1386
2170
  #endif
2171
+ }
1387
2172
 
1388
2173
  #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1389
2174
  /* else */
1390
- return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
2175
+ if (dctx->litBufferLocation == ZSTD_split)
2176
+ return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
2177
+ else
2178
+ return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
1391
2179
  #endif
1392
2180
  }
1393
2181
  }
1394
2182
 
1395
2183
 
1396
- void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
2184
+ ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
2185
+ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
1397
2186
  {
1398
- if (dst != dctx->previousDstEnd) { /* not contiguous */
2187
+ if (dst != dctx->previousDstEnd && dstSize > 0) { /* not contiguous */
1399
2188
  dctx->dictEnd = dctx->previousDstEnd;
1400
2189
  dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
1401
2190
  dctx->prefixStart = dst;
@@ -1404,15 +2193,26 @@ void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
1404
2193
  }
1405
2194
 
1406
2195
 
1407
- size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
1408
- void* dst, size_t dstCapacity,
1409
- const void* src, size_t srcSize)
2196
+ size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
2197
+ void* dst, size_t dstCapacity,
2198
+ const void* src, size_t srcSize)
1410
2199
  {
1411
2200
  size_t dSize;
1412
- ZSTD_checkContinuity(dctx, dst);
1413
- dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
2201
+ dctx->isFrameDecompression = 0;
2202
+ ZSTD_checkContinuity(dctx, dst, dstCapacity);
2203
+ dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, not_streaming);
2204
+ FORWARD_IF_ERROR(dSize, "");
1414
2205
  dctx->previousDstEnd = (char*)dst + dSize;
1415
2206
  return dSize;
1416
2207
  }
1417
2208
 
2209
+
2210
+ /* NOTE: Must just wrap ZSTD_decompressBlock_deprecated() */
2211
+ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
2212
+ void* dst, size_t dstCapacity,
2213
+ const void* src, size_t srcSize)
2214
+ {
2215
+ return ZSTD_decompressBlock_deprecated(dctx, dst, dstCapacity, src, srcSize);
1418
2216
  }
2217
+
2218
+ } // namespace duckdb_zstd