duckdb 1.1.2-dev6.0 → 1.1.4-dev11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1747) hide show
  1. package/.github/workflows/NodeJS.yml +5 -54
  2. package/binding.gyp +73 -52
  3. package/package.json +2 -2
  4. package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/avg.cpp +2 -2
  5. package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/corr.cpp +4 -4
  6. package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/covar.cpp +2 -2
  7. package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/stddev.cpp +2 -2
  8. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/approx_count.cpp +1 -1
  9. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/arg_min_max.cpp +66 -18
  10. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bitagg.cpp +1 -1
  11. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bitstring_agg.cpp +5 -7
  12. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bool.cpp +3 -1
  13. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/kurtosis.cpp +1 -1
  14. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/product.cpp +1 -1
  15. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/skew.cpp +2 -2
  16. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/string_agg.cpp +1 -1
  17. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/sum.cpp +13 -2
  18. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/approx_top_k.cpp +3 -3
  19. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/approximate_quantile.cpp +51 -15
  20. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/mad.cpp +25 -10
  21. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/mode.cpp +215 -71
  22. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/quantile.cpp +58 -31
  23. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -2
  24. package/src/duckdb/{src → extension}/core_functions/aggregate/nested/binned_histogram.cpp +9 -4
  25. package/src/duckdb/{src → extension}/core_functions/aggregate/nested/histogram.cpp +4 -2
  26. package/src/duckdb/{src → extension}/core_functions/aggregate/nested/list.cpp +1 -1
  27. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_avg.cpp +1 -1
  28. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_count.cpp +2 -2
  29. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_intercept.cpp +6 -2
  30. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_r2.cpp +2 -2
  31. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_slope.cpp +2 -2
  32. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_sxx_syy.cpp +2 -2
  33. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_sxy.cpp +3 -3
  34. package/src/duckdb/extension/core_functions/core_functions_extension.cpp +85 -0
  35. package/src/duckdb/{src → extension}/core_functions/function_list.cpp +30 -51
  36. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/corr.hpp +3 -7
  37. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic_functions.hpp +1 -1
  38. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/distributive_functions.hpp +16 -21
  39. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/histogram_helpers.hpp +1 -1
  40. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/holistic_functions.hpp +1 -1
  41. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/nested_functions.hpp +1 -1
  42. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_helpers.hpp +2 -2
  43. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_sort_tree.hpp +140 -58
  44. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_state.hpp +50 -43
  45. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression/regr_count.hpp +2 -2
  46. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression/regr_slope.hpp +3 -7
  47. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression_functions.hpp +1 -1
  48. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/array_functions.hpp +1 -1
  49. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/bit_functions.hpp +1 -1
  50. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/blob_functions.hpp +1 -10
  51. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/date_functions.hpp +22 -55
  52. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/debug_functions.hpp +1 -1
  53. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/enum_functions.hpp +1 -1
  54. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/generic_functions.hpp +1 -10
  55. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/list_functions.hpp +4 -4
  56. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/map_functions.hpp +1 -10
  57. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/math_functions.hpp +1 -1
  58. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/operators_functions.hpp +1 -1
  59. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/random_functions.hpp +1 -1
  60. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/string_functions.hpp +10 -103
  61. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/struct_functions.hpp +1 -19
  62. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/union_functions.hpp +1 -1
  63. package/src/duckdb/extension/core_functions/include/core_functions_extension.hpp +22 -0
  64. package/src/duckdb/{src → extension}/core_functions/lambda_functions.cpp +1 -1
  65. package/src/duckdb/{src → extension}/core_functions/scalar/array/array_functions.cpp +11 -4
  66. package/src/duckdb/{src → extension}/core_functions/scalar/array/array_value.cpp +2 -2
  67. package/src/duckdb/{src → extension}/core_functions/scalar/bit/bitstring.cpp +12 -5
  68. package/src/duckdb/{src → extension}/core_functions/scalar/blob/base64.cpp +4 -2
  69. package/src/duckdb/{src → extension}/core_functions/scalar/blob/encode.cpp +4 -2
  70. package/src/duckdb/{src → extension}/core_functions/scalar/date/age.cpp +9 -3
  71. package/src/duckdb/extension/core_functions/scalar/date/current.cpp +29 -0
  72. package/src/duckdb/{src → extension}/core_functions/scalar/date/date_diff.cpp +1 -1
  73. package/src/duckdb/{src → extension}/core_functions/scalar/date/date_part.cpp +42 -9
  74. package/src/duckdb/{src → extension}/core_functions/scalar/date/date_sub.cpp +1 -1
  75. package/src/duckdb/{src → extension}/core_functions/scalar/date/date_trunc.cpp +4 -1
  76. package/src/duckdb/{src → extension}/core_functions/scalar/date/epoch.cpp +19 -3
  77. package/src/duckdb/{src → extension}/core_functions/scalar/date/make_date.cpp +40 -5
  78. package/src/duckdb/{src → extension}/core_functions/scalar/date/time_bucket.cpp +4 -1
  79. package/src/duckdb/{src → extension}/core_functions/scalar/date/to_interval.cpp +54 -28
  80. package/src/duckdb/{src → extension}/core_functions/scalar/debug/vector_type.cpp +1 -1
  81. package/src/duckdb/{src → extension}/core_functions/scalar/enum/enum_functions.cpp +2 -7
  82. package/src/duckdb/{src → extension}/core_functions/scalar/generic/alias.cpp +2 -2
  83. package/src/duckdb/{src/function → extension/core_functions}/scalar/generic/binning.cpp +4 -3
  84. package/src/duckdb/{src → extension}/core_functions/scalar/generic/can_implicitly_cast.cpp +1 -1
  85. package/src/duckdb/{src → extension}/core_functions/scalar/generic/current_setting.cpp +1 -1
  86. package/src/duckdb/{src → extension}/core_functions/scalar/generic/hash.cpp +1 -1
  87. package/src/duckdb/{src → extension}/core_functions/scalar/generic/least.cpp +30 -10
  88. package/src/duckdb/{src → extension}/core_functions/scalar/generic/stats.cpp +1 -1
  89. package/src/duckdb/{src → extension}/core_functions/scalar/generic/system_functions.cpp +1 -1
  90. package/src/duckdb/{src → extension}/core_functions/scalar/generic/typeof.cpp +1 -1
  91. package/src/duckdb/{src → extension}/core_functions/scalar/list/array_slice.cpp +93 -88
  92. package/src/duckdb/{src → extension}/core_functions/scalar/list/flatten.cpp +1 -1
  93. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_aggregates.cpp +7 -3
  94. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_distance.cpp +8 -2
  95. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_filter.cpp +3 -3
  96. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_has_any_or_all.cpp +3 -3
  97. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_reduce.cpp +5 -5
  98. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_sort.cpp +1 -1
  99. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_transform.cpp +3 -3
  100. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_value.cpp +3 -3
  101. package/src/duckdb/{src → extension}/core_functions/scalar/list/range.cpp +7 -1
  102. package/src/duckdb/{src → extension}/core_functions/scalar/map/cardinality.cpp +1 -1
  103. package/src/duckdb/{src → extension}/core_functions/scalar/map/map.cpp +5 -4
  104. package/src/duckdb/{src → extension}/core_functions/scalar/map/map_concat.cpp +1 -1
  105. package/src/duckdb/{src → extension}/core_functions/scalar/map/map_entries.cpp +1 -1
  106. package/src/duckdb/{src → extension}/core_functions/scalar/map/map_extract.cpp +13 -25
  107. package/src/duckdb/{src → extension}/core_functions/scalar/map/map_from_entries.cpp +2 -1
  108. package/src/duckdb/{src → extension}/core_functions/scalar/map/map_keys_values.cpp +11 -9
  109. package/src/duckdb/{src → extension}/core_functions/scalar/math/numeric.cpp +83 -37
  110. package/src/duckdb/{src → extension}/core_functions/scalar/operators/bitwise.cpp +19 -1
  111. package/src/duckdb/{src → extension}/core_functions/scalar/random/random.cpp +4 -3
  112. package/src/duckdb/{src → extension}/core_functions/scalar/random/setseed.cpp +2 -1
  113. package/src/duckdb/{src → extension}/core_functions/scalar/string/ascii.cpp +1 -1
  114. package/src/duckdb/{src → extension}/core_functions/scalar/string/bar.cpp +6 -4
  115. package/src/duckdb/{src → extension}/core_functions/scalar/string/chr.cpp +1 -1
  116. package/src/duckdb/{src → extension}/core_functions/scalar/string/damerau_levenshtein.cpp +1 -1
  117. package/src/duckdb/{src → extension}/core_functions/scalar/string/format_bytes.cpp +1 -1
  118. package/src/duckdb/{src → extension}/core_functions/scalar/string/hamming.cpp +1 -1
  119. package/src/duckdb/{src → extension}/core_functions/scalar/string/hex.cpp +7 -3
  120. package/src/duckdb/{src → extension}/core_functions/scalar/string/instr.cpp +4 -4
  121. package/src/duckdb/{src → extension}/core_functions/scalar/string/jaccard.cpp +1 -1
  122. package/src/duckdb/extension/core_functions/scalar/string/jaro_winkler.cpp +112 -0
  123. package/src/duckdb/{src → extension}/core_functions/scalar/string/left_right.cpp +6 -6
  124. package/src/duckdb/{src → extension}/core_functions/scalar/string/levenshtein.cpp +1 -1
  125. package/src/duckdb/{src → extension}/core_functions/scalar/string/pad.cpp +9 -5
  126. package/src/duckdb/{src → extension}/core_functions/scalar/string/parse_path.cpp +4 -4
  127. package/src/duckdb/{src → extension}/core_functions/scalar/string/printf.cpp +3 -1
  128. package/src/duckdb/{src → extension}/core_functions/scalar/string/repeat.cpp +4 -1
  129. package/src/duckdb/{src → extension}/core_functions/scalar/string/replace.cpp +1 -1
  130. package/src/duckdb/{src → extension}/core_functions/scalar/string/reverse.cpp +1 -1
  131. package/src/duckdb/{src → extension}/core_functions/scalar/string/starts_with.cpp +5 -3
  132. package/src/duckdb/{src → extension}/core_functions/scalar/string/to_base.cpp +1 -1
  133. package/src/duckdb/{src → extension}/core_functions/scalar/string/translate.cpp +1 -1
  134. package/src/duckdb/{src → extension}/core_functions/scalar/string/trim.cpp +1 -1
  135. package/src/duckdb/{src → extension}/core_functions/scalar/string/unicode.cpp +1 -1
  136. package/src/duckdb/{src → extension}/core_functions/scalar/string/url_encode.cpp +1 -1
  137. package/src/duckdb/{src → extension}/core_functions/scalar/struct/struct_insert.cpp +25 -31
  138. package/src/duckdb/{src → extension}/core_functions/scalar/union/union_extract.cpp +1 -1
  139. package/src/duckdb/{src → extension}/core_functions/scalar/union/union_tag.cpp +1 -1
  140. package/src/duckdb/{src → extension}/core_functions/scalar/union/union_value.cpp +3 -3
  141. package/src/duckdb/extension/icu/icu-dateadd.cpp +16 -11
  142. package/src/duckdb/extension/icu/icu-datefunc.cpp +2 -2
  143. package/src/duckdb/extension/icu/icu-datepart.cpp +8 -5
  144. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  145. package/src/duckdb/extension/icu/icu-datetrunc.cpp +8 -1
  146. package/src/duckdb/extension/icu/icu-list-range.cpp +2 -2
  147. package/src/duckdb/extension/icu/icu-makedate.cpp +18 -7
  148. package/src/duckdb/extension/icu/icu-strptime.cpp +9 -3
  149. package/src/duckdb/extension/icu/icu-table-range.cpp +2 -2
  150. package/src/duckdb/extension/icu/icu-timebucket.cpp +4 -1
  151. package/src/duckdb/extension/icu/icu-timezone.cpp +67 -1
  152. package/src/duckdb/extension/icu/icu_extension.cpp +60 -5
  153. package/src/duckdb/extension/icu/include/icu-datefunc.hpp +2 -1
  154. package/src/duckdb/extension/icu/third_party/icu/common/bytestriebuilder.cpp +1 -1
  155. package/src/duckdb/extension/icu/third_party/icu/common/dtintrv.cpp +1 -1
  156. package/src/duckdb/extension/icu/third_party/icu/common/filteredbrk.cpp +1 -1
  157. package/src/duckdb/extension/icu/third_party/icu/common/locid.cpp +1 -1
  158. package/src/duckdb/extension/icu/third_party/icu/common/lsr.cpp +1 -1
  159. package/src/duckdb/extension/icu/third_party/icu/common/lsr.h +2 -2
  160. package/src/duckdb/extension/icu/third_party/icu/common/messagepattern.cpp +2 -2
  161. package/src/duckdb/extension/icu/third_party/icu/common/normlzr.cpp +1 -1
  162. package/src/duckdb/extension/icu/third_party/icu/common/rbbinode.h +1 -1
  163. package/src/duckdb/extension/icu/third_party/icu/common/schriter.cpp +1 -1
  164. package/src/duckdb/extension/icu/third_party/icu/common/stringtriebuilder.cpp +8 -8
  165. package/src/duckdb/extension/icu/third_party/icu/common/ucharstriebuilder.cpp +1 -1
  166. package/src/duckdb/extension/icu/third_party/icu/common/uchriter.cpp +1 -1
  167. package/src/duckdb/extension/icu/third_party/icu/common/unicode/brkiter.h +2 -2
  168. package/src/duckdb/extension/icu/third_party/icu/common/unicode/bytestriebuilder.h +1 -1
  169. package/src/duckdb/extension/icu/third_party/icu/common/unicode/chariter.h +3 -3
  170. package/src/duckdb/extension/icu/third_party/icu/common/unicode/dtintrv.h +3 -3
  171. package/src/duckdb/extension/icu/third_party/icu/common/unicode/locid.h +3 -3
  172. package/src/duckdb/extension/icu/third_party/icu/common/unicode/messagepattern.h +4 -4
  173. package/src/duckdb/extension/icu/third_party/icu/common/unicode/normlzr.h +3 -3
  174. package/src/duckdb/extension/icu/third_party/icu/common/unicode/parsepos.h +4 -4
  175. package/src/duckdb/extension/icu/third_party/icu/common/unicode/rbbi.h +2 -2
  176. package/src/duckdb/extension/icu/third_party/icu/common/unicode/schriter.h +1 -1
  177. package/src/duckdb/extension/icu/third_party/icu/common/unicode/strenum.h +2 -2
  178. package/src/duckdb/extension/icu/third_party/icu/common/unicode/stringpiece.h +1 -1
  179. package/src/duckdb/extension/icu/third_party/icu/common/unicode/stringtriebuilder.h +9 -9
  180. package/src/duckdb/extension/icu/third_party/icu/common/unicode/ucharstriebuilder.h +1 -1
  181. package/src/duckdb/extension/icu/third_party/icu/common/unicode/uchriter.h +1 -1
  182. package/src/duckdb/extension/icu/third_party/icu/common/unicode/uniset.h +3 -3
  183. package/src/duckdb/extension/icu/third_party/icu/common/unicode/unistr.h +12 -12
  184. package/src/duckdb/extension/icu/third_party/icu/common/unicode/uobject.h +2 -2
  185. package/src/duckdb/extension/icu/third_party/icu/common/unifiedcache.h +4 -4
  186. package/src/duckdb/extension/icu/third_party/icu/common/uniset.cpp +1 -1
  187. package/src/duckdb/extension/icu/third_party/icu/common/ustr_titlecase_brkiter.cpp +1 -1
  188. package/src/duckdb/extension/icu/third_party/icu/common/ustrenum.cpp +2 -2
  189. package/src/duckdb/extension/icu/third_party/icu/common/uvector.cpp +1 -1
  190. package/src/duckdb/extension/icu/third_party/icu/common/uvector.h +3 -3
  191. package/src/duckdb/extension/icu/third_party/icu/common/uvectr32.cpp +1 -1
  192. package/src/duckdb/extension/icu/third_party/icu/common/uvectr32.h +3 -3
  193. package/src/duckdb/extension/icu/third_party/icu/common/uvectr64.cpp +1 -1
  194. package/src/duckdb/extension/icu/third_party/icu/common/uvectr64.h +3 -3
  195. package/src/duckdb/extension/icu/third_party/icu/i18n/alphaindex.cpp +2 -2
  196. package/src/duckdb/extension/icu/third_party/icu/i18n/calendar.cpp +1 -1
  197. package/src/duckdb/extension/icu/third_party/icu/i18n/choicfmt.cpp +1 -1
  198. package/src/duckdb/extension/icu/third_party/icu/i18n/coleitr.cpp +2 -2
  199. package/src/duckdb/extension/icu/third_party/icu/i18n/coll.cpp +2 -2
  200. package/src/duckdb/extension/icu/third_party/icu/i18n/collationiterator.cpp +1 -1
  201. package/src/duckdb/extension/icu/third_party/icu/i18n/collationiterator.h +2 -2
  202. package/src/duckdb/extension/icu/third_party/icu/i18n/collationsettings.cpp +1 -1
  203. package/src/duckdb/extension/icu/third_party/icu/i18n/collationsettings.h +2 -2
  204. package/src/duckdb/extension/icu/third_party/icu/i18n/currpinf.cpp +1 -1
  205. package/src/duckdb/extension/icu/third_party/icu/i18n/datefmt.cpp +2 -2
  206. package/src/duckdb/extension/icu/third_party/icu/i18n/dcfmtsym.cpp +1 -1
  207. package/src/duckdb/extension/icu/third_party/icu/i18n/decimfmt.cpp +1 -1
  208. package/src/duckdb/extension/icu/third_party/icu/i18n/dtfmtsym.cpp +1 -1
  209. package/src/duckdb/extension/icu/third_party/icu/i18n/dtitvfmt.cpp +1 -1
  210. package/src/duckdb/extension/icu/third_party/icu/i18n/dtitvinf.cpp +1 -1
  211. package/src/duckdb/extension/icu/third_party/icu/i18n/dtptngen.cpp +2 -2
  212. package/src/duckdb/extension/icu/third_party/icu/i18n/dtptngen_impl.h +4 -4
  213. package/src/duckdb/extension/icu/third_party/icu/i18n/dtrule.cpp +2 -2
  214. package/src/duckdb/extension/icu/third_party/icu/i18n/fmtable.cpp +1 -1
  215. package/src/duckdb/extension/icu/third_party/icu/i18n/format.cpp +1 -1
  216. package/src/duckdb/extension/icu/third_party/icu/i18n/fpositer.cpp +1 -1
  217. package/src/duckdb/extension/icu/third_party/icu/i18n/measfmt.cpp +1 -1
  218. package/src/duckdb/extension/icu/third_party/icu/i18n/measunit.cpp +1 -1
  219. package/src/duckdb/extension/icu/third_party/icu/i18n/measure.cpp +1 -1
  220. package/src/duckdb/extension/icu/third_party/icu/i18n/msgfmt.cpp +2 -2
  221. package/src/duckdb/extension/icu/third_party/icu/i18n/nfrs.cpp +1 -1
  222. package/src/duckdb/extension/icu/third_party/icu/i18n/nfrs.h +2 -2
  223. package/src/duckdb/extension/icu/third_party/icu/i18n/nfrule.cpp +1 -1
  224. package/src/duckdb/extension/icu/third_party/icu/i18n/nfrule.h +2 -2
  225. package/src/duckdb/extension/icu/third_party/icu/i18n/nfsubs.cpp +9 -9
  226. package/src/duckdb/extension/icu/third_party/icu/i18n/nfsubs.h +2 -2
  227. package/src/duckdb/extension/icu/third_party/icu/i18n/number_asformat.cpp +1 -1
  228. package/src/duckdb/extension/icu/third_party/icu/i18n/number_asformat.h +1 -1
  229. package/src/duckdb/extension/icu/third_party/icu/i18n/numfmt.cpp +1 -1
  230. package/src/duckdb/extension/icu/third_party/icu/i18n/olsontz.cpp +1 -1
  231. package/src/duckdb/extension/icu/third_party/icu/i18n/olsontz.h +1 -1
  232. package/src/duckdb/extension/icu/third_party/icu/i18n/plurfmt.cpp +2 -2
  233. package/src/duckdb/extension/icu/third_party/icu/i18n/plurrule.cpp +1 -1
  234. package/src/duckdb/extension/icu/third_party/icu/i18n/rbnf.cpp +4 -4
  235. package/src/duckdb/extension/icu/third_party/icu/i18n/rbtz.cpp +2 -2
  236. package/src/duckdb/extension/icu/third_party/icu/i18n/region.cpp +2 -2
  237. package/src/duckdb/extension/icu/third_party/icu/i18n/reldtfmt.cpp +1 -1
  238. package/src/duckdb/extension/icu/third_party/icu/i18n/reldtfmt.h +1 -1
  239. package/src/duckdb/extension/icu/third_party/icu/i18n/rulebasedcollator.cpp +1 -1
  240. package/src/duckdb/extension/icu/third_party/icu/i18n/selfmt.cpp +2 -2
  241. package/src/duckdb/extension/icu/third_party/icu/i18n/simpletz.cpp +1 -1
  242. package/src/duckdb/extension/icu/third_party/icu/i18n/smpdtfmt.cpp +1 -1
  243. package/src/duckdb/extension/icu/third_party/icu/i18n/sortkey.cpp +1 -1
  244. package/src/duckdb/extension/icu/third_party/icu/i18n/timezone.cpp +1 -1
  245. package/src/duckdb/extension/icu/third_party/icu/i18n/tmutamt.cpp +1 -1
  246. package/src/duckdb/extension/icu/third_party/icu/i18n/tzfmt.cpp +1 -1
  247. package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.cpp +1 -1
  248. package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.h +2 -2
  249. package/src/duckdb/extension/icu/third_party/icu/i18n/tznames.cpp +3 -3
  250. package/src/duckdb/extension/icu/third_party/icu/i18n/tznames_impl.cpp +2 -2
  251. package/src/duckdb/extension/icu/third_party/icu/i18n/tznames_impl.h +2 -2
  252. package/src/duckdb/extension/icu/third_party/icu/i18n/tzrule.cpp +8 -8
  253. package/src/duckdb/extension/icu/third_party/icu/i18n/tztrans.cpp +2 -2
  254. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/alphaindex.h +2 -2
  255. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/calendar.h +2 -2
  256. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/choicfmt.h +1 -1
  257. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coleitr.h +2 -2
  258. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coll.h +2 -2
  259. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/currpinf.h +3 -3
  260. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/datefmt.h +1 -1
  261. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dcfmtsym.h +2 -2
  262. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/decimfmt.h +1 -1
  263. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtfmtsym.h +2 -2
  264. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtitvfmt.h +3 -3
  265. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtitvinf.h +3 -3
  266. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtptngen.h +2 -2
  267. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtrule.h +2 -2
  268. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fieldpos.h +4 -4
  269. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fmtable.h +2 -2
  270. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/format.h +2 -2
  271. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fpositer.h +2 -2
  272. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measfmt.h +1 -1
  273. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measunit.h +2 -2
  274. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measure.h +1 -1
  275. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/msgfmt.h +2 -2
  276. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/numfmt.h +1 -1
  277. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/plurfmt.h +2 -2
  278. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/plurrule.h +2 -2
  279. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/rbnf.h +1 -1
  280. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/rbtz.h +2 -2
  281. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/region.h +2 -2
  282. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/search.h +2 -2
  283. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/selfmt.h +2 -2
  284. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/simpletz.h +1 -1
  285. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/smpdtfmt.h +1 -1
  286. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/sortkey.h +3 -3
  287. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/stsearch.h +1 -1
  288. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tblcoll.h +1 -1
  289. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/timezone.h +2 -2
  290. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tmutamt.h +3 -3
  291. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tmutfmt.h +2 -2
  292. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tzfmt.h +1 -1
  293. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tznames.h +2 -2
  294. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tzrule.h +8 -8
  295. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tztrans.h +2 -2
  296. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/vtzone.h +2 -2
  297. package/src/duckdb/extension/icu/third_party/icu/i18n/utf16collationiterator.cpp +2 -2
  298. package/src/duckdb/extension/icu/third_party/icu/i18n/utf16collationiterator.h +2 -2
  299. package/src/duckdb/extension/icu/third_party/icu/i18n/vtzone.cpp +2 -2
  300. package/src/duckdb/extension/json/buffered_json_reader.cpp +6 -1
  301. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +2 -0
  302. package/src/duckdb/extension/json/include/json_common.hpp +14 -10
  303. package/src/duckdb/extension/json/include/json_scan.hpp +48 -7
  304. package/src/duckdb/extension/json/include/json_structure.hpp +2 -1
  305. package/src/duckdb/extension/json/include/json_transform.hpp +5 -2
  306. package/src/duckdb/extension/json/json_functions/copy_json.cpp +1 -1
  307. package/src/duckdb/extension/json/json_functions/json_create.cpp +57 -20
  308. package/src/duckdb/extension/json/json_functions/json_serialize_plan.cpp +7 -6
  309. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +6 -5
  310. package/src/duckdb/extension/json/json_functions/json_structure.cpp +20 -17
  311. package/src/duckdb/extension/json/json_functions/json_transform.cpp +48 -17
  312. package/src/duckdb/extension/json/json_functions/read_json.cpp +83 -34
  313. package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +3 -3
  314. package/src/duckdb/extension/json/json_functions.cpp +14 -16
  315. package/src/duckdb/extension/json/json_scan.cpp +36 -16
  316. package/src/duckdb/extension/json/json_serializer.cpp +1 -1
  317. package/src/duckdb/extension/json/serialize_json.cpp +2 -2
  318. package/src/duckdb/extension/parquet/column_reader.cpp +136 -116
  319. package/src/duckdb/extension/parquet/column_writer.cpp +870 -604
  320. package/src/duckdb/extension/parquet/geo_parquet.cpp +4 -5
  321. package/src/duckdb/extension/parquet/include/boolean_column_reader.hpp +0 -4
  322. package/src/duckdb/extension/parquet/include/column_reader.hpp +24 -19
  323. package/src/duckdb/extension/parquet/include/column_writer.hpp +7 -5
  324. package/src/duckdb/extension/parquet/include/decode_utils.hpp +138 -18
  325. package/src/duckdb/extension/parquet/include/geo_parquet.hpp +4 -3
  326. package/src/duckdb/extension/parquet/include/null_column_reader.hpp +1 -14
  327. package/src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp +45 -0
  328. package/src/duckdb/extension/parquet/include/parquet_crypto.hpp +1 -1
  329. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +101 -90
  330. package/src/duckdb/extension/parquet/include/parquet_dbp_encoder.hpp +179 -0
  331. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +2 -3
  332. package/src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp +48 -0
  333. package/src/duckdb/extension/parquet/include/parquet_extension.hpp +8 -0
  334. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  335. package/src/duckdb/extension/parquet/include/parquet_metadata.hpp +5 -0
  336. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +22 -18
  337. package/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp +1 -5
  338. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +87 -3
  339. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +30 -16
  340. package/src/duckdb/extension/parquet/include/resizable_buffer.hpp +1 -0
  341. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +0 -8
  342. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +1 -1
  343. package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +1 -42
  344. package/src/duckdb/extension/parquet/include/thrift_tools.hpp +13 -1
  345. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +4 -0
  346. package/src/duckdb/extension/parquet/parquet_extension.cpp +240 -197
  347. package/src/duckdb/extension/parquet/parquet_metadata.cpp +138 -6
  348. package/src/duckdb/extension/parquet/parquet_reader.cpp +155 -79
  349. package/src/duckdb/extension/parquet/parquet_statistics.cpp +258 -38
  350. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +17 -3
  351. package/src/duckdb/extension/parquet/parquet_writer.cpp +65 -34
  352. package/src/duckdb/extension/parquet/serialize_parquet.cpp +4 -0
  353. package/src/duckdb/extension/parquet/zstd_file_system.cpp +13 -0
  354. package/src/duckdb/src/catalog/catalog.cpp +272 -97
  355. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +9 -4
  356. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +8 -0
  357. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +145 -95
  358. package/src/duckdb/src/catalog/catalog_entry/scalar_function_catalog_entry.cpp +9 -3
  359. package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +15 -0
  360. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +40 -24
  361. package/src/duckdb/src/catalog/catalog_entry/table_function_catalog_entry.cpp +1 -1
  362. package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +2 -2
  363. package/src/duckdb/src/catalog/catalog_entry.cpp +3 -0
  364. package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +60 -5
  365. package/src/duckdb/src/catalog/catalog_search_path.cpp +27 -14
  366. package/src/duckdb/src/catalog/catalog_set.cpp +75 -31
  367. package/src/duckdb/src/catalog/default/default_functions.cpp +13 -8
  368. package/src/duckdb/src/catalog/default/default_views.cpp +1 -0
  369. package/src/duckdb/src/catalog/dependency_manager.cpp +133 -5
  370. package/src/duckdb/src/catalog/duck_catalog.cpp +17 -9
  371. package/src/duckdb/src/common/adbc/adbc.cpp +18 -0
  372. package/src/duckdb/src/common/allocator.cpp +3 -1
  373. package/src/duckdb/src/common/arrow/arrow_appender.cpp +30 -9
  374. package/src/duckdb/src/common/arrow/arrow_converter.cpp +63 -82
  375. package/src/duckdb/src/common/arrow/arrow_merge_event.cpp +4 -3
  376. package/src/duckdb/src/common/arrow/arrow_type_extension.cpp +361 -0
  377. package/src/duckdb/src/common/arrow/arrow_util.cpp +10 -6
  378. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +6 -2
  379. package/src/duckdb/src/common/arrow/physical_arrow_collector.cpp +2 -1
  380. package/src/duckdb/src/common/arrow/schema_metadata.cpp +27 -14
  381. package/src/duckdb/src/common/assert.cpp +1 -2
  382. package/src/duckdb/src/common/bind_helpers.cpp +1 -1
  383. package/src/duckdb/src/common/box_renderer.cpp +316 -26
  384. package/src/duckdb/src/common/cgroups.cpp +7 -1
  385. package/src/duckdb/src/common/compressed_file_system.cpp +1 -1
  386. package/src/duckdb/src/common/enum_util.cpp +2865 -6882
  387. package/src/duckdb/src/common/enums/compression_type.cpp +12 -0
  388. package/src/duckdb/src/common/enums/metric_type.cpp +24 -0
  389. package/src/duckdb/src/common/enums/optimizer_type.cpp +4 -0
  390. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  391. package/src/duckdb/src/common/error_data.cpp +23 -6
  392. package/src/duckdb/src/common/exception/binder_exception.cpp +1 -1
  393. package/src/duckdb/src/common/exception.cpp +20 -28
  394. package/src/duckdb/src/common/extra_type_info.cpp +85 -20
  395. package/src/duckdb/src/common/file_buffer.cpp +5 -2
  396. package/src/duckdb/src/common/file_system.cpp +8 -3
  397. package/src/duckdb/src/common/fsst.cpp +3 -3
  398. package/src/duckdb/src/common/hive_partitioning.cpp +1 -1
  399. package/src/duckdb/src/common/local_file_system.cpp +169 -60
  400. package/src/duckdb/src/common/multi_file_list.cpp +4 -1
  401. package/src/duckdb/src/common/multi_file_reader.cpp +240 -63
  402. package/src/duckdb/src/common/opener_file_system.cpp +37 -0
  403. package/src/duckdb/src/common/operator/cast_operators.cpp +77 -11
  404. package/src/duckdb/src/common/operator/string_cast.cpp +6 -2
  405. package/src/duckdb/src/common/pipe_file_system.cpp +4 -4
  406. package/src/duckdb/src/common/progress_bar/progress_bar.cpp +25 -14
  407. package/src/duckdb/src/common/radix_partitioning.cpp +17 -16
  408. package/src/duckdb/src/common/random_engine.cpp +39 -3
  409. package/src/duckdb/src/common/render_tree.cpp +3 -19
  410. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  411. package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -58
  412. package/src/duckdb/src/common/row_operations/row_matcher.cpp +2 -2
  413. package/src/duckdb/src/common/row_operations/row_radix_scatter.cpp +2 -0
  414. package/src/duckdb/src/common/row_operations/row_scatter.cpp +20 -19
  415. package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +1 -1
  416. package/src/duckdb/src/common/serializer/memory_stream.cpp +36 -0
  417. package/src/duckdb/src/common/sort/comparators.cpp +7 -7
  418. package/src/duckdb/src/common/sort/partition_state.cpp +2 -2
  419. package/src/duckdb/src/common/stacktrace.cpp +127 -0
  420. package/src/duckdb/src/common/string_util.cpp +157 -32
  421. package/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +15 -3
  422. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +4 -0
  423. package/src/duckdb/src/common/types/column/column_data_collection.cpp +71 -8
  424. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +27 -6
  425. package/src/duckdb/src/common/types/conflict_manager.cpp +21 -7
  426. package/src/duckdb/src/common/types/date.cpp +39 -25
  427. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +4 -11
  428. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +21 -7
  429. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +10 -1
  430. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +17 -17
  431. package/src/duckdb/src/common/types/timestamp.cpp +70 -33
  432. package/src/duckdb/src/common/types/uuid.cpp +11 -0
  433. package/src/duckdb/src/common/types/validity_mask.cpp +16 -5
  434. package/src/duckdb/src/common/types/value.cpp +357 -199
  435. package/src/duckdb/src/common/types/varint.cpp +64 -18
  436. package/src/duckdb/src/common/types/vector.cpp +78 -38
  437. package/src/duckdb/src/common/types.cpp +199 -92
  438. package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +2 -1
  439. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +32 -5
  440. package/src/duckdb/src/common/vector_operations/vector_hash.cpp +3 -1
  441. package/src/duckdb/src/execution/adaptive_filter.cpp +6 -2
  442. package/src/duckdb/src/execution/aggregate_hashtable.cpp +410 -111
  443. package/src/duckdb/src/execution/column_binding_resolver.cpp +2 -2
  444. package/src/duckdb/src/execution/expression_executor/execute_between.cpp +6 -0
  445. package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +4 -3
  446. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +2 -2
  447. package/src/duckdb/src/execution/expression_executor/execute_conjunction.cpp +2 -2
  448. package/src/duckdb/src/execution/expression_executor/execute_function.cpp +1 -0
  449. package/src/duckdb/src/execution/expression_executor/execute_operator.cpp +5 -4
  450. package/src/duckdb/src/execution/expression_executor.cpp +5 -3
  451. package/src/duckdb/src/execution/index/art/art.cpp +208 -72
  452. package/src/duckdb/src/execution/index/art/base_leaf.cpp +1 -1
  453. package/src/duckdb/src/execution/index/art/leaf.cpp +12 -7
  454. package/src/duckdb/src/execution/index/art/node.cpp +2 -1
  455. package/src/duckdb/src/execution/index/art/node256_leaf.cpp +6 -6
  456. package/src/duckdb/src/execution/index/art/plan_art.cpp +50 -55
  457. package/src/duckdb/src/execution/index/art/prefix.cpp +7 -13
  458. package/src/duckdb/src/execution/index/bound_index.cpp +30 -5
  459. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +3 -5
  460. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +14 -9
  461. package/src/duckdb/src/execution/join_hashtable.cpp +254 -158
  462. package/src/duckdb/src/execution/operator/aggregate/grouped_aggregate_data.cpp +1 -1
  463. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +7 -7
  464. package/src/duckdb/src/execution/operator/aggregate/physical_partitioned_aggregate.cpp +226 -0
  465. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +3 -3
  466. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +3 -3
  467. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +77 -70
  468. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +114 -50
  469. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -2
  470. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +19 -10
  471. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +22 -15
  472. package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +95 -0
  473. package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +6 -1
  474. package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +75 -2
  475. package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +40 -12
  476. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +395 -163
  477. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +20 -23
  478. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +115 -49
  479. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +66 -12
  480. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +20 -23
  481. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +220 -46
  482. package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +43 -32
  483. package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +54 -119
  484. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +184 -20
  485. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +83 -21
  486. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_validator.cpp +63 -0
  487. package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +7 -4
  488. package/src/duckdb/src/execution/operator/helper/physical_set.cpp +1 -1
  489. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +3 -2
  490. package/src/duckdb/src/execution/operator/helper/physical_verify_vector.cpp +9 -1
  491. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +132 -15
  492. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +64 -55
  493. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +284 -154
  494. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +40 -55
  495. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +0 -1
  496. package/src/duckdb/src/execution/operator/order/physical_order.cpp +7 -3
  497. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +298 -227
  498. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +5 -2
  499. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +3 -4
  500. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +32 -19
  501. package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +1 -0
  502. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +6 -0
  503. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +58 -19
  504. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +11 -27
  505. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +308 -119
  506. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +105 -55
  507. package/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp +6 -2
  508. package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +1 -1
  509. package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +15 -6
  510. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +92 -50
  511. package/src/duckdb/src/execution/operator/schema/physical_alter.cpp +0 -1
  512. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +8 -4
  513. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +54 -22
  514. package/src/duckdb/src/execution/operator/set/physical_union.cpp +5 -1
  515. package/src/duckdb/src/execution/physical_operator.cpp +15 -9
  516. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +101 -12
  517. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +11 -140
  518. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +11 -13
  519. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +1 -1
  520. package/src/duckdb/src/execution/physical_plan/plan_delete.cpp +1 -1
  521. package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +1 -1
  522. package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +6 -5
  523. package/src/duckdb/src/execution/physical_plan/plan_export.cpp +0 -4
  524. package/src/duckdb/src/execution/physical_plan/plan_filter.cpp +1 -1
  525. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +16 -13
  526. package/src/duckdb/src/execution/physical_plan/plan_insert.cpp +1 -1
  527. package/src/duckdb/src/execution/physical_plan/plan_order.cpp +7 -7
  528. package/src/duckdb/src/execution/physical_plan/plan_prepare.cpp +2 -2
  529. package/src/duckdb/src/execution/physical_plan/plan_projection.cpp +1 -1
  530. package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +8 -3
  531. package/src/duckdb/src/execution/physical_plan/plan_set_operation.cpp +1 -2
  532. package/src/duckdb/src/execution/physical_plan/plan_simple.cpp +1 -2
  533. package/src/duckdb/src/execution/physical_plan/plan_top_n.cpp +3 -2
  534. package/src/duckdb/src/execution/physical_plan_generator.cpp +0 -22
  535. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +136 -116
  536. package/src/duckdb/src/execution/sample/base_reservoir_sample.cpp +136 -0
  537. package/src/duckdb/src/execution/sample/reservoir_sample.cpp +930 -0
  538. package/src/duckdb/src/function/aggregate/distributive/count.cpp +6 -12
  539. package/src/duckdb/src/function/aggregate/distributive/{first.cpp → first_last_any.cpp} +37 -18
  540. package/src/duckdb/src/{core_functions → function}/aggregate/distributive/minmax.cpp +19 -12
  541. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +72 -13
  542. package/src/duckdb/src/function/built_in_functions.cpp +85 -2
  543. package/src/duckdb/src/function/cast/decimal_cast.cpp +1 -1
  544. package/src/duckdb/src/function/cast/string_cast.cpp +1 -1
  545. package/src/duckdb/src/function/cast/struct_cast.cpp +81 -49
  546. package/src/duckdb/src/function/cast/union/from_struct.cpp +7 -5
  547. package/src/duckdb/src/function/compression_config.cpp +6 -0
  548. package/src/duckdb/src/function/encoding_function.cpp +134 -0
  549. package/src/duckdb/src/function/function.cpp +8 -13
  550. package/src/duckdb/src/function/function_binder.cpp +100 -21
  551. package/src/duckdb/src/function/function_list.cpp +178 -0
  552. package/src/duckdb/src/function/macro_function.cpp +4 -4
  553. package/src/duckdb/src/function/pragma/pragma_functions.cpp +0 -2
  554. package/src/duckdb/src/function/pragma/pragma_queries.cpp +0 -4
  555. package/src/duckdb/src/{core_functions/core_functions.cpp → function/register_function_list.cpp} +12 -8
  556. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +62 -23
  557. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +33 -16
  558. package/src/duckdb/src/function/scalar/compressed_materialization_utils.cpp +21 -0
  559. package/src/duckdb/src/{core_functions/scalar/blob → function/scalar}/create_sort_key.cpp +86 -23
  560. package/src/duckdb/src/{core_functions → function}/scalar/date/strftime.cpp +6 -4
  561. package/src/duckdb/src/function/scalar/generic/constant_or_null.cpp +5 -7
  562. package/src/duckdb/src/{core_functions → function}/scalar/generic/error.cpp +3 -1
  563. package/src/duckdb/src/function/scalar/generic/getvariable.cpp +2 -2
  564. package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +1 -7
  565. package/src/duckdb/src/function/scalar/list/list_extract.cpp +27 -21
  566. package/src/duckdb/src/function/scalar/list/list_resize.cpp +8 -12
  567. package/src/duckdb/src/function/scalar/list/list_select.cpp +1 -4
  568. package/src/duckdb/src/function/scalar/list/list_zip.cpp +6 -6
  569. package/src/duckdb/src/{core_functions → function}/scalar/map/map_contains.cpp +2 -2
  570. package/src/duckdb/src/function/scalar/nested_functions.cpp +0 -11
  571. package/src/duckdb/src/function/scalar/{operators → operator}/add.cpp +2 -1
  572. package/src/duckdb/src/function/scalar/{operators → operator}/arithmetic.cpp +195 -127
  573. package/src/duckdb/src/function/scalar/sequence/nextval.cpp +30 -21
  574. package/src/duckdb/src/function/scalar/strftime_format.cpp +10 -0
  575. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +11 -41
  576. package/src/duckdb/src/function/scalar/string/concat.cpp +22 -20
  577. package/src/duckdb/src/function/scalar/string/concat_ws.cpp +2 -2
  578. package/src/duckdb/src/function/scalar/string/contains.cpp +16 -19
  579. package/src/duckdb/src/function/scalar/string/length.cpp +38 -24
  580. package/src/duckdb/src/function/scalar/string/like.cpp +80 -47
  581. package/src/duckdb/src/{core_functions → function}/scalar/string/md5.cpp +2 -2
  582. package/src/duckdb/src/function/scalar/string/nfc_normalize.cpp +2 -6
  583. package/src/duckdb/src/function/scalar/string/prefix.cpp +0 -4
  584. package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +2 -1
  585. package/src/duckdb/src/function/scalar/string/regexp.cpp +17 -7
  586. package/src/duckdb/src/{core_functions → function}/scalar/string/regexp_escape.cpp +2 -2
  587. package/src/duckdb/src/{core_functions → function}/scalar/string/sha1.cpp +1 -1
  588. package/src/duckdb/src/{core_functions → function}/scalar/string/sha256.cpp +1 -1
  589. package/src/duckdb/src/{core_functions → function}/scalar/string/string_split.cpp +4 -5
  590. package/src/duckdb/src/function/scalar/string/strip_accents.cpp +3 -6
  591. package/src/duckdb/src/function/scalar/string/substring.cpp +14 -13
  592. package/src/duckdb/src/function/scalar/string/suffix.cpp +0 -4
  593. package/src/duckdb/src/function/scalar/struct/struct_concat.cpp +115 -0
  594. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +35 -31
  595. package/src/duckdb/src/{core_functions → function}/scalar/struct/struct_pack.cpp +7 -7
  596. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -8
  597. package/src/duckdb/src/function/scalar/system/write_log.cpp +170 -0
  598. package/src/duckdb/src/function/scalar_function.cpp +5 -5
  599. package/src/duckdb/src/function/table/arrow/arrow_array_scan_state.cpp +3 -2
  600. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +287 -1
  601. package/src/duckdb/src/function/table/arrow/arrow_type_info.cpp +6 -6
  602. package/src/duckdb/src/function/table/arrow.cpp +32 -352
  603. package/src/duckdb/src/function/table/arrow_conversion.cpp +43 -7
  604. package/src/duckdb/src/function/table/copy_csv.cpp +38 -23
  605. package/src/duckdb/src/function/table/glob.cpp +1 -1
  606. package/src/duckdb/src/function/table/query_function.cpp +12 -7
  607. package/src/duckdb/src/function/table/read_csv.cpp +114 -46
  608. package/src/duckdb/src/function/table/read_file.cpp +26 -6
  609. package/src/duckdb/src/function/table/sniff_csv.cpp +25 -5
  610. package/src/duckdb/src/function/table/system/duckdb_columns.cpp +1 -1
  611. package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +1 -1
  612. package/src/duckdb/src/function/table/system/duckdb_dependencies.cpp +6 -7
  613. package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +1 -1
  614. package/src/duckdb/src/function/table/system/duckdb_functions.cpp +141 -16
  615. package/src/duckdb/src/function/table/system/duckdb_log.cpp +64 -0
  616. package/src/duckdb/src/function/table/system/duckdb_log_contexts.cpp +65 -0
  617. package/src/duckdb/src/function/table/system/duckdb_memory.cpp +0 -1
  618. package/src/duckdb/src/function/table/system/duckdb_settings.cpp +1 -1
  619. package/src/duckdb/src/function/table/system/duckdb_tables.cpp +1 -13
  620. package/src/duckdb/src/function/table/system/duckdb_types.cpp +1 -1
  621. package/src/duckdb/src/function/table/system/pragma_storage_info.cpp +17 -0
  622. package/src/duckdb/src/function/table/system/pragma_table_info.cpp +6 -0
  623. package/src/duckdb/src/function/table/system/pragma_table_sample.cpp +95 -0
  624. package/src/duckdb/src/function/table/system/test_all_types.cpp +56 -46
  625. package/src/duckdb/src/function/table/system_functions.cpp +3 -0
  626. package/src/duckdb/src/function/table/table_scan.cpp +487 -289
  627. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  628. package/src/duckdb/src/function/table_function.cpp +10 -6
  629. package/src/duckdb/src/function/window/window_aggregate_function.cpp +248 -0
  630. package/src/duckdb/src/function/window/window_aggregate_states.cpp +48 -0
  631. package/src/duckdb/src/function/window/window_aggregator.cpp +88 -0
  632. package/src/duckdb/src/function/window/window_boundaries_state.cpp +854 -0
  633. package/src/duckdb/src/function/window/window_collection.cpp +146 -0
  634. package/src/duckdb/src/function/window/window_constant_aggregator.cpp +357 -0
  635. package/src/duckdb/src/function/window/window_custom_aggregator.cpp +146 -0
  636. package/src/duckdb/src/function/window/window_distinct_aggregator.cpp +758 -0
  637. package/src/duckdb/src/function/window/window_executor.cpp +99 -0
  638. package/src/duckdb/src/function/window/window_index_tree.cpp +63 -0
  639. package/src/duckdb/src/function/window/window_merge_sort_tree.cpp +275 -0
  640. package/src/duckdb/src/function/window/window_naive_aggregator.cpp +361 -0
  641. package/src/duckdb/src/function/window/window_rank_function.cpp +288 -0
  642. package/src/duckdb/src/function/window/window_rownumber_function.cpp +191 -0
  643. package/src/duckdb/src/function/window/window_segment_tree.cpp +594 -0
  644. package/src/duckdb/src/function/window/window_shared_expressions.cpp +50 -0
  645. package/src/duckdb/src/function/window/window_token_tree.cpp +142 -0
  646. package/src/duckdb/src/function/window/window_value_function.cpp +566 -0
  647. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +74 -17
  648. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +1 -1
  649. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp +2 -0
  650. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +9 -0
  651. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/function_entry.hpp +4 -10
  652. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/pragma_function_catalog_entry.hpp +1 -1
  653. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp +2 -2
  654. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/schema_catalog_entry.hpp +2 -0
  655. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +18 -3
  656. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp +1 -1
  657. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/type_catalog_entry.hpp +2 -1
  658. package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +5 -2
  659. package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +21 -18
  660. package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +3 -2
  661. package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +10 -2
  662. package/src/duckdb/src/include/duckdb/catalog/dependency_manager.hpp +11 -0
  663. package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +9 -4
  664. package/src/duckdb/src/include/duckdb/common/allocator.hpp +3 -0
  665. package/src/duckdb/src/include/duckdb/common/array_ptr.hpp +8 -0
  666. package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +4 -1
  667. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +3 -1
  668. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_view_data.hpp +3 -1
  669. package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +2 -1
  670. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +7 -3
  671. package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +26 -3
  672. package/src/duckdb/src/include/duckdb/common/arrow/arrow_query_result.hpp +1 -1
  673. package/src/duckdb/src/include/duckdb/common/arrow/arrow_type_extension.hpp +144 -0
  674. package/src/duckdb/src/include/duckdb/common/arrow/arrow_util.hpp +5 -2
  675. package/src/duckdb/src/include/duckdb/common/arrow/result_arrow_wrapper.hpp +2 -0
  676. package/src/duckdb/src/include/duckdb/common/arrow/schema_metadata.hpp +11 -4
  677. package/src/duckdb/src/include/duckdb/common/assert.hpp +12 -1
  678. package/src/duckdb/src/include/duckdb/common/atomic_ptr.hpp +102 -0
  679. package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +65 -6
  680. package/src/duckdb/src/include/duckdb/common/chrono.hpp +1 -0
  681. package/src/duckdb/src/include/duckdb/common/column_index.hpp +72 -0
  682. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +120 -0
  683. package/src/duckdb/src/include/duckdb/{core_functions/core_functions.hpp → common/enums/collation_type.hpp} +2 -7
  684. package/src/duckdb/src/include/duckdb/common/enums/compression_type.hpp +5 -2
  685. package/src/duckdb/src/include/duckdb/common/enums/function_errors.hpp +18 -0
  686. package/src/duckdb/src/include/duckdb/common/enums/memory_tag.hpp +3 -2
  687. package/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp +7 -2
  688. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +4 -0
  689. package/src/duckdb/src/include/duckdb/common/enums/order_preservation_type.hpp +1 -1
  690. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  691. package/src/duckdb/src/include/duckdb/common/enums/profiler_format.hpp +1 -1
  692. package/src/duckdb/src/include/duckdb/{core_functions/aggregate → common/enums}/quantile_enum.hpp +3 -1
  693. package/src/duckdb/src/include/duckdb/common/enums/scan_vector_type.hpp +2 -0
  694. package/src/duckdb/src/include/duckdb/common/error_data.hpp +1 -0
  695. package/src/duckdb/src/include/duckdb/common/exception/parser_exception.hpp +4 -0
  696. package/src/duckdb/src/include/duckdb/common/exception.hpp +1 -1
  697. package/src/duckdb/src/include/duckdb/common/extension_type_info.hpp +37 -0
  698. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +7 -2
  699. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +9 -3
  700. package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +6 -6
  701. package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +19 -10
  702. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +2 -0
  703. package/src/duckdb/src/include/duckdb/common/file_system.hpp +6 -1
  704. package/src/duckdb/src/include/duckdb/common/fsst.hpp +2 -2
  705. package/src/duckdb/src/include/duckdb/common/helper.hpp +6 -0
  706. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +10 -0
  707. package/src/duckdb/src/include/duckdb/common/insertion_order_preserving_map.hpp +12 -2
  708. package/src/duckdb/src/include/duckdb/common/local_file_system.hpp +3 -0
  709. package/src/duckdb/src/include/duckdb/common/multi_file_list.hpp +2 -1
  710. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +147 -27
  711. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +4 -0
  712. package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +2 -7
  713. package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +16 -5
  714. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +16 -0
  715. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +4 -0
  716. package/src/duckdb/src/include/duckdb/common/platform.hpp +34 -3
  717. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +10 -13
  718. package/src/duckdb/src/include/duckdb/common/random_engine.hpp +8 -3
  719. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +0 -2
  720. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -1
  721. package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +7 -0
  722. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +1 -0
  723. package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +1 -0
  724. package/src/duckdb/src/include/duckdb/common/stacktrace.hpp +25 -0
  725. package/src/duckdb/src/include/duckdb/common/string_util.hpp +30 -2
  726. package/src/duckdb/src/include/duckdb/common/tree_renderer/graphviz_tree_renderer.hpp +1 -1
  727. package/src/duckdb/src/include/duckdb/common/tree_renderer/html_tree_renderer.hpp +1 -1
  728. package/src/duckdb/src/include/duckdb/common/tree_renderer/json_tree_renderer.hpp +1 -1
  729. package/src/duckdb/src/include/duckdb/common/tree_renderer/text_tree_renderer.hpp +3 -2
  730. package/src/duckdb/src/include/duckdb/common/tree_renderer.hpp +2 -0
  731. package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
  732. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +8 -0
  733. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +13 -2
  734. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +2 -1
  735. package/src/duckdb/src/include/duckdb/common/types/conflict_manager.hpp +21 -4
  736. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +4 -1
  737. package/src/duckdb/src/include/duckdb/common/types/date.hpp +9 -4
  738. package/src/duckdb/src/include/duckdb/common/types/date_lookup_cache.hpp +1 -1
  739. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +58 -10
  740. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -4
  741. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +4 -0
  742. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +4 -0
  743. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +10 -0
  744. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +43 -16
  745. package/src/duckdb/src/include/duckdb/common/types/uuid.hpp +3 -1
  746. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +63 -21
  747. package/src/duckdb/src/include/duckdb/common/types/value.hpp +62 -16
  748. package/src/duckdb/src/include/duckdb/common/types/varint.hpp +13 -0
  749. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +34 -7
  750. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +15 -0
  751. package/src/duckdb/src/include/duckdb/common/types.hpp +12 -7
  752. package/src/duckdb/src/include/duckdb/common/uhugeint.hpp +10 -0
  753. package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +12 -13
  754. package/src/duckdb/src/include/duckdb/common/vector_operations/binary_executor.hpp +27 -0
  755. package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +111 -4
  756. package/src/duckdb/src/include/duckdb/common/vector_operations/vector_operations.hpp +0 -1
  757. package/src/duckdb/src/include/duckdb/execution/adaptive_filter.hpp +2 -0
  758. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +48 -10
  759. package/src/duckdb/src/include/duckdb/execution/executor.hpp +2 -1
  760. package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +0 -1
  761. package/src/duckdb/src/include/duckdb/execution/ht_entry.hpp +25 -27
  762. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +28 -18
  763. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +1 -0
  764. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +2 -2
  765. package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +23 -16
  766. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +4 -0
  767. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +2 -2
  768. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +25 -16
  769. package/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp +15 -10
  770. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +1 -1
  771. package/src/duckdb/src/include/duckdb/execution/operator/{persistent/physical_fixed_batch_copy.hpp → aggregate/physical_partitioned_aggregate.hpp} +25 -27
  772. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +1 -2
  773. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +5 -4
  774. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/ungrouped_aggregate_state.hpp +21 -1
  775. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +38 -9
  776. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +8 -9
  777. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +7 -1
  778. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp +29 -23
  779. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp +15 -13
  780. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp +13 -5
  781. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +2 -1
  782. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +24 -10
  783. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +36 -1
  784. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state.hpp +21 -13
  785. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp +52 -22
  786. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp +6 -6
  787. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_validator.hpp +58 -0
  788. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +62 -0
  789. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp +6 -3
  790. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner_boundary.hpp +16 -6
  791. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +9 -4
  792. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +8 -4
  793. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +55 -10
  794. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +2 -2
  795. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_batch_collector.hpp +2 -2
  796. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +2 -2
  797. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +1 -1
  798. package/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp +28 -7
  799. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +6 -9
  800. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +17 -16
  801. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
  802. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +7 -3
  803. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +5 -1
  804. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +2 -2
  805. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
  806. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +55 -4
  807. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +2 -0
  808. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_tableinout_function.hpp +2 -2
  809. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_positional_scan.hpp +2 -1
  810. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +10 -9
  811. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_art_index.hpp +16 -13
  812. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +0 -4
  813. package/src/duckdb/src/include/duckdb/execution/partition_info.hpp +79 -0
  814. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +20 -9
  815. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +1 -11
  816. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +0 -2
  817. package/src/duckdb/src/include/duckdb/execution/progress_data.hpp +58 -0
  818. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +2 -1
  819. package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +160 -31
  820. package/src/duckdb/src/include/duckdb/function/aggregate/distributive_function_utils.hpp +31 -0
  821. package/src/duckdb/src/include/duckdb/function/aggregate/distributive_functions.hpp +61 -10
  822. package/src/duckdb/src/include/duckdb/{core_functions → function}/aggregate/minmax_n_helpers.hpp +1 -1
  823. package/src/duckdb/src/include/duckdb/{core_functions → function}/aggregate/sort_key_helpers.hpp +2 -2
  824. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +47 -27
  825. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -0
  826. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +3 -10
  827. package/src/duckdb/src/include/duckdb/function/cast/bound_cast_data.hpp +13 -6
  828. package/src/duckdb/src/include/duckdb/function/compression/compression.hpp +15 -0
  829. package/src/duckdb/src/include/duckdb/function/compression_function.hpp +29 -6
  830. package/src/duckdb/src/include/duckdb/{core_functions → function}/create_sort_key.hpp +4 -1
  831. package/src/duckdb/src/include/duckdb/function/encoding_function.hpp +78 -0
  832. package/src/duckdb/src/include/duckdb/function/function.hpp +22 -1
  833. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +3 -0
  834. package/src/duckdb/src/include/duckdb/function/function_list.hpp +39 -0
  835. package/src/duckdb/src/include/duckdb/function/function_set.hpp +13 -7
  836. package/src/duckdb/src/include/duckdb/{core_functions → function}/lambda_functions.hpp +1 -1
  837. package/src/duckdb/src/include/duckdb/function/partition_stats.hpp +36 -0
  838. package/src/duckdb/src/include/duckdb/function/register_function_list_helper.hpp +69 -0
  839. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +154 -23
  840. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_utils.hpp +45 -0
  841. package/src/duckdb/src/include/duckdb/function/scalar/date_functions.hpp +45 -0
  842. package/src/duckdb/src/include/duckdb/function/scalar/generic_common.hpp +36 -0
  843. package/src/duckdb/src/include/duckdb/function/scalar/generic_functions.hpp +32 -23
  844. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  845. package/src/duckdb/src/include/duckdb/function/scalar/list_functions.hpp +156 -0
  846. package/src/duckdb/src/include/duckdb/function/scalar/map_functions.hpp +27 -0
  847. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +4 -45
  848. package/src/duckdb/src/include/duckdb/function/scalar/operator_functions.hpp +102 -0
  849. package/src/duckdb/src/include/duckdb/function/scalar/operators.hpp +2 -16
  850. package/src/duckdb/src/include/duckdb/function/scalar/sequence_functions.hpp +16 -25
  851. package/src/duckdb/src/include/duckdb/function/scalar/sequence_utils.hpp +38 -0
  852. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +1 -0
  853. package/src/duckdb/src/include/duckdb/function/scalar/string_common.hpp +49 -0
  854. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +401 -76
  855. package/src/duckdb/src/include/duckdb/function/scalar/struct_functions.hpp +63 -0
  856. package/src/duckdb/src/include/duckdb/function/scalar/struct_utils.hpp +33 -0
  857. package/src/duckdb/src/include/duckdb/function/scalar/system_functions.hpp +45 -0
  858. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +17 -8
  859. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +59 -6
  860. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_type_info.hpp +12 -9
  861. package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_type_info_type.hpp +2 -0
  862. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +18 -13
  863. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +7 -4
  864. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +14 -0
  865. package/src/duckdb/src/include/duckdb/function/table/table_scan.hpp +15 -10
  866. package/src/duckdb/src/include/duckdb/function/table_function.hpp +94 -18
  867. package/src/duckdb/src/include/duckdb/{core_functions → function}/to_interval.hpp +1 -1
  868. package/src/duckdb/src/include/duckdb/function/window/window_aggregate_function.hpp +44 -0
  869. package/src/duckdb/src/include/duckdb/function/window/window_aggregate_states.hpp +56 -0
  870. package/src/duckdb/src/include/duckdb/function/window/window_aggregator.hpp +194 -0
  871. package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +153 -0
  872. package/src/duckdb/src/include/duckdb/function/window/window_collection.hpp +146 -0
  873. package/src/duckdb/src/include/duckdb/function/window/window_constant_aggregator.hpp +38 -0
  874. package/src/duckdb/src/include/duckdb/function/window/window_custom_aggregator.hpp +32 -0
  875. package/src/duckdb/src/include/duckdb/function/window/window_distinct_aggregator.hpp +39 -0
  876. package/src/duckdb/src/include/duckdb/function/window/window_executor.hpp +122 -0
  877. package/src/duckdb/src/include/duckdb/function/window/window_index_tree.hpp +42 -0
  878. package/src/duckdb/src/include/duckdb/function/window/window_merge_sort_tree.hpp +108 -0
  879. package/src/duckdb/src/include/duckdb/function/window/window_naive_aggregator.hpp +33 -0
  880. package/src/duckdb/src/include/duckdb/function/window/window_rank_function.hpp +63 -0
  881. package/src/duckdb/src/include/duckdb/function/window/window_rownumber_function.hpp +43 -0
  882. package/src/duckdb/src/include/duckdb/function/window/window_segment_tree.hpp +31 -0
  883. package/src/duckdb/src/include/duckdb/function/window/window_shared_expressions.hpp +76 -0
  884. package/src/duckdb/src/include/duckdb/function/window/window_token_tree.hpp +46 -0
  885. package/src/duckdb/src/include/duckdb/function/window/window_value_function.hpp +79 -0
  886. package/src/duckdb/src/include/duckdb/logging/http_logger.hpp +2 -0
  887. package/src/duckdb/src/include/duckdb/logging/log_manager.hpp +81 -0
  888. package/src/duckdb/src/include/duckdb/logging/log_storage.hpp +127 -0
  889. package/src/duckdb/src/include/duckdb/logging/logger.hpp +287 -0
  890. package/src/duckdb/src/include/duckdb/logging/logging.hpp +83 -0
  891. package/src/duckdb/src/include/duckdb/main/appender.hpp +41 -18
  892. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +6 -3
  893. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +7 -2
  894. package/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp +317 -231
  895. package/src/duckdb/src/include/duckdb/main/client_config.hpp +17 -1
  896. package/src/duckdb/src/include/duckdb/main/client_context.hpp +28 -6
  897. package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
  898. package/src/duckdb/src/include/duckdb/main/client_context_wrapper.hpp +5 -0
  899. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -2
  900. package/src/duckdb/src/include/duckdb/main/client_properties.hpp +8 -3
  901. package/src/duckdb/src/include/duckdb/main/config.hpp +52 -8
  902. package/src/duckdb/src/include/duckdb/main/connection.hpp +18 -3
  903. package/src/duckdb/src/include/duckdb/main/database.hpp +8 -7
  904. package/src/duckdb/src/include/duckdb/main/database_file_opener.hpp +5 -1
  905. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +3 -0
  906. package/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp +1 -0
  907. package/src/duckdb/src/include/duckdb/main/extension.hpp +8 -2
  908. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +548 -9
  909. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +18 -0
  910. package/src/duckdb/src/include/duckdb/main/extension_util.hpp +12 -7
  911. package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +3 -3
  912. package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +2 -2
  913. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +8 -4
  914. package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +3 -1
  915. package/src/duckdb/src/include/duckdb/main/relation/delete_relation.hpp +2 -2
  916. package/src/duckdb/src/include/duckdb/main/relation/subquery_relation.hpp +1 -4
  917. package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +3 -1
  918. package/src/duckdb/src/include/duckdb/main/relation/table_relation.hpp +3 -0
  919. package/src/duckdb/src/include/duckdb/main/relation/update_relation.hpp +3 -2
  920. package/src/duckdb/src/include/duckdb/main/relation/value_relation.hpp +7 -0
  921. package/src/duckdb/src/include/duckdb/main/relation/view_relation.hpp +1 -0
  922. package/src/duckdb/src/include/duckdb/main/relation/write_parquet_relation.hpp +1 -1
  923. package/src/duckdb/src/include/duckdb/main/relation.hpp +45 -9
  924. package/src/duckdb/src/include/duckdb/main/secret/secret_storage.hpp +20 -22
  925. package/src/duckdb/src/include/duckdb/main/settings.hpp +613 -378
  926. package/src/duckdb/src/include/duckdb/main/table_description.hpp +14 -4
  927. package/src/duckdb/src/include/duckdb/optimizer/build_probe_side_optimizer.hpp +1 -3
  928. package/src/duckdb/src/include/duckdb/optimizer/column_lifetime_analyzer.hpp +14 -7
  929. package/src/duckdb/src/include/duckdb/optimizer/common_aggregate_optimizer.hpp +2 -2
  930. package/src/duckdb/src/include/duckdb/optimizer/empty_result_pullup.hpp +27 -0
  931. package/src/duckdb/src/include/duckdb/optimizer/expression_heuristics.hpp +1 -1
  932. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +6 -1
  933. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +2 -0
  934. package/src/duckdb/src/include/duckdb/optimizer/in_clause_rewriter.hpp +3 -0
  935. package/src/duckdb/src/include/duckdb/optimizer/join_filter_pushdown_optimizer.hpp +5 -0
  936. package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +2 -0
  937. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +2 -2
  938. package/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp +45 -0
  939. package/src/duckdb/src/include/duckdb/optimizer/matcher/expression_matcher.hpp +23 -0
  940. package/src/duckdb/src/include/duckdb/optimizer/matcher/type_matcher.hpp +18 -0
  941. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +9 -0
  942. package/src/duckdb/src/include/duckdb/optimizer/remove_unused_columns.hpp +33 -11
  943. package/src/duckdb/src/include/duckdb/optimizer/rule/distinct_aggregate_optimizer.hpp +34 -0
  944. package/src/duckdb/src/include/duckdb/optimizer/sampling_pushdown.hpp +25 -0
  945. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +3 -1
  946. package/src/duckdb/src/include/duckdb/optimizer/sum_rewriter.hpp +37 -0
  947. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +4 -0
  948. package/src/duckdb/src/include/duckdb/parallel/event.hpp +3 -0
  949. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +1 -1
  950. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +26 -8
  951. package/src/duckdb/src/include/duckdb/parallel/thread_context.hpp +3 -0
  952. package/src/duckdb/src/include/duckdb/parser/base_expression.hpp +51 -3
  953. package/src/duckdb/src/include/duckdb/parser/constraints/unique_constraint.hpp +28 -44
  954. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
  955. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +2 -2
  956. package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +1 -1
  957. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +2 -2
  958. package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +6 -6
  959. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +11 -1
  960. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +12 -0
  961. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +1 -0
  962. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_scalar_function_info.hpp +3 -2
  963. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +22 -1
  964. package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +3 -4
  965. package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_column_info.hpp +1 -1
  966. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_function_info.hpp +16 -12
  967. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +3 -3
  968. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +5 -5
  969. package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +12 -3
  970. package/src/duckdb/src/include/duckdb/parser/parser.hpp +3 -0
  971. package/src/duckdb/src/include/duckdb/parser/qualified_name.hpp +17 -57
  972. package/src/duckdb/src/include/duckdb/parser/qualified_name_set.hpp +19 -3
  973. package/src/duckdb/src/include/duckdb/parser/simplified_token.hpp +2 -1
  974. package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +12 -9
  975. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -1
  976. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -2
  977. package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +45 -28
  978. package/src/duckdb/src/include/duckdb/planner/binder.hpp +23 -11
  979. package/src/duckdb/src/include/duckdb/planner/binding_alias.hpp +44 -0
  980. package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +1 -0
  981. package/src/duckdb/src/include/duckdb/planner/collation_binding.hpp +4 -3
  982. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +11 -10
  983. package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +2 -0
  984. package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +1 -0
  985. package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +4 -4
  986. package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +6 -0
  987. package/src/duckdb/src/include/duckdb/planner/expression.hpp +2 -0
  988. package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +2 -0
  989. package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +9 -4
  990. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +8 -2
  991. package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +1 -2
  992. package/src/duckdb/src/include/duckdb/planner/filter/dynamic_filter.hpp +48 -0
  993. package/src/duckdb/src/include/duckdb/planner/filter/in_filter.hpp +37 -0
  994. package/src/duckdb/src/include/duckdb/planner/filter/optional_filter.hpp +35 -0
  995. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +4 -0
  996. package/src/duckdb/src/include/duckdb/planner/logical_operator_visitor.hpp +3 -0
  997. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +2 -0
  998. package/src/duckdb/src/include/duckdb/planner/operator/logical_create_index.hpp +9 -9
  999. package/src/duckdb/src/include/duckdb/planner/operator/logical_filter.hpp +4 -0
  1000. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +16 -7
  1001. package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +2 -0
  1002. package/src/duckdb/src/include/duckdb/planner/operator/logical_join.hpp +4 -0
  1003. package/src/duckdb/src/include/duckdb/planner/operator/logical_order.hpp +5 -1
  1004. package/src/duckdb/src/include/duckdb/planner/operator/logical_top_n.hpp +5 -3
  1005. package/src/duckdb/src/include/duckdb/planner/table_binding.hpp +14 -6
  1006. package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +12 -8
  1007. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -0
  1008. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -0
  1009. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +82 -26
  1010. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +1 -1
  1011. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +10 -3
  1012. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +2 -1
  1013. package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +4 -13
  1014. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +14 -15
  1015. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_constants.hpp +1 -1
  1016. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +13 -15
  1017. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +1 -1
  1018. package/src/duckdb/src/include/duckdb/storage/compression/dictionary/analyze.hpp +46 -0
  1019. package/src/duckdb/src/include/duckdb/storage/compression/dictionary/common.hpp +60 -0
  1020. package/src/duckdb/src/include/duckdb/storage/compression/dictionary/compression.hpp +61 -0
  1021. package/src/duckdb/src/include/duckdb/storage/compression/dictionary/decompression.hpp +50 -0
  1022. package/src/duckdb/src/include/duckdb/storage/compression/empty_validity.hpp +100 -0
  1023. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +1 -1
  1024. package/src/duckdb/src/include/duckdb/storage/compression/roaring/appender.hpp +150 -0
  1025. package/src/duckdb/src/include/duckdb/storage/compression/roaring/roaring.hpp +618 -0
  1026. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +53 -31
  1027. package/src/duckdb/src/include/duckdb/storage/index.hpp +2 -3
  1028. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +0 -1
  1029. package/src/duckdb/src/include/duckdb/storage/segment/uncompressed.hpp +4 -1
  1030. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +3 -3
  1031. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +1 -1
  1032. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -4
  1033. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +16 -1
  1034. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +2 -1
  1035. package/src/duckdb/src/include/duckdb/storage/storage_index.hpp +70 -0
  1036. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +5 -7
  1037. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +4 -3
  1038. package/src/duckdb/src/include/duckdb/storage/storage_options.hpp +23 -0
  1039. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +34 -6
  1040. package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +2 -0
  1041. package/src/duckdb/src/include/duckdb/storage/table/array_column_data.hpp +2 -2
  1042. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +2 -1
  1043. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +39 -10
  1044. package/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp +56 -14
  1045. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +35 -29
  1046. package/src/duckdb/src/include/duckdb/storage/table/delete_state.hpp +1 -1
  1047. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
  1048. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +7 -1
  1049. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +19 -6
  1050. package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +2 -1
  1051. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +29 -6
  1052. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +10 -10
  1053. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +5 -0
  1054. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +26 -19
  1055. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +8 -1
  1056. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +16 -14
  1057. package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +2 -0
  1058. package/src/duckdb/src/include/duckdb/storage/table_io_manager.hpp +3 -0
  1059. package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +1 -0
  1060. package/src/duckdb/src/include/duckdb/storage/temporary_file_manager.hpp +228 -61
  1061. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +14 -10
  1062. package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +3 -1
  1063. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +3 -2
  1064. package/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp +1 -0
  1065. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +19 -17
  1066. package/src/duckdb/src/include/duckdb/transaction/rollback_state.hpp +5 -2
  1067. package/src/duckdb/src/include/duckdb/transaction/transaction.hpp +1 -2
  1068. package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +13 -8
  1069. package/src/duckdb/src/include/duckdb/transaction/undo_buffer_allocator.hpp +79 -0
  1070. package/src/duckdb/src/include/duckdb/transaction/update_info.hpp +43 -13
  1071. package/src/duckdb/src/include/duckdb/transaction/wal_write_state.hpp +4 -1
  1072. package/src/duckdb/src/include/duckdb/verification/copied_statement_verifier.hpp +4 -2
  1073. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier.hpp +4 -2
  1074. package/src/duckdb/src/include/duckdb/verification/external_statement_verifier.hpp +4 -2
  1075. package/src/duckdb/src/include/duckdb/verification/fetch_row_verifier.hpp +4 -2
  1076. package/src/duckdb/src/include/duckdb/verification/no_operator_caching_verifier.hpp +4 -2
  1077. package/src/duckdb/src/include/duckdb/verification/parsed_statement_verifier.hpp +4 -2
  1078. package/src/duckdb/src/include/duckdb/verification/prepared_statement_verifier.hpp +7 -3
  1079. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +11 -5
  1080. package/src/duckdb/src/include/duckdb/verification/unoptimized_statement_verifier.hpp +4 -2
  1081. package/src/duckdb/src/include/duckdb.h +424 -41
  1082. package/src/duckdb/src/include/duckdb_extension.h +301 -195
  1083. package/src/duckdb/src/logging/log_manager.cpp +157 -0
  1084. package/src/duckdb/src/logging/log_storage.cpp +209 -0
  1085. package/src/duckdb/src/logging/logger.cpp +211 -0
  1086. package/src/duckdb/src/logging/logging.cpp +42 -0
  1087. package/src/duckdb/src/main/appender.cpp +187 -45
  1088. package/src/duckdb/src/main/attached_database.cpp +16 -8
  1089. package/src/duckdb/src/main/capi/appender-c.cpp +47 -4
  1090. package/src/duckdb/src/main/capi/arrow-c.cpp +9 -4
  1091. package/src/duckdb/src/main/capi/config-c.cpp +17 -4
  1092. package/src/duckdb/src/main/capi/datetime-c.cpp +15 -0
  1093. package/src/duckdb/src/main/capi/duckdb-c.cpp +54 -13
  1094. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +212 -4
  1095. package/src/duckdb/src/main/capi/helper-c.cpp +3 -0
  1096. package/src/duckdb/src/main/capi/prepared-c.cpp +26 -7
  1097. package/src/duckdb/src/main/capi/replacement_scan-c.cpp +1 -1
  1098. package/src/duckdb/src/main/capi/result-c.cpp +3 -0
  1099. package/src/duckdb/src/main/capi/table_description-c.cpp +43 -10
  1100. package/src/duckdb/src/main/capi/threading-c.cpp +4 -4
  1101. package/src/duckdb/src/main/client_context.cpp +125 -51
  1102. package/src/duckdb/src/main/client_context_file_opener.cpp +4 -0
  1103. package/src/duckdb/src/main/client_context_wrapper.cpp +4 -0
  1104. package/src/duckdb/src/main/client_data.cpp +1 -1
  1105. package/src/duckdb/src/main/client_verify.cpp +39 -20
  1106. package/src/duckdb/src/main/config.cpp +266 -74
  1107. package/src/duckdb/src/main/connection.cpp +53 -13
  1108. package/src/duckdb/src/main/database.cpp +39 -18
  1109. package/src/duckdb/src/main/database_manager.cpp +12 -11
  1110. package/src/duckdb/src/main/db_instance_cache.cpp +14 -7
  1111. package/src/duckdb/src/main/extension/extension_helper.cpp +24 -23
  1112. package/src/duckdb/src/main/extension/extension_install.cpp +19 -7
  1113. package/src/duckdb/src/main/extension/extension_load.cpp +91 -41
  1114. package/src/duckdb/src/main/extension/extension_util.cpp +40 -19
  1115. package/src/duckdb/src/main/extension.cpp +20 -11
  1116. package/src/duckdb/src/main/profiling_info.cpp +19 -5
  1117. package/src/duckdb/src/main/query_profiler.cpp +135 -36
  1118. package/src/duckdb/src/main/query_result.cpp +2 -1
  1119. package/src/duckdb/src/main/relation/aggregate_relation.cpp +3 -3
  1120. package/src/duckdb/src/main/relation/create_table_relation.cpp +5 -4
  1121. package/src/duckdb/src/main/relation/create_view_relation.cpp +2 -2
  1122. package/src/duckdb/src/main/relation/cross_product_relation.cpp +2 -2
  1123. package/src/duckdb/src/main/relation/delete_relation.cpp +2 -2
  1124. package/src/duckdb/src/main/relation/delim_get_relation.cpp +1 -1
  1125. package/src/duckdb/src/main/relation/distinct_relation.cpp +1 -1
  1126. package/src/duckdb/src/main/relation/explain_relation.cpp +1 -1
  1127. package/src/duckdb/src/main/relation/filter_relation.cpp +1 -1
  1128. package/src/duckdb/src/main/relation/insert_relation.cpp +1 -1
  1129. package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
  1130. package/src/duckdb/src/main/relation/order_relation.cpp +1 -1
  1131. package/src/duckdb/src/main/relation/projection_relation.cpp +3 -3
  1132. package/src/duckdb/src/main/relation/query_relation.cpp +1 -1
  1133. package/src/duckdb/src/main/relation/read_csv_relation.cpp +58 -20
  1134. package/src/duckdb/src/main/relation/setop_relation.cpp +2 -2
  1135. package/src/duckdb/src/main/relation/subquery_relation.cpp +3 -8
  1136. package/src/duckdb/src/main/relation/table_function_relation.cpp +10 -1
  1137. package/src/duckdb/src/main/relation/table_relation.cpp +19 -3
  1138. package/src/duckdb/src/main/relation/update_relation.cpp +2 -2
  1139. package/src/duckdb/src/main/relation/value_relation.cpp +42 -2
  1140. package/src/duckdb/src/main/relation/view_relation.cpp +8 -2
  1141. package/src/duckdb/src/main/relation/write_csv_relation.cpp +1 -1
  1142. package/src/duckdb/src/main/relation/write_parquet_relation.cpp +1 -1
  1143. package/src/duckdb/src/main/relation.cpp +49 -28
  1144. package/src/duckdb/src/main/secret/secret_manager.cpp +1 -1
  1145. package/src/duckdb/src/main/secret/secret_storage.cpp +6 -4
  1146. package/src/duckdb/src/main/settings/autogenerated_settings.cpp +1102 -0
  1147. package/src/duckdb/src/main/settings/custom_settings.cpp +1343 -0
  1148. package/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp +60 -37
  1149. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +1 -1
  1150. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +126 -72
  1151. package/src/duckdb/src/optimizer/common_aggregate_optimizer.cpp +22 -6
  1152. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +3 -3
  1153. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +2 -2
  1154. package/src/duckdb/src/optimizer/compressed_materialization.cpp +3 -3
  1155. package/src/duckdb/src/optimizer/cse_optimizer.cpp +7 -7
  1156. package/src/duckdb/src/optimizer/deliminator.cpp +6 -5
  1157. package/src/duckdb/src/optimizer/empty_result_pullup.cpp +96 -0
  1158. package/src/duckdb/src/optimizer/expression_heuristics.cpp +11 -3
  1159. package/src/duckdb/src/optimizer/expression_rewriter.cpp +9 -2
  1160. package/src/duckdb/src/optimizer/filter_combiner.cpp +190 -88
  1161. package/src/duckdb/src/optimizer/filter_pushdown.cpp +6 -5
  1162. package/src/duckdb/src/optimizer/in_clause_rewriter.cpp +25 -9
  1163. package/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +170 -72
  1164. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +5 -4
  1165. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +3 -1
  1166. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +7 -7
  1167. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +15 -6
  1168. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +37 -22
  1169. package/src/duckdb/src/optimizer/late_materialization.cpp +414 -0
  1170. package/src/duckdb/src/optimizer/limit_pushdown.cpp +1 -0
  1171. package/src/duckdb/src/optimizer/matcher/expression_matcher.cpp +30 -2
  1172. package/src/duckdb/src/optimizer/optimizer.cpp +67 -7
  1173. package/src/duckdb/src/optimizer/pullup/pullup_filter.cpp +3 -3
  1174. package/src/duckdb/src/optimizer/pullup/pullup_projection.cpp +2 -2
  1175. package/src/duckdb/src/optimizer/pullup/pullup_set_operation.cpp +1 -1
  1176. package/src/duckdb/src/optimizer/pushdown/pushdown_aggregate.cpp +2 -2
  1177. package/src/duckdb/src/optimizer/pushdown/pushdown_filter.cpp +1 -1
  1178. package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +1 -1
  1179. package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +3 -3
  1180. package/src/duckdb/src/optimizer/pushdown/pushdown_projection.cpp +5 -3
  1181. package/src/duckdb/src/optimizer/pushdown/pushdown_set_operation.cpp +1 -1
  1182. package/src/duckdb/src/optimizer/pushdown/pushdown_unnest.cpp +52 -0
  1183. package/src/duckdb/src/optimizer/pushdown/pushdown_window.cpp +2 -2
  1184. package/src/duckdb/src/optimizer/regex_range_filter.cpp +1 -1
  1185. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +1 -1
  1186. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +168 -38
  1187. package/src/duckdb/src/optimizer/rule/arithmetic_simplification.cpp +2 -1
  1188. package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +8 -5
  1189. package/src/duckdb/src/optimizer/rule/conjunction_simplification.cpp +2 -2
  1190. package/src/duckdb/src/optimizer/rule/constant_folding.cpp +2 -2
  1191. package/src/duckdb/src/optimizer/rule/distinct_aggregate_optimizer.cpp +65 -0
  1192. package/src/duckdb/src/optimizer/rule/distributivity.cpp +2 -2
  1193. package/src/duckdb/src/optimizer/rule/enum_comparison.cpp +2 -1
  1194. package/src/duckdb/src/optimizer/rule/equal_or_null_simplification.cpp +4 -3
  1195. package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +3 -3
  1196. package/src/duckdb/src/optimizer/rule/like_optimizations.cpp +3 -1
  1197. package/src/duckdb/src/optimizer/rule/move_constants.cpp +9 -9
  1198. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +4 -3
  1199. package/src/duckdb/src/optimizer/rule/timestamp_comparison.cpp +1 -1
  1200. package/src/duckdb/src/optimizer/sampling_pushdown.cpp +24 -0
  1201. package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +1 -1
  1202. package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +1 -1
  1203. package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +1 -1
  1204. package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +74 -0
  1205. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +10 -7
  1206. package/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +3 -3
  1207. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +3 -3
  1208. package/src/duckdb/src/optimizer/statistics/operator/propagate_window.cpp +3 -0
  1209. package/src/duckdb/src/optimizer/sum_rewriter.cpp +174 -0
  1210. package/src/duckdb/src/optimizer/topn_optimizer.cpp +71 -0
  1211. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +5 -5
  1212. package/src/duckdb/src/parallel/event.cpp +4 -0
  1213. package/src/duckdb/src/parallel/executor.cpp +11 -29
  1214. package/src/duckdb/src/parallel/executor_task.cpp +8 -3
  1215. package/src/duckdb/src/parallel/pipeline.cpp +15 -8
  1216. package/src/duckdb/src/parallel/pipeline_executor.cpp +67 -43
  1217. package/src/duckdb/src/parallel/thread_context.cpp +12 -1
  1218. package/src/duckdb/src/parser/column_definition.cpp +3 -3
  1219. package/src/duckdb/src/parser/constraints/unique_constraint.cpp +72 -9
  1220. package/src/duckdb/src/parser/expression/columnref_expression.cpp +15 -3
  1221. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +1 -1
  1222. package/src/duckdb/src/parser/expression/function_expression.cpp +1 -1
  1223. package/src/duckdb/src/parser/expression/lambda_expression.cpp +3 -3
  1224. package/src/duckdb/src/parser/expression/lambdaref_expression.cpp +1 -1
  1225. package/src/duckdb/src/parser/expression/star_expression.cpp +46 -2
  1226. package/src/duckdb/src/parser/expression/window_expression.cpp +24 -1
  1227. package/src/duckdb/src/parser/parsed_data/alter_info.cpp +26 -2
  1228. package/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +5 -3
  1229. package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +29 -1
  1230. package/src/duckdb/src/parser/parsed_data/attach_info.cpp +6 -6
  1231. package/src/duckdb/src/parser/parsed_data/create_aggregate_function_info.cpp +1 -1
  1232. package/src/duckdb/src/parser/parsed_data/create_function_info.cpp +17 -0
  1233. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +16 -15
  1234. package/src/duckdb/src/parser/parsed_data/create_macro_info.cpp +1 -1
  1235. package/src/duckdb/src/parser/parsed_data/create_pragma_function_info.cpp +1 -1
  1236. package/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp +3 -2
  1237. package/src/duckdb/src/parser/parsed_data/create_schema_info.cpp +1 -1
  1238. package/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp +1 -1
  1239. package/src/duckdb/src/parser/parsed_data/create_table_info.cpp +1 -0
  1240. package/src/duckdb/src/parser/parsed_data/create_type_info.cpp +4 -4
  1241. package/src/duckdb/src/parser/parsed_data/load_info.cpp +1 -0
  1242. package/src/duckdb/src/parser/parsed_data/sample_options.cpp +31 -1
  1243. package/src/duckdb/src/parser/parsed_expression.cpp +1 -1
  1244. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +4 -1
  1245. package/src/duckdb/src/parser/parser.cpp +129 -0
  1246. package/src/duckdb/src/parser/qualified_name.cpp +99 -0
  1247. package/src/duckdb/src/parser/query_error_context.cpp +35 -6
  1248. package/src/duckdb/src/parser/query_node/select_node.cpp +4 -4
  1249. package/src/duckdb/src/parser/statement/delete_statement.cpp +6 -1
  1250. package/src/duckdb/src/parser/statement/insert_statement.cpp +4 -3
  1251. package/src/duckdb/src/parser/statement/update_statement.cpp +6 -1
  1252. package/src/duckdb/src/parser/tableref/pivotref.cpp +2 -2
  1253. package/src/duckdb/src/parser/tableref.cpp +2 -2
  1254. package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +16 -24
  1255. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +1 -1
  1256. package/src/duckdb/src/parser/transform/expression/transform_bool_expr.cpp +5 -5
  1257. package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +61 -13
  1258. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +10 -4
  1259. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -2
  1260. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +30 -3
  1261. package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +1 -1
  1262. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +25 -6
  1263. package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +1 -1
  1264. package/src/duckdb/src/parser/transform/helpers/transform_sample.cpp +10 -3
  1265. package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +4 -3
  1266. package/src/duckdb/src/parser/transform/statement/transform_alter_table.cpp +18 -3
  1267. package/src/duckdb/src/parser/transform/statement/transform_comment_on.cpp +1 -1
  1268. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +0 -1
  1269. package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +5 -5
  1270. package/src/duckdb/src/parser/transform/statement/transform_create_table.cpp +26 -12
  1271. package/src/duckdb/src/parser/transform/statement/transform_create_table_as.cpp +11 -3
  1272. package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +1 -1
  1273. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -0
  1274. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +3 -3
  1275. package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +4 -4
  1276. package/src/duckdb/src/parser/transform/statement/transform_set.cpp +2 -2
  1277. package/src/duckdb/src/parser/transform/statement/transform_show.cpp +21 -3
  1278. package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +8 -6
  1279. package/src/duckdb/src/parser/transformer.cpp +2 -2
  1280. package/src/duckdb/src/planner/bind_context.cpp +308 -136
  1281. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +30 -31
  1282. package/src/duckdb/src/planner/binder/expression/bind_between_expression.cpp +4 -2
  1283. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +102 -94
  1284. package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +7 -5
  1285. package/src/duckdb/src/planner/binder/expression/bind_conjunction_expression.cpp +1 -1
  1286. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +7 -7
  1287. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +10 -10
  1288. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +24 -6
  1289. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +23 -15
  1290. package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +1 -1
  1291. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +97 -19
  1292. package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +74 -16
  1293. package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +6 -6
  1294. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +49 -15
  1295. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +32 -23
  1296. package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +20 -3
  1297. package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +2 -2
  1298. package/src/duckdb/src/planner/binder/query_node/plan_query_node.cpp +3 -0
  1299. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +6 -5
  1300. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +38 -19
  1301. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +2 -12
  1302. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +117 -412
  1303. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +423 -144
  1304. package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
  1305. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +5 -0
  1306. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
  1307. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +0 -4
  1308. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +31 -13
  1309. package/src/duckdb/src/planner/binder/statement/bind_pragma.cpp +1 -1
  1310. package/src/duckdb/src/planner/binder/statement/bind_simple.cpp +96 -27
  1311. package/src/duckdb/src/planner/binder/statement/bind_summarize.cpp +1 -1
  1312. package/src/duckdb/src/planner/binder/statement/bind_update.cpp +5 -3
  1313. package/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp +7 -6
  1314. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +36 -9
  1315. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +34 -34
  1316. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +72 -35
  1317. package/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +99 -18
  1318. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +23 -11
  1319. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +22 -19
  1320. package/src/duckdb/src/planner/binder.cpp +23 -45
  1321. package/src/duckdb/src/planner/binding_alias.cpp +69 -0
  1322. package/src/duckdb/src/planner/bound_parameter_map.cpp +1 -1
  1323. package/src/duckdb/src/planner/bound_result_modifier.cpp +6 -2
  1324. package/src/duckdb/src/planner/collation_binding.cpp +38 -4
  1325. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +17 -5
  1326. package/src/duckdb/src/planner/expression/bound_expression.cpp +1 -1
  1327. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +8 -1
  1328. package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +2 -2
  1329. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +24 -4
  1330. package/src/duckdb/src/planner/expression.cpp +7 -1
  1331. package/src/duckdb/src/planner/expression_binder/aggregate_binder.cpp +1 -1
  1332. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +2 -2
  1333. package/src/duckdb/src/planner/expression_binder/group_binder.cpp +2 -2
  1334. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +16 -0
  1335. package/src/duckdb/src/planner/expression_binder/index_binder.cpp +53 -1
  1336. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +3 -3
  1337. package/src/duckdb/src/planner/expression_binder/order_binder.cpp +8 -8
  1338. package/src/duckdb/src/planner/expression_binder/relation_binder.cpp +1 -1
  1339. package/src/duckdb/src/planner/expression_binder/select_bind_state.cpp +2 -2
  1340. package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +1 -1
  1341. package/src/duckdb/src/planner/expression_binder/update_binder.cpp +1 -1
  1342. package/src/duckdb/src/planner/expression_binder.cpp +7 -7
  1343. package/src/duckdb/src/planner/expression_iterator.cpp +6 -3
  1344. package/src/duckdb/src/planner/filter/constant_filter.cpp +17 -2
  1345. package/src/duckdb/src/planner/filter/dynamic_filter.cpp +68 -0
  1346. package/src/duckdb/src/planner/filter/in_filter.cpp +84 -0
  1347. package/src/duckdb/src/planner/filter/null_filter.cpp +1 -2
  1348. package/src/duckdb/src/planner/filter/optional_filter.cpp +29 -0
  1349. package/src/duckdb/src/planner/filter/struct_filter.cpp +11 -6
  1350. package/src/duckdb/src/planner/joinside.cpp +6 -5
  1351. package/src/duckdb/src/planner/logical_operator.cpp +4 -1
  1352. package/src/duckdb/src/planner/logical_operator_visitor.cpp +68 -2
  1353. package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +23 -0
  1354. package/src/duckdb/src/planner/operator/logical_create_index.cpp +16 -12
  1355. package/src/duckdb/src/planner/operator/logical_filter.cpp +1 -1
  1356. package/src/duckdb/src/planner/operator/logical_get.cpp +48 -25
  1357. package/src/duckdb/src/planner/operator/logical_insert.cpp +1 -1
  1358. package/src/duckdb/src/planner/operator/logical_join.cpp +1 -1
  1359. package/src/duckdb/src/planner/operator/logical_order.cpp +4 -11
  1360. package/src/duckdb/src/planner/operator/logical_top_n.cpp +7 -0
  1361. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +33 -5
  1362. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +2 -2
  1363. package/src/duckdb/src/planner/table_binding.cpp +74 -36
  1364. package/src/duckdb/src/planner/table_filter.cpp +5 -8
  1365. package/src/duckdb/src/storage/arena_allocator.cpp +5 -4
  1366. package/src/duckdb/src/storage/buffer/block_handle.cpp +88 -17
  1367. package/src/duckdb/src/storage/buffer/block_manager.cpp +34 -26
  1368. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -2
  1369. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +70 -49
  1370. package/src/duckdb/src/storage/buffer_manager.cpp +4 -0
  1371. package/src/duckdb/src/storage/checkpoint_manager.cpp +24 -5
  1372. package/src/duckdb/src/storage/compression/bitpacking.cpp +14 -16
  1373. package/src/duckdb/src/storage/compression/dictionary/analyze.cpp +54 -0
  1374. package/src/duckdb/src/storage/compression/dictionary/common.cpp +90 -0
  1375. package/src/duckdb/src/storage/compression/dictionary/compression.cpp +174 -0
  1376. package/src/duckdb/src/storage/compression/dictionary/decompression.cpp +115 -0
  1377. package/src/duckdb/src/storage/compression/dictionary_compression.cpp +53 -545
  1378. package/src/duckdb/src/storage/compression/empty_validity.cpp +15 -0
  1379. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +25 -16
  1380. package/src/duckdb/src/storage/compression/fsst.cpp +101 -47
  1381. package/src/duckdb/src/storage/compression/numeric_constant.cpp +92 -2
  1382. package/src/duckdb/src/storage/compression/rle.cpp +216 -46
  1383. package/src/duckdb/src/storage/compression/roaring/analyze.cpp +179 -0
  1384. package/src/duckdb/src/storage/compression/roaring/common.cpp +282 -0
  1385. package/src/duckdb/src/storage/compression/roaring/compress.cpp +481 -0
  1386. package/src/duckdb/src/storage/compression/roaring/metadata.cpp +262 -0
  1387. package/src/duckdb/src/storage/compression/roaring/scan.cpp +364 -0
  1388. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +47 -65
  1389. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +102 -39
  1390. package/src/duckdb/src/storage/compression/zstd.cpp +1049 -0
  1391. package/src/duckdb/src/storage/data_table.cpp +312 -172
  1392. package/src/duckdb/src/storage/local_storage.cpp +104 -46
  1393. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +1 -1
  1394. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +7 -3
  1395. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +138 -58
  1396. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +14 -0
  1397. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +19 -8
  1398. package/src/duckdb/src/storage/serialization/serialize_statement.cpp +2 -0
  1399. package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +43 -0
  1400. package/src/duckdb/src/storage/serialization/serialize_types.cpp +32 -5
  1401. package/src/duckdb/src/storage/single_file_block_manager.cpp +6 -8
  1402. package/src/duckdb/src/storage/standard_buffer_manager.cpp +82 -71
  1403. package/src/duckdb/src/storage/statistics/column_statistics.cpp +3 -3
  1404. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +18 -17
  1405. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +34 -22
  1406. package/src/duckdb/src/storage/statistics/string_stats.cpp +14 -3
  1407. package/src/duckdb/src/storage/storage_info.cpp +72 -10
  1408. package/src/duckdb/src/storage/storage_manager.cpp +41 -47
  1409. package/src/duckdb/src/storage/table/array_column_data.cpp +7 -1
  1410. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +10 -9
  1411. package/src/duckdb/src/storage/table/column_data.cpp +105 -43
  1412. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +307 -132
  1413. package/src/duckdb/src/storage/table/column_segment.cpp +36 -13
  1414. package/src/duckdb/src/storage/table/list_column_data.cpp +4 -4
  1415. package/src/duckdb/src/storage/table/row_group.cpp +159 -66
  1416. package/src/duckdb/src/storage/table/row_group_collection.cpp +157 -68
  1417. package/src/duckdb/src/storage/table/row_version_manager.cpp +33 -10
  1418. package/src/duckdb/src/storage/table/scan_state.cpp +21 -7
  1419. package/src/duckdb/src/storage/table/standard_column_data.cpp +68 -5
  1420. package/src/duckdb/src/storage/table/struct_column_data.cpp +42 -4
  1421. package/src/duckdb/src/storage/table/table_statistics.cpp +91 -5
  1422. package/src/duckdb/src/storage/table/update_segment.cpp +287 -210
  1423. package/src/duckdb/src/storage/table_index_list.cpp +55 -58
  1424. package/src/duckdb/src/storage/temporary_file_manager.cpp +412 -149
  1425. package/src/duckdb/src/storage/wal_replay.cpp +132 -48
  1426. package/src/duckdb/src/storage/write_ahead_log.cpp +75 -48
  1427. package/src/duckdb/src/transaction/cleanup_state.cpp +0 -1
  1428. package/src/duckdb/src/transaction/commit_state.cpp +23 -14
  1429. package/src/duckdb/src/transaction/duck_transaction.cpp +29 -25
  1430. package/src/duckdb/src/transaction/duck_transaction_manager.cpp +18 -6
  1431. package/src/duckdb/src/transaction/meta_transaction.cpp +3 -2
  1432. package/src/duckdb/src/transaction/rollback_state.cpp +5 -2
  1433. package/src/duckdb/src/transaction/transaction_context.cpp +9 -1
  1434. package/src/duckdb/src/transaction/undo_buffer.cpp +35 -27
  1435. package/src/duckdb/src/transaction/undo_buffer_allocator.cpp +72 -0
  1436. package/src/duckdb/src/transaction/wal_write_state.cpp +12 -10
  1437. package/src/duckdb/src/verification/copied_statement_verifier.cpp +7 -4
  1438. package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +7 -5
  1439. package/src/duckdb/src/verification/external_statement_verifier.cpp +7 -4
  1440. package/src/duckdb/src/verification/fetch_row_verifier.cpp +7 -4
  1441. package/src/duckdb/src/verification/no_operator_caching_verifier.cpp +8 -4
  1442. package/src/duckdb/src/verification/parsed_statement_verifier.cpp +7 -4
  1443. package/src/duckdb/src/verification/prepared_statement_verifier.cpp +16 -12
  1444. package/src/duckdb/src/verification/statement_verifier.cpp +20 -15
  1445. package/src/duckdb/src/verification/unoptimized_statement_verifier.cpp +7 -4
  1446. package/src/duckdb/third_party/fsst/libfsst.hpp +1 -0
  1447. package/src/duckdb/third_party/httplib/httplib.hpp +15 -22
  1448. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +4 -2
  1449. package/src/duckdb/third_party/libpg_query/pg_functions.cpp +2 -4
  1450. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +14278 -13832
  1451. package/src/duckdb/third_party/parquet/parquet_types.cpp +3410 -1686
  1452. package/src/duckdb/third_party/parquet/parquet_types.h +1585 -1204
  1453. package/src/duckdb/third_party/skiplist/SkipList.h +0 -1
  1454. package/src/duckdb/third_party/snappy/snappy-stubs-internal.h +13 -15
  1455. package/src/duckdb/third_party/zstd/common/debug.cpp +36 -0
  1456. package/src/duckdb/third_party/zstd/common/entropy_common.cpp +173 -49
  1457. package/src/duckdb/third_party/zstd/common/error_private.cpp +11 -3
  1458. package/src/duckdb/third_party/zstd/common/fse_decompress.cpp +126 -97
  1459. package/src/duckdb/third_party/zstd/common/pool.cpp +376 -0
  1460. package/src/duckdb/third_party/zstd/common/threading.cpp +193 -0
  1461. package/src/duckdb/third_party/zstd/common/xxhash.cpp +18 -14
  1462. package/src/duckdb/third_party/zstd/common/zstd_common.cpp +3 -38
  1463. package/src/duckdb/third_party/zstd/compress/fse_compress.cpp +93 -165
  1464. package/src/duckdb/third_party/zstd/compress/hist.cpp +28 -31
  1465. package/src/duckdb/third_party/zstd/compress/huf_compress.cpp +957 -291
  1466. package/src/duckdb/third_party/zstd/compress/zstd_compress.cpp +3988 -1124
  1467. package/src/duckdb/third_party/zstd/compress/zstd_compress_literals.cpp +120 -43
  1468. package/src/duckdb/third_party/zstd/compress/zstd_compress_sequences.cpp +47 -23
  1469. package/src/duckdb/third_party/zstd/compress/zstd_compress_superblock.cpp +274 -424
  1470. package/src/duckdb/third_party/zstd/compress/zstd_double_fast.cpp +403 -153
  1471. package/src/duckdb/third_party/zstd/compress/zstd_fast.cpp +741 -268
  1472. package/src/duckdb/third_party/zstd/compress/zstd_lazy.cpp +1339 -278
  1473. package/src/duckdb/third_party/zstd/compress/zstd_ldm.cpp +334 -222
  1474. package/src/duckdb/third_party/zstd/compress/zstd_opt.cpp +674 -298
  1475. package/src/duckdb/third_party/zstd/compress/zstdmt_compress.cpp +1885 -0
  1476. package/src/duckdb/third_party/zstd/decompress/huf_decompress.cpp +1247 -586
  1477. package/src/duckdb/third_party/zstd/decompress/zstd_ddict.cpp +18 -17
  1478. package/src/duckdb/third_party/zstd/decompress/zstd_decompress.cpp +724 -270
  1479. package/src/duckdb/third_party/zstd/decompress/zstd_decompress_block.cpp +1193 -393
  1480. package/src/duckdb/third_party/zstd/deprecated/zbuff_common.cpp +30 -0
  1481. package/src/duckdb/third_party/zstd/deprecated/zbuff_compress.cpp +171 -0
  1482. package/src/duckdb/third_party/zstd/deprecated/zbuff_decompress.cpp +80 -0
  1483. package/src/duckdb/third_party/zstd/dict/cover.cpp +1271 -0
  1484. package/src/duckdb/third_party/zstd/dict/divsufsort.cpp +1916 -0
  1485. package/src/duckdb/third_party/zstd/dict/fastcover.cpp +775 -0
  1486. package/src/duckdb/third_party/zstd/dict/zdict.cpp +1139 -0
  1487. package/src/duckdb/third_party/zstd/include/zdict.h +473 -0
  1488. package/src/duckdb/third_party/zstd/include/zstd/common/allocations.h +58 -0
  1489. package/src/duckdb/third_party/zstd/include/zstd/common/bits.h +204 -0
  1490. package/src/duckdb/third_party/zstd/include/zstd/common/bitstream.h +88 -85
  1491. package/src/duckdb/third_party/zstd/include/zstd/common/compiler.h +243 -47
  1492. package/src/duckdb/third_party/zstd/include/zstd/common/cpu.h +253 -0
  1493. package/src/duckdb/third_party/zstd/include/zstd/common/debug.h +31 -31
  1494. package/src/duckdb/third_party/zstd/include/zstd/common/error_private.h +94 -6
  1495. package/src/duckdb/third_party/zstd/include/zstd/common/fse.h +424 -64
  1496. package/src/duckdb/third_party/zstd/include/zstd/common/huf.h +255 -70
  1497. package/src/duckdb/third_party/zstd/include/zstd/common/mem.h +125 -85
  1498. package/src/duckdb/third_party/zstd/include/zstd/common/pool.h +84 -0
  1499. package/src/duckdb/third_party/zstd/include/zstd/common/portability_macros.h +158 -0
  1500. package/src/duckdb/third_party/zstd/include/zstd/common/threading.h +152 -0
  1501. package/src/duckdb/third_party/zstd/include/zstd/common/{xxhash.h → xxhash.hpp} +0 -1
  1502. package/src/duckdb/third_party/zstd/include/zstd/common/{xxhash_static.h → xxhash_static.hpp} +1 -1
  1503. package/src/duckdb/third_party/zstd/include/zstd/common/zstd_deps.h +122 -0
  1504. package/src/duckdb/third_party/zstd/include/zstd/common/zstd_internal.h +143 -174
  1505. package/src/duckdb/third_party/zstd/include/zstd/common/zstd_trace.h +159 -0
  1506. package/src/duckdb/third_party/zstd/include/zstd/compress/clevels.h +136 -0
  1507. package/src/duckdb/third_party/zstd/include/zstd/compress/hist.h +4 -4
  1508. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_internal.h +631 -220
  1509. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_literals.h +17 -7
  1510. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_sequences.h +2 -2
  1511. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_superblock.h +3 -2
  1512. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_cwksp.h +256 -153
  1513. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_double_fast.h +16 -3
  1514. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_fast.h +4 -3
  1515. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_lazy.h +145 -11
  1516. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_ldm.h +14 -6
  1517. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_ldm_geartab.h +110 -0
  1518. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_opt.h +33 -9
  1519. package/src/duckdb/third_party/zstd/include/zstd/compress/zstdmt_compress.h +107 -0
  1520. package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_ddict.h +4 -3
  1521. package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_block.h +20 -6
  1522. package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_internal.h +88 -16
  1523. package/src/duckdb/third_party/zstd/include/zstd/deprecated/zbuff.h +214 -0
  1524. package/src/duckdb/third_party/zstd/include/zstd/dict/cover.h +156 -0
  1525. package/src/duckdb/third_party/zstd/include/zstd/dict/divsufsort.h +62 -0
  1526. package/src/duckdb/third_party/zstd/include/zstd.h +2171 -93
  1527. package/src/duckdb/third_party/zstd/include/{zstd/common/zstd_errors.h → zstd_errors.h} +32 -10
  1528. package/src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp +8 -0
  1529. package/src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp +20 -0
  1530. package/src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp +12 -0
  1531. package/src/duckdb/ub_extension_core_functions_aggregate_nested.cpp +6 -0
  1532. package/src/duckdb/ub_extension_core_functions_aggregate_regression.cpp +14 -0
  1533. package/src/duckdb/ub_extension_core_functions_scalar_array.cpp +4 -0
  1534. package/src/duckdb/ub_extension_core_functions_scalar_bit.cpp +2 -0
  1535. package/src/duckdb/ub_extension_core_functions_scalar_blob.cpp +4 -0
  1536. package/src/duckdb/ub_extension_core_functions_scalar_date.cpp +20 -0
  1537. package/src/duckdb/ub_extension_core_functions_scalar_debug.cpp +2 -0
  1538. package/src/duckdb/ub_extension_core_functions_scalar_enum.cpp +2 -0
  1539. package/src/duckdb/ub_extension_core_functions_scalar_generic.cpp +18 -0
  1540. package/src/duckdb/ub_extension_core_functions_scalar_list.cpp +22 -0
  1541. package/src/duckdb/ub_extension_core_functions_scalar_map.cpp +14 -0
  1542. package/src/duckdb/ub_extension_core_functions_scalar_math.cpp +2 -0
  1543. package/src/duckdb/ub_extension_core_functions_scalar_operators.cpp +2 -0
  1544. package/src/duckdb/ub_extension_core_functions_scalar_random.cpp +4 -0
  1545. package/src/duckdb/ub_extension_core_functions_scalar_string.cpp +48 -0
  1546. package/src/duckdb/ub_extension_core_functions_scalar_struct.cpp +2 -0
  1547. package/src/duckdb/ub_extension_core_functions_scalar_union.cpp +6 -0
  1548. package/src/duckdb/ub_src_common.cpp +4 -0
  1549. package/src/duckdb/ub_src_common_arrow.cpp +3 -1
  1550. package/src/duckdb/ub_src_execution.cpp +0 -6
  1551. package/src/duckdb/ub_src_execution_operator_aggregate.cpp +2 -0
  1552. package/src/duckdb/ub_src_execution_operator_csv_scanner_encode.cpp +2 -0
  1553. package/src/duckdb/ub_src_execution_operator_csv_scanner_util.cpp +2 -0
  1554. package/src/duckdb/ub_src_execution_sample.cpp +4 -0
  1555. package/src/duckdb/ub_src_function.cpp +6 -0
  1556. package/src/duckdb/ub_src_function_aggregate.cpp +0 -2
  1557. package/src/duckdb/ub_src_function_aggregate_distributive.cpp +3 -1
  1558. package/src/duckdb/ub_src_function_scalar.cpp +2 -8
  1559. package/src/duckdb/ub_src_function_scalar_date.cpp +2 -0
  1560. package/src/duckdb/ub_src_function_scalar_generic.cpp +2 -2
  1561. package/src/duckdb/ub_src_function_scalar_map.cpp +2 -0
  1562. package/src/duckdb/ub_src_function_scalar_operator.cpp +8 -0
  1563. package/src/duckdb/ub_src_function_scalar_string.cpp +10 -0
  1564. package/src/duckdb/ub_src_function_scalar_struct.cpp +4 -0
  1565. package/src/duckdb/ub_src_function_scalar_system.cpp +2 -0
  1566. package/src/duckdb/ub_src_function_table_system.cpp +6 -0
  1567. package/src/duckdb/ub_src_function_window.cpp +36 -0
  1568. package/src/duckdb/ub_src_logging.cpp +8 -0
  1569. package/src/duckdb/ub_src_main_settings.cpp +3 -1
  1570. package/src/duckdb/ub_src_optimizer.cpp +8 -0
  1571. package/src/duckdb/ub_src_optimizer_pushdown.cpp +2 -0
  1572. package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
  1573. package/src/duckdb/ub_src_parser.cpp +2 -0
  1574. package/src/duckdb/ub_src_parser_parsed_data.cpp +2 -0
  1575. package/src/duckdb/ub_src_planner.cpp +2 -0
  1576. package/src/duckdb/ub_src_planner_filter.cpp +6 -0
  1577. package/src/duckdb/ub_src_storage_compression.cpp +4 -0
  1578. package/src/duckdb/ub_src_storage_compression_dictionary.cpp +8 -0
  1579. package/src/duckdb/ub_src_storage_compression_roaring.cpp +10 -0
  1580. package/src/duckdb/ub_src_transaction.cpp +2 -0
  1581. package/vendor.py +1 -1
  1582. package/src/duckdb/extension/json/yyjson/include/yyjson.hpp +0 -6003
  1583. package/src/duckdb/extension/json/yyjson/yyjson.cpp +0 -8218
  1584. package/src/duckdb/src/common/arrow/appender/list_data.cpp +0 -78
  1585. package/src/duckdb/src/common/arrow/appender/map_data.cpp +0 -91
  1586. package/src/duckdb/src/common/cycle_counter.cpp +0 -76
  1587. package/src/duckdb/src/common/field_writer.cpp +0 -97
  1588. package/src/duckdb/src/common/http_state.cpp +0 -95
  1589. package/src/duckdb/src/common/preserved_error.cpp +0 -87
  1590. package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
  1591. package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +0 -27
  1592. package/src/duckdb/src/common/serializer/buffered_serializer.cpp +0 -36
  1593. package/src/duckdb/src/common/serializer/format_serializer.cpp +0 -15
  1594. package/src/duckdb/src/common/serializer.cpp +0 -24
  1595. package/src/duckdb/src/common/types/chunk_collection.cpp +0 -190
  1596. package/src/duckdb/src/core_functions/aggregate/distributive/entropy.cpp +0 -183
  1597. package/src/duckdb/src/core_functions/scalar/date/current.cpp +0 -54
  1598. package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +0 -78
  1599. package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +0 -70
  1600. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +0 -412
  1601. package/src/duckdb/src/core_functions/scalar/secret/which_secret.cpp +0 -28
  1602. package/src/duckdb/src/core_functions/scalar/string/jaro_winkler.cpp +0 -71
  1603. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
  1604. package/src/duckdb/src/execution/index/art/node16.cpp +0 -196
  1605. package/src/duckdb/src/execution/index/art/node4.cpp +0 -189
  1606. package/src/duckdb/src/execution/index/unknown_index.cpp +0 -65
  1607. package/src/duckdb/src/execution/operator/csv_scanner/base_csv_reader.cpp +0 -595
  1608. package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +0 -434
  1609. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +0 -89
  1610. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +0 -90
  1611. package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +0 -95
  1612. package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +0 -494
  1613. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +0 -35
  1614. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +0 -99
  1615. package/src/duckdb/src/execution/operator/csv_scanner/parallel_csv_reader.cpp +0 -689
  1616. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +0 -242
  1617. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +0 -695
  1618. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
  1619. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
  1620. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
  1621. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -280
  1622. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +0 -666
  1623. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +0 -499
  1624. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +0 -207
  1625. package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
  1626. package/src/duckdb/src/execution/physical_plan/plan_limit_percent.cpp +0 -18
  1627. package/src/duckdb/src/execution/physical_plan/plan_show_select.cpp +0 -47
  1628. package/src/duckdb/src/execution/reservoir_sample.cpp +0 -324
  1629. package/src/duckdb/src/execution/window_executor.cpp +0 -1830
  1630. package/src/duckdb/src/execution/window_segment_tree.cpp +0 -2073
  1631. package/src/duckdb/src/extension_forward_decl/icu.cpp +0 -59
  1632. package/src/duckdb/src/function/aggregate/distributive_functions.cpp +0 -15
  1633. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +0 -29
  1634. package/src/duckdb/src/function/scalar/generic_functions.cpp +0 -11
  1635. package/src/duckdb/src/function/scalar/list/list_concat.cpp +0 -143
  1636. package/src/duckdb/src/function/scalar/operators.cpp +0 -14
  1637. package/src/duckdb/src/function/scalar/sequence_functions.cpp +0 -10
  1638. package/src/duckdb/src/function/scalar/string_functions.cpp +0 -22
  1639. package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +0 -173
  1640. package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +0 -101
  1641. package/src/duckdb/src/include/duckdb/catalog/mapping_value.hpp +0 -92
  1642. package/src/duckdb/src/include/duckdb/common/arrow/arrow_types_extension.hpp +0 -42
  1643. package/src/duckdb/src/include/duckdb/common/cycle_counter.hpp +0 -68
  1644. package/src/duckdb/src/include/duckdb/common/enums/index_type.hpp +0 -34
  1645. package/src/duckdb/src/include/duckdb/common/http_state.hpp +0 -113
  1646. package/src/duckdb/src/include/duckdb/common/platform.h +0 -58
  1647. package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +0 -59
  1648. package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +0 -192
  1649. package/src/duckdb/src/include/duckdb/common/types/chunk_collection.hpp +0 -137
  1650. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +0 -65
  1651. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +0 -63
  1652. package/src/duckdb/src/include/duckdb/execution/index/unknown_index.hpp +0 -65
  1653. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer.hpp +0 -103
  1654. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.hpp +0 -74
  1655. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_file_handle.hpp +0 -60
  1656. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +0 -253
  1657. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_option.hpp +0 -155
  1658. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_reader_options.hpp +0 -163
  1659. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/state_machine_options.hpp +0 -35
  1660. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/base_scanner.hpp +0 -228
  1661. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/column_count_scanner.hpp +0 -70
  1662. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/scanner_boundary.hpp +0 -93
  1663. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/skip_scanner.hpp +0 -60
  1664. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/string_value_scanner.hpp +0 -197
  1665. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/quote_rules.hpp +0 -21
  1666. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state.hpp +0 -30
  1667. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine.hpp +0 -99
  1668. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.hpp +0 -87
  1669. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/csv_file_scanner.hpp +0 -70
  1670. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/global_csv_state.hpp +0 -80
  1671. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_casting.hpp +0 -137
  1672. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_error.hpp +0 -104
  1673. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +0 -79
  1674. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/base_csv_reader.hpp +0 -119
  1675. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +0 -72
  1676. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +0 -110
  1677. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +0 -103
  1678. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_file_handle.hpp +0 -59
  1679. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_line_info.hpp +0 -46
  1680. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp +0 -210
  1681. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +0 -131
  1682. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state.hpp +0 -28
  1683. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +0 -70
  1684. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +0 -65
  1685. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/parallel_csv_reader.hpp +0 -167
  1686. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +0 -21
  1687. package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +0 -343
  1688. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +0 -165
  1689. package/src/duckdb/src/include/duckdb/optimizer/column_lifetime_optimizer.hpp +0 -45
  1690. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +0 -57
  1691. package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_info.hpp +0 -45
  1692. package/src/duckdb/src/include/duckdb/parser/statement/show_statement.hpp +0 -32
  1693. package/src/duckdb/src/include/duckdb/planner/operator/logical_limit_percent.hpp +0 -49
  1694. package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +0 -42
  1695. package/src/duckdb/src/main/settings/settings.cpp +0 -2056
  1696. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +0 -36
  1697. package/src/duckdb/src/parser/parsed_data/comment_on_info.cpp +0 -19
  1698. package/src/duckdb/src/parser/statement/show_statement.cpp +0 -15
  1699. package/src/duckdb/src/planner/binder/statement/bind_show.cpp +0 -30
  1700. package/src/duckdb/src/planner/operator/logical_limit_percent.cpp +0 -14
  1701. package/src/duckdb/src/storage/statistics/numeric_stats_union.cpp +0 -70
  1702. package/src/duckdb/third_party/fsst/fsst_avx512.cpp +0 -140
  1703. package/src/duckdb/third_party/fsst/fsst_avx512.inc +0 -57
  1704. package/src/duckdb/third_party/fsst/fsst_avx512_unroll1.inc +0 -57
  1705. package/src/duckdb/third_party/fsst/fsst_avx512_unroll2.inc +0 -114
  1706. package/src/duckdb/third_party/fsst/fsst_avx512_unroll3.inc +0 -171
  1707. package/src/duckdb/third_party/fsst/fsst_avx512_unroll4.inc +0 -228
  1708. package/src/duckdb/third_party/parquet/parquet_constants.cpp +0 -17
  1709. package/src/duckdb/third_party/parquet/parquet_constants.h +0 -24
  1710. package/src/duckdb/third_party/re2/util/pod_array.h +0 -55
  1711. package/src/duckdb/third_party/re2/util/sparse_array.h +0 -392
  1712. package/src/duckdb/third_party/re2/util/sparse_set.h +0 -264
  1713. package/src/duckdb/third_party/zstd/include/zstd/common/fse_static.h +0 -421
  1714. package/src/duckdb/third_party/zstd/include/zstd/common/huf_static.h +0 -238
  1715. package/src/duckdb/third_party/zstd/include/zstd_static.h +0 -1070
  1716. package/src/duckdb/ub_src_core_functions.cpp +0 -6
  1717. package/src/duckdb/ub_src_core_functions_aggregate_algebraic.cpp +0 -8
  1718. package/src/duckdb/ub_src_core_functions_aggregate_distributive.cpp +0 -24
  1719. package/src/duckdb/ub_src_core_functions_aggregate_holistic.cpp +0 -12
  1720. package/src/duckdb/ub_src_core_functions_aggregate_nested.cpp +0 -6
  1721. package/src/duckdb/ub_src_core_functions_aggregate_regression.cpp +0 -14
  1722. package/src/duckdb/ub_src_core_functions_scalar_array.cpp +0 -4
  1723. package/src/duckdb/ub_src_core_functions_scalar_bit.cpp +0 -2
  1724. package/src/duckdb/ub_src_core_functions_scalar_blob.cpp +0 -6
  1725. package/src/duckdb/ub_src_core_functions_scalar_date.cpp +0 -22
  1726. package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +0 -2
  1727. package/src/duckdb/ub_src_core_functions_scalar_enum.cpp +0 -2
  1728. package/src/duckdb/ub_src_core_functions_scalar_generic.cpp +0 -18
  1729. package/src/duckdb/ub_src_core_functions_scalar_list.cpp +0 -22
  1730. package/src/duckdb/ub_src_core_functions_scalar_map.cpp +0 -16
  1731. package/src/duckdb/ub_src_core_functions_scalar_math.cpp +0 -2
  1732. package/src/duckdb/ub_src_core_functions_scalar_operators.cpp +0 -2
  1733. package/src/duckdb/ub_src_core_functions_scalar_random.cpp +0 -4
  1734. package/src/duckdb/ub_src_core_functions_scalar_secret.cpp +0 -2
  1735. package/src/duckdb/ub_src_core_functions_scalar_string.cpp +0 -58
  1736. package/src/duckdb/ub_src_core_functions_scalar_struct.cpp +0 -4
  1737. package/src/duckdb/ub_src_core_functions_scalar_union.cpp +0 -6
  1738. package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +0 -18
  1739. package/src/duckdb/ub_src_function_scalar_operators.cpp +0 -8
  1740. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/covar.hpp +0 -0
  1741. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/stddev.hpp +0 -0
  1742. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/sum_helpers.hpp +0 -0
  1743. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/array_kernels.hpp +0 -0
  1744. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/function_list.hpp +0 -0
  1745. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/secret_functions.hpp +0 -0
  1746. /package/src/duckdb/src/function/scalar/{operators → operator}/multiply.cpp +0 -0
  1747. /package/src/duckdb/src/function/scalar/{operators → operator}/subtract.cpp +0 -0
@@ -1,6 +1,6 @@
1
1
  /* ******************************************************************
2
2
  * Huffman encoder, part of New Generation Entropy library
3
- * Copyright (c) 2013-2020, Yann Collet, Facebook, Inc.
3
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
4
4
  *
5
5
  * You can contact the author at :
6
6
  * - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
@@ -23,45 +23,131 @@
23
23
  /* **************************************************************
24
24
  * Includes
25
25
  ****************************************************************/
26
- #include <string.h> /* memcpy, memset */
27
- #include <stdio.h> /* printf (debug) */
26
+ #include "zstd/common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memset */
28
27
  #include "zstd/common/compiler.h"
29
28
  #include "zstd/common/bitstream.h"
30
29
  #include "zstd/compress/hist.h"
30
+ #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
31
31
  #include "zstd/common/fse.h" /* header compression */
32
- #include "zstd/common/fse_static.h"
33
32
  #include "zstd/common/huf.h"
34
- #include "zstd/common/huf_static.h"
35
33
  #include "zstd/common/error_private.h"
34
+ #include "zstd/common/bits.h" /* ZSTD_highbit32 */
36
35
 
36
+ namespace duckdb_zstd {
37
37
 
38
38
  /* **************************************************************
39
39
  * Error Management
40
40
  ****************************************************************/
41
- // #define HUF_isError ERR_isError
41
+ #define HUF_isError ERR_isError
42
42
  #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
43
43
 
44
44
 
45
- namespace duckdb_zstd {
46
45
  /* **************************************************************
47
- * Utils
46
+ * Required declarations
47
+ ****************************************************************/
48
+ typedef struct nodeElt_s {
49
+ U32 count;
50
+ U16 parent;
51
+ BYTE byte;
52
+ BYTE nbBits;
53
+ } nodeElt;
54
+
55
+
56
+ /* **************************************************************
57
+ * Debug Traces
48
58
  ****************************************************************/
49
- unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
59
+
60
+ #if DEBUGLEVEL >= 2
61
+
62
+ static size_t showU32(const U32* arr, size_t size)
50
63
  {
51
- return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
64
+ size_t u;
65
+ for (u=0; u<size; u++) {
66
+ RAWLOG(6, " %u", arr[u]); (void)arr;
67
+ }
68
+ RAWLOG(6, " \n");
69
+ return size;
52
70
  }
53
71
 
72
+ static size_t HUF_getNbBits(HUF_CElt elt);
73
+
74
+ static size_t showCTableBits(const HUF_CElt* ctable, size_t size)
75
+ {
76
+ size_t u;
77
+ for (u=0; u<size; u++) {
78
+ RAWLOG(6, " %zu", HUF_getNbBits(ctable[u])); (void)ctable;
79
+ }
80
+ RAWLOG(6, " \n");
81
+ return size;
82
+
83
+ }
84
+
85
+ static size_t showHNodeSymbols(const nodeElt* hnode, size_t size)
86
+ {
87
+ size_t u;
88
+ for (u=0; u<size; u++) {
89
+ RAWLOG(6, " %u", hnode[u].byte); (void)hnode;
90
+ }
91
+ RAWLOG(6, " \n");
92
+ return size;
93
+ }
94
+
95
+ static size_t showHNodeBits(const nodeElt* hnode, size_t size)
96
+ {
97
+ size_t u;
98
+ for (u=0; u<size; u++) {
99
+ RAWLOG(6, " %u", hnode[u].nbBits); (void)hnode;
100
+ }
101
+ RAWLOG(6, " \n");
102
+ return size;
103
+ }
104
+
105
+ #endif
106
+
54
107
 
55
108
  /* *******************************************************
56
109
  * HUF : Huffman block compression
57
110
  *********************************************************/
111
+ #define HUF_WORKSPACE_MAX_ALIGNMENT 8
112
+
113
+ static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
114
+ {
115
+ size_t const mask = align - 1;
116
+ size_t const rem = (size_t)workspace & mask;
117
+ size_t const add = (align - rem) & mask;
118
+ BYTE* const aligned = (BYTE*)workspace + add;
119
+ assert((align & (align - 1)) == 0); /* pow 2 */
120
+ assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);
121
+ if (*workspaceSizePtr >= add) {
122
+ assert(add < align);
123
+ assert(((size_t)aligned & mask) == 0);
124
+ *workspaceSizePtr -= add;
125
+ return aligned;
126
+ } else {
127
+ *workspaceSizePtr = 0;
128
+ return NULL;
129
+ }
130
+ }
131
+
132
+
58
133
  /* HUF_compressWeights() :
59
134
  * Same as FSE_compress(), but dedicated to huff0's weights compression.
60
135
  * The use case needs much less stack memory.
61
136
  * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
62
137
  */
63
138
  #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
64
- static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
139
+
140
+ typedef struct {
141
+ FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
142
+ U32 scratchBuffer[FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(HUF_TABLELOG_MAX, MAX_FSE_TABLELOG_FOR_HUFF_HEADER)];
143
+ unsigned count[HUF_TABLELOG_MAX+1];
144
+ S16 norm[HUF_TABLELOG_MAX+1];
145
+ } HUF_CompressWeightsWksp;
146
+
147
+ static size_t
148
+ HUF_compressWeights(void* dst, size_t dstSize,
149
+ const void* weightTable, size_t wtSize,
150
+ void* workspace, size_t workspaceSize)
65
151
  {
66
152
  BYTE* const ostart = (BYTE*) dst;
67
153
  BYTE* op = ostart;
@@ -69,33 +155,30 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
69
155
 
70
156
  unsigned maxSymbolValue = HUF_TABLELOG_MAX;
71
157
  U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
158
+ HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
72
159
 
73
- FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)];
74
- BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER];
75
-
76
- unsigned count[HUF_TABLELOG_MAX+1];
77
- S16 norm[HUF_TABLELOG_MAX+1];
160
+ if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
78
161
 
79
162
  /* init conditions */
80
163
  if (wtSize <= 1) return 0; /* Not compressible */
81
164
 
82
165
  /* Scan input and build symbol stats */
83
- { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
166
+ { unsigned const maxCount = HIST_count_simple(wksp->count, &maxSymbolValue, weightTable, wtSize); /* never fails */
84
167
  if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
85
168
  if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
86
169
  }
87
170
 
88
171
  tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
89
- CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) );
172
+ CHECK_F( FSE_normalizeCount(wksp->norm, tableLog, wksp->count, wtSize, maxSymbolValue, /* useLowProbCount */ 0) );
90
173
 
91
174
  /* Write table description header */
92
- { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), norm, maxSymbolValue, tableLog) );
175
+ { CHECK_V_F(hSize, FSE_writeNCount(op, (size_t)(oend-op), wksp->norm, maxSymbolValue, tableLog) );
93
176
  op += hSize;
94
177
  }
95
178
 
96
179
  /* Compress */
97
- CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) );
98
- { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, CTable) );
180
+ CHECK_F( FSE_buildCTable_wksp(wksp->CTable, wksp->norm, maxSymbolValue, tableLog, wksp->scratchBuffer, sizeof(wksp->scratchBuffer)) );
181
+ { CHECK_V_F(cSize, FSE_compress_usingCTable(op, (size_t)(oend - op), weightTable, wtSize, wksp->CTable) );
99
182
  if (cSize == 0) return 0; /* not enough space for compressed data */
100
183
  op += cSize;
101
184
  }
@@ -103,35 +186,94 @@ static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weight
103
186
  return (size_t)(op-ostart);
104
187
  }
105
188
 
189
+ static size_t HUF_getNbBits(HUF_CElt elt)
190
+ {
191
+ return elt & 0xFF;
192
+ }
193
+
194
+ static size_t HUF_getNbBitsFast(HUF_CElt elt)
195
+ {
196
+ return elt;
197
+ }
198
+
199
+ static size_t HUF_getValue(HUF_CElt elt)
200
+ {
201
+ return elt & ~(size_t)0xFF;
202
+ }
203
+
204
+ static size_t HUF_getValueFast(HUF_CElt elt)
205
+ {
206
+ return elt;
207
+ }
208
+
209
+ static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
210
+ {
211
+ assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);
212
+ *elt = nbBits;
213
+ }
214
+
215
+ static void HUF_setValue(HUF_CElt* elt, size_t value)
216
+ {
217
+ size_t const nbBits = HUF_getNbBits(*elt);
218
+ if (nbBits > 0) {
219
+ assert((value >> nbBits) == 0);
220
+ *elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);
221
+ }
222
+ }
106
223
 
107
- struct HUF_CElt_s {
108
- U16 val;
109
- BYTE nbBits;
110
- }; /* typedef'd to HUF_CElt within "zstd/common/huf.h" */
224
+ HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable)
225
+ {
226
+ HUF_CTableHeader header;
227
+ ZSTD_memcpy(&header, ctable, sizeof(header));
228
+ return header;
229
+ }
111
230
 
112
- /*! HUF_writeCTable() :
113
- `CTable` : Huffman tree to save, using huf representation.
114
- @return : size of saved CTable */
115
- size_t HUF_writeCTable (void* dst, size_t maxDstSize,
116
- const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog)
231
+ static void HUF_writeCTableHeader(HUF_CElt* ctable, U32 tableLog, U32 maxSymbolValue)
117
232
  {
233
+ HUF_CTableHeader header;
234
+ HUF_STATIC_ASSERT(sizeof(ctable[0]) == sizeof(header));
235
+ ZSTD_memset(&header, 0, sizeof(header));
236
+ assert(tableLog < 256);
237
+ header.tableLog = (BYTE)tableLog;
238
+ assert(maxSymbolValue < 256);
239
+ header.maxSymbolValue = (BYTE)maxSymbolValue;
240
+ ZSTD_memcpy(ctable, &header, sizeof(header));
241
+ }
242
+
243
+ typedef struct {
244
+ HUF_CompressWeightsWksp wksp;
118
245
  BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */
119
246
  BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
247
+ } HUF_WriteCTableWksp;
248
+
249
+ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
250
+ const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
251
+ void* workspace, size_t workspaceSize)
252
+ {
253
+ HUF_CElt const* const ct = CTable + 1;
120
254
  BYTE* op = (BYTE*)dst;
121
255
  U32 n;
256
+ HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
122
257
 
123
- /* check conditions */
258
+ HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));
259
+
260
+ assert(HUF_readCTableHeader(CTable).maxSymbolValue == maxSymbolValue);
261
+ assert(HUF_readCTableHeader(CTable).tableLog == huffLog);
262
+
263
+ /* check conditions */
264
+ if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
124
265
  if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
125
266
 
126
267
  /* convert to weight */
127
- bitsToWeight[0] = 0;
268
+ wksp->bitsToWeight[0] = 0;
128
269
  for (n=1; n<huffLog+1; n++)
129
- bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
270
+ wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
130
271
  for (n=0; n<maxSymbolValue; n++)
131
- huffWeight[n] = bitsToWeight[CTable[n].nbBits];
272
+ wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];
132
273
 
133
274
  /* attempt weights compression by FSE */
134
- { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) );
275
+ if (maxDstSize < 1) return ERROR(dstSize_tooSmall);
276
+ { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
135
277
  if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */
136
278
  op[0] = (BYTE)hSize;
137
279
  return hSize+1;
@@ -141,9 +283,9 @@ size_t HUF_writeCTable (void* dst, size_t maxDstSize,
141
283
  if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */
142
284
  if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
143
285
  op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
144
- huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
286
+ wksp->huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */
145
287
  for (n=0; n<maxSymbolValue; n+=2)
146
- op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
288
+ op[(n/2)+1] = (BYTE)((wksp->huffWeight[n] << 4) + wksp->huffWeight[n+1]);
147
289
  return ((maxSymbolValue+1)/2) + 1;
148
290
  }
149
291
 
@@ -154,34 +296,38 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
154
296
  U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
155
297
  U32 tableLog = 0;
156
298
  U32 nbSymbols = 0;
299
+ HUF_CElt* const ct = CTable + 1;
157
300
 
158
301
  /* get symbol weights */
159
302
  CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
303
+ *hasZeroWeights = (rankVal[0] > 0);
160
304
 
161
305
  /* check result */
162
306
  if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
163
307
  if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
164
308
 
309
+ *maxSymbolValuePtr = nbSymbols - 1;
310
+
311
+ HUF_writeCTableHeader(CTable, tableLog, *maxSymbolValuePtr);
312
+
165
313
  /* Prepare base value per rank */
166
314
  { U32 n, nextRankStart = 0;
167
315
  for (n=1; n<=tableLog; n++) {
168
- U32 current = nextRankStart;
316
+ U32 curr = nextRankStart;
169
317
  nextRankStart += (rankVal[n] << (n-1));
170
- rankVal[n] = current;
318
+ rankVal[n] = curr;
171
319
  } }
172
320
 
173
321
  /* fill nbBits */
174
- *hasZeroWeights = 0;
175
322
  { U32 n; for (n=0; n<nbSymbols; n++) {
176
323
  const U32 w = huffWeight[n];
177
- *hasZeroWeights |= (w == 0);
178
- CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
324
+ HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));
179
325
  } }
180
326
 
181
327
  /* fill val */
182
328
  { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
183
329
  U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
184
- { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
330
+ { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }
185
331
  /* determine stating value per rank */
186
332
  valPerRank[tableLog+1] = 0; /* for w==0 */
187
333
  { U16 min = 0;
@@ -191,92 +337,151 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
191
337
  min >>= 1;
192
338
  } }
193
339
  /* assign value within rank, symbol order */
194
- { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
340
+ { U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
195
341
  }
196
342
 
197
- *maxSymbolValuePtr = nbSymbols - 1;
198
343
  return readSize;
199
344
  }
200
345
 
201
- U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
346
+ U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
202
347
  {
203
- const HUF_CElt* table = (const HUF_CElt*)symbolTable;
348
+ const HUF_CElt* const ct = CTable + 1;
204
349
  assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
205
- return table[symbolValue].nbBits;
350
+ if (symbolValue > HUF_readCTableHeader(CTable).maxSymbolValue)
351
+ return 0;
352
+ return (U32)HUF_getNbBits(ct[symbolValue]);
206
353
  }
207
354
 
208
355
 
209
- typedef struct nodeElt_s {
210
- U32 count;
211
- U16 parent;
212
- BYTE byte;
213
- BYTE nbBits;
214
- } nodeElt;
215
-
216
- static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
356
+ /**
357
+ * HUF_setMaxHeight():
358
+ * Try to enforce @targetNbBits on the Huffman tree described in @huffNode.
359
+ *
360
+ * It attempts to convert all nodes with nbBits > @targetNbBits
361
+ * to employ @targetNbBits instead. Then it adjusts the tree
362
+ * so that it remains a valid canonical Huffman tree.
363
+ *
364
+ * @pre The sum of the ranks of each symbol == 2^largestBits,
365
+ * where largestBits == huffNode[lastNonNull].nbBits.
366
+ * @post The sum of the ranks of each symbol == 2^largestBits,
367
+ * where largestBits is the return value (expected <= targetNbBits).
368
+ *
369
+ * @param huffNode The Huffman tree modified in place to enforce targetNbBits.
370
+ * It's presumed sorted, from most frequent to rarest symbol.
371
+ * @param lastNonNull The symbol with the lowest count in the Huffman tree.
372
+ * @param targetNbBits The allowed number of bits, which the Huffman tree
373
+ * may not respect. After this function the Huffman tree will
374
+ * respect targetNbBits.
375
+ * @return The maximum number of bits of the Huffman tree after adjustment.
376
+ */
377
+ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 targetNbBits)
217
378
  {
218
379
  const U32 largestBits = huffNode[lastNonNull].nbBits;
219
- if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */
380
+ /* early exit : no elt > targetNbBits, so the tree is already valid. */
381
+ if (largestBits <= targetNbBits) return largestBits;
382
+
383
+ DEBUGLOG(5, "HUF_setMaxHeight (targetNbBits = %u)", targetNbBits);
220
384
 
221
385
  /* there are several too large elements (at least >= 2) */
222
386
  { int totalCost = 0;
223
- const U32 baseCost = 1 << (largestBits - maxNbBits);
387
+ const U32 baseCost = 1 << (largestBits - targetNbBits);
224
388
  int n = (int)lastNonNull;
225
389
 
226
- while (huffNode[n].nbBits > maxNbBits) {
390
+ /* Adjust any ranks > targetNbBits to targetNbBits.
391
+ * Compute totalCost, which is how far the sum of the ranks is
392
+ * we are over 2^largestBits after adjust the offending ranks.
393
+ */
394
+ while (huffNode[n].nbBits > targetNbBits) {
227
395
  totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
228
- huffNode[n].nbBits = (BYTE)maxNbBits;
229
- n --;
230
- } /* n stops at huffNode[n].nbBits <= maxNbBits */
231
- while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */
396
+ huffNode[n].nbBits = (BYTE)targetNbBits;
397
+ n--;
398
+ }
399
+ /* n stops at huffNode[n].nbBits <= targetNbBits */
400
+ assert(huffNode[n].nbBits <= targetNbBits);
401
+ /* n end at index of smallest symbol using < targetNbBits */
402
+ while (huffNode[n].nbBits == targetNbBits) --n;
232
403
 
233
- /* renorm totalCost */
234
- totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */
404
+ /* renorm totalCost from 2^largestBits to 2^targetNbBits
405
+ * note : totalCost is necessarily a multiple of baseCost */
406
+ assert(((U32)totalCost & (baseCost - 1)) == 0);
407
+ totalCost >>= (largestBits - targetNbBits);
408
+ assert(totalCost > 0);
235
409
 
236
410
  /* repay normalized cost */
237
411
  { U32 const noSymbol = 0xF0F0F0F0;
238
412
  U32 rankLast[HUF_TABLELOG_MAX+2];
239
413
 
240
- /* Get pos of last (smallest) symbol per rank */
241
- memset(rankLast, 0xF0, sizeof(rankLast));
242
- { U32 currentNbBits = maxNbBits;
414
+ /* Get pos of last (smallest = lowest cum. count) symbol per rank */
415
+ ZSTD_memset(rankLast, 0xF0, sizeof(rankLast));
416
+ { U32 currentNbBits = targetNbBits;
243
417
  int pos;
244
418
  for (pos=n ; pos >= 0; pos--) {
245
419
  if (huffNode[pos].nbBits >= currentNbBits) continue;
246
- currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */
247
- rankLast[maxNbBits-currentNbBits] = (U32)pos;
420
+ currentNbBits = huffNode[pos].nbBits; /* < targetNbBits */
421
+ rankLast[targetNbBits-currentNbBits] = (U32)pos;
248
422
  } }
249
423
 
250
424
  while (totalCost > 0) {
251
- U32 nBitsToDecrease = BIT_highbit32((U32)totalCost) + 1;
425
+ /* Try to reduce the next power of 2 above totalCost because we
426
+ * gain back half the rank.
427
+ */
428
+ U32 nBitsToDecrease = ZSTD_highbit32((U32)totalCost) + 1;
252
429
  for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
253
430
  U32 const highPos = rankLast[nBitsToDecrease];
254
431
  U32 const lowPos = rankLast[nBitsToDecrease-1];
255
432
  if (highPos == noSymbol) continue;
433
+ /* Decrease highPos if no symbols of lowPos or if it is
434
+ * not cheaper to remove 2 lowPos than highPos.
435
+ */
256
436
  if (lowPos == noSymbol) break;
257
437
  { U32 const highTotal = huffNode[highPos].count;
258
438
  U32 const lowTotal = 2 * huffNode[lowPos].count;
259
439
  if (highTotal <= lowTotal) break;
260
440
  } }
261
441
  /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
442
+ assert(rankLast[nBitsToDecrease] != noSymbol || nBitsToDecrease == 1);
262
443
  /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
263
444
  while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol))
264
- nBitsToDecrease ++;
445
+ nBitsToDecrease++;
446
+ assert(rankLast[nBitsToDecrease] != noSymbol);
447
+ /* Increase the number of bits to gain back half the rank cost. */
265
448
  totalCost -= 1 << (nBitsToDecrease-1);
449
+ huffNode[rankLast[nBitsToDecrease]].nbBits++;
450
+
451
+ /* Fix up the new rank.
452
+ * If the new rank was empty, this symbol is now its smallest.
453
+ * Otherwise, this symbol will be the largest in the new rank so no adjustment.
454
+ */
266
455
  if (rankLast[nBitsToDecrease-1] == noSymbol)
267
- rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */
268
- huffNode[rankLast[nBitsToDecrease]].nbBits ++;
456
+ rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease];
457
+ /* Fix up the old rank.
458
+ * If the symbol was at position 0, meaning it was the highest weight symbol in the tree,
459
+ * it must be the only symbol in its rank, so the old rank now has no symbols.
460
+ * Otherwise, since the Huffman nodes are sorted by count, the previous position is now
461
+ * the smallest node in the rank. If the previous position belongs to a different rank,
462
+ * then the rank is now empty.
463
+ */
269
464
  if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
270
465
  rankLast[nBitsToDecrease] = noSymbol;
271
466
  else {
272
467
  rankLast[nBitsToDecrease]--;
273
- if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
468
+ if (huffNode[rankLast[nBitsToDecrease]].nbBits != targetNbBits-nBitsToDecrease)
274
469
  rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
275
- } } /* while (totalCost > 0) */
276
-
470
+ }
471
+ } /* while (totalCost > 0) */
472
+
473
+ /* If we've removed too much weight, then we have to add it back.
474
+ * To avoid overshooting again, we only adjust the smallest rank.
475
+ * We take the largest nodes from the lowest rank 0 and move them
476
+ * to rank 1. There's guaranteed to be enough rank 0 symbols because
477
+ * TODO.
478
+ */
277
479
  while (totalCost < 0) { /* Sometimes, cost correction overshoot */
278
- if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
279
- while (huffNode[n].nbBits == maxNbBits) n--;
480
+ /* special case : no rank 1 symbol (using targetNbBits-1);
481
+ * let's create one from largest rank 0 (using targetNbBits).
482
+ */
483
+ if (rankLast[1] == noSymbol) {
484
+ while (huffNode[n].nbBits == targetNbBits) n--;
280
485
  huffNode[n+1].nbBits--;
281
486
  assert(n >= 0);
282
487
  rankLast[1] = (U32)(n+1);
@@ -286,47 +491,178 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
286
491
  huffNode[ rankLast[1] + 1 ].nbBits--;
287
492
  rankLast[1]++;
288
493
  totalCost ++;
289
- } } } /* there are several too large elements (at least >= 2) */
494
+ }
495
+ } /* repay normalized cost */
496
+ } /* there are several too large elements (at least >= 2) */
290
497
 
291
- return maxNbBits;
498
+ return targetNbBits;
292
499
  }
293
500
 
294
501
  typedef struct {
295
- U32 base;
296
- U32 current;
502
+ U16 base;
503
+ U16 curr;
297
504
  } rankPos;
298
505
 
299
- typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
506
+ typedef nodeElt huffNodeTable[2 * (HUF_SYMBOLVALUE_MAX + 1)];
300
507
 
301
- #define RANK_POSITION_TABLE_SIZE 32
508
+ /* Number of buckets available for HUF_sort() */
509
+ #define RANK_POSITION_TABLE_SIZE 192
302
510
 
303
511
  typedef struct {
304
512
  huffNodeTable huffNodeTbl;
305
513
  rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
306
514
  } HUF_buildCTable_wksp_tables;
307
515
 
308
- static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
309
- {
516
+ /* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
517
+ * Strategy is to use as many buckets as possible for representing distinct
518
+ * counts while using the remainder to represent all "large" counts.
519
+ *
520
+ * To satisfy this requirement for 192 buckets, we can do the following:
521
+ * Let buckets 0-166 represent distinct counts of [0, 166]
522
+ * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
523
+ */
524
+ #define RANK_POSITION_MAX_COUNT_LOG 32
525
+ #define RANK_POSITION_LOG_BUCKETS_BEGIN ((RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */)
526
+ #define RANK_POSITION_DISTINCT_COUNT_CUTOFF (RANK_POSITION_LOG_BUCKETS_BEGIN + ZSTD_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */)
527
+
528
+ /* Return the appropriate bucket index for a given count. See definition of
529
+ * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
530
+ */
531
+ static U32 HUF_getIndex(U32 const count) {
532
+ return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
533
+ ? count
534
+ : ZSTD_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
535
+ }
536
+
537
+ /* Helper swap function for HUF_quickSortPartition() */
538
+ static void HUF_swapNodes(nodeElt* a, nodeElt* b) {
539
+ nodeElt tmp = *a;
540
+ *a = *b;
541
+ *b = tmp;
542
+ }
543
+
544
+ /* Returns 0 if the huffNode array is not sorted by descending count */
545
+ MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {
546
+ U32 i;
547
+ for (i = 1; i < maxSymbolValue1; ++i) {
548
+ if (huffNode[i].count > huffNode[i-1].count) {
549
+ return 0;
550
+ }
551
+ }
552
+ return 1;
553
+ }
554
+
555
+ /* Insertion sort by descending order */
556
+ HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {
557
+ int i;
558
+ int const size = high-low+1;
559
+ huffNode += low;
560
+ for (i = 1; i < size; ++i) {
561
+ nodeElt const key = huffNode[i];
562
+ int j = i - 1;
563
+ while (j >= 0 && huffNode[j].count < key.count) {
564
+ huffNode[j + 1] = huffNode[j];
565
+ j--;
566
+ }
567
+ huffNode[j + 1] = key;
568
+ }
569
+ }
570
+
571
+ /* Pivot helper function for quicksort. */
572
+ static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {
573
+ /* Simply select rightmost element as pivot. "Better" selectors like
574
+ * median-of-three don't experimentally appear to have any benefit.
575
+ */
576
+ U32 const pivot = arr[high].count;
577
+ int i = low - 1;
578
+ int j = low;
579
+ for ( ; j < high; j++) {
580
+ if (arr[j].count > pivot) {
581
+ i++;
582
+ HUF_swapNodes(&arr[i], &arr[j]);
583
+ }
584
+ }
585
+ HUF_swapNodes(&arr[i + 1], &arr[high]);
586
+ return i + 1;
587
+ }
588
+
589
+ /* Classic quicksort by descending with partially iterative calls
590
+ * to reduce worst case callstack size.
591
+ */
592
+ static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {
593
+ int const kInsertionSortThreshold = 8;
594
+ if (high - low < kInsertionSortThreshold) {
595
+ HUF_insertionSort(arr, low, high);
596
+ return;
597
+ }
598
+ while (low < high) {
599
+ int const idx = HUF_quickSortPartition(arr, low, high);
600
+ if (idx - low < high - idx) {
601
+ HUF_simpleQuickSort(arr, low, idx - 1);
602
+ low = idx + 1;
603
+ } else {
604
+ HUF_simpleQuickSort(arr, idx + 1, high);
605
+ high = idx - 1;
606
+ }
607
+ }
608
+ }
609
+
610
+ /**
611
+ * HUF_sort():
612
+ * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
613
+ * This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
614
+ *
615
+ * @param[out] huffNode Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
616
+ * Must have (maxSymbolValue + 1) entries.
617
+ * @param[in] count Histogram of the symbols.
618
+ * @param[in] maxSymbolValue Maximum symbol value.
619
+ * @param rankPosition This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
620
+ */
621
+ static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {
310
622
  U32 n;
623
+ U32 const maxSymbolValue1 = maxSymbolValue+1;
624
+
625
+ /* Compute base and set curr to base.
626
+ * For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.
627
+ * See HUF_getIndex to see bucketing strategy.
628
+ * We attribute each symbol to lowerRank's base value, because we want to know where
629
+ * each rank begins in the output, so for rank R we want to count ranks R+1 and above.
630
+ */
631
+ ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
632
+ for (n = 0; n < maxSymbolValue1; ++n) {
633
+ U32 lowerRank = HUF_getIndex(count[n]);
634
+ assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1);
635
+ rankPosition[lowerRank].base++;
636
+ }
311
637
 
312
- memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
313
- for (n=0; n<=maxSymbolValue; n++) {
314
- U32 r = BIT_highbit32(count[n] + 1);
315
- rankPosition[r].base ++;
638
+ assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
639
+ /* Set up the rankPosition table */
640
+ for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
641
+ rankPosition[n-1].base += rankPosition[n].base;
642
+ rankPosition[n-1].curr = rankPosition[n-1].base;
316
643
  }
317
- for (n=30; n>0; n--) rankPosition[n-1].base += rankPosition[n].base;
318
- for (n=0; n<32; n++) rankPosition[n].current = rankPosition[n].base;
319
- for (n=0; n<=maxSymbolValue; n++) {
644
+
645
+ /* Insert each symbol into their appropriate bucket, setting up rankPosition table. */
646
+ for (n = 0; n < maxSymbolValue1; ++n) {
320
647
  U32 const c = count[n];
321
- U32 const r = BIT_highbit32(c+1) + 1;
322
- U32 pos = rankPosition[r].current++;
323
- while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
324
- huffNode[pos] = huffNode[pos-1];
325
- pos--;
326
- }
648
+ U32 const r = HUF_getIndex(c) + 1;
649
+ U32 const pos = rankPosition[r].curr++;
650
+ assert(pos < maxSymbolValue1);
327
651
  huffNode[pos].count = c;
328
652
  huffNode[pos].byte = (BYTE)n;
329
653
  }
654
+
655
+ /* Sort each bucket. */
656
+ for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
657
+ int const bucketSize = rankPosition[n].curr - rankPosition[n].base;
658
+ U32 const bucketStartIdx = rankPosition[n].base;
659
+ if (bucketSize > 1) {
660
+ assert(bucketStartIdx < maxSymbolValue1);
661
+ HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1);
662
+ }
663
+ }
664
+
665
+ assert(HUF_isSorted(huffNode, maxSymbolValue1));
330
666
  }
331
667
 
332
668
 
@@ -336,28 +672,21 @@ static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValu
336
672
  */
337
673
  #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
338
674
 
339
- size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
675
+ /* HUF_buildTree():
676
+ * Takes the huffNode array sorted by HUF_sort() and builds an unlimited-depth Huffman tree.
677
+ *
678
+ * @param huffNode The array sorted by HUF_sort(). Builds the Huffman tree in this array.
679
+ * @param maxSymbolValue The maximum symbol value.
680
+ * @return The smallest node in the Huffman tree (by count).
681
+ */
682
+ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
340
683
  {
341
- HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
342
- nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
343
- nodeElt* const huffNode = huffNode0+1;
684
+ nodeElt* const huffNode0 = huffNode - 1;
344
685
  int nonNullRank;
345
686
  int lowS, lowN;
346
687
  int nodeNb = STARTNODE;
347
688
  int n, nodeRoot;
348
-
349
- /* safety checks */
350
- if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
351
- if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
352
- return ERROR(workSpace_tooSmall);
353
- if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
354
- if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
355
- return ERROR(maxSymbolValue_tooLarge);
356
- memset(huffNode0, 0, sizeof(huffNodeTable));
357
-
358
- /* sort, decreasing order */
359
- HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
360
-
689
+ DEBUGLOG(5, "HUF_buildTree (alphabet size = %u)", maxSymbolValue + 1);
361
690
  /* init for parents */
362
691
  nonNullRank = (int)maxSymbolValue;
363
692
  while(huffNode[nonNullRank].count == 0) nonNullRank--;
@@ -384,127 +713,414 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo
384
713
  for (n=0; n<=nonNullRank; n++)
385
714
  huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
386
715
 
387
- /* enforce maxTableLog */
388
- maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
716
+ DEBUGLOG(6, "Initial distribution of bits completed (%zu sorted symbols)", showHNodeBits(huffNode, maxSymbolValue+1));
389
717
 
390
- /* fill result into tree (val, nbBits) */
391
- { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
392
- U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
393
- int const alphabetSize = (int)(maxSymbolValue + 1);
394
- if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
395
- for (n=0; n<=nonNullRank; n++)
396
- nbPerRank[huffNode[n].nbBits]++;
397
- /* determine stating value per rank */
398
- { U16 min = 0;
399
- for (n=(int)maxNbBits; n>0; n--) {
400
- valPerRank[n] = min; /* get starting value within each rank */
401
- min += nbPerRank[n];
402
- min >>= 1;
403
- } }
404
- for (n=0; n<alphabetSize; n++)
405
- tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
406
- for (n=0; n<alphabetSize; n++)
407
- tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
408
- }
409
-
410
- return maxNbBits;
718
+ return nonNullRank;
411
719
  }
412
720
 
413
- /** HUF_buildCTable() :
414
- * @return : maxNbBits
415
- * Note : count is used before tree is written, so they can safely overlap
721
+ /**
722
+ * HUF_buildCTableFromTree():
723
+ * Build the CTable given the Huffman tree in huffNode.
724
+ *
725
+ * @param[out] CTable The output Huffman CTable.
726
+ * @param huffNode The Huffman tree.
727
+ * @param nonNullRank The last and smallest node in the Huffman tree.
728
+ * @param maxSymbolValue The maximum symbol value.
729
+ * @param maxNbBits The exact maximum number of bits used in the Huffman tree.
416
730
  */
417
- size_t HUF_buildCTable (HUF_CElt* tree, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits)
731
+ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
418
732
  {
419
- HUF_buildCTable_wksp_tables workspace;
420
- return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, &workspace, sizeof(workspace));
733
+ HUF_CElt* const ct = CTable + 1;
734
+ /* fill result into ctable (val, nbBits) */
735
+ int n;
736
+ U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
737
+ U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
738
+ int const alphabetSize = (int)(maxSymbolValue + 1);
739
+ for (n=0; n<=nonNullRank; n++)
740
+ nbPerRank[huffNode[n].nbBits]++;
741
+ /* determine starting value per rank */
742
+ { U16 min = 0;
743
+ for (n=(int)maxNbBits; n>0; n--) {
744
+ valPerRank[n] = min; /* get starting value within each rank */
745
+ min += nbPerRank[n];
746
+ min >>= 1;
747
+ } }
748
+ for (n=0; n<alphabetSize; n++)
749
+ HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits); /* push nbBits per symbol, symbol order */
750
+ for (n=0; n<alphabetSize; n++)
751
+ HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); /* assign value within rank, symbol order */
752
+
753
+ HUF_writeCTableHeader(CTable, maxNbBits, maxSymbolValue);
754
+ }
755
+
756
+ size_t
757
+ HUF_buildCTable_wksp(HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits,
758
+ void* workSpace, size_t wkspSize)
759
+ {
760
+ HUF_buildCTable_wksp_tables* const wksp_tables =
761
+ (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
762
+ nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
763
+ nodeElt* const huffNode = huffNode0+1;
764
+ int nonNullRank;
765
+
766
+ HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE == sizeof(HUF_buildCTable_wksp_tables));
767
+
768
+ DEBUGLOG(5, "HUF_buildCTable_wksp (alphabet size = %u)", maxSymbolValue+1);
769
+
770
+ /* safety checks */
771
+ if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
772
+ return ERROR(workSpace_tooSmall);
773
+ if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
774
+ if (maxSymbolValue > HUF_SYMBOLVALUE_MAX)
775
+ return ERROR(maxSymbolValue_tooLarge);
776
+ ZSTD_memset(huffNode0, 0, sizeof(huffNodeTable));
777
+
778
+ /* sort, decreasing order */
779
+ HUF_sort(huffNode, count, maxSymbolValue, wksp_tables->rankPosition);
780
+ DEBUGLOG(6, "sorted symbols completed (%zu symbols)", showHNodeSymbols(huffNode, maxSymbolValue+1));
781
+
782
+ /* build tree */
783
+ nonNullRank = HUF_buildTree(huffNode, maxSymbolValue);
784
+
785
+ /* determine and enforce maxTableLog */
786
+ maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
787
+ if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
788
+
789
+ HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
790
+
791
+ return maxNbBits;
421
792
  }
422
793
 
423
794
  size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
424
795
  {
796
+ HUF_CElt const* ct = CTable + 1;
425
797
  size_t nbBits = 0;
426
798
  int s;
427
799
  for (s = 0; s <= (int)maxSymbolValue; ++s) {
428
- nbBits += CTable[s].nbBits * count[s];
800
+ nbBits += HUF_getNbBits(ct[s]) * count[s];
429
801
  }
430
802
  return nbBits >> 3;
431
803
  }
432
804
 
433
805
  int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
434
- int bad = 0;
435
- int s;
436
- for (s = 0; s <= (int)maxSymbolValue; ++s) {
437
- bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
438
- }
439
- return !bad;
806
+ HUF_CTableHeader header = HUF_readCTableHeader(CTable);
807
+ HUF_CElt const* ct = CTable + 1;
808
+ int bad = 0;
809
+ int s;
810
+
811
+ assert(header.tableLog <= HUF_TABLELOG_ABSOLUTEMAX);
812
+
813
+ if (header.maxSymbolValue < maxSymbolValue)
814
+ return 0;
815
+
816
+ for (s = 0; s <= (int)maxSymbolValue; ++s) {
817
+ bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
818
+ }
819
+ return !bad;
440
820
  }
441
821
 
442
822
  size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
443
823
 
824
+ /** HUF_CStream_t:
825
+ * Huffman uses its own BIT_CStream_t implementation.
826
+ * There are three major differences from BIT_CStream_t:
827
+ * 1. HUF_addBits() takes a HUF_CElt (size_t) which is
828
+ * the pair (nbBits, value) in the format:
829
+ * format:
830
+ * - Bits [0, 4) = nbBits
831
+ * - Bits [4, 64 - nbBits) = 0
832
+ * - Bits [64 - nbBits, 64) = value
833
+ * 2. The bitContainer is built from the upper bits and
834
+ * right shifted. E.g. to add a new value of N bits
835
+ * you right shift the bitContainer by N, then or in
836
+ * the new value into the N upper bits.
837
+ * 3. The bitstream has two bit containers. You can add
838
+ * bits to the second container and merge them into
839
+ * the first container.
840
+ */
841
+
842
+ #define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
843
+
844
+ typedef struct {
845
+ size_t bitContainer[2];
846
+ size_t bitPos[2];
847
+
848
+ BYTE* startPtr;
849
+ BYTE* ptr;
850
+ BYTE* endPtr;
851
+ } HUF_CStream_t;
852
+
853
+ /**! HUF_initCStream():
854
+ * Initializes the bitstream.
855
+ * @returns 0 or an error code.
856
+ */
857
+ static size_t HUF_initCStream(HUF_CStream_t* bitC,
858
+ void* startPtr, size_t dstCapacity)
859
+ {
860
+ ZSTD_memset(bitC, 0, sizeof(*bitC));
861
+ bitC->startPtr = (BYTE*)startPtr;
862
+ bitC->ptr = bitC->startPtr;
863
+ bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);
864
+ if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);
865
+ return 0;
866
+ }
867
+
868
+ /*! HUF_addBits():
869
+ * Adds the symbol stored in HUF_CElt elt to the bitstream.
870
+ *
871
+ * @param elt The element we're adding. This is a (nbBits, value) pair.
872
+ * See the HUF_CStream_t docs for the format.
873
+ * @param idx Insert into the bitstream at this idx.
874
+ * @param kFast This is a template parameter. If the bitstream is guaranteed
875
+ * to have at least 4 unused bits after this call it may be 1,
876
+ * otherwise it must be 0. HUF_addBits() is faster when fast is set.
877
+ */
878
+ FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
879
+ {
880
+ assert(idx <= 1);
881
+ assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);
882
+ /* This is efficient on x86-64 with BMI2 because shrx
883
+ * only reads the low 6 bits of the register. The compiler
884
+ * knows this and elides the mask. When fast is set,
885
+ * every operation can use the same value loaded from elt.
886
+ */
887
+ bitC->bitContainer[idx] >>= HUF_getNbBits(elt);
888
+ bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);
889
+ /* We only read the low 8 bits of bitC->bitPos[idx] so it
890
+ * doesn't matter that the high bits have noise from the value.
891
+ */
892
+ bitC->bitPos[idx] += HUF_getNbBitsFast(elt);
893
+ assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
894
+ /* The last 4-bits of elt are dirty if fast is set,
895
+ * so we must not be overwriting bits that have already been
896
+ * inserted into the bit container.
897
+ */
898
+ #if DEBUGLEVEL >= 1
899
+ {
900
+ size_t const nbBits = HUF_getNbBits(elt);
901
+ size_t const dirtyBits = nbBits == 0 ? 0 : ZSTD_highbit32((U32)nbBits) + 1;
902
+ (void)dirtyBits;
903
+ /* Middle bits are 0. */
904
+ assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
905
+ /* We didn't overwrite any bits in the bit container. */
906
+ assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
907
+ (void)dirtyBits;
908
+ }
909
+ #endif
910
+ }
911
+
912
+ FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
913
+ {
914
+ bitC->bitContainer[1] = 0;
915
+ bitC->bitPos[1] = 0;
916
+ }
917
+
918
+ /*! HUF_mergeIndex1() :
919
+ * Merges the bit container @ index 1 into the bit container @ index 0
920
+ * and zeros the bit container @ index 1.
921
+ */
922
+ FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
923
+ {
924
+ assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);
925
+ bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);
926
+ bitC->bitContainer[0] |= bitC->bitContainer[1];
927
+ bitC->bitPos[0] += bitC->bitPos[1];
928
+ assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);
929
+ }
930
+
931
+ /*! HUF_flushBits() :
932
+ * Flushes the bits in the bit container @ index 0.
933
+ *
934
+ * @post bitPos will be < 8.
935
+ * @param kFast If kFast is set then we must know a-priori that
936
+ * the bit container will not overflow.
937
+ */
938
+ FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
939
+ {
940
+ /* The upper bits of bitPos are noisy, so we must mask by 0xFF. */
941
+ size_t const nbBits = bitC->bitPos[0] & 0xFF;
942
+ size_t const nbBytes = nbBits >> 3;
943
+ /* The top nbBits bits of bitContainer are the ones we need. */
944
+ size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);
945
+ /* Mask bitPos to account for the bytes we consumed. */
946
+ bitC->bitPos[0] &= 7;
947
+ assert(nbBits > 0);
948
+ assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);
949
+ assert(bitC->ptr <= bitC->endPtr);
950
+ MEM_writeLEST(bitC->ptr, bitContainer);
951
+ bitC->ptr += nbBytes;
952
+ assert(!kFast || bitC->ptr <= bitC->endPtr);
953
+ if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
954
+ /* bitContainer doesn't need to be modified because the leftover
955
+ * bits are already the top bitPos bits. And we don't care about
956
+ * noise in the lower values.
957
+ */
958
+ }
959
+
960
+ /*! HUF_endMark()
961
+ * @returns The Huffman stream end mark: A 1-bit value = 1.
962
+ */
963
+ static HUF_CElt HUF_endMark(void)
964
+ {
965
+ HUF_CElt endMark;
966
+ HUF_setNbBits(&endMark, 1);
967
+ HUF_setValue(&endMark, 1);
968
+ return endMark;
969
+ }
970
+
971
+ /*! HUF_closeCStream() :
972
+ * @return Size of CStream, in bytes,
973
+ * or 0 if it could not fit into dstBuffer */
974
+ static size_t HUF_closeCStream(HUF_CStream_t* bitC)
975
+ {
976
+ HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);
977
+ HUF_flushBits(bitC, /* kFast */ 0);
978
+ {
979
+ size_t const nbBits = bitC->bitPos[0] & 0xFF;
980
+ if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
981
+ return (size_t)(bitC->ptr - bitC->startPtr) + (nbBits > 0);
982
+ }
983
+ }
984
+
444
985
  FORCE_INLINE_TEMPLATE void
445
- HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
986
+ HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
446
987
  {
447
- BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
988
+ HUF_addBits(bitCPtr, CTable[symbol], idx, fast);
448
989
  }
449
990
 
450
- #define HUF_FLUSHBITS(s) BIT_flushBits(s)
991
+ FORCE_INLINE_TEMPLATE void
992
+ HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
993
+ const BYTE* ip, size_t srcSize,
994
+ const HUF_CElt* ct,
995
+ int kUnroll, int kFastFlush, int kLastFast)
996
+ {
997
+ /* Join to kUnroll */
998
+ int n = (int)srcSize;
999
+ int rem = n % kUnroll;
1000
+ if (rem > 0) {
1001
+ for (; rem > 0; --rem) {
1002
+ HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);
1003
+ }
1004
+ HUF_flushBits(bitC, kFastFlush);
1005
+ }
1006
+ assert(n % kUnroll == 0);
451
1007
 
452
- #define HUF_FLUSHBITS_1(stream) \
453
- if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
1008
+ /* Join to 2 * kUnroll */
1009
+ if (n % (2 * kUnroll)) {
1010
+ int u;
1011
+ for (u = 1; u < kUnroll; ++u) {
1012
+ HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);
1013
+ }
1014
+ HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);
1015
+ HUF_flushBits(bitC, kFastFlush);
1016
+ n -= kUnroll;
1017
+ }
1018
+ assert(n % (2 * kUnroll) == 0);
1019
+
1020
+ for (; n>0; n-= 2 * kUnroll) {
1021
+ /* Encode kUnroll symbols into the bitstream @ index 0. */
1022
+ int u;
1023
+ for (u = 1; u < kUnroll; ++u) {
1024
+ HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);
1025
+ }
1026
+ HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);
1027
+ HUF_flushBits(bitC, kFastFlush);
1028
+ /* Encode kUnroll symbols into the bitstream @ index 1.
1029
+ * This allows us to start filling the bit container
1030
+ * without any data dependencies.
1031
+ */
1032
+ HUF_zeroIndex1(bitC);
1033
+ for (u = 1; u < kUnroll; ++u) {
1034
+ HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);
1035
+ }
1036
+ HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);
1037
+ /* Merge bitstream @ index 1 into the bitstream @ index 0 */
1038
+ HUF_mergeIndex1(bitC);
1039
+ HUF_flushBits(bitC, kFastFlush);
1040
+ }
1041
+ assert(n == 0);
1042
+
1043
+ }
1044
+
1045
+ /**
1046
+ * Returns a tight upper bound on the output space needed by Huffman
1047
+ * with 8 bytes buffer to handle over-writes. If the output is at least
1048
+ * this large we don't need to do bounds checks during Huffman encoding.
1049
+ */
1050
+ static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
1051
+ {
1052
+ return ((srcSize * tableLog) >> 3) + 8;
1053
+ }
454
1054
 
455
- #define HUF_FLUSHBITS_2(stream) \
456
- if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
457
1055
 
458
1056
  FORCE_INLINE_TEMPLATE size_t
459
1057
  HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
460
1058
  const void* src, size_t srcSize,
461
1059
  const HUF_CElt* CTable)
462
1060
  {
1061
+ U32 const tableLog = HUF_readCTableHeader(CTable).tableLog;
1062
+ HUF_CElt const* ct = CTable + 1;
463
1063
  const BYTE* ip = (const BYTE*) src;
464
1064
  BYTE* const ostart = (BYTE*)dst;
465
1065
  BYTE* const oend = ostart + dstSize;
466
- BYTE* op = ostart;
467
- size_t n;
468
- BIT_CStream_t bitC;
1066
+ HUF_CStream_t bitC;
469
1067
 
470
1068
  /* init */
471
1069
  if (dstSize < 8) return 0; /* not enough space to compress */
472
- { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op));
1070
+ { BYTE* op = ostart;
1071
+ size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
473
1072
  if (HUF_isError(initErr)) return 0; }
474
1073
 
475
- n = srcSize & ~3; /* join to mod 4 */
476
- switch (srcSize & 3)
477
- {
478
- case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
479
- HUF_FLUSHBITS_2(&bitC);
480
- /* fall-through */
481
- case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
482
- HUF_FLUSHBITS_1(&bitC);
483
- /* fall-through */
484
- case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
485
- HUF_FLUSHBITS(&bitC);
486
- /* fall-through */
487
- case 0 : /* fall-through */
488
- default: break;
489
- }
490
-
491
- for (; n>0; n-=4) { /* note : n&3==0 at this stage */
492
- HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
493
- HUF_FLUSHBITS_1(&bitC);
494
- HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
495
- HUF_FLUSHBITS_2(&bitC);
496
- HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
497
- HUF_FLUSHBITS_1(&bitC);
498
- HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
499
- HUF_FLUSHBITS(&bitC);
1074
+ if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
1075
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);
1076
+ else {
1077
+ if (MEM_32bits()) {
1078
+ switch (tableLog) {
1079
+ case 11:
1080
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);
1081
+ break;
1082
+ case 10: ZSTD_FALLTHROUGH;
1083
+ case 9: ZSTD_FALLTHROUGH;
1084
+ case 8:
1085
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);
1086
+ break;
1087
+ case 7: ZSTD_FALLTHROUGH;
1088
+ default:
1089
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);
1090
+ break;
1091
+ }
1092
+ } else {
1093
+ switch (tableLog) {
1094
+ case 11:
1095
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);
1096
+ break;
1097
+ case 10:
1098
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);
1099
+ break;
1100
+ case 9:
1101
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);
1102
+ break;
1103
+ case 8:
1104
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);
1105
+ break;
1106
+ case 7:
1107
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);
1108
+ break;
1109
+ case 6: ZSTD_FALLTHROUGH;
1110
+ default:
1111
+ HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);
1112
+ break;
1113
+ }
1114
+ }
500
1115
  }
1116
+ assert(bitC.ptr <= bitC.endPtr);
501
1117
 
502
- return BIT_closeCStream(&bitC);
1118
+ return HUF_closeCStream(&bitC);
503
1119
  }
504
1120
 
505
1121
  #if DYNAMIC_BMI2
506
1122
 
507
- static TARGET_ATTRIBUTE("bmi2") size_t
1123
+ static BMI2_TARGET_ATTRIBUTE size_t
508
1124
  HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
509
1125
  const void* src, size_t srcSize,
510
1126
  const HUF_CElt* CTable)
@@ -523,9 +1139,9 @@ HUF_compress1X_usingCTable_internal_default(void* dst, size_t dstSize,
523
1139
  static size_t
524
1140
  HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
525
1141
  const void* src, size_t srcSize,
526
- const HUF_CElt* CTable, const int bmi2)
1142
+ const HUF_CElt* CTable, const int flags)
527
1143
  {
528
- if (bmi2) {
1144
+ if (flags & HUF_flags_bmi2) {
529
1145
  return HUF_compress1X_usingCTable_internal_bmi2(dst, dstSize, src, srcSize, CTable);
530
1146
  }
531
1147
  return HUF_compress1X_usingCTable_internal_default(dst, dstSize, src, srcSize, CTable);
@@ -536,24 +1152,23 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
536
1152
  static size_t
537
1153
  HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
538
1154
  const void* src, size_t srcSize,
539
- const HUF_CElt* CTable, const int bmi2)
1155
+ const HUF_CElt* CTable, const int flags)
540
1156
  {
541
- (void)bmi2;
1157
+ (void)flags;
542
1158
  return HUF_compress1X_usingCTable_internal_body(dst, dstSize, src, srcSize, CTable);
543
1159
  }
544
1160
 
545
1161
  #endif
546
1162
 
547
- size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
1163
+ size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
548
1164
  {
549
- return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
1165
+ return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
550
1166
  }
551
1167
 
552
-
553
1168
  static size_t
554
1169
  HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
555
1170
  const void* src, size_t srcSize,
556
- const HUF_CElt* CTable, int bmi2)
1171
+ const HUF_CElt* CTable, int flags)
557
1172
  {
558
1173
  size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
559
1174
  const BYTE* ip = (const BYTE*) src;
@@ -567,27 +1182,24 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
567
1182
  op += 6; /* jumpTable */
568
1183
 
569
1184
  assert(op <= oend);
570
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
571
- if (cSize==0) return 0;
572
- assert(cSize <= 65535);
1185
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
1186
+ if (cSize == 0 || cSize > 65535) return 0;
573
1187
  MEM_writeLE16(ostart, (U16)cSize);
574
1188
  op += cSize;
575
1189
  }
576
1190
 
577
1191
  ip += segmentSize;
578
1192
  assert(op <= oend);
579
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
580
- if (cSize==0) return 0;
581
- assert(cSize <= 65535);
1193
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
1194
+ if (cSize == 0 || cSize > 65535) return 0;
582
1195
  MEM_writeLE16(ostart+2, (U16)cSize);
583
1196
  op += cSize;
584
1197
  }
585
1198
 
586
1199
  ip += segmentSize;
587
1200
  assert(op <= oend);
588
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
589
- if (cSize==0) return 0;
590
- assert(cSize <= 65535);
1201
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, flags) );
1202
+ if (cSize == 0 || cSize > 65535) return 0;
591
1203
  MEM_writeLE16(ostart+4, (U16)cSize);
592
1204
  op += cSize;
593
1205
  }
@@ -595,17 +1207,17 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
595
1207
  ip += segmentSize;
596
1208
  assert(op <= oend);
597
1209
  assert(ip <= iend);
598
- { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
599
- if (cSize==0) return 0;
1210
+ { CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, flags) );
1211
+ if (cSize == 0 || cSize > 65535) return 0;
600
1212
  op += cSize;
601
1213
  }
602
1214
 
603
1215
  return (size_t)(op-ostart);
604
1216
  }
605
1217
 
606
- size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
1218
+ size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int flags)
607
1219
  {
608
- return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
1220
+ return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, flags);
609
1221
  }
610
1222
 
611
1223
  typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
@@ -613,11 +1225,11 @@ typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
613
1225
  static size_t HUF_compressCTable_internal(
614
1226
  BYTE* const ostart, BYTE* op, BYTE* const oend,
615
1227
  const void* src, size_t srcSize,
616
- HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int bmi2)
1228
+ HUF_nbStreams_e nbStreams, const HUF_CElt* CTable, const int flags)
617
1229
  {
618
1230
  size_t const cSize = (nbStreams==HUF_singleStream) ?
619
- HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2) :
620
- HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, bmi2);
1231
+ HUF_compress1X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags) :
1232
+ HUF_compress4X_usingCTable_internal(op, (size_t)(oend - op), src, srcSize, CTable, flags);
621
1233
  if (HUF_isError(cSize)) { return cSize; }
622
1234
  if (cSize==0) { return 0; } /* uncompressible */
623
1235
  op += cSize;
@@ -629,31 +1241,113 @@ static size_t HUF_compressCTable_internal(
629
1241
 
630
1242
  typedef struct {
631
1243
  unsigned count[HUF_SYMBOLVALUE_MAX + 1];
632
- HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
633
- HUF_buildCTable_wksp_tables buildCTable_wksp;
1244
+ HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
1245
+ union {
1246
+ HUF_buildCTable_wksp_tables buildCTable_wksp;
1247
+ HUF_WriteCTableWksp writeCTable_wksp;
1248
+ U32 hist_wksp[HIST_WKSP_SIZE_U32];
1249
+ } wksps;
634
1250
  } HUF_compress_tables_t;
635
1251
 
1252
+ #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
1253
+ #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
1254
+
1255
+ unsigned HUF_cardinality(const unsigned* count, unsigned maxSymbolValue)
1256
+ {
1257
+ unsigned cardinality = 0;
1258
+ unsigned i;
1259
+
1260
+ for (i = 0; i < maxSymbolValue + 1; i++) {
1261
+ if (count[i] != 0) cardinality += 1;
1262
+ }
1263
+
1264
+ return cardinality;
1265
+ }
1266
+
1267
+ unsigned HUF_minTableLog(unsigned symbolCardinality)
1268
+ {
1269
+ U32 minBitsSymbols = ZSTD_highbit32(symbolCardinality) + 1;
1270
+ return minBitsSymbols;
1271
+ }
1272
+
1273
+ unsigned HUF_optimalTableLog(
1274
+ unsigned maxTableLog,
1275
+ size_t srcSize,
1276
+ unsigned maxSymbolValue,
1277
+ void* workSpace, size_t wkspSize,
1278
+ HUF_CElt* table,
1279
+ const unsigned* count,
1280
+ int flags)
1281
+ {
1282
+ assert(srcSize > 1); /* Not supported, RLE should be used instead */
1283
+ assert(wkspSize >= sizeof(HUF_buildCTable_wksp_tables));
1284
+
1285
+ if (!(flags & HUF_flags_optimalDepth)) {
1286
+ /* cheap evaluation, based on FSE */
1287
+ return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
1288
+ }
1289
+
1290
+ { BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
1291
+ size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
1292
+ size_t hSize, newSize;
1293
+ const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
1294
+ const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
1295
+ size_t optSize = ((size_t) ~0) - 1;
1296
+ unsigned optLog = maxTableLog, optLogGuess;
1297
+
1298
+ DEBUGLOG(6, "HUF_optimalTableLog: probing huf depth (srcSize=%zu)", srcSize);
1299
+
1300
+ /* Search until size increases */
1301
+ for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
1302
+ DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
1303
+
1304
+ { size_t maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
1305
+ if (ERR_isError(maxBits)) continue;
1306
+
1307
+ if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
1308
+
1309
+ hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
1310
+ }
1311
+
1312
+ if (ERR_isError(hSize)) continue;
1313
+
1314
+ newSize = HUF_estimateCompressedSize(table, count, maxSymbolValue) + hSize;
1315
+
1316
+ if (newSize > optSize + 1) {
1317
+ break;
1318
+ }
1319
+
1320
+ if (newSize < optSize) {
1321
+ optSize = newSize;
1322
+ optLog = optLogGuess;
1323
+ }
1324
+ }
1325
+ assert(optLog <= HUF_TABLELOG_MAX);
1326
+ return optLog;
1327
+ }
1328
+ }
1329
+
636
1330
  /* HUF_compress_internal() :
637
- * `workSpace` must a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
1331
+ * `workSpace_align4` must be aligned on 4-bytes boundaries,
1332
+ * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
638
1333
  static size_t
639
1334
  HUF_compress_internal (void* dst, size_t dstSize,
640
1335
  const void* src, size_t srcSize,
641
1336
  unsigned maxSymbolValue, unsigned huffLog,
642
1337
  HUF_nbStreams_e nbStreams,
643
1338
  void* workSpace, size_t wkspSize,
644
- HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
645
- const int bmi2)
1339
+ HUF_CElt* oldHufTable, HUF_repeat* repeat, int flags)
646
1340
  {
647
- HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace;
1341
+ HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
648
1342
  BYTE* const ostart = (BYTE*)dst;
649
1343
  BYTE* const oend = ostart + dstSize;
650
1344
  BYTE* op = ostart;
651
1345
 
652
- HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
1346
+ DEBUGLOG(5, "HUF_compress_internal (srcSize=%zu)", srcSize);
1347
+ HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
653
1348
 
654
1349
  /* checks & inits */
655
- if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC); /* must be aligned on 4-bytes boundaries */
656
- if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
1350
+ if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
657
1351
  if (!srcSize) return 0; /* Uncompressed */
658
1352
  if (!dstSize) return 0; /* cannot fit anything within dst budget */
659
1353
  if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
@@ -663,17 +1357,34 @@ HUF_compress_internal (void* dst, size_t dstSize,
663
1357
  if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
664
1358
 
665
1359
  /* Heuristic : If old table is valid, use it for small inputs */
666
- if (preferRepeat && repeat && *repeat == HUF_repeat_valid) {
1360
+ if ((flags & HUF_flags_preferRepeat) && repeat && *repeat == HUF_repeat_valid) {
667
1361
  return HUF_compressCTable_internal(ostart, op, oend,
668
1362
  src, srcSize,
669
- nbStreams, oldHufTable, bmi2);
1363
+ nbStreams, oldHufTable, flags);
1364
+ }
1365
+
1366
+ /* If uncompressible data is suspected, do a smaller sampling first */
1367
+ DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
1368
+ if ((flags & HUF_flags_suspectUncompressible) && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
1369
+ size_t largestTotal = 0;
1370
+ DEBUGLOG(5, "input suspected incompressible : sampling to check");
1371
+ { unsigned maxSymbolValueBegin = maxSymbolValue;
1372
+ CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
1373
+ largestTotal += largestBegin;
1374
+ }
1375
+ { unsigned maxSymbolValueEnd = maxSymbolValue;
1376
+ CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
1377
+ largestTotal += largestEnd;
1378
+ }
1379
+ if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0; /* heuristic : probably not compressible enough */
670
1380
  }
671
1381
 
672
1382
  /* Scan input and build symbol stats */
673
- { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace, wkspSize) );
1383
+ { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );
674
1384
  if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
675
1385
  if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
676
1386
  }
1387
+ DEBUGLOG(6, "histogram detail completed (%zu symbols)", showU32(table->count, maxSymbolValue+1));
677
1388
 
678
1389
  /* Check validity of previous table */
679
1390
  if ( repeat
@@ -682,26 +1393,25 @@ HUF_compress_internal (void* dst, size_t dstSize,
682
1393
  *repeat = HUF_repeat_none;
683
1394
  }
684
1395
  /* Heuristic : use existing table for small inputs */
685
- if (preferRepeat && repeat && *repeat != HUF_repeat_none) {
1396
+ if ((flags & HUF_flags_preferRepeat) && repeat && *repeat != HUF_repeat_none) {
686
1397
  return HUF_compressCTable_internal(ostart, op, oend,
687
1398
  src, srcSize,
688
- nbStreams, oldHufTable, bmi2);
1399
+ nbStreams, oldHufTable, flags);
689
1400
  }
690
1401
 
691
1402
  /* Build Huffman Tree */
692
- huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
1403
+ huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue, &table->wksps, sizeof(table->wksps), table->CTable, table->count, flags);
693
1404
  { size_t const maxBits = HUF_buildCTable_wksp(table->CTable, table->count,
694
1405
  maxSymbolValue, huffLog,
695
- &table->buildCTable_wksp, sizeof(table->buildCTable_wksp));
1406
+ &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
696
1407
  CHECK_F(maxBits);
697
1408
  huffLog = (U32)maxBits;
698
- /* Zero unused symbols in CTable, so we can check it for validity */
699
- memset(table->CTable + (maxSymbolValue + 1), 0,
700
- sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
1409
+ DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));
701
1410
  }
702
1411
 
703
1412
  /* Write table description header */
704
- { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table->CTable, maxSymbolValue, huffLog) );
1413
+ { CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
1414
+ &table->wksps.writeCTable_wksp, sizeof(table->wksps.writeCTable_wksp)) );
705
1415
  /* Check if using previous huffman table is beneficial */
706
1416
  if (repeat && *repeat != HUF_repeat_none) {
707
1417
  size_t const oldSize = HUF_estimateCompressedSize(oldHufTable, table->count, maxSymbolValue);
@@ -709,7 +1419,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
709
1419
  if (oldSize <= hSize + newSize || hSize + 12 >= srcSize) {
710
1420
  return HUF_compressCTable_internal(ostart, op, oend,
711
1421
  src, srcSize,
712
- nbStreams, oldHufTable, bmi2);
1422
+ nbStreams, oldHufTable, flags);
713
1423
  } }
714
1424
 
715
1425
  /* Use the new huffman table */
@@ -717,85 +1427,41 @@ HUF_compress_internal (void* dst, size_t dstSize,
717
1427
  op += hSize;
718
1428
  if (repeat) { *repeat = HUF_repeat_none; }
719
1429
  if (oldHufTable)
720
- memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
1430
+ ZSTD_memcpy(oldHufTable, table->CTable, sizeof(table->CTable)); /* Save new table */
721
1431
  }
722
1432
  return HUF_compressCTable_internal(ostart, op, oend,
723
1433
  src, srcSize,
724
- nbStreams, table->CTable, bmi2);
725
- }
726
-
727
-
728
- size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
729
- const void* src, size_t srcSize,
730
- unsigned maxSymbolValue, unsigned huffLog,
731
- void* workSpace, size_t wkspSize)
732
- {
733
- return HUF_compress_internal(dst, dstSize, src, srcSize,
734
- maxSymbolValue, huffLog, HUF_singleStream,
735
- workSpace, wkspSize,
736
- NULL, NULL, 0, 0 /*bmi2*/);
1434
+ nbStreams, table->CTable, flags);
737
1435
  }
738
1436
 
739
1437
  size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
740
1438
  const void* src, size_t srcSize,
741
1439
  unsigned maxSymbolValue, unsigned huffLog,
742
1440
  void* workSpace, size_t wkspSize,
743
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
1441
+ HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
744
1442
  {
1443
+ DEBUGLOG(5, "HUF_compress1X_repeat (srcSize = %zu)", srcSize);
745
1444
  return HUF_compress_internal(dst, dstSize, src, srcSize,
746
1445
  maxSymbolValue, huffLog, HUF_singleStream,
747
1446
  workSpace, wkspSize, hufTable,
748
- repeat, preferRepeat, bmi2);
749
- }
750
-
751
- size_t HUF_compress1X (void* dst, size_t dstSize,
752
- const void* src, size_t srcSize,
753
- unsigned maxSymbolValue, unsigned huffLog)
754
- {
755
- unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
756
- return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
1447
+ repeat, flags);
757
1448
  }
758
1449
 
759
1450
  /* HUF_compress4X_repeat():
760
1451
  * compress input using 4 streams.
761
- * provide workspace to generate compression tables */
762
- size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
763
- const void* src, size_t srcSize,
764
- unsigned maxSymbolValue, unsigned huffLog,
765
- void* workSpace, size_t wkspSize)
766
- {
767
- return HUF_compress_internal(dst, dstSize, src, srcSize,
768
- maxSymbolValue, huffLog, HUF_fourStreams,
769
- workSpace, wkspSize,
770
- NULL, NULL, 0, 0 /*bmi2*/);
771
- }
772
-
773
- /* HUF_compress4X_repeat():
774
- * compress input using 4 streams.
775
- * re-use an existing huffman compression table */
1452
+ * consider skipping quickly
1453
+ * reuse an existing huffman compression table */
776
1454
  size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
777
1455
  const void* src, size_t srcSize,
778
1456
  unsigned maxSymbolValue, unsigned huffLog,
779
1457
  void* workSpace, size_t wkspSize,
780
- HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
1458
+ HUF_CElt* hufTable, HUF_repeat* repeat, int flags)
781
1459
  {
1460
+ DEBUGLOG(5, "HUF_compress4X_repeat (srcSize = %zu)", srcSize);
782
1461
  return HUF_compress_internal(dst, dstSize, src, srcSize,
783
1462
  maxSymbolValue, huffLog, HUF_fourStreams,
784
1463
  workSpace, wkspSize,
785
- hufTable, repeat, preferRepeat, bmi2);
786
- }
787
-
788
- size_t HUF_compress2 (void* dst, size_t dstSize,
789
- const void* src, size_t srcSize,
790
- unsigned maxSymbolValue, unsigned huffLog)
791
- {
792
- unsigned workSpace[HUF_WORKSPACE_SIZE_U32];
793
- return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace));
1464
+ hufTable, repeat, flags);
794
1465
  }
795
1466
 
796
- size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
797
- {
798
- return HUF_compress2(dst, maxDstSize, src, srcSize, 255, HUF_TABLELOG_DEFAULT);
799
- }
800
-
801
- }
1467
+ } // namespace duckdb_zstd