duckdb 1.1.2-dev6.0 → 1.1.4-dev11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1747) hide show
  1. package/.github/workflows/NodeJS.yml +5 -54
  2. package/binding.gyp +73 -52
  3. package/package.json +2 -2
  4. package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/avg.cpp +2 -2
  5. package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/corr.cpp +4 -4
  6. package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/covar.cpp +2 -2
  7. package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/stddev.cpp +2 -2
  8. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/approx_count.cpp +1 -1
  9. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/arg_min_max.cpp +66 -18
  10. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bitagg.cpp +1 -1
  11. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bitstring_agg.cpp +5 -7
  12. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bool.cpp +3 -1
  13. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/kurtosis.cpp +1 -1
  14. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/product.cpp +1 -1
  15. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/skew.cpp +2 -2
  16. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/string_agg.cpp +1 -1
  17. package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/sum.cpp +13 -2
  18. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/approx_top_k.cpp +3 -3
  19. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/approximate_quantile.cpp +51 -15
  20. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/mad.cpp +25 -10
  21. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/mode.cpp +215 -71
  22. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/quantile.cpp +58 -31
  23. package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -2
  24. package/src/duckdb/{src → extension}/core_functions/aggregate/nested/binned_histogram.cpp +9 -4
  25. package/src/duckdb/{src → extension}/core_functions/aggregate/nested/histogram.cpp +4 -2
  26. package/src/duckdb/{src → extension}/core_functions/aggregate/nested/list.cpp +1 -1
  27. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_avg.cpp +1 -1
  28. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_count.cpp +2 -2
  29. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_intercept.cpp +6 -2
  30. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_r2.cpp +2 -2
  31. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_slope.cpp +2 -2
  32. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_sxx_syy.cpp +2 -2
  33. package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_sxy.cpp +3 -3
  34. package/src/duckdb/extension/core_functions/core_functions_extension.cpp +85 -0
  35. package/src/duckdb/{src → extension}/core_functions/function_list.cpp +30 -51
  36. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/corr.hpp +3 -7
  37. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic_functions.hpp +1 -1
  38. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/distributive_functions.hpp +16 -21
  39. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/histogram_helpers.hpp +1 -1
  40. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/holistic_functions.hpp +1 -1
  41. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/nested_functions.hpp +1 -1
  42. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_helpers.hpp +2 -2
  43. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_sort_tree.hpp +140 -58
  44. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_state.hpp +50 -43
  45. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression/regr_count.hpp +2 -2
  46. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression/regr_slope.hpp +3 -7
  47. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression_functions.hpp +1 -1
  48. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/array_functions.hpp +1 -1
  49. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/bit_functions.hpp +1 -1
  50. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/blob_functions.hpp +1 -10
  51. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/date_functions.hpp +22 -55
  52. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/debug_functions.hpp +1 -1
  53. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/enum_functions.hpp +1 -1
  54. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/generic_functions.hpp +1 -10
  55. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/list_functions.hpp +4 -4
  56. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/map_functions.hpp +1 -10
  57. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/math_functions.hpp +1 -1
  58. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/operators_functions.hpp +1 -1
  59. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/random_functions.hpp +1 -1
  60. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/string_functions.hpp +10 -103
  61. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/struct_functions.hpp +1 -19
  62. package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/union_functions.hpp +1 -1
  63. package/src/duckdb/extension/core_functions/include/core_functions_extension.hpp +22 -0
  64. package/src/duckdb/{src → extension}/core_functions/lambda_functions.cpp +1 -1
  65. package/src/duckdb/{src → extension}/core_functions/scalar/array/array_functions.cpp +11 -4
  66. package/src/duckdb/{src → extension}/core_functions/scalar/array/array_value.cpp +2 -2
  67. package/src/duckdb/{src → extension}/core_functions/scalar/bit/bitstring.cpp +12 -5
  68. package/src/duckdb/{src → extension}/core_functions/scalar/blob/base64.cpp +4 -2
  69. package/src/duckdb/{src → extension}/core_functions/scalar/blob/encode.cpp +4 -2
  70. package/src/duckdb/{src → extension}/core_functions/scalar/date/age.cpp +9 -3
  71. package/src/duckdb/extension/core_functions/scalar/date/current.cpp +29 -0
  72. package/src/duckdb/{src → extension}/core_functions/scalar/date/date_diff.cpp +1 -1
  73. package/src/duckdb/{src → extension}/core_functions/scalar/date/date_part.cpp +42 -9
  74. package/src/duckdb/{src → extension}/core_functions/scalar/date/date_sub.cpp +1 -1
  75. package/src/duckdb/{src → extension}/core_functions/scalar/date/date_trunc.cpp +4 -1
  76. package/src/duckdb/{src → extension}/core_functions/scalar/date/epoch.cpp +19 -3
  77. package/src/duckdb/{src → extension}/core_functions/scalar/date/make_date.cpp +40 -5
  78. package/src/duckdb/{src → extension}/core_functions/scalar/date/time_bucket.cpp +4 -1
  79. package/src/duckdb/{src → extension}/core_functions/scalar/date/to_interval.cpp +54 -28
  80. package/src/duckdb/{src → extension}/core_functions/scalar/debug/vector_type.cpp +1 -1
  81. package/src/duckdb/{src → extension}/core_functions/scalar/enum/enum_functions.cpp +2 -7
  82. package/src/duckdb/{src → extension}/core_functions/scalar/generic/alias.cpp +2 -2
  83. package/src/duckdb/{src/function → extension/core_functions}/scalar/generic/binning.cpp +4 -3
  84. package/src/duckdb/{src → extension}/core_functions/scalar/generic/can_implicitly_cast.cpp +1 -1
  85. package/src/duckdb/{src → extension}/core_functions/scalar/generic/current_setting.cpp +1 -1
  86. package/src/duckdb/{src → extension}/core_functions/scalar/generic/hash.cpp +1 -1
  87. package/src/duckdb/{src → extension}/core_functions/scalar/generic/least.cpp +30 -10
  88. package/src/duckdb/{src → extension}/core_functions/scalar/generic/stats.cpp +1 -1
  89. package/src/duckdb/{src → extension}/core_functions/scalar/generic/system_functions.cpp +1 -1
  90. package/src/duckdb/{src → extension}/core_functions/scalar/generic/typeof.cpp +1 -1
  91. package/src/duckdb/{src → extension}/core_functions/scalar/list/array_slice.cpp +93 -88
  92. package/src/duckdb/{src → extension}/core_functions/scalar/list/flatten.cpp +1 -1
  93. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_aggregates.cpp +7 -3
  94. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_distance.cpp +8 -2
  95. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_filter.cpp +3 -3
  96. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_has_any_or_all.cpp +3 -3
  97. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_reduce.cpp +5 -5
  98. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_sort.cpp +1 -1
  99. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_transform.cpp +3 -3
  100. package/src/duckdb/{src → extension}/core_functions/scalar/list/list_value.cpp +3 -3
  101. package/src/duckdb/{src → extension}/core_functions/scalar/list/range.cpp +7 -1
  102. package/src/duckdb/{src → extension}/core_functions/scalar/map/cardinality.cpp +1 -1
  103. package/src/duckdb/{src → extension}/core_functions/scalar/map/map.cpp +5 -4
  104. package/src/duckdb/{src → extension}/core_functions/scalar/map/map_concat.cpp +1 -1
  105. package/src/duckdb/{src → extension}/core_functions/scalar/map/map_entries.cpp +1 -1
  106. package/src/duckdb/{src → extension}/core_functions/scalar/map/map_extract.cpp +13 -25
  107. package/src/duckdb/{src → extension}/core_functions/scalar/map/map_from_entries.cpp +2 -1
  108. package/src/duckdb/{src → extension}/core_functions/scalar/map/map_keys_values.cpp +11 -9
  109. package/src/duckdb/{src → extension}/core_functions/scalar/math/numeric.cpp +83 -37
  110. package/src/duckdb/{src → extension}/core_functions/scalar/operators/bitwise.cpp +19 -1
  111. package/src/duckdb/{src → extension}/core_functions/scalar/random/random.cpp +4 -3
  112. package/src/duckdb/{src → extension}/core_functions/scalar/random/setseed.cpp +2 -1
  113. package/src/duckdb/{src → extension}/core_functions/scalar/string/ascii.cpp +1 -1
  114. package/src/duckdb/{src → extension}/core_functions/scalar/string/bar.cpp +6 -4
  115. package/src/duckdb/{src → extension}/core_functions/scalar/string/chr.cpp +1 -1
  116. package/src/duckdb/{src → extension}/core_functions/scalar/string/damerau_levenshtein.cpp +1 -1
  117. package/src/duckdb/{src → extension}/core_functions/scalar/string/format_bytes.cpp +1 -1
  118. package/src/duckdb/{src → extension}/core_functions/scalar/string/hamming.cpp +1 -1
  119. package/src/duckdb/{src → extension}/core_functions/scalar/string/hex.cpp +7 -3
  120. package/src/duckdb/{src → extension}/core_functions/scalar/string/instr.cpp +4 -4
  121. package/src/duckdb/{src → extension}/core_functions/scalar/string/jaccard.cpp +1 -1
  122. package/src/duckdb/extension/core_functions/scalar/string/jaro_winkler.cpp +112 -0
  123. package/src/duckdb/{src → extension}/core_functions/scalar/string/left_right.cpp +6 -6
  124. package/src/duckdb/{src → extension}/core_functions/scalar/string/levenshtein.cpp +1 -1
  125. package/src/duckdb/{src → extension}/core_functions/scalar/string/pad.cpp +9 -5
  126. package/src/duckdb/{src → extension}/core_functions/scalar/string/parse_path.cpp +4 -4
  127. package/src/duckdb/{src → extension}/core_functions/scalar/string/printf.cpp +3 -1
  128. package/src/duckdb/{src → extension}/core_functions/scalar/string/repeat.cpp +4 -1
  129. package/src/duckdb/{src → extension}/core_functions/scalar/string/replace.cpp +1 -1
  130. package/src/duckdb/{src → extension}/core_functions/scalar/string/reverse.cpp +1 -1
  131. package/src/duckdb/{src → extension}/core_functions/scalar/string/starts_with.cpp +5 -3
  132. package/src/duckdb/{src → extension}/core_functions/scalar/string/to_base.cpp +1 -1
  133. package/src/duckdb/{src → extension}/core_functions/scalar/string/translate.cpp +1 -1
  134. package/src/duckdb/{src → extension}/core_functions/scalar/string/trim.cpp +1 -1
  135. package/src/duckdb/{src → extension}/core_functions/scalar/string/unicode.cpp +1 -1
  136. package/src/duckdb/{src → extension}/core_functions/scalar/string/url_encode.cpp +1 -1
  137. package/src/duckdb/{src → extension}/core_functions/scalar/struct/struct_insert.cpp +25 -31
  138. package/src/duckdb/{src → extension}/core_functions/scalar/union/union_extract.cpp +1 -1
  139. package/src/duckdb/{src → extension}/core_functions/scalar/union/union_tag.cpp +1 -1
  140. package/src/duckdb/{src → extension}/core_functions/scalar/union/union_value.cpp +3 -3
  141. package/src/duckdb/extension/icu/icu-dateadd.cpp +16 -11
  142. package/src/duckdb/extension/icu/icu-datefunc.cpp +2 -2
  143. package/src/duckdb/extension/icu/icu-datepart.cpp +8 -5
  144. package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
  145. package/src/duckdb/extension/icu/icu-datetrunc.cpp +8 -1
  146. package/src/duckdb/extension/icu/icu-list-range.cpp +2 -2
  147. package/src/duckdb/extension/icu/icu-makedate.cpp +18 -7
  148. package/src/duckdb/extension/icu/icu-strptime.cpp +9 -3
  149. package/src/duckdb/extension/icu/icu-table-range.cpp +2 -2
  150. package/src/duckdb/extension/icu/icu-timebucket.cpp +4 -1
  151. package/src/duckdb/extension/icu/icu-timezone.cpp +67 -1
  152. package/src/duckdb/extension/icu/icu_extension.cpp +60 -5
  153. package/src/duckdb/extension/icu/include/icu-datefunc.hpp +2 -1
  154. package/src/duckdb/extension/icu/third_party/icu/common/bytestriebuilder.cpp +1 -1
  155. package/src/duckdb/extension/icu/third_party/icu/common/dtintrv.cpp +1 -1
  156. package/src/duckdb/extension/icu/third_party/icu/common/filteredbrk.cpp +1 -1
  157. package/src/duckdb/extension/icu/third_party/icu/common/locid.cpp +1 -1
  158. package/src/duckdb/extension/icu/third_party/icu/common/lsr.cpp +1 -1
  159. package/src/duckdb/extension/icu/third_party/icu/common/lsr.h +2 -2
  160. package/src/duckdb/extension/icu/third_party/icu/common/messagepattern.cpp +2 -2
  161. package/src/duckdb/extension/icu/third_party/icu/common/normlzr.cpp +1 -1
  162. package/src/duckdb/extension/icu/third_party/icu/common/rbbinode.h +1 -1
  163. package/src/duckdb/extension/icu/third_party/icu/common/schriter.cpp +1 -1
  164. package/src/duckdb/extension/icu/third_party/icu/common/stringtriebuilder.cpp +8 -8
  165. package/src/duckdb/extension/icu/third_party/icu/common/ucharstriebuilder.cpp +1 -1
  166. package/src/duckdb/extension/icu/third_party/icu/common/uchriter.cpp +1 -1
  167. package/src/duckdb/extension/icu/third_party/icu/common/unicode/brkiter.h +2 -2
  168. package/src/duckdb/extension/icu/third_party/icu/common/unicode/bytestriebuilder.h +1 -1
  169. package/src/duckdb/extension/icu/third_party/icu/common/unicode/chariter.h +3 -3
  170. package/src/duckdb/extension/icu/third_party/icu/common/unicode/dtintrv.h +3 -3
  171. package/src/duckdb/extension/icu/third_party/icu/common/unicode/locid.h +3 -3
  172. package/src/duckdb/extension/icu/third_party/icu/common/unicode/messagepattern.h +4 -4
  173. package/src/duckdb/extension/icu/third_party/icu/common/unicode/normlzr.h +3 -3
  174. package/src/duckdb/extension/icu/third_party/icu/common/unicode/parsepos.h +4 -4
  175. package/src/duckdb/extension/icu/third_party/icu/common/unicode/rbbi.h +2 -2
  176. package/src/duckdb/extension/icu/third_party/icu/common/unicode/schriter.h +1 -1
  177. package/src/duckdb/extension/icu/third_party/icu/common/unicode/strenum.h +2 -2
  178. package/src/duckdb/extension/icu/third_party/icu/common/unicode/stringpiece.h +1 -1
  179. package/src/duckdb/extension/icu/third_party/icu/common/unicode/stringtriebuilder.h +9 -9
  180. package/src/duckdb/extension/icu/third_party/icu/common/unicode/ucharstriebuilder.h +1 -1
  181. package/src/duckdb/extension/icu/third_party/icu/common/unicode/uchriter.h +1 -1
  182. package/src/duckdb/extension/icu/third_party/icu/common/unicode/uniset.h +3 -3
  183. package/src/duckdb/extension/icu/third_party/icu/common/unicode/unistr.h +12 -12
  184. package/src/duckdb/extension/icu/third_party/icu/common/unicode/uobject.h +2 -2
  185. package/src/duckdb/extension/icu/third_party/icu/common/unifiedcache.h +4 -4
  186. package/src/duckdb/extension/icu/third_party/icu/common/uniset.cpp +1 -1
  187. package/src/duckdb/extension/icu/third_party/icu/common/ustr_titlecase_brkiter.cpp +1 -1
  188. package/src/duckdb/extension/icu/third_party/icu/common/ustrenum.cpp +2 -2
  189. package/src/duckdb/extension/icu/third_party/icu/common/uvector.cpp +1 -1
  190. package/src/duckdb/extension/icu/third_party/icu/common/uvector.h +3 -3
  191. package/src/duckdb/extension/icu/third_party/icu/common/uvectr32.cpp +1 -1
  192. package/src/duckdb/extension/icu/third_party/icu/common/uvectr32.h +3 -3
  193. package/src/duckdb/extension/icu/third_party/icu/common/uvectr64.cpp +1 -1
  194. package/src/duckdb/extension/icu/third_party/icu/common/uvectr64.h +3 -3
  195. package/src/duckdb/extension/icu/third_party/icu/i18n/alphaindex.cpp +2 -2
  196. package/src/duckdb/extension/icu/third_party/icu/i18n/calendar.cpp +1 -1
  197. package/src/duckdb/extension/icu/third_party/icu/i18n/choicfmt.cpp +1 -1
  198. package/src/duckdb/extension/icu/third_party/icu/i18n/coleitr.cpp +2 -2
  199. package/src/duckdb/extension/icu/third_party/icu/i18n/coll.cpp +2 -2
  200. package/src/duckdb/extension/icu/third_party/icu/i18n/collationiterator.cpp +1 -1
  201. package/src/duckdb/extension/icu/third_party/icu/i18n/collationiterator.h +2 -2
  202. package/src/duckdb/extension/icu/third_party/icu/i18n/collationsettings.cpp +1 -1
  203. package/src/duckdb/extension/icu/third_party/icu/i18n/collationsettings.h +2 -2
  204. package/src/duckdb/extension/icu/third_party/icu/i18n/currpinf.cpp +1 -1
  205. package/src/duckdb/extension/icu/third_party/icu/i18n/datefmt.cpp +2 -2
  206. package/src/duckdb/extension/icu/third_party/icu/i18n/dcfmtsym.cpp +1 -1
  207. package/src/duckdb/extension/icu/third_party/icu/i18n/decimfmt.cpp +1 -1
  208. package/src/duckdb/extension/icu/third_party/icu/i18n/dtfmtsym.cpp +1 -1
  209. package/src/duckdb/extension/icu/third_party/icu/i18n/dtitvfmt.cpp +1 -1
  210. package/src/duckdb/extension/icu/third_party/icu/i18n/dtitvinf.cpp +1 -1
  211. package/src/duckdb/extension/icu/third_party/icu/i18n/dtptngen.cpp +2 -2
  212. package/src/duckdb/extension/icu/third_party/icu/i18n/dtptngen_impl.h +4 -4
  213. package/src/duckdb/extension/icu/third_party/icu/i18n/dtrule.cpp +2 -2
  214. package/src/duckdb/extension/icu/third_party/icu/i18n/fmtable.cpp +1 -1
  215. package/src/duckdb/extension/icu/third_party/icu/i18n/format.cpp +1 -1
  216. package/src/duckdb/extension/icu/third_party/icu/i18n/fpositer.cpp +1 -1
  217. package/src/duckdb/extension/icu/third_party/icu/i18n/measfmt.cpp +1 -1
  218. package/src/duckdb/extension/icu/third_party/icu/i18n/measunit.cpp +1 -1
  219. package/src/duckdb/extension/icu/third_party/icu/i18n/measure.cpp +1 -1
  220. package/src/duckdb/extension/icu/third_party/icu/i18n/msgfmt.cpp +2 -2
  221. package/src/duckdb/extension/icu/third_party/icu/i18n/nfrs.cpp +1 -1
  222. package/src/duckdb/extension/icu/third_party/icu/i18n/nfrs.h +2 -2
  223. package/src/duckdb/extension/icu/third_party/icu/i18n/nfrule.cpp +1 -1
  224. package/src/duckdb/extension/icu/third_party/icu/i18n/nfrule.h +2 -2
  225. package/src/duckdb/extension/icu/third_party/icu/i18n/nfsubs.cpp +9 -9
  226. package/src/duckdb/extension/icu/third_party/icu/i18n/nfsubs.h +2 -2
  227. package/src/duckdb/extension/icu/third_party/icu/i18n/number_asformat.cpp +1 -1
  228. package/src/duckdb/extension/icu/third_party/icu/i18n/number_asformat.h +1 -1
  229. package/src/duckdb/extension/icu/third_party/icu/i18n/numfmt.cpp +1 -1
  230. package/src/duckdb/extension/icu/third_party/icu/i18n/olsontz.cpp +1 -1
  231. package/src/duckdb/extension/icu/third_party/icu/i18n/olsontz.h +1 -1
  232. package/src/duckdb/extension/icu/third_party/icu/i18n/plurfmt.cpp +2 -2
  233. package/src/duckdb/extension/icu/third_party/icu/i18n/plurrule.cpp +1 -1
  234. package/src/duckdb/extension/icu/third_party/icu/i18n/rbnf.cpp +4 -4
  235. package/src/duckdb/extension/icu/third_party/icu/i18n/rbtz.cpp +2 -2
  236. package/src/duckdb/extension/icu/third_party/icu/i18n/region.cpp +2 -2
  237. package/src/duckdb/extension/icu/third_party/icu/i18n/reldtfmt.cpp +1 -1
  238. package/src/duckdb/extension/icu/third_party/icu/i18n/reldtfmt.h +1 -1
  239. package/src/duckdb/extension/icu/third_party/icu/i18n/rulebasedcollator.cpp +1 -1
  240. package/src/duckdb/extension/icu/third_party/icu/i18n/selfmt.cpp +2 -2
  241. package/src/duckdb/extension/icu/third_party/icu/i18n/simpletz.cpp +1 -1
  242. package/src/duckdb/extension/icu/third_party/icu/i18n/smpdtfmt.cpp +1 -1
  243. package/src/duckdb/extension/icu/third_party/icu/i18n/sortkey.cpp +1 -1
  244. package/src/duckdb/extension/icu/third_party/icu/i18n/timezone.cpp +1 -1
  245. package/src/duckdb/extension/icu/third_party/icu/i18n/tmutamt.cpp +1 -1
  246. package/src/duckdb/extension/icu/third_party/icu/i18n/tzfmt.cpp +1 -1
  247. package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.cpp +1 -1
  248. package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.h +2 -2
  249. package/src/duckdb/extension/icu/third_party/icu/i18n/tznames.cpp +3 -3
  250. package/src/duckdb/extension/icu/third_party/icu/i18n/tznames_impl.cpp +2 -2
  251. package/src/duckdb/extension/icu/third_party/icu/i18n/tznames_impl.h +2 -2
  252. package/src/duckdb/extension/icu/third_party/icu/i18n/tzrule.cpp +8 -8
  253. package/src/duckdb/extension/icu/third_party/icu/i18n/tztrans.cpp +2 -2
  254. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/alphaindex.h +2 -2
  255. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/calendar.h +2 -2
  256. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/choicfmt.h +1 -1
  257. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coleitr.h +2 -2
  258. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coll.h +2 -2
  259. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/currpinf.h +3 -3
  260. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/datefmt.h +1 -1
  261. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dcfmtsym.h +2 -2
  262. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/decimfmt.h +1 -1
  263. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtfmtsym.h +2 -2
  264. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtitvfmt.h +3 -3
  265. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtitvinf.h +3 -3
  266. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtptngen.h +2 -2
  267. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtrule.h +2 -2
  268. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fieldpos.h +4 -4
  269. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fmtable.h +2 -2
  270. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/format.h +2 -2
  271. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fpositer.h +2 -2
  272. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measfmt.h +1 -1
  273. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measunit.h +2 -2
  274. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measure.h +1 -1
  275. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/msgfmt.h +2 -2
  276. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/numfmt.h +1 -1
  277. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/plurfmt.h +2 -2
  278. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/plurrule.h +2 -2
  279. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/rbnf.h +1 -1
  280. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/rbtz.h +2 -2
  281. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/region.h +2 -2
  282. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/search.h +2 -2
  283. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/selfmt.h +2 -2
  284. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/simpletz.h +1 -1
  285. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/smpdtfmt.h +1 -1
  286. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/sortkey.h +3 -3
  287. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/stsearch.h +1 -1
  288. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tblcoll.h +1 -1
  289. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/timezone.h +2 -2
  290. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tmutamt.h +3 -3
  291. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tmutfmt.h +2 -2
  292. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tzfmt.h +1 -1
  293. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tznames.h +2 -2
  294. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tzrule.h +8 -8
  295. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tztrans.h +2 -2
  296. package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/vtzone.h +2 -2
  297. package/src/duckdb/extension/icu/third_party/icu/i18n/utf16collationiterator.cpp +2 -2
  298. package/src/duckdb/extension/icu/third_party/icu/i18n/utf16collationiterator.h +2 -2
  299. package/src/duckdb/extension/icu/third_party/icu/i18n/vtzone.cpp +2 -2
  300. package/src/duckdb/extension/json/buffered_json_reader.cpp +6 -1
  301. package/src/duckdb/extension/json/include/buffered_json_reader.hpp +2 -0
  302. package/src/duckdb/extension/json/include/json_common.hpp +14 -10
  303. package/src/duckdb/extension/json/include/json_scan.hpp +48 -7
  304. package/src/duckdb/extension/json/include/json_structure.hpp +2 -1
  305. package/src/duckdb/extension/json/include/json_transform.hpp +5 -2
  306. package/src/duckdb/extension/json/json_functions/copy_json.cpp +1 -1
  307. package/src/duckdb/extension/json/json_functions/json_create.cpp +57 -20
  308. package/src/duckdb/extension/json/json_functions/json_serialize_plan.cpp +7 -6
  309. package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +6 -5
  310. package/src/duckdb/extension/json/json_functions/json_structure.cpp +20 -17
  311. package/src/duckdb/extension/json/json_functions/json_transform.cpp +48 -17
  312. package/src/duckdb/extension/json/json_functions/read_json.cpp +83 -34
  313. package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +3 -3
  314. package/src/duckdb/extension/json/json_functions.cpp +14 -16
  315. package/src/duckdb/extension/json/json_scan.cpp +36 -16
  316. package/src/duckdb/extension/json/json_serializer.cpp +1 -1
  317. package/src/duckdb/extension/json/serialize_json.cpp +2 -2
  318. package/src/duckdb/extension/parquet/column_reader.cpp +136 -116
  319. package/src/duckdb/extension/parquet/column_writer.cpp +870 -604
  320. package/src/duckdb/extension/parquet/geo_parquet.cpp +4 -5
  321. package/src/duckdb/extension/parquet/include/boolean_column_reader.hpp +0 -4
  322. package/src/duckdb/extension/parquet/include/column_reader.hpp +24 -19
  323. package/src/duckdb/extension/parquet/include/column_writer.hpp +7 -5
  324. package/src/duckdb/extension/parquet/include/decode_utils.hpp +138 -18
  325. package/src/duckdb/extension/parquet/include/geo_parquet.hpp +4 -3
  326. package/src/duckdb/extension/parquet/include/null_column_reader.hpp +1 -14
  327. package/src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp +45 -0
  328. package/src/duckdb/extension/parquet/include/parquet_crypto.hpp +1 -1
  329. package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +101 -90
  330. package/src/duckdb/extension/parquet/include/parquet_dbp_encoder.hpp +179 -0
  331. package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +2 -3
  332. package/src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp +48 -0
  333. package/src/duckdb/extension/parquet/include/parquet_extension.hpp +8 -0
  334. package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
  335. package/src/duckdb/extension/parquet/include/parquet_metadata.hpp +5 -0
  336. package/src/duckdb/extension/parquet/include/parquet_reader.hpp +22 -18
  337. package/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp +1 -5
  338. package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +87 -3
  339. package/src/duckdb/extension/parquet/include/parquet_writer.hpp +30 -16
  340. package/src/duckdb/extension/parquet/include/resizable_buffer.hpp +1 -0
  341. package/src/duckdb/extension/parquet/include/string_column_reader.hpp +0 -8
  342. package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +1 -1
  343. package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +1 -42
  344. package/src/duckdb/extension/parquet/include/thrift_tools.hpp +13 -1
  345. package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +4 -0
  346. package/src/duckdb/extension/parquet/parquet_extension.cpp +240 -197
  347. package/src/duckdb/extension/parquet/parquet_metadata.cpp +138 -6
  348. package/src/duckdb/extension/parquet/parquet_reader.cpp +155 -79
  349. package/src/duckdb/extension/parquet/parquet_statistics.cpp +258 -38
  350. package/src/duckdb/extension/parquet/parquet_timestamp.cpp +17 -3
  351. package/src/duckdb/extension/parquet/parquet_writer.cpp +65 -34
  352. package/src/duckdb/extension/parquet/serialize_parquet.cpp +4 -0
  353. package/src/duckdb/extension/parquet/zstd_file_system.cpp +13 -0
  354. package/src/duckdb/src/catalog/catalog.cpp +272 -97
  355. package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +9 -4
  356. package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +8 -0
  357. package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +145 -95
  358. package/src/duckdb/src/catalog/catalog_entry/scalar_function_catalog_entry.cpp +9 -3
  359. package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +15 -0
  360. package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +40 -24
  361. package/src/duckdb/src/catalog/catalog_entry/table_function_catalog_entry.cpp +1 -1
  362. package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +2 -2
  363. package/src/duckdb/src/catalog/catalog_entry.cpp +3 -0
  364. package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +60 -5
  365. package/src/duckdb/src/catalog/catalog_search_path.cpp +27 -14
  366. package/src/duckdb/src/catalog/catalog_set.cpp +75 -31
  367. package/src/duckdb/src/catalog/default/default_functions.cpp +13 -8
  368. package/src/duckdb/src/catalog/default/default_views.cpp +1 -0
  369. package/src/duckdb/src/catalog/dependency_manager.cpp +133 -5
  370. package/src/duckdb/src/catalog/duck_catalog.cpp +17 -9
  371. package/src/duckdb/src/common/adbc/adbc.cpp +18 -0
  372. package/src/duckdb/src/common/allocator.cpp +3 -1
  373. package/src/duckdb/src/common/arrow/arrow_appender.cpp +30 -9
  374. package/src/duckdb/src/common/arrow/arrow_converter.cpp +63 -82
  375. package/src/duckdb/src/common/arrow/arrow_merge_event.cpp +4 -3
  376. package/src/duckdb/src/common/arrow/arrow_type_extension.cpp +361 -0
  377. package/src/duckdb/src/common/arrow/arrow_util.cpp +10 -6
  378. package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +6 -2
  379. package/src/duckdb/src/common/arrow/physical_arrow_collector.cpp +2 -1
  380. package/src/duckdb/src/common/arrow/schema_metadata.cpp +27 -14
  381. package/src/duckdb/src/common/assert.cpp +1 -2
  382. package/src/duckdb/src/common/bind_helpers.cpp +1 -1
  383. package/src/duckdb/src/common/box_renderer.cpp +316 -26
  384. package/src/duckdb/src/common/cgroups.cpp +7 -1
  385. package/src/duckdb/src/common/compressed_file_system.cpp +1 -1
  386. package/src/duckdb/src/common/enum_util.cpp +2865 -6882
  387. package/src/duckdb/src/common/enums/compression_type.cpp +12 -0
  388. package/src/duckdb/src/common/enums/metric_type.cpp +24 -0
  389. package/src/duckdb/src/common/enums/optimizer_type.cpp +4 -0
  390. package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
  391. package/src/duckdb/src/common/error_data.cpp +23 -6
  392. package/src/duckdb/src/common/exception/binder_exception.cpp +1 -1
  393. package/src/duckdb/src/common/exception.cpp +20 -28
  394. package/src/duckdb/src/common/extra_type_info.cpp +85 -20
  395. package/src/duckdb/src/common/file_buffer.cpp +5 -2
  396. package/src/duckdb/src/common/file_system.cpp +8 -3
  397. package/src/duckdb/src/common/fsst.cpp +3 -3
  398. package/src/duckdb/src/common/hive_partitioning.cpp +1 -1
  399. package/src/duckdb/src/common/local_file_system.cpp +169 -60
  400. package/src/duckdb/src/common/multi_file_list.cpp +4 -1
  401. package/src/duckdb/src/common/multi_file_reader.cpp +240 -63
  402. package/src/duckdb/src/common/opener_file_system.cpp +37 -0
  403. package/src/duckdb/src/common/operator/cast_operators.cpp +77 -11
  404. package/src/duckdb/src/common/operator/string_cast.cpp +6 -2
  405. package/src/duckdb/src/common/pipe_file_system.cpp +4 -4
  406. package/src/duckdb/src/common/progress_bar/progress_bar.cpp +25 -14
  407. package/src/duckdb/src/common/radix_partitioning.cpp +17 -16
  408. package/src/duckdb/src/common/random_engine.cpp +39 -3
  409. package/src/duckdb/src/common/render_tree.cpp +3 -19
  410. package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
  411. package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -58
  412. package/src/duckdb/src/common/row_operations/row_matcher.cpp +2 -2
  413. package/src/duckdb/src/common/row_operations/row_radix_scatter.cpp +2 -0
  414. package/src/duckdb/src/common/row_operations/row_scatter.cpp +20 -19
  415. package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +1 -1
  416. package/src/duckdb/src/common/serializer/memory_stream.cpp +36 -0
  417. package/src/duckdb/src/common/sort/comparators.cpp +7 -7
  418. package/src/duckdb/src/common/sort/partition_state.cpp +2 -2
  419. package/src/duckdb/src/common/stacktrace.cpp +127 -0
  420. package/src/duckdb/src/common/string_util.cpp +157 -32
  421. package/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +15 -3
  422. package/src/duckdb/src/common/types/column/column_data_allocator.cpp +4 -0
  423. package/src/duckdb/src/common/types/column/column_data_collection.cpp +71 -8
  424. package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +27 -6
  425. package/src/duckdb/src/common/types/conflict_manager.cpp +21 -7
  426. package/src/duckdb/src/common/types/date.cpp +39 -25
  427. package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +4 -11
  428. package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +21 -7
  429. package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +10 -1
  430. package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +17 -17
  431. package/src/duckdb/src/common/types/timestamp.cpp +70 -33
  432. package/src/duckdb/src/common/types/uuid.cpp +11 -0
  433. package/src/duckdb/src/common/types/validity_mask.cpp +16 -5
  434. package/src/duckdb/src/common/types/value.cpp +357 -199
  435. package/src/duckdb/src/common/types/varint.cpp +64 -18
  436. package/src/duckdb/src/common/types/vector.cpp +78 -38
  437. package/src/duckdb/src/common/types.cpp +199 -92
  438. package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +2 -1
  439. package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +32 -5
  440. package/src/duckdb/src/common/vector_operations/vector_hash.cpp +3 -1
  441. package/src/duckdb/src/execution/adaptive_filter.cpp +6 -2
  442. package/src/duckdb/src/execution/aggregate_hashtable.cpp +410 -111
  443. package/src/duckdb/src/execution/column_binding_resolver.cpp +2 -2
  444. package/src/duckdb/src/execution/expression_executor/execute_between.cpp +6 -0
  445. package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +4 -3
  446. package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +2 -2
  447. package/src/duckdb/src/execution/expression_executor/execute_conjunction.cpp +2 -2
  448. package/src/duckdb/src/execution/expression_executor/execute_function.cpp +1 -0
  449. package/src/duckdb/src/execution/expression_executor/execute_operator.cpp +5 -4
  450. package/src/duckdb/src/execution/expression_executor.cpp +5 -3
  451. package/src/duckdb/src/execution/index/art/art.cpp +208 -72
  452. package/src/duckdb/src/execution/index/art/base_leaf.cpp +1 -1
  453. package/src/duckdb/src/execution/index/art/leaf.cpp +12 -7
  454. package/src/duckdb/src/execution/index/art/node.cpp +2 -1
  455. package/src/duckdb/src/execution/index/art/node256_leaf.cpp +6 -6
  456. package/src/duckdb/src/execution/index/art/plan_art.cpp +50 -55
  457. package/src/duckdb/src/execution/index/art/prefix.cpp +7 -13
  458. package/src/duckdb/src/execution/index/bound_index.cpp +30 -5
  459. package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +3 -5
  460. package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +14 -9
  461. package/src/duckdb/src/execution/join_hashtable.cpp +254 -158
  462. package/src/duckdb/src/execution/operator/aggregate/grouped_aggregate_data.cpp +1 -1
  463. package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +7 -7
  464. package/src/duckdb/src/execution/operator/aggregate/physical_partitioned_aggregate.cpp +226 -0
  465. package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +3 -3
  466. package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +3 -3
  467. package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +77 -70
  468. package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +114 -50
  469. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -2
  470. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +19 -10
  471. package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +22 -15
  472. package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +95 -0
  473. package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +6 -1
  474. package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +75 -2
  475. package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +40 -12
  476. package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +395 -163
  477. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +20 -23
  478. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +115 -49
  479. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +66 -12
  480. package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +20 -23
  481. package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +220 -46
  482. package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +43 -32
  483. package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +54 -119
  484. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +184 -20
  485. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +83 -21
  486. package/src/duckdb/src/execution/operator/csv_scanner/util/csv_validator.cpp +63 -0
  487. package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +7 -4
  488. package/src/duckdb/src/execution/operator/helper/physical_set.cpp +1 -1
  489. package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +3 -2
  490. package/src/duckdb/src/execution/operator/helper/physical_verify_vector.cpp +9 -1
  491. package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +132 -15
  492. package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +64 -55
  493. package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +284 -154
  494. package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +40 -55
  495. package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +0 -1
  496. package/src/duckdb/src/execution/operator/order/physical_order.cpp +7 -3
  497. package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +298 -227
  498. package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +5 -2
  499. package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +3 -4
  500. package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +32 -19
  501. package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +1 -0
  502. package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +6 -0
  503. package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +58 -19
  504. package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +11 -27
  505. package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +308 -119
  506. package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +105 -55
  507. package/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp +6 -2
  508. package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +1 -1
  509. package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +15 -6
  510. package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +92 -50
  511. package/src/duckdb/src/execution/operator/schema/physical_alter.cpp +0 -1
  512. package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +8 -4
  513. package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +54 -22
  514. package/src/duckdb/src/execution/operator/set/physical_union.cpp +5 -1
  515. package/src/duckdb/src/execution/physical_operator.cpp +15 -9
  516. package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +101 -12
  517. package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +11 -140
  518. package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +11 -13
  519. package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +1 -1
  520. package/src/duckdb/src/execution/physical_plan/plan_delete.cpp +1 -1
  521. package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +1 -1
  522. package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +6 -5
  523. package/src/duckdb/src/execution/physical_plan/plan_export.cpp +0 -4
  524. package/src/duckdb/src/execution/physical_plan/plan_filter.cpp +1 -1
  525. package/src/duckdb/src/execution/physical_plan/plan_get.cpp +16 -13
  526. package/src/duckdb/src/execution/physical_plan/plan_insert.cpp +1 -1
  527. package/src/duckdb/src/execution/physical_plan/plan_order.cpp +7 -7
  528. package/src/duckdb/src/execution/physical_plan/plan_prepare.cpp +2 -2
  529. package/src/duckdb/src/execution/physical_plan/plan_projection.cpp +1 -1
  530. package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +8 -3
  531. package/src/duckdb/src/execution/physical_plan/plan_set_operation.cpp +1 -2
  532. package/src/duckdb/src/execution/physical_plan/plan_simple.cpp +1 -2
  533. package/src/duckdb/src/execution/physical_plan/plan_top_n.cpp +3 -2
  534. package/src/duckdb/src/execution/physical_plan_generator.cpp +0 -22
  535. package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +136 -116
  536. package/src/duckdb/src/execution/sample/base_reservoir_sample.cpp +136 -0
  537. package/src/duckdb/src/execution/sample/reservoir_sample.cpp +930 -0
  538. package/src/duckdb/src/function/aggregate/distributive/count.cpp +6 -12
  539. package/src/duckdb/src/function/aggregate/distributive/{first.cpp → first_last_any.cpp} +37 -18
  540. package/src/duckdb/src/{core_functions → function}/aggregate/distributive/minmax.cpp +19 -12
  541. package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +72 -13
  542. package/src/duckdb/src/function/built_in_functions.cpp +85 -2
  543. package/src/duckdb/src/function/cast/decimal_cast.cpp +1 -1
  544. package/src/duckdb/src/function/cast/string_cast.cpp +1 -1
  545. package/src/duckdb/src/function/cast/struct_cast.cpp +81 -49
  546. package/src/duckdb/src/function/cast/union/from_struct.cpp +7 -5
  547. package/src/duckdb/src/function/compression_config.cpp +6 -0
  548. package/src/duckdb/src/function/encoding_function.cpp +134 -0
  549. package/src/duckdb/src/function/function.cpp +8 -13
  550. package/src/duckdb/src/function/function_binder.cpp +100 -21
  551. package/src/duckdb/src/function/function_list.cpp +178 -0
  552. package/src/duckdb/src/function/macro_function.cpp +4 -4
  553. package/src/duckdb/src/function/pragma/pragma_functions.cpp +0 -2
  554. package/src/duckdb/src/function/pragma/pragma_queries.cpp +0 -4
  555. package/src/duckdb/src/{core_functions/core_functions.cpp → function/register_function_list.cpp} +12 -8
  556. package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +62 -23
  557. package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +33 -16
  558. package/src/duckdb/src/function/scalar/compressed_materialization_utils.cpp +21 -0
  559. package/src/duckdb/src/{core_functions/scalar/blob → function/scalar}/create_sort_key.cpp +86 -23
  560. package/src/duckdb/src/{core_functions → function}/scalar/date/strftime.cpp +6 -4
  561. package/src/duckdb/src/function/scalar/generic/constant_or_null.cpp +5 -7
  562. package/src/duckdb/src/{core_functions → function}/scalar/generic/error.cpp +3 -1
  563. package/src/duckdb/src/function/scalar/generic/getvariable.cpp +2 -2
  564. package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +1 -7
  565. package/src/duckdb/src/function/scalar/list/list_extract.cpp +27 -21
  566. package/src/duckdb/src/function/scalar/list/list_resize.cpp +8 -12
  567. package/src/duckdb/src/function/scalar/list/list_select.cpp +1 -4
  568. package/src/duckdb/src/function/scalar/list/list_zip.cpp +6 -6
  569. package/src/duckdb/src/{core_functions → function}/scalar/map/map_contains.cpp +2 -2
  570. package/src/duckdb/src/function/scalar/nested_functions.cpp +0 -11
  571. package/src/duckdb/src/function/scalar/{operators → operator}/add.cpp +2 -1
  572. package/src/duckdb/src/function/scalar/{operators → operator}/arithmetic.cpp +195 -127
  573. package/src/duckdb/src/function/scalar/sequence/nextval.cpp +30 -21
  574. package/src/duckdb/src/function/scalar/strftime_format.cpp +10 -0
  575. package/src/duckdb/src/function/scalar/string/caseconvert.cpp +11 -41
  576. package/src/duckdb/src/function/scalar/string/concat.cpp +22 -20
  577. package/src/duckdb/src/function/scalar/string/concat_ws.cpp +2 -2
  578. package/src/duckdb/src/function/scalar/string/contains.cpp +16 -19
  579. package/src/duckdb/src/function/scalar/string/length.cpp +38 -24
  580. package/src/duckdb/src/function/scalar/string/like.cpp +80 -47
  581. package/src/duckdb/src/{core_functions → function}/scalar/string/md5.cpp +2 -2
  582. package/src/duckdb/src/function/scalar/string/nfc_normalize.cpp +2 -6
  583. package/src/duckdb/src/function/scalar/string/prefix.cpp +0 -4
  584. package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +2 -1
  585. package/src/duckdb/src/function/scalar/string/regexp.cpp +17 -7
  586. package/src/duckdb/src/{core_functions → function}/scalar/string/regexp_escape.cpp +2 -2
  587. package/src/duckdb/src/{core_functions → function}/scalar/string/sha1.cpp +1 -1
  588. package/src/duckdb/src/{core_functions → function}/scalar/string/sha256.cpp +1 -1
  589. package/src/duckdb/src/{core_functions → function}/scalar/string/string_split.cpp +4 -5
  590. package/src/duckdb/src/function/scalar/string/strip_accents.cpp +3 -6
  591. package/src/duckdb/src/function/scalar/string/substring.cpp +14 -13
  592. package/src/duckdb/src/function/scalar/string/suffix.cpp +0 -4
  593. package/src/duckdb/src/function/scalar/struct/struct_concat.cpp +115 -0
  594. package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +35 -31
  595. package/src/duckdb/src/{core_functions → function}/scalar/struct/struct_pack.cpp +7 -7
  596. package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -8
  597. package/src/duckdb/src/function/scalar/system/write_log.cpp +170 -0
  598. package/src/duckdb/src/function/scalar_function.cpp +5 -5
  599. package/src/duckdb/src/function/table/arrow/arrow_array_scan_state.cpp +3 -2
  600. package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +287 -1
  601. package/src/duckdb/src/function/table/arrow/arrow_type_info.cpp +6 -6
  602. package/src/duckdb/src/function/table/arrow.cpp +32 -352
  603. package/src/duckdb/src/function/table/arrow_conversion.cpp +43 -7
  604. package/src/duckdb/src/function/table/copy_csv.cpp +38 -23
  605. package/src/duckdb/src/function/table/glob.cpp +1 -1
  606. package/src/duckdb/src/function/table/query_function.cpp +12 -7
  607. package/src/duckdb/src/function/table/read_csv.cpp +114 -46
  608. package/src/duckdb/src/function/table/read_file.cpp +26 -6
  609. package/src/duckdb/src/function/table/sniff_csv.cpp +25 -5
  610. package/src/duckdb/src/function/table/system/duckdb_columns.cpp +1 -1
  611. package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +1 -1
  612. package/src/duckdb/src/function/table/system/duckdb_dependencies.cpp +6 -7
  613. package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +1 -1
  614. package/src/duckdb/src/function/table/system/duckdb_functions.cpp +141 -16
  615. package/src/duckdb/src/function/table/system/duckdb_log.cpp +64 -0
  616. package/src/duckdb/src/function/table/system/duckdb_log_contexts.cpp +65 -0
  617. package/src/duckdb/src/function/table/system/duckdb_memory.cpp +0 -1
  618. package/src/duckdb/src/function/table/system/duckdb_settings.cpp +1 -1
  619. package/src/duckdb/src/function/table/system/duckdb_tables.cpp +1 -13
  620. package/src/duckdb/src/function/table/system/duckdb_types.cpp +1 -1
  621. package/src/duckdb/src/function/table/system/pragma_storage_info.cpp +17 -0
  622. package/src/duckdb/src/function/table/system/pragma_table_info.cpp +6 -0
  623. package/src/duckdb/src/function/table/system/pragma_table_sample.cpp +95 -0
  624. package/src/duckdb/src/function/table/system/test_all_types.cpp +56 -46
  625. package/src/duckdb/src/function/table/system_functions.cpp +3 -0
  626. package/src/duckdb/src/function/table/table_scan.cpp +487 -289
  627. package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
  628. package/src/duckdb/src/function/table_function.cpp +10 -6
  629. package/src/duckdb/src/function/window/window_aggregate_function.cpp +248 -0
  630. package/src/duckdb/src/function/window/window_aggregate_states.cpp +48 -0
  631. package/src/duckdb/src/function/window/window_aggregator.cpp +88 -0
  632. package/src/duckdb/src/function/window/window_boundaries_state.cpp +854 -0
  633. package/src/duckdb/src/function/window/window_collection.cpp +146 -0
  634. package/src/duckdb/src/function/window/window_constant_aggregator.cpp +357 -0
  635. package/src/duckdb/src/function/window/window_custom_aggregator.cpp +146 -0
  636. package/src/duckdb/src/function/window/window_distinct_aggregator.cpp +758 -0
  637. package/src/duckdb/src/function/window/window_executor.cpp +99 -0
  638. package/src/duckdb/src/function/window/window_index_tree.cpp +63 -0
  639. package/src/duckdb/src/function/window/window_merge_sort_tree.cpp +275 -0
  640. package/src/duckdb/src/function/window/window_naive_aggregator.cpp +361 -0
  641. package/src/duckdb/src/function/window/window_rank_function.cpp +288 -0
  642. package/src/duckdb/src/function/window/window_rownumber_function.cpp +191 -0
  643. package/src/duckdb/src/function/window/window_segment_tree.cpp +594 -0
  644. package/src/duckdb/src/function/window/window_shared_expressions.cpp +50 -0
  645. package/src/duckdb/src/function/window/window_token_tree.cpp +142 -0
  646. package/src/duckdb/src/function/window/window_value_function.cpp +566 -0
  647. package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +74 -17
  648. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +1 -1
  649. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp +2 -0
  650. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +9 -0
  651. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/function_entry.hpp +4 -10
  652. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/pragma_function_catalog_entry.hpp +1 -1
  653. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp +2 -2
  654. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/schema_catalog_entry.hpp +2 -0
  655. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +18 -3
  656. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp +1 -1
  657. package/src/duckdb/src/include/duckdb/catalog/catalog_entry/type_catalog_entry.hpp +2 -1
  658. package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +5 -2
  659. package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +21 -18
  660. package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +3 -2
  661. package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +10 -2
  662. package/src/duckdb/src/include/duckdb/catalog/dependency_manager.hpp +11 -0
  663. package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +9 -4
  664. package/src/duckdb/src/include/duckdb/common/allocator.hpp +3 -0
  665. package/src/duckdb/src/include/duckdb/common/array_ptr.hpp +8 -0
  666. package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +4 -1
  667. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +3 -1
  668. package/src/duckdb/src/include/duckdb/common/arrow/appender/list_view_data.hpp +3 -1
  669. package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +2 -1
  670. package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +7 -3
  671. package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +26 -3
  672. package/src/duckdb/src/include/duckdb/common/arrow/arrow_query_result.hpp +1 -1
  673. package/src/duckdb/src/include/duckdb/common/arrow/arrow_type_extension.hpp +144 -0
  674. package/src/duckdb/src/include/duckdb/common/arrow/arrow_util.hpp +5 -2
  675. package/src/duckdb/src/include/duckdb/common/arrow/result_arrow_wrapper.hpp +2 -0
  676. package/src/duckdb/src/include/duckdb/common/arrow/schema_metadata.hpp +11 -4
  677. package/src/duckdb/src/include/duckdb/common/assert.hpp +12 -1
  678. package/src/duckdb/src/include/duckdb/common/atomic_ptr.hpp +102 -0
  679. package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +65 -6
  680. package/src/duckdb/src/include/duckdb/common/chrono.hpp +1 -0
  681. package/src/duckdb/src/include/duckdb/common/column_index.hpp +72 -0
  682. package/src/duckdb/src/include/duckdb/common/enum_util.hpp +120 -0
  683. package/src/duckdb/src/include/duckdb/{core_functions/core_functions.hpp → common/enums/collation_type.hpp} +2 -7
  684. package/src/duckdb/src/include/duckdb/common/enums/compression_type.hpp +5 -2
  685. package/src/duckdb/src/include/duckdb/common/enums/function_errors.hpp +18 -0
  686. package/src/duckdb/src/include/duckdb/common/enums/memory_tag.hpp +3 -2
  687. package/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp +7 -2
  688. package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +4 -0
  689. package/src/duckdb/src/include/duckdb/common/enums/order_preservation_type.hpp +1 -1
  690. package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
  691. package/src/duckdb/src/include/duckdb/common/enums/profiler_format.hpp +1 -1
  692. package/src/duckdb/src/include/duckdb/{core_functions/aggregate → common/enums}/quantile_enum.hpp +3 -1
  693. package/src/duckdb/src/include/duckdb/common/enums/scan_vector_type.hpp +2 -0
  694. package/src/duckdb/src/include/duckdb/common/error_data.hpp +1 -0
  695. package/src/duckdb/src/include/duckdb/common/exception/parser_exception.hpp +4 -0
  696. package/src/duckdb/src/include/duckdb/common/exception.hpp +1 -1
  697. package/src/duckdb/src/include/duckdb/common/extension_type_info.hpp +37 -0
  698. package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +7 -2
  699. package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +9 -3
  700. package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +6 -6
  701. package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +19 -10
  702. package/src/duckdb/src/include/duckdb/common/file_opener.hpp +2 -0
  703. package/src/duckdb/src/include/duckdb/common/file_system.hpp +6 -1
  704. package/src/duckdb/src/include/duckdb/common/fsst.hpp +2 -2
  705. package/src/duckdb/src/include/duckdb/common/helper.hpp +6 -0
  706. package/src/duckdb/src/include/duckdb/common/hugeint.hpp +10 -0
  707. package/src/duckdb/src/include/duckdb/common/insertion_order_preserving_map.hpp +12 -2
  708. package/src/duckdb/src/include/duckdb/common/local_file_system.hpp +3 -0
  709. package/src/duckdb/src/include/duckdb/common/multi_file_list.hpp +2 -1
  710. package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +147 -27
  711. package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +4 -0
  712. package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +2 -7
  713. package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +16 -5
  714. package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +16 -0
  715. package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +4 -0
  716. package/src/duckdb/src/include/duckdb/common/platform.hpp +34 -3
  717. package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +10 -13
  718. package/src/duckdb/src/include/duckdb/common/random_engine.hpp +8 -3
  719. package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +0 -2
  720. package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -1
  721. package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +7 -0
  722. package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +1 -0
  723. package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +1 -0
  724. package/src/duckdb/src/include/duckdb/common/stacktrace.hpp +25 -0
  725. package/src/duckdb/src/include/duckdb/common/string_util.hpp +30 -2
  726. package/src/duckdb/src/include/duckdb/common/tree_renderer/graphviz_tree_renderer.hpp +1 -1
  727. package/src/duckdb/src/include/duckdb/common/tree_renderer/html_tree_renderer.hpp +1 -1
  728. package/src/duckdb/src/include/duckdb/common/tree_renderer/json_tree_renderer.hpp +1 -1
  729. package/src/duckdb/src/include/duckdb/common/tree_renderer/text_tree_renderer.hpp +3 -2
  730. package/src/duckdb/src/include/duckdb/common/tree_renderer.hpp +2 -0
  731. package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
  732. package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +8 -0
  733. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +13 -2
  734. package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +2 -1
  735. package/src/duckdb/src/include/duckdb/common/types/conflict_manager.hpp +21 -4
  736. package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +4 -1
  737. package/src/duckdb/src/include/duckdb/common/types/date.hpp +9 -4
  738. package/src/duckdb/src/include/duckdb/common/types/date_lookup_cache.hpp +1 -1
  739. package/src/duckdb/src/include/duckdb/common/types/interval.hpp +58 -10
  740. package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -4
  741. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +4 -0
  742. package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +4 -0
  743. package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +10 -0
  744. package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +43 -16
  745. package/src/duckdb/src/include/duckdb/common/types/uuid.hpp +3 -1
  746. package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +63 -21
  747. package/src/duckdb/src/include/duckdb/common/types/value.hpp +62 -16
  748. package/src/duckdb/src/include/duckdb/common/types/varint.hpp +13 -0
  749. package/src/duckdb/src/include/duckdb/common/types/vector.hpp +34 -7
  750. package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +15 -0
  751. package/src/duckdb/src/include/duckdb/common/types.hpp +12 -7
  752. package/src/duckdb/src/include/duckdb/common/uhugeint.hpp +10 -0
  753. package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +12 -13
  754. package/src/duckdb/src/include/duckdb/common/vector_operations/binary_executor.hpp +27 -0
  755. package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +111 -4
  756. package/src/duckdb/src/include/duckdb/common/vector_operations/vector_operations.hpp +0 -1
  757. package/src/duckdb/src/include/duckdb/execution/adaptive_filter.hpp +2 -0
  758. package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +48 -10
  759. package/src/duckdb/src/include/duckdb/execution/executor.hpp +2 -1
  760. package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +0 -1
  761. package/src/duckdb/src/include/duckdb/execution/ht_entry.hpp +25 -27
  762. package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +28 -18
  763. package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +1 -0
  764. package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +2 -2
  765. package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +23 -16
  766. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +4 -0
  767. package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +2 -2
  768. package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +25 -16
  769. package/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp +15 -10
  770. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +1 -1
  771. package/src/duckdb/src/include/duckdb/execution/operator/{persistent/physical_fixed_batch_copy.hpp → aggregate/physical_partitioned_aggregate.hpp} +25 -27
  772. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +1 -2
  773. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +5 -4
  774. package/src/duckdb/src/include/duckdb/execution/operator/aggregate/ungrouped_aggregate_state.hpp +21 -1
  775. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +38 -9
  776. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +8 -9
  777. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +7 -1
  778. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp +29 -23
  779. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp +15 -13
  780. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp +13 -5
  781. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +2 -1
  782. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +24 -10
  783. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +36 -1
  784. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state.hpp +21 -13
  785. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp +52 -22
  786. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp +6 -6
  787. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_validator.hpp +58 -0
  788. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +62 -0
  789. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp +6 -3
  790. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner_boundary.hpp +16 -6
  791. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +9 -4
  792. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +8 -4
  793. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +55 -10
  794. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +2 -2
  795. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_batch_collector.hpp +2 -2
  796. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +2 -2
  797. package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +1 -1
  798. package/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp +28 -7
  799. package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +6 -9
  800. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +17 -16
  801. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
  802. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +7 -3
  803. package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +5 -1
  804. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +2 -2
  805. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
  806. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +55 -4
  807. package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +2 -0
  808. package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_tableinout_function.hpp +2 -2
  809. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_positional_scan.hpp +2 -1
  810. package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +10 -9
  811. package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_art_index.hpp +16 -13
  812. package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +0 -4
  813. package/src/duckdb/src/include/duckdb/execution/partition_info.hpp +79 -0
  814. package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +20 -9
  815. package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +1 -11
  816. package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +0 -2
  817. package/src/duckdb/src/include/duckdb/execution/progress_data.hpp +58 -0
  818. package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +2 -1
  819. package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +160 -31
  820. package/src/duckdb/src/include/duckdb/function/aggregate/distributive_function_utils.hpp +31 -0
  821. package/src/duckdb/src/include/duckdb/function/aggregate/distributive_functions.hpp +61 -10
  822. package/src/duckdb/src/include/duckdb/{core_functions → function}/aggregate/minmax_n_helpers.hpp +1 -1
  823. package/src/duckdb/src/include/duckdb/{core_functions → function}/aggregate/sort_key_helpers.hpp +2 -2
  824. package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +47 -27
  825. package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -0
  826. package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +3 -10
  827. package/src/duckdb/src/include/duckdb/function/cast/bound_cast_data.hpp +13 -6
  828. package/src/duckdb/src/include/duckdb/function/compression/compression.hpp +15 -0
  829. package/src/duckdb/src/include/duckdb/function/compression_function.hpp +29 -6
  830. package/src/duckdb/src/include/duckdb/{core_functions → function}/create_sort_key.hpp +4 -1
  831. package/src/duckdb/src/include/duckdb/function/encoding_function.hpp +78 -0
  832. package/src/duckdb/src/include/duckdb/function/function.hpp +22 -1
  833. package/src/duckdb/src/include/duckdb/function/function_binder.hpp +3 -0
  834. package/src/duckdb/src/include/duckdb/function/function_list.hpp +39 -0
  835. package/src/duckdb/src/include/duckdb/function/function_set.hpp +13 -7
  836. package/src/duckdb/src/include/duckdb/{core_functions → function}/lambda_functions.hpp +1 -1
  837. package/src/duckdb/src/include/duckdb/function/partition_stats.hpp +36 -0
  838. package/src/duckdb/src/include/duckdb/function/register_function_list_helper.hpp +69 -0
  839. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +154 -23
  840. package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_utils.hpp +45 -0
  841. package/src/duckdb/src/include/duckdb/function/scalar/date_functions.hpp +45 -0
  842. package/src/duckdb/src/include/duckdb/function/scalar/generic_common.hpp +36 -0
  843. package/src/duckdb/src/include/duckdb/function/scalar/generic_functions.hpp +32 -23
  844. package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
  845. package/src/duckdb/src/include/duckdb/function/scalar/list_functions.hpp +156 -0
  846. package/src/duckdb/src/include/duckdb/function/scalar/map_functions.hpp +27 -0
  847. package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +4 -45
  848. package/src/duckdb/src/include/duckdb/function/scalar/operator_functions.hpp +102 -0
  849. package/src/duckdb/src/include/duckdb/function/scalar/operators.hpp +2 -16
  850. package/src/duckdb/src/include/duckdb/function/scalar/sequence_functions.hpp +16 -25
  851. package/src/duckdb/src/include/duckdb/function/scalar/sequence_utils.hpp +38 -0
  852. package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +1 -0
  853. package/src/duckdb/src/include/duckdb/function/scalar/string_common.hpp +49 -0
  854. package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +401 -76
  855. package/src/duckdb/src/include/duckdb/function/scalar/struct_functions.hpp +63 -0
  856. package/src/duckdb/src/include/duckdb/function/scalar/struct_utils.hpp +33 -0
  857. package/src/duckdb/src/include/duckdb/function/scalar/system_functions.hpp +45 -0
  858. package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +17 -8
  859. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +59 -6
  860. package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_type_info.hpp +12 -9
  861. package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_type_info_type.hpp +2 -0
  862. package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +18 -13
  863. package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +7 -4
  864. package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +14 -0
  865. package/src/duckdb/src/include/duckdb/function/table/table_scan.hpp +15 -10
  866. package/src/duckdb/src/include/duckdb/function/table_function.hpp +94 -18
  867. package/src/duckdb/src/include/duckdb/{core_functions → function}/to_interval.hpp +1 -1
  868. package/src/duckdb/src/include/duckdb/function/window/window_aggregate_function.hpp +44 -0
  869. package/src/duckdb/src/include/duckdb/function/window/window_aggregate_states.hpp +56 -0
  870. package/src/duckdb/src/include/duckdb/function/window/window_aggregator.hpp +194 -0
  871. package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +153 -0
  872. package/src/duckdb/src/include/duckdb/function/window/window_collection.hpp +146 -0
  873. package/src/duckdb/src/include/duckdb/function/window/window_constant_aggregator.hpp +38 -0
  874. package/src/duckdb/src/include/duckdb/function/window/window_custom_aggregator.hpp +32 -0
  875. package/src/duckdb/src/include/duckdb/function/window/window_distinct_aggregator.hpp +39 -0
  876. package/src/duckdb/src/include/duckdb/function/window/window_executor.hpp +122 -0
  877. package/src/duckdb/src/include/duckdb/function/window/window_index_tree.hpp +42 -0
  878. package/src/duckdb/src/include/duckdb/function/window/window_merge_sort_tree.hpp +108 -0
  879. package/src/duckdb/src/include/duckdb/function/window/window_naive_aggregator.hpp +33 -0
  880. package/src/duckdb/src/include/duckdb/function/window/window_rank_function.hpp +63 -0
  881. package/src/duckdb/src/include/duckdb/function/window/window_rownumber_function.hpp +43 -0
  882. package/src/duckdb/src/include/duckdb/function/window/window_segment_tree.hpp +31 -0
  883. package/src/duckdb/src/include/duckdb/function/window/window_shared_expressions.hpp +76 -0
  884. package/src/duckdb/src/include/duckdb/function/window/window_token_tree.hpp +46 -0
  885. package/src/duckdb/src/include/duckdb/function/window/window_value_function.hpp +79 -0
  886. package/src/duckdb/src/include/duckdb/logging/http_logger.hpp +2 -0
  887. package/src/duckdb/src/include/duckdb/logging/log_manager.hpp +81 -0
  888. package/src/duckdb/src/include/duckdb/logging/log_storage.hpp +127 -0
  889. package/src/duckdb/src/include/duckdb/logging/logger.hpp +287 -0
  890. package/src/duckdb/src/include/duckdb/logging/logging.hpp +83 -0
  891. package/src/duckdb/src/include/duckdb/main/appender.hpp +41 -18
  892. package/src/duckdb/src/include/duckdb/main/attached_database.hpp +6 -3
  893. package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +7 -2
  894. package/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp +317 -231
  895. package/src/duckdb/src/include/duckdb/main/client_config.hpp +17 -1
  896. package/src/duckdb/src/include/duckdb/main/client_context.hpp +28 -6
  897. package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
  898. package/src/duckdb/src/include/duckdb/main/client_context_wrapper.hpp +5 -0
  899. package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -2
  900. package/src/duckdb/src/include/duckdb/main/client_properties.hpp +8 -3
  901. package/src/duckdb/src/include/duckdb/main/config.hpp +52 -8
  902. package/src/duckdb/src/include/duckdb/main/connection.hpp +18 -3
  903. package/src/duckdb/src/include/duckdb/main/database.hpp +8 -7
  904. package/src/duckdb/src/include/duckdb/main/database_file_opener.hpp +5 -1
  905. package/src/duckdb/src/include/duckdb/main/database_manager.hpp +3 -0
  906. package/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp +1 -0
  907. package/src/duckdb/src/include/duckdb/main/extension.hpp +8 -2
  908. package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +548 -9
  909. package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +18 -0
  910. package/src/duckdb/src/include/duckdb/main/extension_util.hpp +12 -7
  911. package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +3 -3
  912. package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +2 -2
  913. package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +8 -4
  914. package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +3 -1
  915. package/src/duckdb/src/include/duckdb/main/relation/delete_relation.hpp +2 -2
  916. package/src/duckdb/src/include/duckdb/main/relation/subquery_relation.hpp +1 -4
  917. package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +3 -1
  918. package/src/duckdb/src/include/duckdb/main/relation/table_relation.hpp +3 -0
  919. package/src/duckdb/src/include/duckdb/main/relation/update_relation.hpp +3 -2
  920. package/src/duckdb/src/include/duckdb/main/relation/value_relation.hpp +7 -0
  921. package/src/duckdb/src/include/duckdb/main/relation/view_relation.hpp +1 -0
  922. package/src/duckdb/src/include/duckdb/main/relation/write_parquet_relation.hpp +1 -1
  923. package/src/duckdb/src/include/duckdb/main/relation.hpp +45 -9
  924. package/src/duckdb/src/include/duckdb/main/secret/secret_storage.hpp +20 -22
  925. package/src/duckdb/src/include/duckdb/main/settings.hpp +613 -378
  926. package/src/duckdb/src/include/duckdb/main/table_description.hpp +14 -4
  927. package/src/duckdb/src/include/duckdb/optimizer/build_probe_side_optimizer.hpp +1 -3
  928. package/src/duckdb/src/include/duckdb/optimizer/column_lifetime_analyzer.hpp +14 -7
  929. package/src/duckdb/src/include/duckdb/optimizer/common_aggregate_optimizer.hpp +2 -2
  930. package/src/duckdb/src/include/duckdb/optimizer/empty_result_pullup.hpp +27 -0
  931. package/src/duckdb/src/include/duckdb/optimizer/expression_heuristics.hpp +1 -1
  932. package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +6 -1
  933. package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +2 -0
  934. package/src/duckdb/src/include/duckdb/optimizer/in_clause_rewriter.hpp +3 -0
  935. package/src/duckdb/src/include/duckdb/optimizer/join_filter_pushdown_optimizer.hpp +5 -0
  936. package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +2 -0
  937. package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +2 -2
  938. package/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp +45 -0
  939. package/src/duckdb/src/include/duckdb/optimizer/matcher/expression_matcher.hpp +23 -0
  940. package/src/duckdb/src/include/duckdb/optimizer/matcher/type_matcher.hpp +18 -0
  941. package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +9 -0
  942. package/src/duckdb/src/include/duckdb/optimizer/remove_unused_columns.hpp +33 -11
  943. package/src/duckdb/src/include/duckdb/optimizer/rule/distinct_aggregate_optimizer.hpp +34 -0
  944. package/src/duckdb/src/include/duckdb/optimizer/sampling_pushdown.hpp +25 -0
  945. package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +3 -1
  946. package/src/duckdb/src/include/duckdb/optimizer/sum_rewriter.hpp +37 -0
  947. package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +4 -0
  948. package/src/duckdb/src/include/duckdb/parallel/event.hpp +3 -0
  949. package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +1 -1
  950. package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +26 -8
  951. package/src/duckdb/src/include/duckdb/parallel/thread_context.hpp +3 -0
  952. package/src/duckdb/src/include/duckdb/parser/base_expression.hpp +51 -3
  953. package/src/duckdb/src/include/duckdb/parser/constraints/unique_constraint.hpp +28 -44
  954. package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
  955. package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +2 -2
  956. package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +1 -1
  957. package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +2 -2
  958. package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +6 -6
  959. package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +11 -1
  960. package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +12 -0
  961. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +1 -0
  962. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_scalar_function_info.hpp +3 -2
  963. package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +22 -1
  964. package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +3 -4
  965. package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_column_info.hpp +1 -1
  966. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_function_info.hpp +16 -12
  967. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +3 -3
  968. package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +5 -5
  969. package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +12 -3
  970. package/src/duckdb/src/include/duckdb/parser/parser.hpp +3 -0
  971. package/src/duckdb/src/include/duckdb/parser/qualified_name.hpp +17 -57
  972. package/src/duckdb/src/include/duckdb/parser/qualified_name_set.hpp +19 -3
  973. package/src/duckdb/src/include/duckdb/parser/simplified_token.hpp +2 -1
  974. package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +12 -9
  975. package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -1
  976. package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -2
  977. package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +45 -28
  978. package/src/duckdb/src/include/duckdb/planner/binder.hpp +23 -11
  979. package/src/duckdb/src/include/duckdb/planner/binding_alias.hpp +44 -0
  980. package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +1 -0
  981. package/src/duckdb/src/include/duckdb/planner/collation_binding.hpp +4 -3
  982. package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +11 -10
  983. package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +2 -0
  984. package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +1 -0
  985. package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +4 -4
  986. package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +6 -0
  987. package/src/duckdb/src/include/duckdb/planner/expression.hpp +2 -0
  988. package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +2 -0
  989. package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +9 -4
  990. package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +8 -2
  991. package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +1 -2
  992. package/src/duckdb/src/include/duckdb/planner/filter/dynamic_filter.hpp +48 -0
  993. package/src/duckdb/src/include/duckdb/planner/filter/in_filter.hpp +37 -0
  994. package/src/duckdb/src/include/duckdb/planner/filter/optional_filter.hpp +35 -0
  995. package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +4 -0
  996. package/src/duckdb/src/include/duckdb/planner/logical_operator_visitor.hpp +3 -0
  997. package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +2 -0
  998. package/src/duckdb/src/include/duckdb/planner/operator/logical_create_index.hpp +9 -9
  999. package/src/duckdb/src/include/duckdb/planner/operator/logical_filter.hpp +4 -0
  1000. package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +16 -7
  1001. package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +2 -0
  1002. package/src/duckdb/src/include/duckdb/planner/operator/logical_join.hpp +4 -0
  1003. package/src/duckdb/src/include/duckdb/planner/operator/logical_order.hpp +5 -1
  1004. package/src/duckdb/src/include/duckdb/planner/operator/logical_top_n.hpp +5 -3
  1005. package/src/duckdb/src/include/duckdb/planner/table_binding.hpp +14 -6
  1006. package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +12 -8
  1007. package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -0
  1008. package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -0
  1009. package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +82 -26
  1010. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +1 -1
  1011. package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +10 -3
  1012. package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +2 -1
  1013. package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +4 -13
  1014. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +14 -15
  1015. package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_constants.hpp +1 -1
  1016. package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +13 -15
  1017. package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +1 -1
  1018. package/src/duckdb/src/include/duckdb/storage/compression/dictionary/analyze.hpp +46 -0
  1019. package/src/duckdb/src/include/duckdb/storage/compression/dictionary/common.hpp +60 -0
  1020. package/src/duckdb/src/include/duckdb/storage/compression/dictionary/compression.hpp +61 -0
  1021. package/src/duckdb/src/include/duckdb/storage/compression/dictionary/decompression.hpp +50 -0
  1022. package/src/duckdb/src/include/duckdb/storage/compression/empty_validity.hpp +100 -0
  1023. package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +1 -1
  1024. package/src/duckdb/src/include/duckdb/storage/compression/roaring/appender.hpp +150 -0
  1025. package/src/duckdb/src/include/duckdb/storage/compression/roaring/roaring.hpp +618 -0
  1026. package/src/duckdb/src/include/duckdb/storage/data_table.hpp +53 -31
  1027. package/src/duckdb/src/include/duckdb/storage/index.hpp +2 -3
  1028. package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +0 -1
  1029. package/src/duckdb/src/include/duckdb/storage/segment/uncompressed.hpp +4 -1
  1030. package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +3 -3
  1031. package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +1 -1
  1032. package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -4
  1033. package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +16 -1
  1034. package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +2 -1
  1035. package/src/duckdb/src/include/duckdb/storage/storage_index.hpp +70 -0
  1036. package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +5 -7
  1037. package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +4 -3
  1038. package/src/duckdb/src/include/duckdb/storage/storage_options.hpp +23 -0
  1039. package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +34 -6
  1040. package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +2 -0
  1041. package/src/duckdb/src/include/duckdb/storage/table/array_column_data.hpp +2 -2
  1042. package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +2 -1
  1043. package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +39 -10
  1044. package/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp +56 -14
  1045. package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +35 -29
  1046. package/src/duckdb/src/include/duckdb/storage/table/delete_state.hpp +1 -1
  1047. package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
  1048. package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +7 -1
  1049. package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +19 -6
  1050. package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +2 -1
  1051. package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +29 -6
  1052. package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +10 -10
  1053. package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +5 -0
  1054. package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +26 -19
  1055. package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +8 -1
  1056. package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +16 -14
  1057. package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +2 -0
  1058. package/src/duckdb/src/include/duckdb/storage/table_io_manager.hpp +3 -0
  1059. package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +1 -0
  1060. package/src/duckdb/src/include/duckdb/storage/temporary_file_manager.hpp +228 -61
  1061. package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +14 -10
  1062. package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +3 -1
  1063. package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +3 -2
  1064. package/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp +1 -0
  1065. package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +19 -17
  1066. package/src/duckdb/src/include/duckdb/transaction/rollback_state.hpp +5 -2
  1067. package/src/duckdb/src/include/duckdb/transaction/transaction.hpp +1 -2
  1068. package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +13 -8
  1069. package/src/duckdb/src/include/duckdb/transaction/undo_buffer_allocator.hpp +79 -0
  1070. package/src/duckdb/src/include/duckdb/transaction/update_info.hpp +43 -13
  1071. package/src/duckdb/src/include/duckdb/transaction/wal_write_state.hpp +4 -1
  1072. package/src/duckdb/src/include/duckdb/verification/copied_statement_verifier.hpp +4 -2
  1073. package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier.hpp +4 -2
  1074. package/src/duckdb/src/include/duckdb/verification/external_statement_verifier.hpp +4 -2
  1075. package/src/duckdb/src/include/duckdb/verification/fetch_row_verifier.hpp +4 -2
  1076. package/src/duckdb/src/include/duckdb/verification/no_operator_caching_verifier.hpp +4 -2
  1077. package/src/duckdb/src/include/duckdb/verification/parsed_statement_verifier.hpp +4 -2
  1078. package/src/duckdb/src/include/duckdb/verification/prepared_statement_verifier.hpp +7 -3
  1079. package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +11 -5
  1080. package/src/duckdb/src/include/duckdb/verification/unoptimized_statement_verifier.hpp +4 -2
  1081. package/src/duckdb/src/include/duckdb.h +424 -41
  1082. package/src/duckdb/src/include/duckdb_extension.h +301 -195
  1083. package/src/duckdb/src/logging/log_manager.cpp +157 -0
  1084. package/src/duckdb/src/logging/log_storage.cpp +209 -0
  1085. package/src/duckdb/src/logging/logger.cpp +211 -0
  1086. package/src/duckdb/src/logging/logging.cpp +42 -0
  1087. package/src/duckdb/src/main/appender.cpp +187 -45
  1088. package/src/duckdb/src/main/attached_database.cpp +16 -8
  1089. package/src/duckdb/src/main/capi/appender-c.cpp +47 -4
  1090. package/src/duckdb/src/main/capi/arrow-c.cpp +9 -4
  1091. package/src/duckdb/src/main/capi/config-c.cpp +17 -4
  1092. package/src/duckdb/src/main/capi/datetime-c.cpp +15 -0
  1093. package/src/duckdb/src/main/capi/duckdb-c.cpp +54 -13
  1094. package/src/duckdb/src/main/capi/duckdb_value-c.cpp +212 -4
  1095. package/src/duckdb/src/main/capi/helper-c.cpp +3 -0
  1096. package/src/duckdb/src/main/capi/prepared-c.cpp +26 -7
  1097. package/src/duckdb/src/main/capi/replacement_scan-c.cpp +1 -1
  1098. package/src/duckdb/src/main/capi/result-c.cpp +3 -0
  1099. package/src/duckdb/src/main/capi/table_description-c.cpp +43 -10
  1100. package/src/duckdb/src/main/capi/threading-c.cpp +4 -4
  1101. package/src/duckdb/src/main/client_context.cpp +125 -51
  1102. package/src/duckdb/src/main/client_context_file_opener.cpp +4 -0
  1103. package/src/duckdb/src/main/client_context_wrapper.cpp +4 -0
  1104. package/src/duckdb/src/main/client_data.cpp +1 -1
  1105. package/src/duckdb/src/main/client_verify.cpp +39 -20
  1106. package/src/duckdb/src/main/config.cpp +266 -74
  1107. package/src/duckdb/src/main/connection.cpp +53 -13
  1108. package/src/duckdb/src/main/database.cpp +39 -18
  1109. package/src/duckdb/src/main/database_manager.cpp +12 -11
  1110. package/src/duckdb/src/main/db_instance_cache.cpp +14 -7
  1111. package/src/duckdb/src/main/extension/extension_helper.cpp +24 -23
  1112. package/src/duckdb/src/main/extension/extension_install.cpp +19 -7
  1113. package/src/duckdb/src/main/extension/extension_load.cpp +91 -41
  1114. package/src/duckdb/src/main/extension/extension_util.cpp +40 -19
  1115. package/src/duckdb/src/main/extension.cpp +20 -11
  1116. package/src/duckdb/src/main/profiling_info.cpp +19 -5
  1117. package/src/duckdb/src/main/query_profiler.cpp +135 -36
  1118. package/src/duckdb/src/main/query_result.cpp +2 -1
  1119. package/src/duckdb/src/main/relation/aggregate_relation.cpp +3 -3
  1120. package/src/duckdb/src/main/relation/create_table_relation.cpp +5 -4
  1121. package/src/duckdb/src/main/relation/create_view_relation.cpp +2 -2
  1122. package/src/duckdb/src/main/relation/cross_product_relation.cpp +2 -2
  1123. package/src/duckdb/src/main/relation/delete_relation.cpp +2 -2
  1124. package/src/duckdb/src/main/relation/delim_get_relation.cpp +1 -1
  1125. package/src/duckdb/src/main/relation/distinct_relation.cpp +1 -1
  1126. package/src/duckdb/src/main/relation/explain_relation.cpp +1 -1
  1127. package/src/duckdb/src/main/relation/filter_relation.cpp +1 -1
  1128. package/src/duckdb/src/main/relation/insert_relation.cpp +1 -1
  1129. package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
  1130. package/src/duckdb/src/main/relation/order_relation.cpp +1 -1
  1131. package/src/duckdb/src/main/relation/projection_relation.cpp +3 -3
  1132. package/src/duckdb/src/main/relation/query_relation.cpp +1 -1
  1133. package/src/duckdb/src/main/relation/read_csv_relation.cpp +58 -20
  1134. package/src/duckdb/src/main/relation/setop_relation.cpp +2 -2
  1135. package/src/duckdb/src/main/relation/subquery_relation.cpp +3 -8
  1136. package/src/duckdb/src/main/relation/table_function_relation.cpp +10 -1
  1137. package/src/duckdb/src/main/relation/table_relation.cpp +19 -3
  1138. package/src/duckdb/src/main/relation/update_relation.cpp +2 -2
  1139. package/src/duckdb/src/main/relation/value_relation.cpp +42 -2
  1140. package/src/duckdb/src/main/relation/view_relation.cpp +8 -2
  1141. package/src/duckdb/src/main/relation/write_csv_relation.cpp +1 -1
  1142. package/src/duckdb/src/main/relation/write_parquet_relation.cpp +1 -1
  1143. package/src/duckdb/src/main/relation.cpp +49 -28
  1144. package/src/duckdb/src/main/secret/secret_manager.cpp +1 -1
  1145. package/src/duckdb/src/main/secret/secret_storage.cpp +6 -4
  1146. package/src/duckdb/src/main/settings/autogenerated_settings.cpp +1102 -0
  1147. package/src/duckdb/src/main/settings/custom_settings.cpp +1343 -0
  1148. package/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp +60 -37
  1149. package/src/duckdb/src/optimizer/column_binding_replacer.cpp +1 -1
  1150. package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +126 -72
  1151. package/src/duckdb/src/optimizer/common_aggregate_optimizer.cpp +22 -6
  1152. package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +3 -3
  1153. package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +2 -2
  1154. package/src/duckdb/src/optimizer/compressed_materialization.cpp +3 -3
  1155. package/src/duckdb/src/optimizer/cse_optimizer.cpp +7 -7
  1156. package/src/duckdb/src/optimizer/deliminator.cpp +6 -5
  1157. package/src/duckdb/src/optimizer/empty_result_pullup.cpp +96 -0
  1158. package/src/duckdb/src/optimizer/expression_heuristics.cpp +11 -3
  1159. package/src/duckdb/src/optimizer/expression_rewriter.cpp +9 -2
  1160. package/src/duckdb/src/optimizer/filter_combiner.cpp +190 -88
  1161. package/src/duckdb/src/optimizer/filter_pushdown.cpp +6 -5
  1162. package/src/duckdb/src/optimizer/in_clause_rewriter.cpp +25 -9
  1163. package/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +170 -72
  1164. package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +5 -4
  1165. package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +3 -1
  1166. package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +7 -7
  1167. package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +15 -6
  1168. package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +37 -22
  1169. package/src/duckdb/src/optimizer/late_materialization.cpp +414 -0
  1170. package/src/duckdb/src/optimizer/limit_pushdown.cpp +1 -0
  1171. package/src/duckdb/src/optimizer/matcher/expression_matcher.cpp +30 -2
  1172. package/src/duckdb/src/optimizer/optimizer.cpp +67 -7
  1173. package/src/duckdb/src/optimizer/pullup/pullup_filter.cpp +3 -3
  1174. package/src/duckdb/src/optimizer/pullup/pullup_projection.cpp +2 -2
  1175. package/src/duckdb/src/optimizer/pullup/pullup_set_operation.cpp +1 -1
  1176. package/src/duckdb/src/optimizer/pushdown/pushdown_aggregate.cpp +2 -2
  1177. package/src/duckdb/src/optimizer/pushdown/pushdown_filter.cpp +1 -1
  1178. package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +1 -1
  1179. package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +3 -3
  1180. package/src/duckdb/src/optimizer/pushdown/pushdown_projection.cpp +5 -3
  1181. package/src/duckdb/src/optimizer/pushdown/pushdown_set_operation.cpp +1 -1
  1182. package/src/duckdb/src/optimizer/pushdown/pushdown_unnest.cpp +52 -0
  1183. package/src/duckdb/src/optimizer/pushdown/pushdown_window.cpp +2 -2
  1184. package/src/duckdb/src/optimizer/regex_range_filter.cpp +1 -1
  1185. package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +1 -1
  1186. package/src/duckdb/src/optimizer/remove_unused_columns.cpp +168 -38
  1187. package/src/duckdb/src/optimizer/rule/arithmetic_simplification.cpp +2 -1
  1188. package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +8 -5
  1189. package/src/duckdb/src/optimizer/rule/conjunction_simplification.cpp +2 -2
  1190. package/src/duckdb/src/optimizer/rule/constant_folding.cpp +2 -2
  1191. package/src/duckdb/src/optimizer/rule/distinct_aggregate_optimizer.cpp +65 -0
  1192. package/src/duckdb/src/optimizer/rule/distributivity.cpp +2 -2
  1193. package/src/duckdb/src/optimizer/rule/enum_comparison.cpp +2 -1
  1194. package/src/duckdb/src/optimizer/rule/equal_or_null_simplification.cpp +4 -3
  1195. package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +3 -3
  1196. package/src/duckdb/src/optimizer/rule/like_optimizations.cpp +3 -1
  1197. package/src/duckdb/src/optimizer/rule/move_constants.cpp +9 -9
  1198. package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +4 -3
  1199. package/src/duckdb/src/optimizer/rule/timestamp_comparison.cpp +1 -1
  1200. package/src/duckdb/src/optimizer/sampling_pushdown.cpp +24 -0
  1201. package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +1 -1
  1202. package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +1 -1
  1203. package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +1 -1
  1204. package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +74 -0
  1205. package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +10 -7
  1206. package/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +3 -3
  1207. package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +3 -3
  1208. package/src/duckdb/src/optimizer/statistics/operator/propagate_window.cpp +3 -0
  1209. package/src/duckdb/src/optimizer/sum_rewriter.cpp +174 -0
  1210. package/src/duckdb/src/optimizer/topn_optimizer.cpp +71 -0
  1211. package/src/duckdb/src/optimizer/unnest_rewriter.cpp +5 -5
  1212. package/src/duckdb/src/parallel/event.cpp +4 -0
  1213. package/src/duckdb/src/parallel/executor.cpp +11 -29
  1214. package/src/duckdb/src/parallel/executor_task.cpp +8 -3
  1215. package/src/duckdb/src/parallel/pipeline.cpp +15 -8
  1216. package/src/duckdb/src/parallel/pipeline_executor.cpp +67 -43
  1217. package/src/duckdb/src/parallel/thread_context.cpp +12 -1
  1218. package/src/duckdb/src/parser/column_definition.cpp +3 -3
  1219. package/src/duckdb/src/parser/constraints/unique_constraint.cpp +72 -9
  1220. package/src/duckdb/src/parser/expression/columnref_expression.cpp +15 -3
  1221. package/src/duckdb/src/parser/expression/conjunction_expression.cpp +1 -1
  1222. package/src/duckdb/src/parser/expression/function_expression.cpp +1 -1
  1223. package/src/duckdb/src/parser/expression/lambda_expression.cpp +3 -3
  1224. package/src/duckdb/src/parser/expression/lambdaref_expression.cpp +1 -1
  1225. package/src/duckdb/src/parser/expression/star_expression.cpp +46 -2
  1226. package/src/duckdb/src/parser/expression/window_expression.cpp +24 -1
  1227. package/src/duckdb/src/parser/parsed_data/alter_info.cpp +26 -2
  1228. package/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +5 -3
  1229. package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +29 -1
  1230. package/src/duckdb/src/parser/parsed_data/attach_info.cpp +6 -6
  1231. package/src/duckdb/src/parser/parsed_data/create_aggregate_function_info.cpp +1 -1
  1232. package/src/duckdb/src/parser/parsed_data/create_function_info.cpp +17 -0
  1233. package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +16 -15
  1234. package/src/duckdb/src/parser/parsed_data/create_macro_info.cpp +1 -1
  1235. package/src/duckdb/src/parser/parsed_data/create_pragma_function_info.cpp +1 -1
  1236. package/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp +3 -2
  1237. package/src/duckdb/src/parser/parsed_data/create_schema_info.cpp +1 -1
  1238. package/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp +1 -1
  1239. package/src/duckdb/src/parser/parsed_data/create_table_info.cpp +1 -0
  1240. package/src/duckdb/src/parser/parsed_data/create_type_info.cpp +4 -4
  1241. package/src/duckdb/src/parser/parsed_data/load_info.cpp +1 -0
  1242. package/src/duckdb/src/parser/parsed_data/sample_options.cpp +31 -1
  1243. package/src/duckdb/src/parser/parsed_expression.cpp +1 -1
  1244. package/src/duckdb/src/parser/parsed_expression_iterator.cpp +4 -1
  1245. package/src/duckdb/src/parser/parser.cpp +129 -0
  1246. package/src/duckdb/src/parser/qualified_name.cpp +99 -0
  1247. package/src/duckdb/src/parser/query_error_context.cpp +35 -6
  1248. package/src/duckdb/src/parser/query_node/select_node.cpp +4 -4
  1249. package/src/duckdb/src/parser/statement/delete_statement.cpp +6 -1
  1250. package/src/duckdb/src/parser/statement/insert_statement.cpp +4 -3
  1251. package/src/duckdb/src/parser/statement/update_statement.cpp +6 -1
  1252. package/src/duckdb/src/parser/tableref/pivotref.cpp +2 -2
  1253. package/src/duckdb/src/parser/tableref.cpp +2 -2
  1254. package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +16 -24
  1255. package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +1 -1
  1256. package/src/duckdb/src/parser/transform/expression/transform_bool_expr.cpp +5 -5
  1257. package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +61 -13
  1258. package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +10 -4
  1259. package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -2
  1260. package/src/duckdb/src/parser/transform/expression/transform_function.cpp +30 -3
  1261. package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +1 -1
  1262. package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +25 -6
  1263. package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +1 -1
  1264. package/src/duckdb/src/parser/transform/helpers/transform_sample.cpp +10 -3
  1265. package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +4 -3
  1266. package/src/duckdb/src/parser/transform/statement/transform_alter_table.cpp +18 -3
  1267. package/src/duckdb/src/parser/transform/statement/transform_comment_on.cpp +1 -1
  1268. package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +0 -1
  1269. package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +5 -5
  1270. package/src/duckdb/src/parser/transform/statement/transform_create_table.cpp +26 -12
  1271. package/src/duckdb/src/parser/transform/statement/transform_create_table_as.cpp +11 -3
  1272. package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +1 -1
  1273. package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -0
  1274. package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +3 -3
  1275. package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +4 -4
  1276. package/src/duckdb/src/parser/transform/statement/transform_set.cpp +2 -2
  1277. package/src/duckdb/src/parser/transform/statement/transform_show.cpp +21 -3
  1278. package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +8 -6
  1279. package/src/duckdb/src/parser/transformer.cpp +2 -2
  1280. package/src/duckdb/src/planner/bind_context.cpp +308 -136
  1281. package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +30 -31
  1282. package/src/duckdb/src/planner/binder/expression/bind_between_expression.cpp +4 -2
  1283. package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +102 -94
  1284. package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +7 -5
  1285. package/src/duckdb/src/planner/binder/expression/bind_conjunction_expression.cpp +1 -1
  1286. package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +7 -7
  1287. package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +10 -10
  1288. package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +24 -6
  1289. package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +23 -15
  1290. package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +1 -1
  1291. package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +97 -19
  1292. package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +74 -16
  1293. package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +6 -6
  1294. package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +49 -15
  1295. package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +32 -23
  1296. package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +20 -3
  1297. package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +2 -2
  1298. package/src/duckdb/src/planner/binder/query_node/plan_query_node.cpp +3 -0
  1299. package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +6 -5
  1300. package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +38 -19
  1301. package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +2 -12
  1302. package/src/duckdb/src/planner/binder/statement/bind_create.cpp +117 -412
  1303. package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +423 -144
  1304. package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
  1305. package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +5 -0
  1306. package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
  1307. package/src/duckdb/src/planner/binder/statement/bind_export.cpp +0 -4
  1308. package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +31 -13
  1309. package/src/duckdb/src/planner/binder/statement/bind_pragma.cpp +1 -1
  1310. package/src/duckdb/src/planner/binder/statement/bind_simple.cpp +96 -27
  1311. package/src/duckdb/src/planner/binder/statement/bind_summarize.cpp +1 -1
  1312. package/src/duckdb/src/planner/binder/statement/bind_update.cpp +5 -3
  1313. package/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp +7 -6
  1314. package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +36 -9
  1315. package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +34 -34
  1316. package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +72 -35
  1317. package/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +99 -18
  1318. package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +23 -11
  1319. package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +22 -19
  1320. package/src/duckdb/src/planner/binder.cpp +23 -45
  1321. package/src/duckdb/src/planner/binding_alias.cpp +69 -0
  1322. package/src/duckdb/src/planner/bound_parameter_map.cpp +1 -1
  1323. package/src/duckdb/src/planner/bound_result_modifier.cpp +6 -2
  1324. package/src/duckdb/src/planner/collation_binding.cpp +38 -4
  1325. package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +17 -5
  1326. package/src/duckdb/src/planner/expression/bound_expression.cpp +1 -1
  1327. package/src/duckdb/src/planner/expression/bound_function_expression.cpp +8 -1
  1328. package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +2 -2
  1329. package/src/duckdb/src/planner/expression/bound_window_expression.cpp +24 -4
  1330. package/src/duckdb/src/planner/expression.cpp +7 -1
  1331. package/src/duckdb/src/planner/expression_binder/aggregate_binder.cpp +1 -1
  1332. package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +2 -2
  1333. package/src/duckdb/src/planner/expression_binder/group_binder.cpp +2 -2
  1334. package/src/duckdb/src/planner/expression_binder/having_binder.cpp +16 -0
  1335. package/src/duckdb/src/planner/expression_binder/index_binder.cpp +53 -1
  1336. package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +3 -3
  1337. package/src/duckdb/src/planner/expression_binder/order_binder.cpp +8 -8
  1338. package/src/duckdb/src/planner/expression_binder/relation_binder.cpp +1 -1
  1339. package/src/duckdb/src/planner/expression_binder/select_bind_state.cpp +2 -2
  1340. package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +1 -1
  1341. package/src/duckdb/src/planner/expression_binder/update_binder.cpp +1 -1
  1342. package/src/duckdb/src/planner/expression_binder.cpp +7 -7
  1343. package/src/duckdb/src/planner/expression_iterator.cpp +6 -3
  1344. package/src/duckdb/src/planner/filter/constant_filter.cpp +17 -2
  1345. package/src/duckdb/src/planner/filter/dynamic_filter.cpp +68 -0
  1346. package/src/duckdb/src/planner/filter/in_filter.cpp +84 -0
  1347. package/src/duckdb/src/planner/filter/null_filter.cpp +1 -2
  1348. package/src/duckdb/src/planner/filter/optional_filter.cpp +29 -0
  1349. package/src/duckdb/src/planner/filter/struct_filter.cpp +11 -6
  1350. package/src/duckdb/src/planner/joinside.cpp +6 -5
  1351. package/src/duckdb/src/planner/logical_operator.cpp +4 -1
  1352. package/src/duckdb/src/planner/logical_operator_visitor.cpp +68 -2
  1353. package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +23 -0
  1354. package/src/duckdb/src/planner/operator/logical_create_index.cpp +16 -12
  1355. package/src/duckdb/src/planner/operator/logical_filter.cpp +1 -1
  1356. package/src/duckdb/src/planner/operator/logical_get.cpp +48 -25
  1357. package/src/duckdb/src/planner/operator/logical_insert.cpp +1 -1
  1358. package/src/duckdb/src/planner/operator/logical_join.cpp +1 -1
  1359. package/src/duckdb/src/planner/operator/logical_order.cpp +4 -11
  1360. package/src/duckdb/src/planner/operator/logical_top_n.cpp +7 -0
  1361. package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +33 -5
  1362. package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +2 -2
  1363. package/src/duckdb/src/planner/table_binding.cpp +74 -36
  1364. package/src/duckdb/src/planner/table_filter.cpp +5 -8
  1365. package/src/duckdb/src/storage/arena_allocator.cpp +5 -4
  1366. package/src/duckdb/src/storage/buffer/block_handle.cpp +88 -17
  1367. package/src/duckdb/src/storage/buffer/block_manager.cpp +34 -26
  1368. package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -2
  1369. package/src/duckdb/src/storage/buffer/buffer_pool.cpp +70 -49
  1370. package/src/duckdb/src/storage/buffer_manager.cpp +4 -0
  1371. package/src/duckdb/src/storage/checkpoint_manager.cpp +24 -5
  1372. package/src/duckdb/src/storage/compression/bitpacking.cpp +14 -16
  1373. package/src/duckdb/src/storage/compression/dictionary/analyze.cpp +54 -0
  1374. package/src/duckdb/src/storage/compression/dictionary/common.cpp +90 -0
  1375. package/src/duckdb/src/storage/compression/dictionary/compression.cpp +174 -0
  1376. package/src/duckdb/src/storage/compression/dictionary/decompression.cpp +115 -0
  1377. package/src/duckdb/src/storage/compression/dictionary_compression.cpp +53 -545
  1378. package/src/duckdb/src/storage/compression/empty_validity.cpp +15 -0
  1379. package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +25 -16
  1380. package/src/duckdb/src/storage/compression/fsst.cpp +101 -47
  1381. package/src/duckdb/src/storage/compression/numeric_constant.cpp +92 -2
  1382. package/src/duckdb/src/storage/compression/rle.cpp +216 -46
  1383. package/src/duckdb/src/storage/compression/roaring/analyze.cpp +179 -0
  1384. package/src/duckdb/src/storage/compression/roaring/common.cpp +282 -0
  1385. package/src/duckdb/src/storage/compression/roaring/compress.cpp +481 -0
  1386. package/src/duckdb/src/storage/compression/roaring/metadata.cpp +262 -0
  1387. package/src/duckdb/src/storage/compression/roaring/scan.cpp +364 -0
  1388. package/src/duckdb/src/storage/compression/string_uncompressed.cpp +47 -65
  1389. package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +102 -39
  1390. package/src/duckdb/src/storage/compression/zstd.cpp +1049 -0
  1391. package/src/duckdb/src/storage/data_table.cpp +312 -172
  1392. package/src/duckdb/src/storage/local_storage.cpp +104 -46
  1393. package/src/duckdb/src/storage/metadata/metadata_manager.cpp +1 -1
  1394. package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +7 -3
  1395. package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +138 -58
  1396. package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +14 -0
  1397. package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +19 -8
  1398. package/src/duckdb/src/storage/serialization/serialize_statement.cpp +2 -0
  1399. package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +43 -0
  1400. package/src/duckdb/src/storage/serialization/serialize_types.cpp +32 -5
  1401. package/src/duckdb/src/storage/single_file_block_manager.cpp +6 -8
  1402. package/src/duckdb/src/storage/standard_buffer_manager.cpp +82 -71
  1403. package/src/duckdb/src/storage/statistics/column_statistics.cpp +3 -3
  1404. package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +18 -17
  1405. package/src/duckdb/src/storage/statistics/numeric_stats.cpp +34 -22
  1406. package/src/duckdb/src/storage/statistics/string_stats.cpp +14 -3
  1407. package/src/duckdb/src/storage/storage_info.cpp +72 -10
  1408. package/src/duckdb/src/storage/storage_manager.cpp +41 -47
  1409. package/src/duckdb/src/storage/table/array_column_data.cpp +7 -1
  1410. package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +10 -9
  1411. package/src/duckdb/src/storage/table/column_data.cpp +105 -43
  1412. package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +307 -132
  1413. package/src/duckdb/src/storage/table/column_segment.cpp +36 -13
  1414. package/src/duckdb/src/storage/table/list_column_data.cpp +4 -4
  1415. package/src/duckdb/src/storage/table/row_group.cpp +159 -66
  1416. package/src/duckdb/src/storage/table/row_group_collection.cpp +157 -68
  1417. package/src/duckdb/src/storage/table/row_version_manager.cpp +33 -10
  1418. package/src/duckdb/src/storage/table/scan_state.cpp +21 -7
  1419. package/src/duckdb/src/storage/table/standard_column_data.cpp +68 -5
  1420. package/src/duckdb/src/storage/table/struct_column_data.cpp +42 -4
  1421. package/src/duckdb/src/storage/table/table_statistics.cpp +91 -5
  1422. package/src/duckdb/src/storage/table/update_segment.cpp +287 -210
  1423. package/src/duckdb/src/storage/table_index_list.cpp +55 -58
  1424. package/src/duckdb/src/storage/temporary_file_manager.cpp +412 -149
  1425. package/src/duckdb/src/storage/wal_replay.cpp +132 -48
  1426. package/src/duckdb/src/storage/write_ahead_log.cpp +75 -48
  1427. package/src/duckdb/src/transaction/cleanup_state.cpp +0 -1
  1428. package/src/duckdb/src/transaction/commit_state.cpp +23 -14
  1429. package/src/duckdb/src/transaction/duck_transaction.cpp +29 -25
  1430. package/src/duckdb/src/transaction/duck_transaction_manager.cpp +18 -6
  1431. package/src/duckdb/src/transaction/meta_transaction.cpp +3 -2
  1432. package/src/duckdb/src/transaction/rollback_state.cpp +5 -2
  1433. package/src/duckdb/src/transaction/transaction_context.cpp +9 -1
  1434. package/src/duckdb/src/transaction/undo_buffer.cpp +35 -27
  1435. package/src/duckdb/src/transaction/undo_buffer_allocator.cpp +72 -0
  1436. package/src/duckdb/src/transaction/wal_write_state.cpp +12 -10
  1437. package/src/duckdb/src/verification/copied_statement_verifier.cpp +7 -4
  1438. package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +7 -5
  1439. package/src/duckdb/src/verification/external_statement_verifier.cpp +7 -4
  1440. package/src/duckdb/src/verification/fetch_row_verifier.cpp +7 -4
  1441. package/src/duckdb/src/verification/no_operator_caching_verifier.cpp +8 -4
  1442. package/src/duckdb/src/verification/parsed_statement_verifier.cpp +7 -4
  1443. package/src/duckdb/src/verification/prepared_statement_verifier.cpp +16 -12
  1444. package/src/duckdb/src/verification/statement_verifier.cpp +20 -15
  1445. package/src/duckdb/src/verification/unoptimized_statement_verifier.cpp +7 -4
  1446. package/src/duckdb/third_party/fsst/libfsst.hpp +1 -0
  1447. package/src/duckdb/third_party/httplib/httplib.hpp +15 -22
  1448. package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +4 -2
  1449. package/src/duckdb/third_party/libpg_query/pg_functions.cpp +2 -4
  1450. package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +14278 -13832
  1451. package/src/duckdb/third_party/parquet/parquet_types.cpp +3410 -1686
  1452. package/src/duckdb/third_party/parquet/parquet_types.h +1585 -1204
  1453. package/src/duckdb/third_party/skiplist/SkipList.h +0 -1
  1454. package/src/duckdb/third_party/snappy/snappy-stubs-internal.h +13 -15
  1455. package/src/duckdb/third_party/zstd/common/debug.cpp +36 -0
  1456. package/src/duckdb/third_party/zstd/common/entropy_common.cpp +173 -49
  1457. package/src/duckdb/third_party/zstd/common/error_private.cpp +11 -3
  1458. package/src/duckdb/third_party/zstd/common/fse_decompress.cpp +126 -97
  1459. package/src/duckdb/third_party/zstd/common/pool.cpp +376 -0
  1460. package/src/duckdb/third_party/zstd/common/threading.cpp +193 -0
  1461. package/src/duckdb/third_party/zstd/common/xxhash.cpp +18 -14
  1462. package/src/duckdb/third_party/zstd/common/zstd_common.cpp +3 -38
  1463. package/src/duckdb/third_party/zstd/compress/fse_compress.cpp +93 -165
  1464. package/src/duckdb/third_party/zstd/compress/hist.cpp +28 -31
  1465. package/src/duckdb/third_party/zstd/compress/huf_compress.cpp +957 -291
  1466. package/src/duckdb/third_party/zstd/compress/zstd_compress.cpp +3988 -1124
  1467. package/src/duckdb/third_party/zstd/compress/zstd_compress_literals.cpp +120 -43
  1468. package/src/duckdb/third_party/zstd/compress/zstd_compress_sequences.cpp +47 -23
  1469. package/src/duckdb/third_party/zstd/compress/zstd_compress_superblock.cpp +274 -424
  1470. package/src/duckdb/third_party/zstd/compress/zstd_double_fast.cpp +403 -153
  1471. package/src/duckdb/third_party/zstd/compress/zstd_fast.cpp +741 -268
  1472. package/src/duckdb/third_party/zstd/compress/zstd_lazy.cpp +1339 -278
  1473. package/src/duckdb/third_party/zstd/compress/zstd_ldm.cpp +334 -222
  1474. package/src/duckdb/third_party/zstd/compress/zstd_opt.cpp +674 -298
  1475. package/src/duckdb/third_party/zstd/compress/zstdmt_compress.cpp +1885 -0
  1476. package/src/duckdb/third_party/zstd/decompress/huf_decompress.cpp +1247 -586
  1477. package/src/duckdb/third_party/zstd/decompress/zstd_ddict.cpp +18 -17
  1478. package/src/duckdb/third_party/zstd/decompress/zstd_decompress.cpp +724 -270
  1479. package/src/duckdb/third_party/zstd/decompress/zstd_decompress_block.cpp +1193 -393
  1480. package/src/duckdb/third_party/zstd/deprecated/zbuff_common.cpp +30 -0
  1481. package/src/duckdb/third_party/zstd/deprecated/zbuff_compress.cpp +171 -0
  1482. package/src/duckdb/third_party/zstd/deprecated/zbuff_decompress.cpp +80 -0
  1483. package/src/duckdb/third_party/zstd/dict/cover.cpp +1271 -0
  1484. package/src/duckdb/third_party/zstd/dict/divsufsort.cpp +1916 -0
  1485. package/src/duckdb/third_party/zstd/dict/fastcover.cpp +775 -0
  1486. package/src/duckdb/third_party/zstd/dict/zdict.cpp +1139 -0
  1487. package/src/duckdb/third_party/zstd/include/zdict.h +473 -0
  1488. package/src/duckdb/third_party/zstd/include/zstd/common/allocations.h +58 -0
  1489. package/src/duckdb/third_party/zstd/include/zstd/common/bits.h +204 -0
  1490. package/src/duckdb/third_party/zstd/include/zstd/common/bitstream.h +88 -85
  1491. package/src/duckdb/third_party/zstd/include/zstd/common/compiler.h +243 -47
  1492. package/src/duckdb/third_party/zstd/include/zstd/common/cpu.h +253 -0
  1493. package/src/duckdb/third_party/zstd/include/zstd/common/debug.h +31 -31
  1494. package/src/duckdb/third_party/zstd/include/zstd/common/error_private.h +94 -6
  1495. package/src/duckdb/third_party/zstd/include/zstd/common/fse.h +424 -64
  1496. package/src/duckdb/third_party/zstd/include/zstd/common/huf.h +255 -70
  1497. package/src/duckdb/third_party/zstd/include/zstd/common/mem.h +125 -85
  1498. package/src/duckdb/third_party/zstd/include/zstd/common/pool.h +84 -0
  1499. package/src/duckdb/third_party/zstd/include/zstd/common/portability_macros.h +158 -0
  1500. package/src/duckdb/third_party/zstd/include/zstd/common/threading.h +152 -0
  1501. package/src/duckdb/third_party/zstd/include/zstd/common/{xxhash.h → xxhash.hpp} +0 -1
  1502. package/src/duckdb/third_party/zstd/include/zstd/common/{xxhash_static.h → xxhash_static.hpp} +1 -1
  1503. package/src/duckdb/third_party/zstd/include/zstd/common/zstd_deps.h +122 -0
  1504. package/src/duckdb/third_party/zstd/include/zstd/common/zstd_internal.h +143 -174
  1505. package/src/duckdb/third_party/zstd/include/zstd/common/zstd_trace.h +159 -0
  1506. package/src/duckdb/third_party/zstd/include/zstd/compress/clevels.h +136 -0
  1507. package/src/duckdb/third_party/zstd/include/zstd/compress/hist.h +4 -4
  1508. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_internal.h +631 -220
  1509. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_literals.h +17 -7
  1510. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_sequences.h +2 -2
  1511. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_superblock.h +3 -2
  1512. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_cwksp.h +256 -153
  1513. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_double_fast.h +16 -3
  1514. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_fast.h +4 -3
  1515. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_lazy.h +145 -11
  1516. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_ldm.h +14 -6
  1517. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_ldm_geartab.h +110 -0
  1518. package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_opt.h +33 -9
  1519. package/src/duckdb/third_party/zstd/include/zstd/compress/zstdmt_compress.h +107 -0
  1520. package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_ddict.h +4 -3
  1521. package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_block.h +20 -6
  1522. package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_internal.h +88 -16
  1523. package/src/duckdb/third_party/zstd/include/zstd/deprecated/zbuff.h +214 -0
  1524. package/src/duckdb/third_party/zstd/include/zstd/dict/cover.h +156 -0
  1525. package/src/duckdb/third_party/zstd/include/zstd/dict/divsufsort.h +62 -0
  1526. package/src/duckdb/third_party/zstd/include/zstd.h +2171 -93
  1527. package/src/duckdb/third_party/zstd/include/{zstd/common/zstd_errors.h → zstd_errors.h} +32 -10
  1528. package/src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp +8 -0
  1529. package/src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp +20 -0
  1530. package/src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp +12 -0
  1531. package/src/duckdb/ub_extension_core_functions_aggregate_nested.cpp +6 -0
  1532. package/src/duckdb/ub_extension_core_functions_aggregate_regression.cpp +14 -0
  1533. package/src/duckdb/ub_extension_core_functions_scalar_array.cpp +4 -0
  1534. package/src/duckdb/ub_extension_core_functions_scalar_bit.cpp +2 -0
  1535. package/src/duckdb/ub_extension_core_functions_scalar_blob.cpp +4 -0
  1536. package/src/duckdb/ub_extension_core_functions_scalar_date.cpp +20 -0
  1537. package/src/duckdb/ub_extension_core_functions_scalar_debug.cpp +2 -0
  1538. package/src/duckdb/ub_extension_core_functions_scalar_enum.cpp +2 -0
  1539. package/src/duckdb/ub_extension_core_functions_scalar_generic.cpp +18 -0
  1540. package/src/duckdb/ub_extension_core_functions_scalar_list.cpp +22 -0
  1541. package/src/duckdb/ub_extension_core_functions_scalar_map.cpp +14 -0
  1542. package/src/duckdb/ub_extension_core_functions_scalar_math.cpp +2 -0
  1543. package/src/duckdb/ub_extension_core_functions_scalar_operators.cpp +2 -0
  1544. package/src/duckdb/ub_extension_core_functions_scalar_random.cpp +4 -0
  1545. package/src/duckdb/ub_extension_core_functions_scalar_string.cpp +48 -0
  1546. package/src/duckdb/ub_extension_core_functions_scalar_struct.cpp +2 -0
  1547. package/src/duckdb/ub_extension_core_functions_scalar_union.cpp +6 -0
  1548. package/src/duckdb/ub_src_common.cpp +4 -0
  1549. package/src/duckdb/ub_src_common_arrow.cpp +3 -1
  1550. package/src/duckdb/ub_src_execution.cpp +0 -6
  1551. package/src/duckdb/ub_src_execution_operator_aggregate.cpp +2 -0
  1552. package/src/duckdb/ub_src_execution_operator_csv_scanner_encode.cpp +2 -0
  1553. package/src/duckdb/ub_src_execution_operator_csv_scanner_util.cpp +2 -0
  1554. package/src/duckdb/ub_src_execution_sample.cpp +4 -0
  1555. package/src/duckdb/ub_src_function.cpp +6 -0
  1556. package/src/duckdb/ub_src_function_aggregate.cpp +0 -2
  1557. package/src/duckdb/ub_src_function_aggregate_distributive.cpp +3 -1
  1558. package/src/duckdb/ub_src_function_scalar.cpp +2 -8
  1559. package/src/duckdb/ub_src_function_scalar_date.cpp +2 -0
  1560. package/src/duckdb/ub_src_function_scalar_generic.cpp +2 -2
  1561. package/src/duckdb/ub_src_function_scalar_map.cpp +2 -0
  1562. package/src/duckdb/ub_src_function_scalar_operator.cpp +8 -0
  1563. package/src/duckdb/ub_src_function_scalar_string.cpp +10 -0
  1564. package/src/duckdb/ub_src_function_scalar_struct.cpp +4 -0
  1565. package/src/duckdb/ub_src_function_scalar_system.cpp +2 -0
  1566. package/src/duckdb/ub_src_function_table_system.cpp +6 -0
  1567. package/src/duckdb/ub_src_function_window.cpp +36 -0
  1568. package/src/duckdb/ub_src_logging.cpp +8 -0
  1569. package/src/duckdb/ub_src_main_settings.cpp +3 -1
  1570. package/src/duckdb/ub_src_optimizer.cpp +8 -0
  1571. package/src/duckdb/ub_src_optimizer_pushdown.cpp +2 -0
  1572. package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
  1573. package/src/duckdb/ub_src_parser.cpp +2 -0
  1574. package/src/duckdb/ub_src_parser_parsed_data.cpp +2 -0
  1575. package/src/duckdb/ub_src_planner.cpp +2 -0
  1576. package/src/duckdb/ub_src_planner_filter.cpp +6 -0
  1577. package/src/duckdb/ub_src_storage_compression.cpp +4 -0
  1578. package/src/duckdb/ub_src_storage_compression_dictionary.cpp +8 -0
  1579. package/src/duckdb/ub_src_storage_compression_roaring.cpp +10 -0
  1580. package/src/duckdb/ub_src_transaction.cpp +2 -0
  1581. package/vendor.py +1 -1
  1582. package/src/duckdb/extension/json/yyjson/include/yyjson.hpp +0 -6003
  1583. package/src/duckdb/extension/json/yyjson/yyjson.cpp +0 -8218
  1584. package/src/duckdb/src/common/arrow/appender/list_data.cpp +0 -78
  1585. package/src/duckdb/src/common/arrow/appender/map_data.cpp +0 -91
  1586. package/src/duckdb/src/common/cycle_counter.cpp +0 -76
  1587. package/src/duckdb/src/common/field_writer.cpp +0 -97
  1588. package/src/duckdb/src/common/http_state.cpp +0 -95
  1589. package/src/duckdb/src/common/preserved_error.cpp +0 -87
  1590. package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
  1591. package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +0 -27
  1592. package/src/duckdb/src/common/serializer/buffered_serializer.cpp +0 -36
  1593. package/src/duckdb/src/common/serializer/format_serializer.cpp +0 -15
  1594. package/src/duckdb/src/common/serializer.cpp +0 -24
  1595. package/src/duckdb/src/common/types/chunk_collection.cpp +0 -190
  1596. package/src/duckdb/src/core_functions/aggregate/distributive/entropy.cpp +0 -183
  1597. package/src/duckdb/src/core_functions/scalar/date/current.cpp +0 -54
  1598. package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +0 -78
  1599. package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +0 -70
  1600. package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +0 -412
  1601. package/src/duckdb/src/core_functions/scalar/secret/which_secret.cpp +0 -28
  1602. package/src/duckdb/src/core_functions/scalar/string/jaro_winkler.cpp +0 -71
  1603. package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
  1604. package/src/duckdb/src/execution/index/art/node16.cpp +0 -196
  1605. package/src/duckdb/src/execution/index/art/node4.cpp +0 -189
  1606. package/src/duckdb/src/execution/index/unknown_index.cpp +0 -65
  1607. package/src/duckdb/src/execution/operator/csv_scanner/base_csv_reader.cpp +0 -595
  1608. package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +0 -434
  1609. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +0 -89
  1610. package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +0 -90
  1611. package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +0 -95
  1612. package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +0 -494
  1613. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +0 -35
  1614. package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +0 -99
  1615. package/src/duckdb/src/execution/operator/csv_scanner/parallel_csv_reader.cpp +0 -689
  1616. package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +0 -242
  1617. package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +0 -695
  1618. package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
  1619. package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
  1620. package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
  1621. package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -280
  1622. package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +0 -666
  1623. package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +0 -499
  1624. package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +0 -207
  1625. package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
  1626. package/src/duckdb/src/execution/physical_plan/plan_limit_percent.cpp +0 -18
  1627. package/src/duckdb/src/execution/physical_plan/plan_show_select.cpp +0 -47
  1628. package/src/duckdb/src/execution/reservoir_sample.cpp +0 -324
  1629. package/src/duckdb/src/execution/window_executor.cpp +0 -1830
  1630. package/src/duckdb/src/execution/window_segment_tree.cpp +0 -2073
  1631. package/src/duckdb/src/extension_forward_decl/icu.cpp +0 -59
  1632. package/src/duckdb/src/function/aggregate/distributive_functions.cpp +0 -15
  1633. package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +0 -29
  1634. package/src/duckdb/src/function/scalar/generic_functions.cpp +0 -11
  1635. package/src/duckdb/src/function/scalar/list/list_concat.cpp +0 -143
  1636. package/src/duckdb/src/function/scalar/operators.cpp +0 -14
  1637. package/src/duckdb/src/function/scalar/sequence_functions.cpp +0 -10
  1638. package/src/duckdb/src/function/scalar/string_functions.cpp +0 -22
  1639. package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +0 -173
  1640. package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +0 -101
  1641. package/src/duckdb/src/include/duckdb/catalog/mapping_value.hpp +0 -92
  1642. package/src/duckdb/src/include/duckdb/common/arrow/arrow_types_extension.hpp +0 -42
  1643. package/src/duckdb/src/include/duckdb/common/cycle_counter.hpp +0 -68
  1644. package/src/duckdb/src/include/duckdb/common/enums/index_type.hpp +0 -34
  1645. package/src/duckdb/src/include/duckdb/common/http_state.hpp +0 -113
  1646. package/src/duckdb/src/include/duckdb/common/platform.h +0 -58
  1647. package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +0 -59
  1648. package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +0 -192
  1649. package/src/duckdb/src/include/duckdb/common/types/chunk_collection.hpp +0 -137
  1650. package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +0 -65
  1651. package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +0 -63
  1652. package/src/duckdb/src/include/duckdb/execution/index/unknown_index.hpp +0 -65
  1653. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer.hpp +0 -103
  1654. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.hpp +0 -74
  1655. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_file_handle.hpp +0 -60
  1656. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +0 -253
  1657. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_option.hpp +0 -155
  1658. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_reader_options.hpp +0 -163
  1659. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/state_machine_options.hpp +0 -35
  1660. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/base_scanner.hpp +0 -228
  1661. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/column_count_scanner.hpp +0 -70
  1662. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/scanner_boundary.hpp +0 -93
  1663. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/skip_scanner.hpp +0 -60
  1664. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/string_value_scanner.hpp +0 -197
  1665. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/quote_rules.hpp +0 -21
  1666. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state.hpp +0 -30
  1667. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine.hpp +0 -99
  1668. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.hpp +0 -87
  1669. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/csv_file_scanner.hpp +0 -70
  1670. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/global_csv_state.hpp +0 -80
  1671. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_casting.hpp +0 -137
  1672. package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_error.hpp +0 -104
  1673. package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +0 -79
  1674. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/base_csv_reader.hpp +0 -119
  1675. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +0 -72
  1676. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +0 -110
  1677. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +0 -103
  1678. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_file_handle.hpp +0 -59
  1679. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_line_info.hpp +0 -46
  1680. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp +0 -210
  1681. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +0 -131
  1682. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state.hpp +0 -28
  1683. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +0 -70
  1684. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +0 -65
  1685. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/parallel_csv_reader.hpp +0 -167
  1686. package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +0 -21
  1687. package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +0 -343
  1688. package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +0 -165
  1689. package/src/duckdb/src/include/duckdb/optimizer/column_lifetime_optimizer.hpp +0 -45
  1690. package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +0 -57
  1691. package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_info.hpp +0 -45
  1692. package/src/duckdb/src/include/duckdb/parser/statement/show_statement.hpp +0 -32
  1693. package/src/duckdb/src/include/duckdb/planner/operator/logical_limit_percent.hpp +0 -49
  1694. package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +0 -42
  1695. package/src/duckdb/src/main/settings/settings.cpp +0 -2056
  1696. package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +0 -36
  1697. package/src/duckdb/src/parser/parsed_data/comment_on_info.cpp +0 -19
  1698. package/src/duckdb/src/parser/statement/show_statement.cpp +0 -15
  1699. package/src/duckdb/src/planner/binder/statement/bind_show.cpp +0 -30
  1700. package/src/duckdb/src/planner/operator/logical_limit_percent.cpp +0 -14
  1701. package/src/duckdb/src/storage/statistics/numeric_stats_union.cpp +0 -70
  1702. package/src/duckdb/third_party/fsst/fsst_avx512.cpp +0 -140
  1703. package/src/duckdb/third_party/fsst/fsst_avx512.inc +0 -57
  1704. package/src/duckdb/third_party/fsst/fsst_avx512_unroll1.inc +0 -57
  1705. package/src/duckdb/third_party/fsst/fsst_avx512_unroll2.inc +0 -114
  1706. package/src/duckdb/third_party/fsst/fsst_avx512_unroll3.inc +0 -171
  1707. package/src/duckdb/third_party/fsst/fsst_avx512_unroll4.inc +0 -228
  1708. package/src/duckdb/third_party/parquet/parquet_constants.cpp +0 -17
  1709. package/src/duckdb/third_party/parquet/parquet_constants.h +0 -24
  1710. package/src/duckdb/third_party/re2/util/pod_array.h +0 -55
  1711. package/src/duckdb/third_party/re2/util/sparse_array.h +0 -392
  1712. package/src/duckdb/third_party/re2/util/sparse_set.h +0 -264
  1713. package/src/duckdb/third_party/zstd/include/zstd/common/fse_static.h +0 -421
  1714. package/src/duckdb/third_party/zstd/include/zstd/common/huf_static.h +0 -238
  1715. package/src/duckdb/third_party/zstd/include/zstd_static.h +0 -1070
  1716. package/src/duckdb/ub_src_core_functions.cpp +0 -6
  1717. package/src/duckdb/ub_src_core_functions_aggregate_algebraic.cpp +0 -8
  1718. package/src/duckdb/ub_src_core_functions_aggregate_distributive.cpp +0 -24
  1719. package/src/duckdb/ub_src_core_functions_aggregate_holistic.cpp +0 -12
  1720. package/src/duckdb/ub_src_core_functions_aggregate_nested.cpp +0 -6
  1721. package/src/duckdb/ub_src_core_functions_aggregate_regression.cpp +0 -14
  1722. package/src/duckdb/ub_src_core_functions_scalar_array.cpp +0 -4
  1723. package/src/duckdb/ub_src_core_functions_scalar_bit.cpp +0 -2
  1724. package/src/duckdb/ub_src_core_functions_scalar_blob.cpp +0 -6
  1725. package/src/duckdb/ub_src_core_functions_scalar_date.cpp +0 -22
  1726. package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +0 -2
  1727. package/src/duckdb/ub_src_core_functions_scalar_enum.cpp +0 -2
  1728. package/src/duckdb/ub_src_core_functions_scalar_generic.cpp +0 -18
  1729. package/src/duckdb/ub_src_core_functions_scalar_list.cpp +0 -22
  1730. package/src/duckdb/ub_src_core_functions_scalar_map.cpp +0 -16
  1731. package/src/duckdb/ub_src_core_functions_scalar_math.cpp +0 -2
  1732. package/src/duckdb/ub_src_core_functions_scalar_operators.cpp +0 -2
  1733. package/src/duckdb/ub_src_core_functions_scalar_random.cpp +0 -4
  1734. package/src/duckdb/ub_src_core_functions_scalar_secret.cpp +0 -2
  1735. package/src/duckdb/ub_src_core_functions_scalar_string.cpp +0 -58
  1736. package/src/duckdb/ub_src_core_functions_scalar_struct.cpp +0 -4
  1737. package/src/duckdb/ub_src_core_functions_scalar_union.cpp +0 -6
  1738. package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +0 -18
  1739. package/src/duckdb/ub_src_function_scalar_operators.cpp +0 -8
  1740. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/covar.hpp +0 -0
  1741. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/stddev.hpp +0 -0
  1742. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/sum_helpers.hpp +0 -0
  1743. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/array_kernels.hpp +0 -0
  1744. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/function_list.hpp +0 -0
  1745. /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/secret_functions.hpp +0 -0
  1746. /package/src/duckdb/src/function/scalar/{operators → operator}/multiply.cpp +0 -0
  1747. /package/src/duckdb/src/function/scalar/{operators → operator}/subtract.cpp +0 -0
@@ -1,10 +1,14 @@
1
1
  #include "column_writer.hpp"
2
2
 
3
3
  #include "duckdb.hpp"
4
+ #include "geo_parquet.hpp"
5
+ #include "parquet_dbp_encoder.hpp"
6
+ #include "parquet_dlba_encoder.hpp"
4
7
  #include "parquet_rle_bp_decoder.hpp"
5
8
  #include "parquet_rle_bp_encoder.hpp"
9
+ #include "parquet_bss_encoder.hpp"
10
+ #include "parquet_statistics.hpp"
6
11
  #include "parquet_writer.hpp"
7
- #include "geo_parquet.hpp"
8
12
  #ifndef DUCKDB_AMALGAMATION
9
13
  #include "duckdb/common/exception.hpp"
10
14
  #include "duckdb/common/operator/comparison_operators.hpp"
@@ -19,49 +23,32 @@
19
23
  #include "duckdb/execution/expression_executor.hpp"
20
24
  #endif
21
25
 
26
+ #include "brotli/encode.h"
22
27
  #include "lz4.hpp"
23
28
  #include "miniz_wrapper.hpp"
24
29
  #include "snappy.h"
25
30
  #include "zstd.h"
26
- #include "brotli/encode.h"
31
+ #include "zstd/common/xxhash.hpp"
32
+
33
+ #include <cmath>
27
34
 
28
35
  namespace duckdb {
29
36
 
30
37
  using namespace duckdb_parquet; // NOLINT
31
38
  using namespace duckdb_miniz; // NOLINT
32
39
 
33
- using duckdb_parquet::format::CompressionCodec;
34
- using duckdb_parquet::format::ConvertedType;
35
- using duckdb_parquet::format::Encoding;
36
- using duckdb_parquet::format::FieldRepetitionType;
37
- using duckdb_parquet::format::FileMetaData;
38
- using duckdb_parquet::format::PageHeader;
39
- using duckdb_parquet::format::PageType;
40
- using ParquetRowGroup = duckdb_parquet::format::RowGroup;
41
- using duckdb_parquet::format::Type;
40
+ using duckdb_parquet::CompressionCodec;
41
+ using duckdb_parquet::ConvertedType;
42
+ using duckdb_parquet::Encoding;
43
+ using duckdb_parquet::FieldRepetitionType;
44
+ using duckdb_parquet::FileMetaData;
45
+ using duckdb_parquet::PageHeader;
46
+ using duckdb_parquet::PageType;
47
+ using ParquetRowGroup = duckdb_parquet::RowGroup;
48
+ using duckdb_parquet::Type;
42
49
 
43
50
  #define PARQUET_DEFINE_VALID 65535
44
51
 
45
- static void VarintEncode(uint32_t val, WriteStream &ser) {
46
- do {
47
- uint8_t byte = val & 127;
48
- val >>= 7;
49
- if (val != 0) {
50
- byte |= 128;
51
- }
52
- ser.Write<uint8_t>(byte);
53
- } while (val != 0);
54
- }
55
-
56
- static uint8_t GetVarintSize(uint32_t val) {
57
- uint8_t res = 0;
58
- do {
59
- val >>= 7;
60
- res++;
61
- } while (val != 0);
62
- return res;
63
- }
64
-
65
52
  //===--------------------------------------------------------------------===//
66
53
  // ColumnWriterStatistics
67
54
  //===--------------------------------------------------------------------===//
@@ -106,7 +93,7 @@ void RleBpEncoder::BeginPrepare(uint32_t first_value) {
106
93
  void RleBpEncoder::FinishRun() {
107
94
  // last value, or value has changed
108
95
  // write out the current run
109
- byte_count += GetVarintSize(current_run_count << 1) + byte_width;
96
+ byte_count += ParquetDecodeUtils::GetVarintSize(current_run_count << 1) + byte_width;
110
97
  current_run_count = 1;
111
98
  run_count++;
112
99
  }
@@ -137,7 +124,7 @@ void RleBpEncoder::BeginWrite(WriteStream &writer, uint32_t first_value) {
137
124
 
138
125
  void RleBpEncoder::WriteRun(WriteStream &writer) {
139
126
  // write the header of the run
140
- VarintEncode(current_run_count << 1, writer);
127
+ ParquetDecodeUtils::VarintEncode(current_run_count << 1, writer);
141
128
  // now write the value
142
129
  D_ASSERT(last_value >> (byte_width * 8) == 0);
143
130
  switch (byte_width) {
@@ -224,16 +211,11 @@ void ColumnWriter::CompressPage(MemoryStream &temp_writer, size_t &compressed_si
224
211
  break;
225
212
  }
226
213
  case CompressionCodec::ZSTD: {
227
- auto configured_compression = writer.CompressionLevel();
228
- int compress_level = ZSTD_CLEVEL_DEFAULT;
229
- if (configured_compression.IsValid()) {
230
- compress_level = static_cast<int>(configured_compression.GetIndex());
231
- }
232
214
  compressed_size = duckdb_zstd::ZSTD_compressBound(temp_writer.GetPosition());
233
215
  compressed_buf = unique_ptr<data_t[]>(new data_t[compressed_size]);
234
- compressed_size =
235
- duckdb_zstd::ZSTD_compress((void *)compressed_buf.get(), compressed_size,
236
- (const void *)temp_writer.GetData(), temp_writer.GetPosition(), compress_level);
216
+ compressed_size = duckdb_zstd::ZSTD_compress((void *)compressed_buf.get(), compressed_size,
217
+ (const void *)temp_writer.GetData(), temp_writer.GetPosition(),
218
+ UnsafeNumericCast<int32_t>(writer.CompressionLevel()));
237
219
  compressed_data = compressed_buf.get();
238
220
  break;
239
221
  }
@@ -344,18 +326,20 @@ struct PageWriteInformation {
344
326
 
345
327
  class BasicColumnWriterState : public ColumnWriterState {
346
328
  public:
347
- BasicColumnWriterState(duckdb_parquet::format::RowGroup &row_group, idx_t col_idx)
329
+ BasicColumnWriterState(duckdb_parquet::RowGroup &row_group, idx_t col_idx)
348
330
  : row_group(row_group), col_idx(col_idx) {
349
331
  page_info.emplace_back();
350
332
  }
351
333
  ~BasicColumnWriterState() override = default;
352
334
 
353
- duckdb_parquet::format::RowGroup &row_group;
335
+ duckdb_parquet::RowGroup &row_group;
354
336
  idx_t col_idx;
355
337
  vector<PageInformation> page_info;
356
338
  vector<PageWriteInformation> write_info;
357
339
  unique_ptr<ColumnWriterStatistics> stats_state;
358
340
  idx_t current_page = 0;
341
+
342
+ unique_ptr<ParquetBloomFilter> bloom_filter;
359
343
  };
360
344
 
361
345
  //===--------------------------------------------------------------------===//
@@ -377,17 +361,15 @@ public:
377
361
  //! Dictionary pages must be below 2GB. Unlike data pages, there's only one dictionary page.
378
362
  //! For this reason we go with a much higher, but still a conservative upper bound of 1GB;
379
363
  static constexpr const idx_t MAX_UNCOMPRESSED_DICT_PAGE_SIZE = 1e9;
380
- //! If the dictionary has this many entries, but the compression ratio is still below 1,
381
- //! we stop creating the dictionary
364
+ //! If the dictionary has this many entries, we stop creating the dictionary
382
365
  static constexpr const idx_t DICTIONARY_ANALYZE_THRESHOLD = 1e4;
383
-
384
366
  //! The maximum size a key entry in an RLE page takes
385
367
  static constexpr const idx_t MAX_DICTIONARY_KEY_SIZE = sizeof(uint32_t);
386
368
  //! The size of encoding the string length
387
369
  static constexpr const idx_t STRING_LENGTH_SIZE = sizeof(uint32_t);
388
370
 
389
371
  public:
390
- unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) override;
372
+ unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) override;
391
373
  void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override;
392
374
  void BeginWrite(ColumnWriterState &state) override;
393
375
  void Write(ColumnWriterState &state, Vector &vector, idx_t count) override;
@@ -397,7 +379,7 @@ protected:
397
379
  static void WriteLevels(WriteStream &temp_writer, const unsafe_vector<uint16_t> &levels, idx_t max_value,
398
380
  idx_t start_offset, idx_t count);
399
381
 
400
- virtual duckdb_parquet::format::Encoding::type GetEncoding(BasicColumnWriterState &state);
382
+ virtual duckdb_parquet::Encoding::type GetEncoding(BasicColumnWriterState &state);
401
383
 
402
384
  void NextPage(BasicColumnWriterState &state);
403
385
  void FlushPage(BasicColumnWriterState &state);
@@ -425,18 +407,18 @@ protected:
425
407
  void WriteDictionary(BasicColumnWriterState &state, unique_ptr<MemoryStream> temp_writer, idx_t row_count);
426
408
  virtual void FlushDictionary(BasicColumnWriterState &state, ColumnWriterStatistics *stats);
427
409
 
428
- void SetParquetStatistics(BasicColumnWriterState &state, duckdb_parquet::format::ColumnChunk &column);
429
- void RegisterToRowGroup(duckdb_parquet::format::RowGroup &row_group);
410
+ void SetParquetStatistics(BasicColumnWriterState &state, duckdb_parquet::ColumnChunk &column);
411
+ void RegisterToRowGroup(duckdb_parquet::RowGroup &row_group);
430
412
  };
431
413
 
432
- unique_ptr<ColumnWriterState> BasicColumnWriter::InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) {
414
+ unique_ptr<ColumnWriterState> BasicColumnWriter::InitializeWriteState(duckdb_parquet::RowGroup &row_group) {
433
415
  auto result = make_uniq<BasicColumnWriterState>(row_group, row_group.columns.size());
434
416
  RegisterToRowGroup(row_group);
435
417
  return std::move(result);
436
418
  }
437
419
 
438
- void BasicColumnWriter::RegisterToRowGroup(duckdb_parquet::format::RowGroup &row_group) {
439
- format::ColumnChunk column_chunk;
420
+ void BasicColumnWriter::RegisterToRowGroup(duckdb_parquet::RowGroup &row_group) {
421
+ duckdb_parquet::ColumnChunk column_chunk;
440
422
  column_chunk.__isset.meta_data = true;
441
423
  column_chunk.meta_data.codec = writer.GetCodec();
442
424
  column_chunk.meta_data.path_in_schema = schema_path;
@@ -486,7 +468,7 @@ void BasicColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState *p
486
468
  }
487
469
  }
488
470
 
489
- duckdb_parquet::format::Encoding::type BasicColumnWriter::GetEncoding(BasicColumnWriterState &state) {
471
+ duckdb_parquet::Encoding::type BasicColumnWriter::GetEncoding(BasicColumnWriterState &state) {
490
472
  return Encoding::PLAIN;
491
473
  }
492
474
 
@@ -646,8 +628,10 @@ void BasicColumnWriter::Write(ColumnWriterState &state_p, Vector &vector, idx_t
646
628
  }
647
629
  }
648
630
 
649
- void BasicColumnWriter::SetParquetStatistics(BasicColumnWriterState &state,
650
- duckdb_parquet::format::ColumnChunk &column_chunk) {
631
+ void BasicColumnWriter::SetParquetStatistics(BasicColumnWriterState &state, duckdb_parquet::ColumnChunk &column_chunk) {
632
+ if (!state.stats_state) {
633
+ return;
634
+ }
651
635
  if (max_repeat == 0) {
652
636
  column_chunk.meta_data.statistics.null_count = NumericCast<int64_t>(state.null_count);
653
637
  column_chunk.meta_data.statistics.__isset.null_count = true;
@@ -682,6 +666,11 @@ void BasicColumnWriter::SetParquetStatistics(BasicColumnWriterState &state,
682
666
  column_chunk.meta_data.__isset.statistics = true;
683
667
  }
684
668
  for (const auto &write_info : state.write_info) {
669
+ // only care about data page encodings, data_page_header.encoding is meaningless for dict
670
+ if (write_info.page_header.type != PageType::DATA_PAGE &&
671
+ write_info.page_header.type != PageType::DATA_PAGE_V2) {
672
+ continue;
673
+ }
685
674
  column_chunk.meta_data.encodings.push_back(write_info.page_header.data_page_header.encoding);
686
675
  }
687
676
  }
@@ -728,6 +717,11 @@ void BasicColumnWriter::FinalizeWrite(ColumnWriterState &state_p) {
728
717
  column_chunk.meta_data.total_compressed_size =
729
718
  UnsafeNumericCast<int64_t>(column_writer.GetTotalWritten() - start_offset);
730
719
  column_chunk.meta_data.total_uncompressed_size = UnsafeNumericCast<int64_t>(total_uncompressed_size);
720
+
721
+ if (state.bloom_filter) {
722
+ writer.BufferBloomFilter(state.col_idx, std::move(state.bloom_filter));
723
+ }
724
+ // which row group is this?
731
725
  }
732
726
 
733
727
  void BasicColumnWriter::FlushDictionary(BasicColumnWriterState &state, ColumnWriterStatistics *stats) {
@@ -792,21 +786,47 @@ public:
792
786
  return NumericLimits<SRC>::IsSigned() ? GetMaxValue() : string();
793
787
  }
794
788
  string GetMinValue() override {
795
- return HasStats() ? string((char *)&min, sizeof(T)) : string();
789
+ return HasStats() ? string(char_ptr_cast(&min), sizeof(T)) : string();
796
790
  }
797
791
  string GetMaxValue() override {
798
- return HasStats() ? string((char *)&max, sizeof(T)) : string();
792
+ return HasStats() ? string(char_ptr_cast(&max), sizeof(T)) : string();
799
793
  }
800
794
  };
801
795
 
802
796
  struct BaseParquetOperator {
797
+
798
+ template <class SRC, class TGT>
799
+ static void WriteToStream(const TGT &input, WriteStream &ser) {
800
+ ser.WriteData(const_data_ptr_cast(&input), sizeof(TGT));
801
+ }
802
+
803
+ template <class SRC, class TGT>
804
+ static uint64_t XXHash64(const TGT &target_value) {
805
+ return duckdb_zstd::XXH64(&target_value, sizeof(target_value), 0);
806
+ }
807
+
808
+ template <class SRC, class TGT>
809
+ static unique_ptr<ColumnWriterStatistics> InitializeStats() {
810
+ return nullptr;
811
+ }
812
+
813
+ template <class SRC, class TGT>
814
+ static void HandleStats(ColumnWriterStatistics *stats, TGT target_value) {
815
+ }
816
+ };
817
+
818
+ struct ParquetCastOperator : public BaseParquetOperator {
819
+ template <class SRC, class TGT>
820
+ static TGT Operation(SRC input) {
821
+ return TGT(input);
822
+ }
803
823
  template <class SRC, class TGT>
804
824
  static unique_ptr<ColumnWriterStatistics> InitializeStats() {
805
825
  return make_uniq<NumericStatisticsState<SRC, TGT, BaseParquetOperator>>();
806
826
  }
807
827
 
808
828
  template <class SRC, class TGT>
809
- static void HandleStats(ColumnWriterStatistics *stats, SRC source_value, TGT target_value) {
829
+ static void HandleStats(ColumnWriterStatistics *stats, TGT target_value) {
810
830
  auto &numeric_stats = (NumericStatisticsState<SRC, TGT, BaseParquetOperator> &)*stats;
811
831
  if (LessThan::Operation(target_value, numeric_stats.min)) {
812
832
  numeric_stats.min = target_value;
@@ -817,24 +837,165 @@ struct BaseParquetOperator {
817
837
  }
818
838
  };
819
839
 
820
- struct ParquetCastOperator : public BaseParquetOperator {
840
+ struct ParquetTimestampNSOperator : public ParquetCastOperator {
821
841
  template <class SRC, class TGT>
822
842
  static TGT Operation(SRC input) {
823
843
  return TGT(input);
824
844
  }
825
845
  };
826
846
 
827
- struct ParquetTimestampNSOperator : public BaseParquetOperator {
847
+ struct ParquetTimestampSOperator : public ParquetCastOperator {
828
848
  template <class SRC, class TGT>
829
849
  static TGT Operation(SRC input) {
830
- return TGT(input);
850
+ return Timestamp::FromEpochSecondsPossiblyInfinite(input).value;
851
+ }
852
+ };
853
+
854
+ class StringStatisticsState : public ColumnWriterStatistics {
855
+ static constexpr const idx_t MAX_STRING_STATISTICS_SIZE = 10000;
856
+
857
+ public:
858
+ StringStatisticsState() : has_stats(false), values_too_big(false), min(), max() {
859
+ }
860
+
861
+ bool has_stats;
862
+ bool values_too_big;
863
+ string min;
864
+ string max;
865
+
866
+ public:
867
+ bool HasStats() override {
868
+ return has_stats;
869
+ }
870
+
871
+ void Update(const string_t &val) {
872
+ if (values_too_big) {
873
+ return;
874
+ }
875
+ auto str_len = val.GetSize();
876
+ if (str_len > MAX_STRING_STATISTICS_SIZE) {
877
+ // we avoid gathering stats when individual string values are too large
878
+ // this is because the statistics are copied into the Parquet file meta data in uncompressed format
879
+ // ideally we avoid placing several mega or giga-byte long strings there
880
+ // we put a threshold of 10KB, if we see strings that exceed this threshold we avoid gathering stats
881
+ values_too_big = true;
882
+ has_stats = false;
883
+ min = string();
884
+ max = string();
885
+ return;
886
+ }
887
+ if (!has_stats || LessThan::Operation(val, string_t(min))) {
888
+ min = val.GetString();
889
+ }
890
+ if (!has_stats || GreaterThan::Operation(val, string_t(max))) {
891
+ max = val.GetString();
892
+ }
893
+ has_stats = true;
894
+ }
895
+
896
+ string GetMin() override {
897
+ return GetMinValue();
898
+ }
899
+ string GetMax() override {
900
+ return GetMaxValue();
901
+ }
902
+ string GetMinValue() override {
903
+ return HasStats() ? min : string();
904
+ }
905
+ string GetMaxValue() override {
906
+ return HasStats() ? max : string();
831
907
  }
832
908
  };
833
909
 
834
- struct ParquetTimestampSOperator : public BaseParquetOperator {
910
+ struct ParquetStringOperator : public BaseParquetOperator {
835
911
  template <class SRC, class TGT>
836
912
  static TGT Operation(SRC input) {
837
- return Timestamp::FromEpochSecondsPossiblyInfinite(input).value;
913
+ return input;
914
+ }
915
+
916
+ template <class SRC, class TGT>
917
+ static unique_ptr<ColumnWriterStatistics> InitializeStats() {
918
+ return make_uniq<StringStatisticsState>();
919
+ }
920
+
921
+ template <class SRC, class TGT>
922
+ static void HandleStats(ColumnWriterStatistics *stats, TGT target_value) {
923
+ auto &string_stats = stats->Cast<StringStatisticsState>();
924
+ string_stats.Update(target_value);
925
+ }
926
+
927
+ template <class SRC, class TGT>
928
+ static void WriteToStream(const TGT &target_value, WriteStream &ser) {
929
+ ser.Write<uint32_t>(target_value.GetSize());
930
+ ser.WriteData(const_data_ptr_cast(target_value.GetData()), target_value.GetSize());
931
+ }
932
+
933
+ template <class SRC, class TGT>
934
+ static uint64_t XXHash64(const TGT &target_value) {
935
+ return duckdb_zstd::XXH64(target_value.GetData(), target_value.GetSize(), 0);
936
+ }
937
+ };
938
+
939
+ struct ParquetIntervalTargetType {
940
+ static constexpr const idx_t PARQUET_INTERVAL_SIZE = 12;
941
+ data_t bytes[PARQUET_INTERVAL_SIZE];
942
+ };
943
+
944
+ struct ParquetIntervalOperator : public BaseParquetOperator {
945
+ template <class SRC, class TGT>
946
+ static TGT Operation(SRC input) {
947
+
948
+ if (input.days < 0 || input.months < 0 || input.micros < 0) {
949
+ throw IOException("Parquet files do not support negative intervals");
950
+ }
951
+ TGT result;
952
+ Store<uint32_t>(input.months, result.bytes);
953
+ Store<uint32_t>(input.days, result.bytes + sizeof(uint32_t));
954
+ Store<uint32_t>(input.micros / 1000, result.bytes + sizeof(uint32_t) * 2);
955
+ return result;
956
+ }
957
+
958
+ template <class SRC, class TGT>
959
+ static void WriteToStream(const TGT &target_value, WriteStream &ser) {
960
+ ser.WriteData(target_value.bytes, ParquetIntervalTargetType::PARQUET_INTERVAL_SIZE);
961
+ }
962
+
963
+ template <class SRC, class TGT>
964
+ static uint64_t XXHash64(const TGT &target_value) {
965
+ return duckdb_zstd::XXH64(target_value.bytes, ParquetIntervalTargetType::PARQUET_INTERVAL_SIZE, 0);
966
+ }
967
+ };
968
+
969
+ struct ParquetUUIDTargetType {
970
+ static constexpr const idx_t PARQUET_UUID_SIZE = 16;
971
+ data_t bytes[PARQUET_UUID_SIZE];
972
+ };
973
+
974
+ struct ParquetUUIDOperator : public BaseParquetOperator {
975
+ template <class SRC, class TGT>
976
+ static TGT Operation(SRC input) {
977
+ TGT result;
978
+ uint64_t high_bytes = input.upper ^ (int64_t(1) << 63);
979
+ uint64_t low_bytes = input.lower;
980
+ for (idx_t i = 0; i < sizeof(uint64_t); i++) {
981
+ auto shift_count = (sizeof(uint64_t) - i - 1) * 8;
982
+ result.bytes[i] = (high_bytes >> shift_count) & 0xFF;
983
+ }
984
+ for (idx_t i = 0; i < sizeof(uint64_t); i++) {
985
+ auto shift_count = (sizeof(uint64_t) - i - 1) * 8;
986
+ result.bytes[sizeof(uint64_t) + i] = (low_bytes >> shift_count) & 0xFF;
987
+ }
988
+ return result;
989
+ }
990
+
991
+ template <class SRC, class TGT>
992
+ static void WriteToStream(const TGT &target_value, WriteStream &ser) {
993
+ ser.WriteData(target_value.bytes, ParquetUUIDTargetType::PARQUET_UUID_SIZE);
994
+ }
995
+
996
+ template <class SRC, class TGT>
997
+ static uint64_t XXHash64(const TGT &target_value) {
998
+ return duckdb_zstd::XXH64(target_value.bytes, ParquetUUIDTargetType::PARQUET_UUID_SIZE, 0);
838
999
  }
839
1000
  };
840
1001
 
@@ -845,7 +1006,7 @@ struct ParquetTimeTZOperator : public BaseParquetOperator {
845
1006
  }
846
1007
  };
847
1008
 
848
- struct ParquetHugeintOperator {
1009
+ struct ParquetHugeintOperator : public BaseParquetOperator {
849
1010
  template <class SRC, class TGT>
850
1011
  static TGT Operation(SRC input) {
851
1012
  return Hugeint::Cast<double>(input);
@@ -857,11 +1018,11 @@ struct ParquetHugeintOperator {
857
1018
  }
858
1019
 
859
1020
  template <class SRC, class TGT>
860
- static void HandleStats(ColumnWriterStatistics *stats, SRC source_value, TGT target_value) {
1021
+ static void HandleStats(ColumnWriterStatistics *stats, TGT target_value) {
861
1022
  }
862
1023
  };
863
1024
 
864
- struct ParquetUhugeintOperator {
1025
+ struct ParquetUhugeintOperator : public BaseParquetOperator {
865
1026
  template <class SRC, class TGT>
866
1027
  static TGT Operation(SRC input) {
867
1028
  return Uhugeint::Cast<double>(input);
@@ -873,16 +1034,13 @@ struct ParquetUhugeintOperator {
873
1034
  }
874
1035
 
875
1036
  template <class SRC, class TGT>
876
- static void HandleStats(ColumnWriterStatistics *stats, SRC source_value, TGT target_value) {
1037
+ static void HandleStats(ColumnWriterStatistics *stats, TGT target_value) {
877
1038
  }
878
1039
  };
879
1040
 
880
1041
  template <class SRC, class TGT, class OP = ParquetCastOperator>
881
1042
  static void TemplatedWritePlain(Vector &col, ColumnWriterStatistics *stats, const idx_t chunk_start,
882
- const idx_t chunk_end, ValidityMask &mask, WriteStream &ser) {
883
- static constexpr idx_t WRITE_COMBINER_CAPACITY = 8;
884
- TGT write_combiner[WRITE_COMBINER_CAPACITY];
885
- idx_t write_combiner_count = 0;
1043
+ const idx_t chunk_end, const ValidityMask &mask, WriteStream &ser) {
886
1044
 
887
1045
  const auto *ptr = FlatVector::GetData<SRC>(col);
888
1046
  for (idx_t r = chunk_start; r < chunk_end; r++) {
@@ -890,368 +1048,589 @@ static void TemplatedWritePlain(Vector &col, ColumnWriterStatistics *stats, cons
890
1048
  continue;
891
1049
  }
892
1050
  TGT target_value = OP::template Operation<SRC, TGT>(ptr[r]);
893
- OP::template HandleStats<SRC, TGT>(stats, ptr[r], target_value);
894
- write_combiner[write_combiner_count++] = target_value;
895
- if (write_combiner_count == WRITE_COMBINER_CAPACITY) {
896
- ser.WriteData(const_data_ptr_cast(write_combiner), WRITE_COMBINER_CAPACITY * sizeof(TGT));
897
- write_combiner_count = 0;
898
- }
1051
+ OP::template HandleStats<SRC, TGT>(stats, target_value);
1052
+ OP::template WriteToStream<SRC, TGT>(target_value, ser);
899
1053
  }
900
- ser.WriteData(const_data_ptr_cast(write_combiner), write_combiner_count * sizeof(TGT));
901
1054
  }
902
1055
 
903
- template <class SRC, class TGT, class OP = ParquetCastOperator>
904
- class StandardColumnWriter : public BasicColumnWriter {
905
- public:
906
- StandardColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p, // NOLINT
907
- idx_t max_repeat, idx_t max_define, bool can_have_nulls)
908
- : BasicColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls) {
909
- }
910
- ~StandardColumnWriter() override = default;
911
-
1056
+ template <class T>
1057
+ class StandardColumnWriterState : public BasicColumnWriterState {
912
1058
  public:
913
- unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
914
- return OP::template InitializeStats<SRC, TGT>();
1059
+ StandardColumnWriterState(duckdb_parquet::RowGroup &row_group, idx_t col_idx)
1060
+ : BasicColumnWriterState(row_group, col_idx) {
915
1061
  }
1062
+ ~StandardColumnWriterState() override = default;
916
1063
 
917
- void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats, ColumnWriterPageState *page_state,
918
- Vector &input_column, idx_t chunk_start, idx_t chunk_end) override {
919
- auto &mask = FlatVector::Validity(input_column);
920
- TemplatedWritePlain<SRC, TGT, OP>(input_column, stats, chunk_start, chunk_end, mask, temp_writer);
921
- }
1064
+ // analysis state for integer values for DELTA_BINARY_PACKED/DELTA_LENGTH_BYTE_ARRAY
1065
+ idx_t total_value_count = 0;
1066
+ idx_t total_string_size = 0;
922
1067
 
923
- idx_t GetRowSize(const Vector &vector, const idx_t index, const BasicColumnWriterState &state) const override {
924
- return sizeof(TGT);
925
- }
1068
+ unordered_map<T, uint32_t> dictionary;
1069
+ duckdb_parquet::Encoding::type encoding;
926
1070
  };
927
1071
 
928
- //===--------------------------------------------------------------------===//
929
- // Boolean Column Writer
930
- //===--------------------------------------------------------------------===//
931
- class BooleanStatisticsState : public ColumnWriterStatistics {
1072
+ template <class SRC, class TGT>
1073
+ class StandardWriterPageState : public ColumnWriterPageState {
932
1074
  public:
933
- BooleanStatisticsState() : min(true), max(false) {
1075
+ explicit StandardWriterPageState(const idx_t total_value_count, const idx_t total_string_size,
1076
+ Encoding::type encoding_p, const unordered_map<SRC, uint32_t> &dictionary_p)
1077
+ : encoding(encoding_p), dbp_initialized(false), dbp_encoder(total_value_count), dlba_initialized(false),
1078
+ dlba_encoder(total_value_count, total_string_size), bss_encoder(total_value_count, sizeof(TGT)),
1079
+ dictionary(dictionary_p), dict_written_value(false),
1080
+ dict_bit_width(RleBpDecoder::ComputeBitWidth(dictionary.size())), dict_encoder(dict_bit_width) {
934
1081
  }
1082
+ duckdb_parquet::Encoding::type encoding;
935
1083
 
936
- bool min;
937
- bool max;
1084
+ bool dbp_initialized;
1085
+ DbpEncoder dbp_encoder;
938
1086
 
939
- public:
940
- bool HasStats() override {
941
- return !(min && !max);
942
- }
1087
+ bool dlba_initialized;
1088
+ DlbaEncoder dlba_encoder;
943
1089
 
944
- string GetMin() override {
945
- return GetMinValue();
946
- }
947
- string GetMax() override {
948
- return GetMaxValue();
949
- }
950
- string GetMinValue() override {
951
- return HasStats() ? string(const_char_ptr_cast(&min), sizeof(bool)) : string();
952
- }
953
- string GetMaxValue() override {
954
- return HasStats() ? string(const_char_ptr_cast(&max), sizeof(bool)) : string();
955
- }
956
- };
1090
+ BssEncoder bss_encoder;
957
1091
 
958
- class BooleanWriterPageState : public ColumnWriterPageState {
959
- public:
960
- uint8_t byte = 0;
961
- uint8_t byte_pos = 0;
1092
+ const unordered_map<SRC, uint32_t> &dictionary;
1093
+ bool dict_written_value;
1094
+ uint32_t dict_bit_width;
1095
+ RleBpEncoder dict_encoder;
962
1096
  };
963
1097
 
964
- class BooleanColumnWriter : public BasicColumnWriter {
965
- public:
966
- BooleanColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p, idx_t max_repeat,
967
- idx_t max_define, bool can_have_nulls)
968
- : BasicColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls) {
969
- }
970
- ~BooleanColumnWriter() override = default;
1098
+ namespace dbp_encoder {
971
1099
 
972
- public:
973
- unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
974
- return make_uniq<BooleanStatisticsState>();
975
- }
1100
+ template <class T>
1101
+ void BeginWrite(DbpEncoder &encoder, WriteStream &writer, const T &first_value) {
1102
+ throw InternalException("Can't write type to DELTA_BINARY_PACKED column");
1103
+ }
976
1104
 
977
- void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *state_p,
978
- Vector &input_column, idx_t chunk_start, idx_t chunk_end) override {
979
- auto &stats = stats_p->Cast<BooleanStatisticsState>();
980
- auto &state = state_p->Cast<BooleanWriterPageState>();
981
- auto &mask = FlatVector::Validity(input_column);
1105
+ template <>
1106
+ void BeginWrite(DbpEncoder &encoder, WriteStream &writer, const int64_t &first_value) {
1107
+ encoder.BeginWrite(writer, first_value);
1108
+ }
982
1109
 
983
- auto *ptr = FlatVector::GetData<bool>(input_column);
984
- for (idx_t r = chunk_start; r < chunk_end; r++) {
985
- if (mask.RowIsValid(r)) {
986
- // only encode if non-null
987
- if (ptr[r]) {
988
- stats.max = true;
989
- state.byte |= 1 << state.byte_pos;
990
- } else {
991
- stats.min = false;
992
- }
993
- state.byte_pos++;
1110
+ template <>
1111
+ void BeginWrite(DbpEncoder &encoder, WriteStream &writer, const int32_t &first_value) {
1112
+ BeginWrite(encoder, writer, UnsafeNumericCast<int64_t>(first_value));
1113
+ }
994
1114
 
995
- if (state.byte_pos == 8) {
996
- temp_writer.Write<uint8_t>(state.byte);
997
- state.byte = 0;
998
- state.byte_pos = 0;
999
- }
1000
- }
1001
- }
1002
- }
1115
+ template <>
1116
+ void BeginWrite(DbpEncoder &encoder, WriteStream &writer, const uint64_t &first_value) {
1117
+ encoder.BeginWrite(writer, UnsafeNumericCast<int64_t>(first_value));
1118
+ }
1003
1119
 
1004
- unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state) override {
1005
- return make_uniq<BooleanWriterPageState>();
1006
- }
1120
+ template <>
1121
+ void BeginWrite(DbpEncoder &encoder, WriteStream &writer, const uint32_t &first_value) {
1122
+ BeginWrite(encoder, writer, UnsafeNumericCast<int64_t>(first_value));
1123
+ }
1007
1124
 
1008
- void FlushPageState(WriteStream &temp_writer, ColumnWriterPageState *state_p) override {
1009
- auto &state = state_p->Cast<BooleanWriterPageState>();
1010
- if (state.byte_pos > 0) {
1011
- temp_writer.Write<uint8_t>(state.byte);
1012
- state.byte = 0;
1013
- state.byte_pos = 0;
1014
- }
1015
- }
1125
+ template <class T>
1126
+ void WriteValue(DbpEncoder &encoder, WriteStream &writer, const T &value) {
1127
+ throw InternalException("Can't write type to DELTA_BINARY_PACKED column");
1128
+ }
1016
1129
 
1017
- idx_t GetRowSize(const Vector &vector, const idx_t index, const BasicColumnWriterState &state) const override {
1018
- return sizeof(bool);
1019
- }
1020
- };
1130
+ template <>
1131
+ void WriteValue(DbpEncoder &encoder, WriteStream &writer, const int64_t &value) {
1132
+ encoder.WriteValue(writer, value);
1133
+ }
1021
1134
 
1022
- //===--------------------------------------------------------------------===//
1023
- // Decimal Column Writer
1024
- //===--------------------------------------------------------------------===//
1025
- static void WriteParquetDecimal(hugeint_t input, data_ptr_t result) {
1026
- bool positive = input >= 0;
1027
- // numbers are stored as two's complement so some muckery is required
1028
- if (!positive) {
1029
- input = NumericLimits<hugeint_t>::Maximum() + input + 1;
1030
- }
1031
- uint64_t high_bytes = uint64_t(input.upper);
1032
- uint64_t low_bytes = input.lower;
1135
+ template <>
1136
+ void WriteValue(DbpEncoder &encoder, WriteStream &writer, const int32_t &value) {
1137
+ WriteValue(encoder, writer, UnsafeNumericCast<int64_t>(value));
1138
+ }
1033
1139
 
1034
- for (idx_t i = 0; i < sizeof(uint64_t); i++) {
1035
- auto shift_count = (sizeof(uint64_t) - i - 1) * 8;
1036
- result[i] = (high_bytes >> shift_count) & 0xFF;
1037
- }
1038
- for (idx_t i = 0; i < sizeof(uint64_t); i++) {
1039
- auto shift_count = (sizeof(uint64_t) - i - 1) * 8;
1040
- result[sizeof(uint64_t) + i] = (low_bytes >> shift_count) & 0xFF;
1041
- }
1042
- if (!positive) {
1043
- result[0] |= 0x80;
1044
- }
1140
+ template <>
1141
+ void WriteValue(DbpEncoder &encoder, WriteStream &writer, const uint64_t &value) {
1142
+ encoder.WriteValue(writer, UnsafeNumericCast<int64_t>(value));
1045
1143
  }
1046
1144
 
1047
- class FixedDecimalStatistics : public ColumnWriterStatistics {
1145
+ template <>
1146
+ void WriteValue(DbpEncoder &encoder, WriteStream &writer, const uint32_t &value) {
1147
+ WriteValue(encoder, writer, UnsafeNumericCast<int64_t>(value));
1148
+ }
1149
+
1150
+ } // namespace dbp_encoder
1151
+
1152
+ namespace dlba_encoder {
1153
+
1154
+ template <class T>
1155
+ void BeginWrite(DlbaEncoder &encoder, WriteStream &writer, const T &first_value) {
1156
+ throw InternalException("Can't write type to DELTA_LENGTH_BYTE_ARRAY column");
1157
+ }
1158
+
1159
+ template <>
1160
+ void BeginWrite(DlbaEncoder &encoder, WriteStream &writer, const string_t &first_value) {
1161
+ encoder.BeginWrite(writer, first_value);
1162
+ }
1163
+
1164
+ template <class T>
1165
+ void WriteValue(DlbaEncoder &encoder, WriteStream &writer, const T &value) {
1166
+ throw InternalException("Can't write type to DELTA_LENGTH_BYTE_ARRAY column");
1167
+ }
1168
+
1169
+ template <>
1170
+ void WriteValue(DlbaEncoder &encoder, WriteStream &writer, const string_t &value) {
1171
+ encoder.WriteValue(writer, value);
1172
+ }
1173
+
1174
+ // helpers to get size from strings
1175
+ template <class SRC>
1176
+ static constexpr idx_t GetDlbaStringSize(const SRC &src_value) {
1177
+ return 0;
1178
+ }
1179
+
1180
+ template <>
1181
+ idx_t GetDlbaStringSize(const string_t &src_value) {
1182
+ return src_value.GetSize();
1183
+ }
1184
+
1185
+ } // namespace dlba_encoder
1186
+
1187
+ namespace bss_encoder {
1188
+
1189
+ template <class T>
1190
+ void WriteValue(BssEncoder &encoder, const T &value) {
1191
+ throw InternalException("Can't write type to BYTE_STREAM_SPLIT column");
1192
+ }
1193
+
1194
+ template <>
1195
+ void WriteValue(BssEncoder &encoder, const float &value) {
1196
+ encoder.WriteValue(value);
1197
+ }
1198
+
1199
+ template <>
1200
+ void WriteValue(BssEncoder &encoder, const double &value) {
1201
+ encoder.WriteValue(value);
1202
+ }
1203
+
1204
+ } // namespace bss_encoder
1205
+
1206
+ template <class SRC, class TGT, class OP = ParquetCastOperator>
1207
+ class StandardColumnWriter : public BasicColumnWriter {
1048
1208
  public:
1049
- FixedDecimalStatistics() : min(NumericLimits<hugeint_t>::Maximum()), max(NumericLimits<hugeint_t>::Minimum()) {
1209
+ StandardColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p, // NOLINT
1210
+ idx_t max_repeat, idx_t max_define, bool can_have_nulls)
1211
+ : BasicColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls) {
1050
1212
  }
1051
-
1052
- hugeint_t min;
1053
- hugeint_t max;
1213
+ ~StandardColumnWriter() override = default;
1054
1214
 
1055
1215
  public:
1056
- string GetStats(hugeint_t &input) {
1057
- data_t buffer[16];
1058
- WriteParquetDecimal(input, buffer);
1059
- return string(const_char_ptr_cast(buffer), 16);
1216
+ unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) override {
1217
+ auto result = make_uniq<StandardColumnWriterState<SRC>>(row_group, row_group.columns.size());
1218
+ result->encoding = Encoding::RLE_DICTIONARY;
1219
+ RegisterToRowGroup(row_group);
1220
+ return std::move(result);
1060
1221
  }
1061
1222
 
1062
- bool HasStats() override {
1063
- return min <= max;
1223
+ unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state_p) override {
1224
+ auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
1225
+
1226
+ auto result = make_uniq<StandardWriterPageState<SRC, TGT>>(state.total_value_count, state.total_string_size,
1227
+ state.encoding, state.dictionary);
1228
+ return std::move(result);
1064
1229
  }
1065
1230
 
1066
- void Update(hugeint_t &val) {
1067
- if (LessThan::Operation(val, min)) {
1068
- min = val;
1069
- }
1070
- if (GreaterThan::Operation(val, max)) {
1071
- max = val;
1231
+ void FlushPageState(WriteStream &temp_writer, ColumnWriterPageState *state_p) override {
1232
+ auto &page_state = state_p->Cast<StandardWriterPageState<SRC, TGT>>();
1233
+ switch (page_state.encoding) {
1234
+ case Encoding::DELTA_BINARY_PACKED:
1235
+ if (!page_state.dbp_initialized) {
1236
+ dbp_encoder::BeginWrite<int64_t>(page_state.dbp_encoder, temp_writer, 0);
1237
+ }
1238
+ page_state.dbp_encoder.FinishWrite(temp_writer);
1239
+ break;
1240
+ case Encoding::RLE_DICTIONARY:
1241
+ D_ASSERT(page_state.dict_bit_width != 0);
1242
+ if (!page_state.dict_written_value) {
1243
+ // all values are null
1244
+ // just write the bit width
1245
+ temp_writer.Write<uint8_t>(page_state.dict_bit_width);
1246
+ return;
1247
+ }
1248
+ page_state.dict_encoder.FinishWrite(temp_writer);
1249
+ break;
1250
+ case Encoding::DELTA_LENGTH_BYTE_ARRAY:
1251
+ if (!page_state.dlba_initialized) {
1252
+ dlba_encoder::BeginWrite<string_t>(page_state.dlba_encoder, temp_writer, string_t(""));
1253
+ }
1254
+ page_state.dlba_encoder.FinishWrite(temp_writer);
1255
+ break;
1256
+ case Encoding::BYTE_STREAM_SPLIT:
1257
+ page_state.bss_encoder.FinishWrite(temp_writer);
1258
+ break;
1259
+ case Encoding::PLAIN:
1260
+ break;
1261
+ default:
1262
+ throw InternalException("Unknown encoding");
1072
1263
  }
1073
1264
  }
1074
1265
 
1075
- string GetMin() override {
1076
- return GetMinValue();
1266
+ Encoding::type GetEncoding(BasicColumnWriterState &state_p) override {
1267
+ auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
1268
+ return state.encoding;
1077
1269
  }
1078
- string GetMax() override {
1079
- return GetMaxValue();
1080
- }
1081
- string GetMinValue() override {
1082
- return HasStats() ? GetStats(min) : string();
1270
+
1271
+ bool HasAnalyze() override {
1272
+ return true;
1083
1273
  }
1084
- string GetMaxValue() override {
1085
- return HasStats() ? GetStats(max) : string();
1274
+
1275
+ void Analyze(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) override {
1276
+ auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
1277
+
1278
+ auto data_ptr = FlatVector::GetData<SRC>(vector);
1279
+ idx_t vector_index = 0;
1280
+ uint32_t new_value_index = state.dictionary.size();
1281
+
1282
+ const bool check_parent_empty = parent && !parent->is_empty.empty();
1283
+ const idx_t parent_index = state.definition_levels.size();
1284
+
1285
+ const idx_t vcount =
1286
+ check_parent_empty ? parent->definition_levels.size() - state.definition_levels.size() : count;
1287
+
1288
+ const auto &validity = FlatVector::Validity(vector);
1289
+
1290
+ for (idx_t i = 0; i < vcount; i++) {
1291
+ if (check_parent_empty && parent->is_empty[parent_index + i]) {
1292
+ continue;
1293
+ }
1294
+ if (validity.RowIsValid(vector_index)) {
1295
+ const auto &src_value = data_ptr[vector_index];
1296
+ if (state.dictionary.size() <= writer.DictionarySizeLimit()) {
1297
+ if (state.dictionary.find(src_value) == state.dictionary.end()) {
1298
+ state.dictionary[src_value] = new_value_index;
1299
+ new_value_index++;
1300
+ }
1301
+ }
1302
+ state.total_value_count++;
1303
+ state.total_string_size += dlba_encoder::GetDlbaStringSize(src_value);
1304
+ }
1305
+ vector_index++;
1306
+ }
1086
1307
  }
1087
- };
1088
1308
 
1089
- class FixedDecimalColumnWriter : public BasicColumnWriter {
1090
- public:
1091
- FixedDecimalColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p, idx_t max_repeat,
1092
- idx_t max_define, bool can_have_nulls)
1093
- : BasicColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls) {
1309
+ void FinalizeAnalyze(ColumnWriterState &state_p) override {
1310
+ const auto type = writer.GetType(schema_idx);
1311
+
1312
+ auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
1313
+ if (state.dictionary.size() == 0 || state.dictionary.size() > writer.DictionarySizeLimit()) {
1314
+ // If we aren't doing dictionary encoding, the following encodings are virtually always better than PLAIN
1315
+ switch (type) {
1316
+ case Type::type::INT32:
1317
+ case Type::type::INT64:
1318
+ state.encoding = Encoding::DELTA_BINARY_PACKED;
1319
+ break;
1320
+ case Type::type::BYTE_ARRAY:
1321
+ state.encoding = Encoding::DELTA_LENGTH_BYTE_ARRAY;
1322
+ break;
1323
+ case Type::type::FLOAT:
1324
+ case Type::type::DOUBLE:
1325
+ state.encoding = Encoding::BYTE_STREAM_SPLIT;
1326
+ break;
1327
+ default:
1328
+ state.encoding = Encoding::PLAIN;
1329
+ }
1330
+ state.dictionary.clear();
1331
+ }
1094
1332
  }
1095
- ~FixedDecimalColumnWriter() override = default;
1096
1333
 
1097
- public:
1098
1334
  unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
1099
- return make_uniq<FixedDecimalStatistics>();
1335
+ return OP::template InitializeStats<SRC, TGT>();
1100
1336
  }
1101
1337
 
1102
- void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *page_state,
1338
+ bool HasDictionary(BasicColumnWriterState &state_p) override {
1339
+ auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
1340
+ return state.encoding == Encoding::RLE_DICTIONARY;
1341
+ }
1342
+
1343
+ idx_t DictionarySize(BasicColumnWriterState &state_p) override {
1344
+ auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
1345
+ return state.dictionary.size();
1346
+ }
1347
+
1348
+ void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats, ColumnWriterPageState *page_state_p,
1103
1349
  Vector &input_column, idx_t chunk_start, idx_t chunk_end) override {
1104
- auto &mask = FlatVector::Validity(input_column);
1105
- auto *ptr = FlatVector::GetData<hugeint_t>(input_column);
1106
- auto &stats = stats_p->Cast<FixedDecimalStatistics>();
1350
+ auto &page_state = page_state_p->Cast<StandardWriterPageState<SRC, TGT>>();
1107
1351
 
1108
- data_t temp_buffer[16];
1109
- for (idx_t r = chunk_start; r < chunk_end; r++) {
1110
- if (mask.RowIsValid(r)) {
1111
- stats.Update(ptr[r]);
1112
- WriteParquetDecimal(ptr[r], temp_buffer);
1113
- temp_writer.WriteData(temp_buffer, 16);
1352
+ const auto &mask = FlatVector::Validity(input_column);
1353
+ const auto *data_ptr = FlatVector::GetData<SRC>(input_column);
1354
+
1355
+ switch (page_state.encoding) {
1356
+ case Encoding::RLE_DICTIONARY: {
1357
+ for (idx_t r = chunk_start; r < chunk_end; r++) {
1358
+ if (!mask.RowIsValid(r)) {
1359
+ continue;
1360
+ }
1361
+ auto &src_val = data_ptr[r];
1362
+ auto value_index = page_state.dictionary.at(src_val);
1363
+ if (!page_state.dict_written_value) {
1364
+ // first value
1365
+ // write the bit-width as a one-byte entry
1366
+ temp_writer.Write<uint8_t>(page_state.dict_bit_width);
1367
+ // now begin writing the actual value
1368
+ page_state.dict_encoder.BeginWrite(temp_writer, value_index);
1369
+ page_state.dict_written_value = true;
1370
+ } else {
1371
+ page_state.dict_encoder.WriteValue(temp_writer, value_index);
1372
+ }
1373
+ }
1374
+ break;
1375
+ }
1376
+ case Encoding::DELTA_BINARY_PACKED: {
1377
+ idx_t r = chunk_start;
1378
+ if (!page_state.dbp_initialized) {
1379
+ // find first non-null value
1380
+ for (; r < chunk_end; r++) {
1381
+ if (!mask.RowIsValid(r)) {
1382
+ continue;
1383
+ }
1384
+ const TGT target_value = OP::template Operation<SRC, TGT>(data_ptr[r]);
1385
+ OP::template HandleStats<SRC, TGT>(stats, target_value);
1386
+ dbp_encoder::BeginWrite(page_state.dbp_encoder, temp_writer, target_value);
1387
+ page_state.dbp_initialized = true;
1388
+ r++; // skip over
1389
+ break;
1390
+ }
1391
+ }
1392
+
1393
+ for (; r < chunk_end; r++) {
1394
+ if (!mask.RowIsValid(r)) {
1395
+ continue;
1396
+ }
1397
+ const TGT target_value = OP::template Operation<SRC, TGT>(data_ptr[r]);
1398
+ OP::template HandleStats<SRC, TGT>(stats, target_value);
1399
+ dbp_encoder::WriteValue(page_state.dbp_encoder, temp_writer, target_value);
1400
+ }
1401
+ break;
1402
+ }
1403
+ case Encoding::DELTA_LENGTH_BYTE_ARRAY: {
1404
+ idx_t r = chunk_start;
1405
+ if (!page_state.dlba_initialized) {
1406
+ // find first non-null value
1407
+ for (; r < chunk_end; r++) {
1408
+ if (!mask.RowIsValid(r)) {
1409
+ continue;
1410
+ }
1411
+ const TGT target_value = OP::template Operation<SRC, TGT>(data_ptr[r]);
1412
+ OP::template HandleStats<SRC, TGT>(stats, target_value);
1413
+ dlba_encoder::BeginWrite(page_state.dlba_encoder, temp_writer, target_value);
1414
+ page_state.dlba_initialized = true;
1415
+ r++; // skip over
1416
+ break;
1417
+ }
1418
+ }
1419
+
1420
+ for (; r < chunk_end; r++) {
1421
+ if (!mask.RowIsValid(r)) {
1422
+ continue;
1423
+ }
1424
+ const TGT target_value = OP::template Operation<SRC, TGT>(data_ptr[r]);
1425
+ OP::template HandleStats<SRC, TGT>(stats, target_value);
1426
+ dlba_encoder::WriteValue(page_state.dlba_encoder, temp_writer, target_value);
1427
+ }
1428
+ break;
1429
+ }
1430
+ case Encoding::BYTE_STREAM_SPLIT: {
1431
+ for (idx_t r = chunk_start; r < chunk_end; r++) {
1432
+ if (!mask.RowIsValid(r)) {
1433
+ continue;
1434
+ }
1435
+ const TGT target_value = OP::template Operation<SRC, TGT>(data_ptr[r]);
1436
+ OP::template HandleStats<SRC, TGT>(stats, target_value);
1437
+ bss_encoder::WriteValue(page_state.bss_encoder, target_value);
1114
1438
  }
1439
+ break;
1440
+ }
1441
+ case Encoding::PLAIN: {
1442
+ D_ASSERT(page_state.encoding == Encoding::PLAIN);
1443
+ TemplatedWritePlain<SRC, TGT, OP>(input_column, stats, chunk_start, chunk_end, mask, temp_writer);
1444
+ break;
1445
+ }
1446
+ default:
1447
+ throw InternalException("Unknown encoding");
1115
1448
  }
1116
1449
  }
1117
1450
 
1118
- idx_t GetRowSize(const Vector &vector, const idx_t index, const BasicColumnWriterState &state) const override {
1119
- return sizeof(hugeint_t);
1451
+ void FlushDictionary(BasicColumnWriterState &state_p, ColumnWriterStatistics *stats) override {
1452
+ auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
1453
+
1454
+ D_ASSERT(state.encoding == Encoding::RLE_DICTIONARY);
1455
+
1456
+ // first we need to sort the values in index order
1457
+ auto values = vector<SRC>(state.dictionary.size());
1458
+ for (const auto &entry : state.dictionary) {
1459
+ values[entry.second] = entry.first;
1460
+ }
1461
+
1462
+ state.bloom_filter =
1463
+ make_uniq<ParquetBloomFilter>(state.dictionary.size(), writer.BloomFilterFalsePositiveRatio());
1464
+
1465
+ // first write the contents of the dictionary page to a temporary buffer
1466
+ auto temp_writer = make_uniq<MemoryStream>(MaxValue<idx_t>(
1467
+ NextPowerOfTwo(state.dictionary.size() * sizeof(TGT)), MemoryStream::DEFAULT_INITIAL_CAPACITY));
1468
+ for (idx_t r = 0; r < values.size(); r++) {
1469
+ const TGT target_value = OP::template Operation<SRC, TGT>(values[r]);
1470
+ // update the statistics
1471
+ OP::template HandleStats<SRC, TGT>(stats, target_value);
1472
+ // update the bloom filter
1473
+ auto hash = OP::template XXHash64<SRC, TGT>(target_value);
1474
+ state.bloom_filter->FilterInsert(hash);
1475
+ // actually write the dictionary value
1476
+ OP::template WriteToStream<SRC, TGT>(target_value, *temp_writer);
1477
+ }
1478
+ // flush the dictionary page and add it to the to-be-written pages
1479
+ WriteDictionary(state, std::move(temp_writer), values.size());
1480
+ // bloom filter will be queued for writing in ParquetWriter::BufferBloomFilter one level up
1481
+ }
1482
+
1483
+ // TODO this now vastly over-estimates the page size
1484
+ idx_t GetRowSize(const Vector &vector, const idx_t index, const BasicColumnWriterState &state_p) const override {
1485
+ return sizeof(TGT);
1120
1486
  }
1121
1487
  };
1122
1488
 
1123
1489
  //===--------------------------------------------------------------------===//
1124
- // UUID Column Writer
1490
+ // Boolean Column Writer
1125
1491
  //===--------------------------------------------------------------------===//
1126
- class UUIDColumnWriter : public BasicColumnWriter {
1127
- static constexpr const idx_t PARQUET_UUID_SIZE = 16;
1128
-
1492
+ class BooleanStatisticsState : public ColumnWriterStatistics {
1129
1493
  public:
1130
- UUIDColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p, idx_t max_repeat,
1131
- idx_t max_define, bool can_have_nulls)
1132
- : BasicColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls) {
1494
+ BooleanStatisticsState() : min(true), max(false) {
1133
1495
  }
1134
- ~UUIDColumnWriter() override = default;
1135
1496
 
1136
- public:
1137
- static void WriteParquetUUID(hugeint_t input, data_ptr_t result) {
1138
- uint64_t high_bytes = input.upper ^ (int64_t(1) << 63);
1139
- uint64_t low_bytes = input.lower;
1497
+ bool min;
1498
+ bool max;
1140
1499
 
1141
- for (idx_t i = 0; i < sizeof(uint64_t); i++) {
1142
- auto shift_count = (sizeof(uint64_t) - i - 1) * 8;
1143
- result[i] = (high_bytes >> shift_count) & 0xFF;
1144
- }
1145
- for (idx_t i = 0; i < sizeof(uint64_t); i++) {
1146
- auto shift_count = (sizeof(uint64_t) - i - 1) * 8;
1147
- result[sizeof(uint64_t) + i] = (low_bytes >> shift_count) & 0xFF;
1148
- }
1500
+ public:
1501
+ bool HasStats() override {
1502
+ return !(min && !max);
1149
1503
  }
1150
1504
 
1151
- void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *page_state,
1152
- Vector &input_column, idx_t chunk_start, idx_t chunk_end) override {
1153
- auto &mask = FlatVector::Validity(input_column);
1154
- auto *ptr = FlatVector::GetData<hugeint_t>(input_column);
1155
-
1156
- data_t temp_buffer[PARQUET_UUID_SIZE];
1157
- for (idx_t r = chunk_start; r < chunk_end; r++) {
1158
- if (mask.RowIsValid(r)) {
1159
- WriteParquetUUID(ptr[r], temp_buffer);
1160
- temp_writer.WriteData(temp_buffer, PARQUET_UUID_SIZE);
1161
- }
1162
- }
1505
+ string GetMin() override {
1506
+ return GetMinValue();
1163
1507
  }
1164
-
1165
- idx_t GetRowSize(const Vector &vector, const idx_t index, const BasicColumnWriterState &state) const override {
1166
- return PARQUET_UUID_SIZE;
1508
+ string GetMax() override {
1509
+ return GetMaxValue();
1510
+ }
1511
+ string GetMinValue() override {
1512
+ return HasStats() ? string(const_char_ptr_cast(&min), sizeof(bool)) : string();
1513
+ }
1514
+ string GetMaxValue() override {
1515
+ return HasStats() ? string(const_char_ptr_cast(&max), sizeof(bool)) : string();
1167
1516
  }
1168
1517
  };
1169
1518
 
1170
- //===--------------------------------------------------------------------===//
1171
- // Interval Column Writer
1172
- //===--------------------------------------------------------------------===//
1173
- class IntervalColumnWriter : public BasicColumnWriter {
1174
- static constexpr const idx_t PARQUET_INTERVAL_SIZE = 12;
1519
+ class BooleanWriterPageState : public ColumnWriterPageState {
1520
+ public:
1521
+ uint8_t byte = 0;
1522
+ uint8_t byte_pos = 0;
1523
+ };
1175
1524
 
1525
+ class BooleanColumnWriter : public BasicColumnWriter {
1176
1526
  public:
1177
- IntervalColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p, idx_t max_repeat,
1178
- idx_t max_define, bool can_have_nulls)
1527
+ BooleanColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p, idx_t max_repeat,
1528
+ idx_t max_define, bool can_have_nulls)
1179
1529
  : BasicColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls) {
1180
1530
  }
1181
- ~IntervalColumnWriter() override = default;
1531
+ ~BooleanColumnWriter() override = default;
1182
1532
 
1183
1533
  public:
1184
- static void WriteParquetInterval(interval_t input, data_ptr_t result) {
1185
- if (input.days < 0 || input.months < 0 || input.micros < 0) {
1186
- throw IOException("Parquet files do not support negative intervals");
1187
- }
1188
- Store<uint32_t>(input.months, result);
1189
- Store<uint32_t>(input.days, result + sizeof(uint32_t));
1190
- Store<uint32_t>(input.micros / 1000, result + sizeof(uint32_t) * 2);
1534
+ unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
1535
+ return make_uniq<BooleanStatisticsState>();
1191
1536
  }
1192
1537
 
1193
- void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *page_state,
1538
+ void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *state_p,
1194
1539
  Vector &input_column, idx_t chunk_start, idx_t chunk_end) override {
1540
+ auto &stats = stats_p->Cast<BooleanStatisticsState>();
1541
+ auto &state = state_p->Cast<BooleanWriterPageState>();
1195
1542
  auto &mask = FlatVector::Validity(input_column);
1196
- auto *ptr = FlatVector::GetData<interval_t>(input_column);
1197
1543
 
1198
- data_t temp_buffer[PARQUET_INTERVAL_SIZE];
1544
+ auto *ptr = FlatVector::GetData<bool>(input_column);
1199
1545
  for (idx_t r = chunk_start; r < chunk_end; r++) {
1200
1546
  if (mask.RowIsValid(r)) {
1201
- WriteParquetInterval(ptr[r], temp_buffer);
1202
- temp_writer.WriteData(temp_buffer, PARQUET_INTERVAL_SIZE);
1547
+ // only encode if non-null
1548
+ if (ptr[r]) {
1549
+ stats.max = true;
1550
+ state.byte |= 1 << state.byte_pos;
1551
+ } else {
1552
+ stats.min = false;
1553
+ }
1554
+ state.byte_pos++;
1555
+
1556
+ if (state.byte_pos == 8) {
1557
+ temp_writer.Write<uint8_t>(state.byte);
1558
+ state.byte = 0;
1559
+ state.byte_pos = 0;
1560
+ }
1203
1561
  }
1204
1562
  }
1205
1563
  }
1206
1564
 
1565
+ unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state) override {
1566
+ return make_uniq<BooleanWriterPageState>();
1567
+ }
1568
+
1569
+ void FlushPageState(WriteStream &temp_writer, ColumnWriterPageState *state_p) override {
1570
+ auto &state = state_p->Cast<BooleanWriterPageState>();
1571
+ if (state.byte_pos > 0) {
1572
+ temp_writer.Write<uint8_t>(state.byte);
1573
+ state.byte = 0;
1574
+ state.byte_pos = 0;
1575
+ }
1576
+ }
1577
+
1207
1578
  idx_t GetRowSize(const Vector &vector, const idx_t index, const BasicColumnWriterState &state) const override {
1208
- return PARQUET_INTERVAL_SIZE;
1579
+ return sizeof(bool);
1209
1580
  }
1210
1581
  };
1211
1582
 
1212
1583
  //===--------------------------------------------------------------------===//
1213
- // String Column Writer
1584
+ // Decimal Column Writer
1214
1585
  //===--------------------------------------------------------------------===//
1215
- class StringStatisticsState : public ColumnWriterStatistics {
1216
- static constexpr const idx_t MAX_STRING_STATISTICS_SIZE = 10000;
1586
+ static void WriteParquetDecimal(hugeint_t input, data_ptr_t result) {
1587
+ bool positive = input >= 0;
1588
+ // numbers are stored as two's complement so some muckery is required
1589
+ if (!positive) {
1590
+ input = NumericLimits<hugeint_t>::Maximum() + input + 1;
1591
+ }
1592
+ uint64_t high_bytes = uint64_t(input.upper);
1593
+ uint64_t low_bytes = input.lower;
1594
+
1595
+ for (idx_t i = 0; i < sizeof(uint64_t); i++) {
1596
+ auto shift_count = (sizeof(uint64_t) - i - 1) * 8;
1597
+ result[i] = (high_bytes >> shift_count) & 0xFF;
1598
+ }
1599
+ for (idx_t i = 0; i < sizeof(uint64_t); i++) {
1600
+ auto shift_count = (sizeof(uint64_t) - i - 1) * 8;
1601
+ result[sizeof(uint64_t) + i] = (low_bytes >> shift_count) & 0xFF;
1602
+ }
1603
+ if (!positive) {
1604
+ result[0] |= 0x80;
1605
+ }
1606
+ }
1217
1607
 
1608
+ class FixedDecimalStatistics : public ColumnWriterStatistics {
1218
1609
  public:
1219
- StringStatisticsState() : has_stats(false), values_too_big(false), min(), max() {
1610
+ FixedDecimalStatistics() : min(NumericLimits<hugeint_t>::Maximum()), max(NumericLimits<hugeint_t>::Minimum()) {
1220
1611
  }
1221
1612
 
1222
- bool has_stats;
1223
- bool values_too_big;
1224
- string min;
1225
- string max;
1613
+ hugeint_t min;
1614
+ hugeint_t max;
1226
1615
 
1227
1616
  public:
1617
+ string GetStats(hugeint_t &input) {
1618
+ data_t buffer[16];
1619
+ WriteParquetDecimal(input, buffer);
1620
+ return string(const_char_ptr_cast(buffer), 16);
1621
+ }
1622
+
1228
1623
  bool HasStats() override {
1229
- return has_stats;
1624
+ return min <= max;
1230
1625
  }
1231
1626
 
1232
- void Update(const string_t &val) {
1233
- if (values_too_big) {
1234
- return;
1235
- }
1236
- auto str_len = val.GetSize();
1237
- if (str_len > MAX_STRING_STATISTICS_SIZE) {
1238
- // we avoid gathering stats when individual string values are too large
1239
- // this is because the statistics are copied into the Parquet file meta data in uncompressed format
1240
- // ideally we avoid placing several mega or giga-byte long strings there
1241
- // we put a threshold of 10KB, if we see strings that exceed this threshold we avoid gathering stats
1242
- values_too_big = true;
1243
- has_stats = false;
1244
- min = string();
1245
- max = string();
1246
- return;
1247
- }
1248
- if (!has_stats || LessThan::Operation(val, string_t(min))) {
1249
- min = val.GetString();
1627
+ void Update(hugeint_t &val) {
1628
+ if (LessThan::Operation(val, min)) {
1629
+ min = val;
1250
1630
  }
1251
- if (!has_stats || GreaterThan::Operation(val, string_t(max))) {
1252
- max = val.GetString();
1631
+ if (GreaterThan::Operation(val, max)) {
1632
+ max = val;
1253
1633
  }
1254
- has_stats = true;
1255
1634
  }
1256
1635
 
1257
1636
  string GetMin() override {
@@ -1261,264 +1640,44 @@ public:
1261
1640
  return GetMaxValue();
1262
1641
  }
1263
1642
  string GetMinValue() override {
1264
- return HasStats() ? min : string();
1643
+ return HasStats() ? GetStats(min) : string();
1265
1644
  }
1266
1645
  string GetMaxValue() override {
1267
- return HasStats() ? max : string();
1268
- }
1269
- };
1270
-
1271
- class StringColumnWriterState : public BasicColumnWriterState {
1272
- public:
1273
- StringColumnWriterState(duckdb_parquet::format::RowGroup &row_group, idx_t col_idx)
1274
- : BasicColumnWriterState(row_group, col_idx) {
1275
- }
1276
- ~StringColumnWriterState() override = default;
1277
-
1278
- // analysis state
1279
- idx_t estimated_dict_page_size = 0;
1280
- idx_t estimated_rle_pages_size = 0;
1281
- idx_t estimated_plain_size = 0;
1282
-
1283
- // Dictionary and accompanying string heap
1284
- string_map_t<uint32_t> dictionary;
1285
- // key_bit_width== 0 signifies the chunk is written in plain encoding
1286
- uint32_t key_bit_width;
1287
-
1288
- bool IsDictionaryEncoded() const {
1289
- return key_bit_width != 0;
1290
- }
1291
- };
1292
-
1293
- class StringWriterPageState : public ColumnWriterPageState {
1294
- public:
1295
- explicit StringWriterPageState(uint32_t bit_width, const string_map_t<uint32_t> &values)
1296
- : bit_width(bit_width), dictionary(values), encoder(bit_width), written_value(false) {
1297
- D_ASSERT(IsDictionaryEncoded() || (bit_width == 0 && dictionary.empty()));
1298
- }
1299
-
1300
- bool IsDictionaryEncoded() {
1301
- return bit_width != 0;
1646
+ return HasStats() ? GetStats(max) : string();
1302
1647
  }
1303
- // if 0, we're writing a plain page
1304
- uint32_t bit_width;
1305
- const string_map_t<uint32_t> &dictionary;
1306
- RleBpEncoder encoder;
1307
- bool written_value;
1308
1648
  };
1309
1649
 
1310
- class StringColumnWriter : public BasicColumnWriter {
1650
+ class FixedDecimalColumnWriter : public BasicColumnWriter {
1311
1651
  public:
1312
- StringColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p, idx_t max_repeat,
1313
- idx_t max_define, bool can_have_nulls)
1652
+ FixedDecimalColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p, idx_t max_repeat,
1653
+ idx_t max_define, bool can_have_nulls)
1314
1654
  : BasicColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls) {
1315
1655
  }
1316
- ~StringColumnWriter() override = default;
1656
+ ~FixedDecimalColumnWriter() override = default;
1317
1657
 
1318
1658
  public:
1319
1659
  unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
1320
- return make_uniq<StringStatisticsState>();
1321
- }
1322
-
1323
- unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) override {
1324
- auto result = make_uniq<StringColumnWriterState>(row_group, row_group.columns.size());
1325
- RegisterToRowGroup(row_group);
1326
- return std::move(result);
1327
- }
1328
-
1329
- bool HasAnalyze() override {
1330
- return true;
1331
- }
1332
-
1333
- void Analyze(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) override {
1334
- auto &state = state_p.Cast<StringColumnWriterState>();
1335
- if (writer.DictionaryCompressionRatioThreshold() == NumericLimits<double>::Maximum() ||
1336
- (state.dictionary.size() > DICTIONARY_ANALYZE_THRESHOLD && WontUseDictionary(state))) {
1337
- // Early out: compression ratio is less than the specified parameter
1338
- // after seeing more entries than the threshold
1339
- return;
1340
- }
1341
-
1342
- idx_t vcount = parent ? parent->definition_levels.size() - state.definition_levels.size() : count;
1343
- idx_t parent_index = state.definition_levels.size();
1344
- auto &validity = FlatVector::Validity(vector);
1345
- idx_t vector_index = 0;
1346
- uint32_t new_value_index = state.dictionary.size();
1347
- uint32_t last_value_index = -1;
1348
- idx_t run_length = 0;
1349
- idx_t run_count = 0;
1350
- auto strings = FlatVector::GetData<string_t>(vector);
1351
- for (idx_t i = 0; i < vcount; i++) {
1352
- if (parent && !parent->is_empty.empty() && parent->is_empty[parent_index + i]) {
1353
- continue;
1354
- }
1355
-
1356
- if (validity.RowIsValid(vector_index)) {
1357
- run_length++;
1358
- const auto &value = strings[vector_index];
1359
- // Try to insert into the dictionary. If it's already there, we get back the value index
1360
- auto found = state.dictionary.insert(string_map_t<uint32_t>::value_type(value, new_value_index));
1361
- state.estimated_plain_size += value.GetSize() + STRING_LENGTH_SIZE;
1362
- if (found.second) {
1363
- // string didn't exist yet in the dictionary
1364
- new_value_index++;
1365
- state.estimated_dict_page_size += value.GetSize() + MAX_DICTIONARY_KEY_SIZE;
1366
- }
1367
-
1368
- // if the value changed, we will encode it in the page
1369
- if (last_value_index != found.first->second) {
1370
- // we will add the value index size later, when we know the total number of keys
1371
- state.estimated_rle_pages_size += GetVarintSize(run_length);
1372
- run_length = 0;
1373
- run_count++;
1374
- last_value_index = found.first->second;
1375
- }
1376
- }
1377
- vector_index++;
1378
- }
1379
- // Add the costs of keys sizes. We don't know yet how many bytes the keys need as we haven't
1380
- // seen all the values. therefore we use an over-estimation of
1381
- state.estimated_rle_pages_size += MAX_DICTIONARY_KEY_SIZE * run_count;
1382
- }
1383
-
1384
- void FinalizeAnalyze(ColumnWriterState &state_p) override {
1385
- auto &state = state_p.Cast<StringColumnWriterState>();
1386
-
1387
- // check if a dictionary will require more space than a plain write, or if the dictionary page is going to
1388
- // be too large
1389
- if (WontUseDictionary(state)) {
1390
- // clearing the dictionary signals a plain write
1391
- state.dictionary.clear();
1392
- state.key_bit_width = 0;
1393
- } else {
1394
- state.key_bit_width = RleBpDecoder::ComputeBitWidth(state.dictionary.size());
1395
- }
1660
+ return make_uniq<FixedDecimalStatistics>();
1396
1661
  }
1397
1662
 
1398
- void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *page_state_p,
1663
+ void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *page_state,
1399
1664
  Vector &input_column, idx_t chunk_start, idx_t chunk_end) override {
1400
- auto &page_state = page_state_p->Cast<StringWriterPageState>();
1401
1665
  auto &mask = FlatVector::Validity(input_column);
1402
- auto &stats = stats_p->Cast<StringStatisticsState>();
1666
+ auto *ptr = FlatVector::GetData<hugeint_t>(input_column);
1667
+ auto &stats = stats_p->Cast<FixedDecimalStatistics>();
1403
1668
 
1404
- auto *ptr = FlatVector::GetData<string_t>(input_column);
1405
- if (page_state.IsDictionaryEncoded()) {
1406
- // dictionary based page
1407
- for (idx_t r = chunk_start; r < chunk_end; r++) {
1408
- if (!mask.RowIsValid(r)) {
1409
- continue;
1410
- }
1411
- auto value_index = page_state.dictionary.at(ptr[r]);
1412
- if (!page_state.written_value) {
1413
- // first value
1414
- // write the bit-width as a one-byte entry
1415
- temp_writer.Write<uint8_t>(page_state.bit_width);
1416
- // now begin writing the actual value
1417
- page_state.encoder.BeginWrite(temp_writer, value_index);
1418
- page_state.written_value = true;
1419
- } else {
1420
- page_state.encoder.WriteValue(temp_writer, value_index);
1421
- }
1422
- }
1423
- } else {
1424
- // plain page
1425
- for (idx_t r = chunk_start; r < chunk_end; r++) {
1426
- if (!mask.RowIsValid(r)) {
1427
- continue;
1428
- }
1669
+ data_t temp_buffer[16];
1670
+ for (idx_t r = chunk_start; r < chunk_end; r++) {
1671
+ if (mask.RowIsValid(r)) {
1429
1672
  stats.Update(ptr[r]);
1430
- temp_writer.Write<uint32_t>(ptr[r].GetSize());
1431
- temp_writer.WriteData(const_data_ptr_cast(ptr[r].GetData()), ptr[r].GetSize());
1432
- }
1433
- }
1434
- }
1435
-
1436
- unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state_p) override {
1437
- auto &state = state_p.Cast<StringColumnWriterState>();
1438
- return make_uniq<StringWriterPageState>(state.key_bit_width, state.dictionary);
1439
- }
1440
-
1441
- void FlushPageState(WriteStream &temp_writer, ColumnWriterPageState *state_p) override {
1442
- auto &page_state = state_p->Cast<StringWriterPageState>();
1443
- if (page_state.bit_width != 0) {
1444
- if (!page_state.written_value) {
1445
- // all values are null
1446
- // just write the bit width
1447
- temp_writer.Write<uint8_t>(page_state.bit_width);
1448
- return;
1673
+ WriteParquetDecimal(ptr[r], temp_buffer);
1674
+ temp_writer.WriteData(temp_buffer, 16);
1449
1675
  }
1450
- page_state.encoder.FinishWrite(temp_writer);
1451
- }
1452
- }
1453
-
1454
- duckdb_parquet::format::Encoding::type GetEncoding(BasicColumnWriterState &state_p) override {
1455
- auto &state = state_p.Cast<StringColumnWriterState>();
1456
- return state.IsDictionaryEncoded() ? Encoding::RLE_DICTIONARY : Encoding::PLAIN;
1457
- }
1458
-
1459
- bool HasDictionary(BasicColumnWriterState &state_p) override {
1460
- auto &state = state_p.Cast<StringColumnWriterState>();
1461
- return state.IsDictionaryEncoded();
1462
- }
1463
-
1464
- idx_t DictionarySize(BasicColumnWriterState &state_p) override {
1465
- auto &state = state_p.Cast<StringColumnWriterState>();
1466
- D_ASSERT(state.IsDictionaryEncoded());
1467
- return state.dictionary.size();
1468
- }
1469
-
1470
- void FlushDictionary(BasicColumnWriterState &state_p, ColumnWriterStatistics *stats_p) override {
1471
- auto &stats = stats_p->Cast<StringStatisticsState>();
1472
- auto &state = state_p.Cast<StringColumnWriterState>();
1473
- if (!state.IsDictionaryEncoded()) {
1474
- return;
1475
- }
1476
- // first we need to sort the values in index order
1477
- auto values = vector<string_t>(state.dictionary.size());
1478
- for (const auto &entry : state.dictionary) {
1479
- D_ASSERT(values[entry.second].GetSize() == 0);
1480
- values[entry.second] = entry.first;
1481
- }
1482
- // first write the contents of the dictionary page to a temporary buffer
1483
- auto temp_writer = make_uniq<MemoryStream>(
1484
- MaxValue<idx_t>(NextPowerOfTwo(state.estimated_dict_page_size), MemoryStream::DEFAULT_INITIAL_CAPACITY));
1485
- for (idx_t r = 0; r < values.size(); r++) {
1486
- auto &value = values[r];
1487
- // update the statistics
1488
- stats.Update(value);
1489
- // write this string value to the dictionary
1490
- temp_writer->Write<uint32_t>(value.GetSize());
1491
- temp_writer->WriteData(const_data_ptr_cast((value.GetData())), value.GetSize());
1492
- }
1493
- // flush the dictionary page and add it to the to-be-written pages
1494
- WriteDictionary(state, std::move(temp_writer), values.size());
1495
- }
1496
-
1497
- idx_t GetRowSize(const Vector &vector, const idx_t index, const BasicColumnWriterState &state_p) const override {
1498
- auto &state = state_p.Cast<StringColumnWriterState>();
1499
- if (state.IsDictionaryEncoded()) {
1500
- return (state.key_bit_width + 7) / 8;
1501
- } else {
1502
- auto strings = FlatVector::GetData<string_t>(vector);
1503
- return strings[index].GetSize();
1504
1676
  }
1505
1677
  }
1506
1678
 
1507
- private:
1508
- bool WontUseDictionary(StringColumnWriterState &state) const {
1509
- return state.estimated_dict_page_size > MAX_UNCOMPRESSED_DICT_PAGE_SIZE ||
1510
- DictionaryCompressionRatio(state) < writer.DictionaryCompressionRatioThreshold();
1511
- }
1512
-
1513
- static double DictionaryCompressionRatio(StringColumnWriterState &state) {
1514
- // If any are 0, we just return a compression ratio of 1
1515
- if (state.estimated_plain_size == 0 || state.estimated_rle_pages_size == 0 ||
1516
- state.estimated_dict_page_size == 0) {
1517
- return 1;
1518
- }
1519
- // Otherwise, plain size divided by compressed size
1520
- return double(state.estimated_plain_size) /
1521
- double(state.estimated_rle_pages_size + state.estimated_dict_page_size);
1679
+ idx_t GetRowSize(const Vector &vector, const idx_t index, const BasicColumnWriterState &state) const override {
1680
+ return sizeof(hugeint_t);
1522
1681
  }
1523
1682
  };
1524
1683
 
@@ -1527,40 +1686,42 @@ private:
1527
1686
  //===--------------------------------------------------------------------===//
1528
1687
  // Used to store the metadata for a WKB-encoded geometry column when writing
1529
1688
  // GeoParquet files.
1530
- class WKBColumnWriterState final : public StringColumnWriterState {
1689
+ class WKBColumnWriterState final : public StandardColumnWriterState<string_t> {
1531
1690
  public:
1532
- WKBColumnWriterState(ClientContext &context, duckdb_parquet::format::RowGroup &row_group, idx_t col_idx)
1533
- : StringColumnWriterState(row_group, col_idx), geo_data(), geo_data_writer(context) {
1691
+ WKBColumnWriterState(ClientContext &context, duckdb_parquet::RowGroup &row_group, idx_t col_idx)
1692
+ : StandardColumnWriterState(row_group, col_idx), geo_data(), geo_data_writer(context) {
1534
1693
  }
1535
1694
 
1536
1695
  GeoParquetColumnMetadata geo_data;
1537
1696
  GeoParquetColumnMetadataWriter geo_data_writer;
1538
1697
  };
1539
1698
 
1540
- class WKBColumnWriter final : public StringColumnWriter {
1699
+ class WKBColumnWriter final : public StandardColumnWriter<string_t, string_t, ParquetStringOperator> {
1541
1700
  public:
1542
1701
  WKBColumnWriter(ClientContext &context_p, ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p,
1543
1702
  idx_t max_repeat, idx_t max_define, bool can_have_nulls, string name)
1544
- : StringColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls),
1703
+ : StandardColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls),
1545
1704
  column_name(std::move(name)), context(context_p) {
1546
1705
 
1547
1706
  this->writer.GetGeoParquetData().RegisterGeometryColumn(column_name);
1548
1707
  }
1549
1708
 
1550
- unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) override {
1709
+ unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) override {
1551
1710
  auto result = make_uniq<WKBColumnWriterState>(context, row_group, row_group.columns.size());
1711
+ result->encoding = Encoding::RLE_DICTIONARY;
1552
1712
  RegisterToRowGroup(row_group);
1553
1713
  return std::move(result);
1554
1714
  }
1715
+
1555
1716
  void Write(ColumnWriterState &state, Vector &vector, idx_t count) override {
1556
- StringColumnWriter::Write(state, vector, count);
1717
+ StandardColumnWriter::Write(state, vector, count);
1557
1718
 
1558
1719
  auto &geo_state = state.Cast<WKBColumnWriterState>();
1559
1720
  geo_state.geo_data_writer.Update(geo_state.geo_data, vector, count);
1560
1721
  }
1561
1722
 
1562
1723
  void FinalizeWrite(ColumnWriterState &state) override {
1563
- StringColumnWriter::FinalizeWrite(state);
1724
+ StandardColumnWriter::FinalizeWrite(state);
1564
1725
 
1565
1726
  // Add the geodata object to the writer
1566
1727
  const auto &geo_state = state.Cast<WKBColumnWriterState>();
@@ -1658,7 +1819,7 @@ public:
1658
1819
  page_state.encoder.FinishWrite(temp_writer);
1659
1820
  }
1660
1821
 
1661
- duckdb_parquet::format::Encoding::type GetEncoding(BasicColumnWriterState &state) override {
1822
+ duckdb_parquet::Encoding::type GetEncoding(BasicColumnWriterState &state) override {
1662
1823
  return Encoding::RLE_DICTIONARY;
1663
1824
  }
1664
1825
 
@@ -1710,7 +1871,7 @@ public:
1710
1871
  vector<unique_ptr<ColumnWriter>> child_writers;
1711
1872
 
1712
1873
  public:
1713
- unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) override;
1874
+ unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) override;
1714
1875
  bool HasAnalyze() override;
1715
1876
  void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override;
1716
1877
  void FinalizeAnalyze(ColumnWriterState &state) override;
@@ -1723,17 +1884,17 @@ public:
1723
1884
 
1724
1885
  class StructColumnWriterState : public ColumnWriterState {
1725
1886
  public:
1726
- StructColumnWriterState(duckdb_parquet::format::RowGroup &row_group, idx_t col_idx)
1887
+ StructColumnWriterState(duckdb_parquet::RowGroup &row_group, idx_t col_idx)
1727
1888
  : row_group(row_group), col_idx(col_idx) {
1728
1889
  }
1729
1890
  ~StructColumnWriterState() override = default;
1730
1891
 
1731
- duckdb_parquet::format::RowGroup &row_group;
1892
+ duckdb_parquet::RowGroup &row_group;
1732
1893
  idx_t col_idx;
1733
1894
  vector<unique_ptr<ColumnWriterState>> child_states;
1734
1895
  };
1735
1896
 
1736
- unique_ptr<ColumnWriterState> StructColumnWriter::InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) {
1897
+ unique_ptr<ColumnWriterState> StructColumnWriter::InitializeWriteState(duckdb_parquet::RowGroup &row_group) {
1737
1898
  auto result = make_uniq<StructColumnWriterState>(row_group, row_group.columns.size());
1738
1899
 
1739
1900
  result->child_states.reserve(child_writers.size());
@@ -1831,7 +1992,7 @@ public:
1831
1992
  unique_ptr<ColumnWriter> child_writer;
1832
1993
 
1833
1994
  public:
1834
- unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) override;
1995
+ unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) override;
1835
1996
  bool HasAnalyze() override;
1836
1997
  void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override;
1837
1998
  void FinalizeAnalyze(ColumnWriterState &state) override;
@@ -1844,18 +2005,17 @@ public:
1844
2005
 
1845
2006
  class ListColumnWriterState : public ColumnWriterState {
1846
2007
  public:
1847
- ListColumnWriterState(duckdb_parquet::format::RowGroup &row_group, idx_t col_idx)
1848
- : row_group(row_group), col_idx(col_idx) {
2008
+ ListColumnWriterState(duckdb_parquet::RowGroup &row_group, idx_t col_idx) : row_group(row_group), col_idx(col_idx) {
1849
2009
  }
1850
2010
  ~ListColumnWriterState() override = default;
1851
2011
 
1852
- duckdb_parquet::format::RowGroup &row_group;
2012
+ duckdb_parquet::RowGroup &row_group;
1853
2013
  idx_t col_idx;
1854
2014
  unique_ptr<ColumnWriterState> child_state;
1855
2015
  idx_t parent_index = 0;
1856
2016
  };
1857
2017
 
1858
- unique_ptr<ColumnWriterState> ListColumnWriter::InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) {
2018
+ unique_ptr<ColumnWriterState> ListColumnWriter::InitializeWriteState(duckdb_parquet::RowGroup &row_group) {
1859
2019
  auto result = make_uniq<ListColumnWriterState>(row_group, row_group.columns.size());
1860
2020
  result->child_state = child_writer->InitializeWriteState(row_group);
1861
2021
  return std::move(result);
@@ -2078,12 +2238,50 @@ void ArrayColumnWriter::Write(ColumnWriterState &state_p, Vector &vector, idx_t
2078
2238
  child_writer->Write(*state.child_state, array_child, count * array_size);
2079
2239
  }
2080
2240
 
2241
+ // special double/float class to deal with dictionary encoding and NaN equality
2242
+ struct double_na_equal {
2243
+ double_na_equal() : val(0) {
2244
+ }
2245
+ explicit double_na_equal(const double val_p) : val(val_p) {
2246
+ }
2247
+ // NOLINTNEXTLINE: allow implicit conversion to double
2248
+ operator double() const {
2249
+ return val;
2250
+ }
2251
+
2252
+ bool operator==(const double &right) const {
2253
+ if (std::isnan(val) && std::isnan(right)) {
2254
+ return true;
2255
+ }
2256
+ return val == right;
2257
+ }
2258
+ double val;
2259
+ };
2260
+
2261
+ struct float_na_equal {
2262
+ float_na_equal() : val(0) {
2263
+ }
2264
+ explicit float_na_equal(const float val_p) : val(val_p) {
2265
+ }
2266
+ // NOLINTNEXTLINE: allow implicit conversion to float
2267
+ operator float() const {
2268
+ return val;
2269
+ }
2270
+ bool operator==(const float &right) const {
2271
+ if (std::isnan(val) && std::isnan(right)) {
2272
+ return true;
2273
+ }
2274
+ return val == right;
2275
+ }
2276
+ float val;
2277
+ };
2278
+
2081
2279
  //===--------------------------------------------------------------------===//
2082
2280
  // Create Column Writer
2083
2281
  //===--------------------------------------------------------------------===//
2084
2282
 
2085
2283
  unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &context,
2086
- vector<duckdb_parquet::format::SchemaElement> &schemas,
2284
+ vector<duckdb_parquet::SchemaElement> &schemas,
2087
2285
  ParquetWriter &writer, const LogicalType &type,
2088
2286
  const string &name, vector<string> schema_path,
2089
2287
  optional_ptr<const ChildFieldIDs> field_ids,
@@ -2107,7 +2305,7 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
2107
2305
  if (type.id() == LogicalTypeId::STRUCT || type.id() == LogicalTypeId::UNION) {
2108
2306
  auto &child_types = StructType::GetChildTypes(type);
2109
2307
  // set up the schema element for this struct
2110
- duckdb_parquet::format::SchemaElement schema_element;
2308
+ duckdb_parquet::SchemaElement schema_element;
2111
2309
  schema_element.repetition_type = null_type;
2112
2310
  schema_element.num_children = UnsafeNumericCast<int32_t>(child_types.size());
2113
2311
  schema_element.__isset.num_children = true;
@@ -2137,7 +2335,7 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
2137
2335
  // set up the two schema elements for the list
2138
2336
  // for some reason we only set the converted type in the OPTIONAL element
2139
2337
  // first an OPTIONAL element
2140
- duckdb_parquet::format::SchemaElement optional_element;
2338
+ duckdb_parquet::SchemaElement optional_element;
2141
2339
  optional_element.repetition_type = null_type;
2142
2340
  optional_element.num_children = 1;
2143
2341
  optional_element.converted_type = ConvertedType::LIST;
@@ -2154,7 +2352,7 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
2154
2352
  schema_path.push_back(name);
2155
2353
 
2156
2354
  // then a REPEATED element
2157
- duckdb_parquet::format::SchemaElement repeated_element;
2355
+ duckdb_parquet::SchemaElement repeated_element;
2158
2356
  repeated_element.repetition_type = FieldRepetitionType::REPEATED;
2159
2357
  repeated_element.num_children = 1;
2160
2358
  repeated_element.__isset.num_children = true;
@@ -2184,7 +2382,7 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
2184
2382
  // }
2185
2383
  // }
2186
2384
  // top map element
2187
- duckdb_parquet::format::SchemaElement top_element;
2385
+ duckdb_parquet::SchemaElement top_element;
2188
2386
  top_element.repetition_type = null_type;
2189
2387
  top_element.num_children = 1;
2190
2388
  top_element.converted_type = ConvertedType::MAP;
@@ -2201,7 +2399,7 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
2201
2399
  schema_path.push_back(name);
2202
2400
 
2203
2401
  // key_value element
2204
- duckdb_parquet::format::SchemaElement kv_element;
2402
+ duckdb_parquet::SchemaElement kv_element;
2205
2403
  kv_element.repetition_type = FieldRepetitionType::REPEATED;
2206
2404
  kv_element.num_children = 2;
2207
2405
  kv_element.__isset.repetition_type = true;
@@ -2229,7 +2427,7 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
2229
2427
  return make_uniq<ListColumnWriter>(writer, schema_idx, schema_path, max_repeat, max_define,
2230
2428
  std::move(struct_writer), can_have_nulls);
2231
2429
  }
2232
- duckdb_parquet::format::SchemaElement schema_element;
2430
+ duckdb_parquet::SchemaElement schema_element;
2233
2431
  schema_element.type = ParquetWriter::DuckDBTypeToParquetType(type);
2234
2432
  schema_element.repetition_type = null_type;
2235
2433
  schema_element.__isset.num_children = false;
@@ -2243,7 +2441,6 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
2243
2441
  ParquetWriter::SetSchemaProperties(type, schema_element);
2244
2442
  schemas.push_back(std::move(schema_element));
2245
2443
  schema_path.push_back(name);
2246
-
2247
2444
  if (type.id() == LogicalTypeId::BLOB && type.GetAlias() == "WKB_BLOB" &&
2248
2445
  GeoParquetFileMetadata::IsGeoParquetConversionEnabled(context)) {
2249
2446
  return make_uniq<WKBColumnWriter>(context, writer, schema_idx, std::move(schema_path), max_repeat, max_define,
@@ -2299,11 +2496,11 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
2299
2496
  return make_uniq<StandardColumnWriter<uint64_t, uint64_t>>(writer, schema_idx, std::move(schema_path),
2300
2497
  max_repeat, max_define, can_have_nulls);
2301
2498
  case LogicalTypeId::FLOAT:
2302
- return make_uniq<StandardColumnWriter<float, float>>(writer, schema_idx, std::move(schema_path), max_repeat,
2303
- max_define, can_have_nulls);
2499
+ return make_uniq<StandardColumnWriter<float_na_equal, float>>(writer, schema_idx, std::move(schema_path),
2500
+ max_repeat, max_define, can_have_nulls);
2304
2501
  case LogicalTypeId::DOUBLE:
2305
- return make_uniq<StandardColumnWriter<double, double>>(writer, schema_idx, std::move(schema_path), max_repeat,
2306
- max_define, can_have_nulls);
2502
+ return make_uniq<StandardColumnWriter<double_na_equal, double>>(writer, schema_idx, std::move(schema_path),
2503
+ max_repeat, max_define, can_have_nulls);
2307
2504
  case LogicalTypeId::DECIMAL:
2308
2505
  switch (type.InternalType()) {
2309
2506
  case PhysicalType::INT16:
@@ -2321,14 +2518,14 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
2321
2518
  }
2322
2519
  case LogicalTypeId::BLOB:
2323
2520
  case LogicalTypeId::VARCHAR:
2324
- return make_uniq<StringColumnWriter>(writer, schema_idx, std::move(schema_path), max_repeat, max_define,
2325
- can_have_nulls);
2521
+ return make_uniq<StandardColumnWriter<string_t, string_t, ParquetStringOperator>>(
2522
+ writer, schema_idx, std::move(schema_path), max_repeat, max_define, can_have_nulls);
2326
2523
  case LogicalTypeId::UUID:
2327
- return make_uniq<UUIDColumnWriter>(writer, schema_idx, std::move(schema_path), max_repeat, max_define,
2328
- can_have_nulls);
2524
+ return make_uniq<StandardColumnWriter<hugeint_t, ParquetUUIDTargetType, ParquetUUIDOperator>>(
2525
+ writer, schema_idx, std::move(schema_path), max_repeat, max_define, can_have_nulls);
2329
2526
  case LogicalTypeId::INTERVAL:
2330
- return make_uniq<IntervalColumnWriter>(writer, schema_idx, std::move(schema_path), max_repeat, max_define,
2331
- can_have_nulls);
2527
+ return make_uniq<StandardColumnWriter<interval_t, ParquetIntervalTargetType, ParquetIntervalOperator>>(
2528
+ writer, schema_idx, std::move(schema_path), max_repeat, max_define, can_have_nulls);
2332
2529
  case LogicalTypeId::ENUM:
2333
2530
  return make_uniq<EnumColumnWriter>(writer, type, schema_idx, std::move(schema_path), max_repeat, max_define,
2334
2531
  can_have_nulls);
@@ -2337,4 +2534,73 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
2337
2534
  }
2338
2535
  }
2339
2536
 
2537
+ template <>
2538
+ struct NumericLimits<float_na_equal> {
2539
+ static constexpr float Minimum() {
2540
+ return std::numeric_limits<float>::lowest();
2541
+ };
2542
+ static constexpr float Maximum() {
2543
+ return std::numeric_limits<float>::max();
2544
+ };
2545
+ static constexpr bool IsSigned() {
2546
+ return std::is_signed<float>::value;
2547
+ }
2548
+ static constexpr bool IsIntegral() {
2549
+ return std::is_integral<float>::value;
2550
+ }
2551
+ };
2552
+
2553
+ template <>
2554
+ struct NumericLimits<double_na_equal> {
2555
+ static constexpr double Minimum() {
2556
+ return std::numeric_limits<double>::lowest();
2557
+ };
2558
+ static constexpr double Maximum() {
2559
+ return std::numeric_limits<double>::max();
2560
+ };
2561
+ static constexpr bool IsSigned() {
2562
+ return std::is_signed<double>::value;
2563
+ }
2564
+ static constexpr bool IsIntegral() {
2565
+ return std::is_integral<double>::value;
2566
+ }
2567
+ };
2568
+
2340
2569
  } // namespace duckdb
2570
+
2571
+ namespace std {
2572
+ template <>
2573
+ struct hash<duckdb::ParquetIntervalTargetType> {
2574
+ size_t operator()(const duckdb::ParquetIntervalTargetType &val) const {
2575
+ return duckdb::Hash(duckdb::const_char_ptr_cast(val.bytes),
2576
+ duckdb::ParquetIntervalTargetType::PARQUET_INTERVAL_SIZE);
2577
+ }
2578
+ };
2579
+
2580
+ template <>
2581
+ struct hash<duckdb::ParquetUUIDTargetType> {
2582
+ size_t operator()(const duckdb::ParquetUUIDTargetType &val) const {
2583
+ return duckdb::Hash(duckdb::const_char_ptr_cast(val.bytes), duckdb::ParquetUUIDTargetType::PARQUET_UUID_SIZE);
2584
+ }
2585
+ };
2586
+
2587
+ template <>
2588
+ struct hash<duckdb::float_na_equal> {
2589
+ size_t operator()(const duckdb::float_na_equal &val) const {
2590
+ if (std::isnan(val.val)) {
2591
+ return duckdb::Hash<float>(std::numeric_limits<float>::quiet_NaN());
2592
+ }
2593
+ return duckdb::Hash<float>(val.val);
2594
+ }
2595
+ };
2596
+
2597
+ template <>
2598
+ struct hash<duckdb::double_na_equal> {
2599
+ inline size_t operator()(const duckdb::double_na_equal &val) const {
2600
+ if (std::isnan(val.val)) {
2601
+ return duckdb::Hash<double>(std::numeric_limits<double>::quiet_NaN());
2602
+ }
2603
+ return duckdb::Hash<double>(val.val);
2604
+ }
2605
+ };
2606
+ } // namespace std