duckdb 1.1.3 → 1.1.4-dev9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/NodeJS.yml +5 -54
- package/binding.gyp +73 -52
- package/package.json +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/avg.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/corr.cpp +4 -4
- package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/covar.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/algebraic/stddev.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/approx_count.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/arg_min_max.cpp +66 -18
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bitagg.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bitstring_agg.cpp +5 -7
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/bool.cpp +3 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/kurtosis.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/product.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/skew.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/string_agg.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/distributive/sum.cpp +13 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/approx_top_k.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/approximate_quantile.cpp +51 -15
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/mad.cpp +25 -10
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/mode.cpp +215 -71
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/quantile.cpp +58 -31
- package/src/duckdb/{src → extension}/core_functions/aggregate/holistic/reservoir_quantile.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/nested/binned_histogram.cpp +9 -4
- package/src/duckdb/{src → extension}/core_functions/aggregate/nested/histogram.cpp +4 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/nested/list.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_avg.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_count.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_intercept.cpp +6 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_r2.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_slope.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_sxx_syy.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/aggregate/regression/regr_sxy.cpp +3 -3
- package/src/duckdb/extension/core_functions/core_functions_extension.cpp +85 -0
- package/src/duckdb/{src → extension}/core_functions/function_list.cpp +30 -51
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/corr.hpp +3 -7
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/distributive_functions.hpp +16 -21
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/histogram_helpers.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/holistic_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/nested_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_helpers.hpp +2 -2
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_sort_tree.hpp +140 -58
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/quantile_state.hpp +50 -43
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression/regr_count.hpp +2 -2
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression/regr_slope.hpp +3 -7
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/regression_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/array_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/bit_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/blob_functions.hpp +1 -10
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/date_functions.hpp +22 -55
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/debug_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/enum_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/generic_functions.hpp +1 -10
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/list_functions.hpp +4 -4
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/map_functions.hpp +1 -10
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/math_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/operators_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/random_functions.hpp +1 -1
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/string_functions.hpp +10 -103
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/struct_functions.hpp +1 -19
- package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/union_functions.hpp +1 -1
- package/src/duckdb/extension/core_functions/include/core_functions_extension.hpp +22 -0
- package/src/duckdb/{src → extension}/core_functions/lambda_functions.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/array/array_functions.cpp +11 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/array/array_value.cpp +2 -2
- package/src/duckdb/{src → extension}/core_functions/scalar/bit/bitstring.cpp +12 -5
- package/src/duckdb/{src → extension}/core_functions/scalar/blob/base64.cpp +4 -2
- package/src/duckdb/{src → extension}/core_functions/scalar/blob/encode.cpp +4 -2
- package/src/duckdb/{src → extension}/core_functions/scalar/date/age.cpp +9 -3
- package/src/duckdb/extension/core_functions/scalar/date/current.cpp +29 -0
- package/src/duckdb/{src → extension}/core_functions/scalar/date/date_diff.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/date/date_part.cpp +42 -9
- package/src/duckdb/{src → extension}/core_functions/scalar/date/date_sub.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/date/date_trunc.cpp +4 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/date/epoch.cpp +19 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/date/make_date.cpp +40 -5
- package/src/duckdb/{src → extension}/core_functions/scalar/date/time_bucket.cpp +4 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/date/to_interval.cpp +54 -28
- package/src/duckdb/{src → extension}/core_functions/scalar/debug/vector_type.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/enum/enum_functions.cpp +2 -7
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/alias.cpp +2 -2
- package/src/duckdb/{src/function → extension/core_functions}/scalar/generic/binning.cpp +4 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/can_implicitly_cast.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/current_setting.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/hash.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/least.cpp +30 -10
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/stats.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/system_functions.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/generic/typeof.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/list/array_slice.cpp +93 -88
- package/src/duckdb/{src → extension}/core_functions/scalar/list/flatten.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_aggregates.cpp +7 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_distance.cpp +8 -2
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_filter.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_has_any_or_all.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_reduce.cpp +5 -5
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_sort.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_transform.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/list_value.cpp +3 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/list/range.cpp +7 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/cardinality.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map.cpp +5 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_concat.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_entries.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_extract.cpp +13 -25
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_from_entries.cpp +2 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/map/map_keys_values.cpp +11 -9
- package/src/duckdb/{src → extension}/core_functions/scalar/math/numeric.cpp +83 -37
- package/src/duckdb/{src → extension}/core_functions/scalar/operators/bitwise.cpp +19 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/random/random.cpp +4 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/random/setseed.cpp +2 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/ascii.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/bar.cpp +6 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/string/chr.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/damerau_levenshtein.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/format_bytes.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/hamming.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/hex.cpp +7 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/string/instr.cpp +4 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/string/jaccard.cpp +1 -1
- package/src/duckdb/extension/core_functions/scalar/string/jaro_winkler.cpp +112 -0
- package/src/duckdb/{src → extension}/core_functions/scalar/string/left_right.cpp +6 -6
- package/src/duckdb/{src → extension}/core_functions/scalar/string/levenshtein.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/pad.cpp +9 -5
- package/src/duckdb/{src → extension}/core_functions/scalar/string/parse_path.cpp +4 -4
- package/src/duckdb/{src → extension}/core_functions/scalar/string/printf.cpp +3 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/repeat.cpp +4 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/replace.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/reverse.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/starts_with.cpp +5 -3
- package/src/duckdb/{src → extension}/core_functions/scalar/string/to_base.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/translate.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/trim.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/unicode.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/string/url_encode.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/struct/struct_insert.cpp +25 -31
- package/src/duckdb/{src → extension}/core_functions/scalar/union/union_extract.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/union/union_tag.cpp +1 -1
- package/src/duckdb/{src → extension}/core_functions/scalar/union/union_value.cpp +3 -3
- package/src/duckdb/extension/icu/icu-dateadd.cpp +16 -11
- package/src/duckdb/extension/icu/icu-datefunc.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datepart.cpp +8 -5
- package/src/duckdb/extension/icu/icu-datesub.cpp +2 -2
- package/src/duckdb/extension/icu/icu-datetrunc.cpp +8 -1
- package/src/duckdb/extension/icu/icu-list-range.cpp +2 -2
- package/src/duckdb/extension/icu/icu-makedate.cpp +18 -7
- package/src/duckdb/extension/icu/icu-strptime.cpp +9 -3
- package/src/duckdb/extension/icu/icu-table-range.cpp +2 -2
- package/src/duckdb/extension/icu/icu-timebucket.cpp +4 -1
- package/src/duckdb/extension/icu/icu-timezone.cpp +67 -1
- package/src/duckdb/extension/icu/icu_extension.cpp +60 -5
- package/src/duckdb/extension/icu/include/icu-datefunc.hpp +2 -1
- package/src/duckdb/extension/icu/third_party/icu/common/bytestriebuilder.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/dtintrv.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/filteredbrk.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/locid.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/lsr.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/lsr.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/messagepattern.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/normlzr.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/rbbinode.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/schriter.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/stringtriebuilder.cpp +8 -8
- package/src/duckdb/extension/icu/third_party/icu/common/ucharstriebuilder.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uchriter.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/brkiter.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/bytestriebuilder.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/chariter.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/dtintrv.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/locid.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/messagepattern.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/normlzr.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/parsepos.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/rbbi.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/schriter.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/strenum.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/stringpiece.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/stringtriebuilder.h +9 -9
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/ucharstriebuilder.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/uchriter.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/uniset.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/unistr.h +12 -12
- package/src/duckdb/extension/icu/third_party/icu/common/unicode/uobject.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/unifiedcache.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/common/uniset.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/ustr_titlecase_brkiter.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/ustrenum.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/common/uvector.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uvector.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/uvectr32.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uvectr32.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/common/uvectr64.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/common/uvectr64.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/alphaindex.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/calendar.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/choicfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/coleitr.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/coll.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/collationiterator.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/collationiterator.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/collationsettings.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/collationsettings.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/currpinf.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/datefmt.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/dcfmtsym.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/decimfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtfmtsym.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtitvfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtitvinf.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtptngen.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtptngen_impl.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/i18n/dtrule.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/fmtable.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/format.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/fpositer.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/measfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/measunit.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/measure.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/msgfmt.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfrs.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfrs.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfrule.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfrule.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfsubs.cpp +9 -9
- package/src/duckdb/extension/icu/third_party/icu/i18n/nfsubs.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/number_asformat.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/number_asformat.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/numfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/olsontz.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/olsontz.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/plurfmt.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/plurrule.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/rbnf.cpp +4 -4
- package/src/duckdb/extension/icu/third_party/icu/i18n/rbtz.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/region.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/reldtfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/reldtfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/rulebasedcollator.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/selfmt.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/simpletz.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/smpdtfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/sortkey.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/timezone.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tmutamt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzfmt.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.cpp +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzgnames.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/tznames.cpp +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/tznames_impl.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/tznames_impl.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/tzrule.cpp +8 -8
- package/src/duckdb/extension/icu/third_party/icu/i18n/tztrans.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/alphaindex.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/calendar.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/choicfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coleitr.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/coll.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/currpinf.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/datefmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dcfmtsym.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/decimfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtfmtsym.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtitvfmt.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtitvinf.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtptngen.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/dtrule.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fieldpos.h +4 -4
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fmtable.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/format.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/fpositer.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measunit.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/measure.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/msgfmt.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/numfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/plurfmt.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/plurrule.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/rbnf.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/rbtz.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/region.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/search.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/selfmt.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/simpletz.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/smpdtfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/sortkey.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/stsearch.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tblcoll.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/timezone.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tmutamt.h +3 -3
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tmutfmt.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tzfmt.h +1 -1
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tznames.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tzrule.h +8 -8
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/tztrans.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/unicode/vtzone.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/utf16collationiterator.cpp +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/utf16collationiterator.h +2 -2
- package/src/duckdb/extension/icu/third_party/icu/i18n/vtzone.cpp +2 -2
- package/src/duckdb/extension/json/buffered_json_reader.cpp +6 -1
- package/src/duckdb/extension/json/include/buffered_json_reader.hpp +2 -0
- package/src/duckdb/extension/json/include/json_common.hpp +14 -10
- package/src/duckdb/extension/json/include/json_scan.hpp +48 -7
- package/src/duckdb/extension/json/include/json_structure.hpp +2 -1
- package/src/duckdb/extension/json/include/json_transform.hpp +5 -2
- package/src/duckdb/extension/json/json_functions/copy_json.cpp +1 -1
- package/src/duckdb/extension/json/json_functions/json_create.cpp +57 -20
- package/src/duckdb/extension/json/json_functions/json_serialize_plan.cpp +7 -6
- package/src/duckdb/extension/json/json_functions/json_serialize_sql.cpp +6 -5
- package/src/duckdb/extension/json/json_functions/json_structure.cpp +20 -17
- package/src/duckdb/extension/json/json_functions/json_transform.cpp +48 -17
- package/src/duckdb/extension/json/json_functions/read_json.cpp +83 -34
- package/src/duckdb/extension/json/json_functions/read_json_objects.cpp +3 -3
- package/src/duckdb/extension/json/json_functions.cpp +14 -16
- package/src/duckdb/extension/json/json_scan.cpp +36 -16
- package/src/duckdb/extension/json/json_serializer.cpp +1 -1
- package/src/duckdb/extension/json/serialize_json.cpp +2 -2
- package/src/duckdb/extension/parquet/column_reader.cpp +136 -116
- package/src/duckdb/extension/parquet/column_writer.cpp +870 -604
- package/src/duckdb/extension/parquet/geo_parquet.cpp +4 -5
- package/src/duckdb/extension/parquet/include/boolean_column_reader.hpp +0 -4
- package/src/duckdb/extension/parquet/include/column_reader.hpp +24 -19
- package/src/duckdb/extension/parquet/include/column_writer.hpp +7 -5
- package/src/duckdb/extension/parquet/include/decode_utils.hpp +138 -18
- package/src/duckdb/extension/parquet/include/geo_parquet.hpp +4 -3
- package/src/duckdb/extension/parquet/include/null_column_reader.hpp +1 -14
- package/src/duckdb/extension/parquet/include/parquet_bss_encoder.hpp +45 -0
- package/src/duckdb/extension/parquet/include/parquet_crypto.hpp +1 -1
- package/src/duckdb/extension/parquet/include/parquet_dbp_decoder.hpp +101 -90
- package/src/duckdb/extension/parquet/include/parquet_dbp_encoder.hpp +179 -0
- package/src/duckdb/extension/parquet/include/parquet_decimal_utils.hpp +2 -3
- package/src/duckdb/extension/parquet/include/parquet_dlba_encoder.hpp +48 -0
- package/src/duckdb/extension/parquet/include/parquet_extension.hpp +8 -0
- package/src/duckdb/extension/parquet/include/parquet_file_metadata_cache.hpp +2 -2
- package/src/duckdb/extension/parquet/include/parquet_metadata.hpp +5 -0
- package/src/duckdb/extension/parquet/include/parquet_reader.hpp +22 -18
- package/src/duckdb/extension/parquet/include/parquet_rle_bp_decoder.hpp +1 -5
- package/src/duckdb/extension/parquet/include/parquet_statistics.hpp +87 -3
- package/src/duckdb/extension/parquet/include/parquet_writer.hpp +30 -16
- package/src/duckdb/extension/parquet/include/resizable_buffer.hpp +1 -0
- package/src/duckdb/extension/parquet/include/string_column_reader.hpp +0 -8
- package/src/duckdb/extension/parquet/include/struct_column_reader.hpp +1 -1
- package/src/duckdb/extension/parquet/include/templated_column_reader.hpp +1 -42
- package/src/duckdb/extension/parquet/include/thrift_tools.hpp +13 -1
- package/src/duckdb/extension/parquet/include/zstd_file_system.hpp +4 -0
- package/src/duckdb/extension/parquet/parquet_extension.cpp +240 -197
- package/src/duckdb/extension/parquet/parquet_metadata.cpp +138 -6
- package/src/duckdb/extension/parquet/parquet_reader.cpp +155 -79
- package/src/duckdb/extension/parquet/parquet_statistics.cpp +258 -38
- package/src/duckdb/extension/parquet/parquet_timestamp.cpp +17 -3
- package/src/duckdb/extension/parquet/parquet_writer.cpp +65 -34
- package/src/duckdb/extension/parquet/serialize_parquet.cpp +4 -0
- package/src/duckdb/extension/parquet/zstd_file_system.cpp +13 -0
- package/src/duckdb/src/catalog/catalog.cpp +272 -97
- package/src/duckdb/src/catalog/catalog_entry/duck_index_entry.cpp +9 -4
- package/src/duckdb/src/catalog/catalog_entry/duck_schema_entry.cpp +8 -0
- package/src/duckdb/src/catalog/catalog_entry/duck_table_entry.cpp +145 -95
- package/src/duckdb/src/catalog/catalog_entry/scalar_function_catalog_entry.cpp +9 -3
- package/src/duckdb/src/catalog/catalog_entry/schema_catalog_entry.cpp +15 -0
- package/src/duckdb/src/catalog/catalog_entry/table_catalog_entry.cpp +40 -24
- package/src/duckdb/src/catalog/catalog_entry/table_function_catalog_entry.cpp +1 -1
- package/src/duckdb/src/catalog/catalog_entry/type_catalog_entry.cpp +2 -2
- package/src/duckdb/src/catalog/catalog_entry.cpp +3 -0
- package/src/duckdb/src/catalog/catalog_entry_retriever.cpp +60 -5
- package/src/duckdb/src/catalog/catalog_search_path.cpp +27 -14
- package/src/duckdb/src/catalog/catalog_set.cpp +75 -31
- package/src/duckdb/src/catalog/default/default_functions.cpp +13 -8
- package/src/duckdb/src/catalog/default/default_views.cpp +1 -0
- package/src/duckdb/src/catalog/dependency_manager.cpp +133 -5
- package/src/duckdb/src/catalog/duck_catalog.cpp +17 -9
- package/src/duckdb/src/common/adbc/adbc.cpp +18 -0
- package/src/duckdb/src/common/allocator.cpp +3 -1
- package/src/duckdb/src/common/arrow/arrow_appender.cpp +30 -9
- package/src/duckdb/src/common/arrow/arrow_converter.cpp +63 -82
- package/src/duckdb/src/common/arrow/arrow_merge_event.cpp +4 -3
- package/src/duckdb/src/common/arrow/arrow_type_extension.cpp +361 -0
- package/src/duckdb/src/common/arrow/arrow_util.cpp +10 -6
- package/src/duckdb/src/common/arrow/arrow_wrapper.cpp +6 -2
- package/src/duckdb/src/common/arrow/physical_arrow_collector.cpp +2 -1
- package/src/duckdb/src/common/arrow/schema_metadata.cpp +27 -14
- package/src/duckdb/src/common/assert.cpp +1 -2
- package/src/duckdb/src/common/bind_helpers.cpp +1 -1
- package/src/duckdb/src/common/box_renderer.cpp +316 -26
- package/src/duckdb/src/common/cgroups.cpp +7 -1
- package/src/duckdb/src/common/compressed_file_system.cpp +1 -1
- package/src/duckdb/src/common/enum_util.cpp +2865 -6882
- package/src/duckdb/src/common/enums/compression_type.cpp +12 -0
- package/src/duckdb/src/common/enums/metric_type.cpp +24 -0
- package/src/duckdb/src/common/enums/optimizer_type.cpp +4 -0
- package/src/duckdb/src/common/enums/physical_operator_type.cpp +2 -0
- package/src/duckdb/src/common/error_data.cpp +23 -6
- package/src/duckdb/src/common/exception/binder_exception.cpp +1 -1
- package/src/duckdb/src/common/exception.cpp +20 -28
- package/src/duckdb/src/common/extra_type_info.cpp +85 -20
- package/src/duckdb/src/common/file_buffer.cpp +5 -2
- package/src/duckdb/src/common/file_system.cpp +8 -3
- package/src/duckdb/src/common/fsst.cpp +3 -3
- package/src/duckdb/src/common/hive_partitioning.cpp +1 -1
- package/src/duckdb/src/common/local_file_system.cpp +169 -60
- package/src/duckdb/src/common/multi_file_list.cpp +4 -1
- package/src/duckdb/src/common/multi_file_reader.cpp +240 -63
- package/src/duckdb/src/common/opener_file_system.cpp +37 -0
- package/src/duckdb/src/common/operator/cast_operators.cpp +77 -11
- package/src/duckdb/src/common/operator/string_cast.cpp +6 -2
- package/src/duckdb/src/common/pipe_file_system.cpp +4 -4
- package/src/duckdb/src/common/progress_bar/progress_bar.cpp +25 -14
- package/src/duckdb/src/common/radix_partitioning.cpp +17 -16
- package/src/duckdb/src/common/random_engine.cpp +39 -3
- package/src/duckdb/src/common/render_tree.cpp +3 -19
- package/src/duckdb/src/common/row_operations/row_external.cpp +1 -1
- package/src/duckdb/src/common/row_operations/row_gather.cpp +2 -58
- package/src/duckdb/src/common/row_operations/row_matcher.cpp +2 -2
- package/src/duckdb/src/common/row_operations/row_radix_scatter.cpp +2 -0
- package/src/duckdb/src/common/row_operations/row_scatter.cpp +20 -19
- package/src/duckdb/src/common/serializer/buffered_file_writer.cpp +1 -1
- package/src/duckdb/src/common/serializer/memory_stream.cpp +36 -0
- package/src/duckdb/src/common/sort/comparators.cpp +7 -7
- package/src/duckdb/src/common/sort/partition_state.cpp +2 -2
- package/src/duckdb/src/common/stacktrace.cpp +127 -0
- package/src/duckdb/src/common/string_util.cpp +157 -32
- package/src/duckdb/src/common/tree_renderer/text_tree_renderer.cpp +15 -3
- package/src/duckdb/src/common/types/column/column_data_allocator.cpp +4 -0
- package/src/duckdb/src/common/types/column/column_data_collection.cpp +71 -8
- package/src/duckdb/src/common/types/column/column_data_collection_segment.cpp +27 -6
- package/src/duckdb/src/common/types/conflict_manager.cpp +21 -7
- package/src/duckdb/src/common/types/date.cpp +39 -25
- package/src/duckdb/src/common/types/row/partitioned_tuple_data.cpp +4 -11
- package/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +21 -7
- package/src/duckdb/src/common/types/row/tuple_data_collection.cpp +10 -1
- package/src/duckdb/src/common/types/row/tuple_data_scatter_gather.cpp +17 -17
- package/src/duckdb/src/common/types/timestamp.cpp +70 -33
- package/src/duckdb/src/common/types/uuid.cpp +11 -0
- package/src/duckdb/src/common/types/validity_mask.cpp +16 -5
- package/src/duckdb/src/common/types/value.cpp +357 -199
- package/src/duckdb/src/common/types/varint.cpp +64 -18
- package/src/duckdb/src/common/types/vector.cpp +78 -38
- package/src/duckdb/src/common/types.cpp +199 -92
- package/src/duckdb/src/common/vector_operations/comparison_operators.cpp +2 -1
- package/src/duckdb/src/common/vector_operations/is_distinct_from.cpp +32 -5
- package/src/duckdb/src/common/vector_operations/vector_hash.cpp +3 -1
- package/src/duckdb/src/execution/adaptive_filter.cpp +6 -2
- package/src/duckdb/src/execution/aggregate_hashtable.cpp +410 -111
- package/src/duckdb/src/execution/column_binding_resolver.cpp +2 -2
- package/src/duckdb/src/execution/expression_executor/execute_between.cpp +6 -0
- package/src/duckdb/src/execution/expression_executor/execute_cast.cpp +4 -3
- package/src/duckdb/src/execution/expression_executor/execute_comparison.cpp +2 -2
- package/src/duckdb/src/execution/expression_executor/execute_conjunction.cpp +2 -2
- package/src/duckdb/src/execution/expression_executor/execute_function.cpp +1 -0
- package/src/duckdb/src/execution/expression_executor/execute_operator.cpp +5 -4
- package/src/duckdb/src/execution/expression_executor.cpp +5 -3
- package/src/duckdb/src/execution/index/art/art.cpp +208 -72
- package/src/duckdb/src/execution/index/art/base_leaf.cpp +1 -1
- package/src/duckdb/src/execution/index/art/leaf.cpp +12 -7
- package/src/duckdb/src/execution/index/art/node.cpp +2 -1
- package/src/duckdb/src/execution/index/art/node256_leaf.cpp +6 -6
- package/src/duckdb/src/execution/index/art/plan_art.cpp +50 -55
- package/src/duckdb/src/execution/index/art/prefix.cpp +7 -13
- package/src/duckdb/src/execution/index/bound_index.cpp +30 -5
- package/src/duckdb/src/execution/index/fixed_size_allocator.cpp +3 -5
- package/src/duckdb/src/execution/index/fixed_size_buffer.cpp +14 -9
- package/src/duckdb/src/execution/join_hashtable.cpp +254 -158
- package/src/duckdb/src/execution/operator/aggregate/grouped_aggregate_data.cpp +1 -1
- package/src/duckdb/src/execution/operator/aggregate/physical_hash_aggregate.cpp +7 -7
- package/src/duckdb/src/execution/operator/aggregate/physical_partitioned_aggregate.cpp +226 -0
- package/src/duckdb/src/execution/operator/aggregate/physical_perfecthash_aggregate.cpp +3 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_streaming_window.cpp +3 -3
- package/src/duckdb/src/execution/operator/aggregate/physical_ungrouped_aggregate.cpp +77 -70
- package/src/duckdb/src/execution/operator/aggregate/physical_window.cpp +114 -50
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +2 -2
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.cpp +19 -10
- package/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp +22 -15
- package/src/duckdb/src/execution/operator/csv_scanner/encode/csv_encoder.cpp +95 -0
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/column_count_scanner.cpp +6 -1
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/csv_schema.cpp +75 -2
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/scanner_boundary.cpp +40 -12
- package/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +395 -163
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/csv_sniffer.cpp +20 -23
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/dialect_detection.cpp +115 -49
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/header_detection.cpp +66 -12
- package/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +20 -23
- package/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +220 -46
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/csv_file_scanner.cpp +43 -32
- package/src/duckdb/src/execution/operator/csv_scanner/table_function/global_csv_state.cpp +54 -119
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_error.cpp +184 -20
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_reader_options.cpp +83 -21
- package/src/duckdb/src/execution/operator/csv_scanner/util/csv_validator.cpp +63 -0
- package/src/duckdb/src/execution/operator/helper/physical_reservoir_sample.cpp +7 -4
- package/src/duckdb/src/execution/operator/helper/physical_set.cpp +1 -1
- package/src/duckdb/src/execution/operator/helper/physical_vacuum.cpp +3 -2
- package/src/duckdb/src/execution/operator/helper/physical_verify_vector.cpp +9 -1
- package/src/duckdb/src/execution/operator/join/perfect_hash_join_executor.cpp +132 -15
- package/src/duckdb/src/execution/operator/join/physical_blockwise_nl_join.cpp +64 -55
- package/src/duckdb/src/execution/operator/join/physical_hash_join.cpp +284 -154
- package/src/duckdb/src/execution/operator/join/physical_iejoin.cpp +40 -55
- package/src/duckdb/src/execution/operator/join/physical_range_join.cpp +0 -1
- package/src/duckdb/src/execution/operator/order/physical_order.cpp +7 -3
- package/src/duckdb/src/execution/operator/order/physical_top_n.cpp +298 -227
- package/src/duckdb/src/execution/operator/persistent/csv_rejects_table.cpp +5 -2
- package/src/duckdb/src/execution/operator/persistent/physical_batch_copy_to_file.cpp +3 -4
- package/src/duckdb/src/execution/operator/persistent/physical_batch_insert.cpp +32 -19
- package/src/duckdb/src/execution/operator/persistent/physical_copy_database.cpp +1 -0
- package/src/duckdb/src/execution/operator/persistent/physical_copy_to_file.cpp +6 -0
- package/src/duckdb/src/execution/operator/persistent/physical_delete.cpp +58 -19
- package/src/duckdb/src/execution/operator/persistent/physical_export.cpp +11 -27
- package/src/duckdb/src/execution/operator/persistent/physical_insert.cpp +308 -119
- package/src/duckdb/src/execution/operator/persistent/physical_update.cpp +105 -55
- package/src/duckdb/src/execution/operator/projection/physical_tableinout_function.cpp +6 -2
- package/src/duckdb/src/execution/operator/projection/physical_unnest.cpp +1 -1
- package/src/duckdb/src/execution/operator/scan/physical_positional_scan.cpp +15 -6
- package/src/duckdb/src/execution/operator/scan/physical_table_scan.cpp +92 -50
- package/src/duckdb/src/execution/operator/schema/physical_alter.cpp +0 -1
- package/src/duckdb/src/execution/operator/schema/physical_attach.cpp +8 -4
- package/src/duckdb/src/execution/operator/schema/physical_create_art_index.cpp +54 -22
- package/src/duckdb/src/execution/operator/set/physical_union.cpp +5 -1
- package/src/duckdb/src/execution/physical_operator.cpp +15 -9
- package/src/duckdb/src/execution/physical_plan/plan_aggregate.cpp +101 -12
- package/src/duckdb/src/execution/physical_plan/plan_comparison_join.cpp +11 -140
- package/src/duckdb/src/execution/physical_plan/plan_create_index.cpp +11 -13
- package/src/duckdb/src/execution/physical_plan/plan_cte.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_delete.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_delim_join.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_distinct.cpp +6 -5
- package/src/duckdb/src/execution/physical_plan/plan_export.cpp +0 -4
- package/src/duckdb/src/execution/physical_plan/plan_filter.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_get.cpp +16 -13
- package/src/duckdb/src/execution/physical_plan/plan_insert.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_order.cpp +7 -7
- package/src/duckdb/src/execution/physical_plan/plan_prepare.cpp +2 -2
- package/src/duckdb/src/execution/physical_plan/plan_projection.cpp +1 -1
- package/src/duckdb/src/execution/physical_plan/plan_sample.cpp +8 -3
- package/src/duckdb/src/execution/physical_plan/plan_set_operation.cpp +1 -2
- package/src/duckdb/src/execution/physical_plan/plan_simple.cpp +1 -2
- package/src/duckdb/src/execution/physical_plan/plan_top_n.cpp +3 -2
- package/src/duckdb/src/execution/physical_plan_generator.cpp +0 -22
- package/src/duckdb/src/execution/radix_partitioned_hashtable.cpp +136 -116
- package/src/duckdb/src/execution/sample/base_reservoir_sample.cpp +136 -0
- package/src/duckdb/src/execution/sample/reservoir_sample.cpp +930 -0
- package/src/duckdb/src/function/aggregate/distributive/count.cpp +6 -12
- package/src/duckdb/src/function/aggregate/distributive/{first.cpp → first_last_any.cpp} +37 -18
- package/src/duckdb/src/{core_functions → function}/aggregate/distributive/minmax.cpp +19 -12
- package/src/duckdb/src/function/aggregate/sorted_aggregate_function.cpp +72 -13
- package/src/duckdb/src/function/built_in_functions.cpp +85 -2
- package/src/duckdb/src/function/cast/decimal_cast.cpp +1 -1
- package/src/duckdb/src/function/cast/string_cast.cpp +1 -1
- package/src/duckdb/src/function/cast/struct_cast.cpp +81 -49
- package/src/duckdb/src/function/cast/union/from_struct.cpp +7 -5
- package/src/duckdb/src/function/compression_config.cpp +6 -0
- package/src/duckdb/src/function/encoding_function.cpp +134 -0
- package/src/duckdb/src/function/function.cpp +8 -13
- package/src/duckdb/src/function/function_binder.cpp +100 -21
- package/src/duckdb/src/function/function_list.cpp +178 -0
- package/src/duckdb/src/function/macro_function.cpp +4 -4
- package/src/duckdb/src/function/pragma/pragma_functions.cpp +0 -2
- package/src/duckdb/src/function/pragma/pragma_queries.cpp +0 -4
- package/src/duckdb/src/{core_functions/core_functions.cpp → function/register_function_list.cpp} +12 -8
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_integral.cpp +62 -23
- package/src/duckdb/src/function/scalar/compressed_materialization/compress_string.cpp +33 -16
- package/src/duckdb/src/function/scalar/compressed_materialization_utils.cpp +21 -0
- package/src/duckdb/src/{core_functions/scalar/blob → function/scalar}/create_sort_key.cpp +86 -23
- package/src/duckdb/src/{core_functions → function}/scalar/date/strftime.cpp +6 -4
- package/src/duckdb/src/function/scalar/generic/constant_or_null.cpp +5 -7
- package/src/duckdb/src/{core_functions → function}/scalar/generic/error.cpp +3 -1
- package/src/duckdb/src/function/scalar/generic/getvariable.cpp +2 -2
- package/src/duckdb/src/function/scalar/list/contains_or_position.cpp +1 -7
- package/src/duckdb/src/function/scalar/list/list_extract.cpp +27 -21
- package/src/duckdb/src/function/scalar/list/list_resize.cpp +8 -12
- package/src/duckdb/src/function/scalar/list/list_select.cpp +1 -4
- package/src/duckdb/src/function/scalar/list/list_zip.cpp +6 -6
- package/src/duckdb/src/{core_functions → function}/scalar/map/map_contains.cpp +2 -2
- package/src/duckdb/src/function/scalar/nested_functions.cpp +0 -11
- package/src/duckdb/src/function/scalar/{operators → operator}/add.cpp +2 -1
- package/src/duckdb/src/function/scalar/{operators → operator}/arithmetic.cpp +195 -127
- package/src/duckdb/src/function/scalar/sequence/nextval.cpp +30 -21
- package/src/duckdb/src/function/scalar/strftime_format.cpp +10 -0
- package/src/duckdb/src/function/scalar/string/caseconvert.cpp +11 -41
- package/src/duckdb/src/function/scalar/string/concat.cpp +22 -20
- package/src/duckdb/src/function/scalar/string/concat_ws.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/contains.cpp +16 -19
- package/src/duckdb/src/function/scalar/string/length.cpp +38 -24
- package/src/duckdb/src/function/scalar/string/like.cpp +80 -47
- package/src/duckdb/src/{core_functions → function}/scalar/string/md5.cpp +2 -2
- package/src/duckdb/src/function/scalar/string/nfc_normalize.cpp +2 -6
- package/src/duckdb/src/function/scalar/string/prefix.cpp +0 -4
- package/src/duckdb/src/function/scalar/string/regexp/regexp_extract_all.cpp +2 -1
- package/src/duckdb/src/function/scalar/string/regexp.cpp +17 -7
- package/src/duckdb/src/{core_functions → function}/scalar/string/regexp_escape.cpp +2 -2
- package/src/duckdb/src/{core_functions → function}/scalar/string/sha1.cpp +1 -1
- package/src/duckdb/src/{core_functions → function}/scalar/string/sha256.cpp +1 -1
- package/src/duckdb/src/{core_functions → function}/scalar/string/string_split.cpp +4 -5
- package/src/duckdb/src/function/scalar/string/strip_accents.cpp +3 -6
- package/src/duckdb/src/function/scalar/string/substring.cpp +14 -13
- package/src/duckdb/src/function/scalar/string/suffix.cpp +0 -4
- package/src/duckdb/src/function/scalar/struct/struct_concat.cpp +115 -0
- package/src/duckdb/src/function/scalar/struct/struct_extract.cpp +35 -31
- package/src/duckdb/src/{core_functions → function}/scalar/struct/struct_pack.cpp +7 -7
- package/src/duckdb/src/function/scalar/system/aggregate_export.cpp +5 -8
- package/src/duckdb/src/function/scalar/system/write_log.cpp +170 -0
- package/src/duckdb/src/function/scalar_function.cpp +5 -5
- package/src/duckdb/src/function/table/arrow/arrow_array_scan_state.cpp +3 -2
- package/src/duckdb/src/function/table/arrow/arrow_duck_schema.cpp +287 -1
- package/src/duckdb/src/function/table/arrow/arrow_type_info.cpp +6 -6
- package/src/duckdb/src/function/table/arrow.cpp +32 -352
- package/src/duckdb/src/function/table/arrow_conversion.cpp +43 -7
- package/src/duckdb/src/function/table/copy_csv.cpp +38 -23
- package/src/duckdb/src/function/table/glob.cpp +1 -1
- package/src/duckdb/src/function/table/query_function.cpp +12 -7
- package/src/duckdb/src/function/table/read_csv.cpp +114 -46
- package/src/duckdb/src/function/table/read_file.cpp +26 -6
- package/src/duckdb/src/function/table/sniff_csv.cpp +25 -5
- package/src/duckdb/src/function/table/system/duckdb_columns.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_constraints.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_dependencies.cpp +6 -7
- package/src/duckdb/src/function/table/system/duckdb_extensions.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_functions.cpp +141 -16
- package/src/duckdb/src/function/table/system/duckdb_log.cpp +64 -0
- package/src/duckdb/src/function/table/system/duckdb_log_contexts.cpp +65 -0
- package/src/duckdb/src/function/table/system/duckdb_memory.cpp +0 -1
- package/src/duckdb/src/function/table/system/duckdb_settings.cpp +1 -1
- package/src/duckdb/src/function/table/system/duckdb_tables.cpp +1 -13
- package/src/duckdb/src/function/table/system/duckdb_types.cpp +1 -1
- package/src/duckdb/src/function/table/system/pragma_storage_info.cpp +17 -0
- package/src/duckdb/src/function/table/system/pragma_table_info.cpp +6 -0
- package/src/duckdb/src/function/table/system/pragma_table_sample.cpp +95 -0
- package/src/duckdb/src/function/table/system/test_all_types.cpp +56 -46
- package/src/duckdb/src/function/table/system_functions.cpp +3 -0
- package/src/duckdb/src/function/table/table_scan.cpp +487 -289
- package/src/duckdb/src/function/table/version/pragma_version.cpp +3 -3
- package/src/duckdb/src/function/table_function.cpp +10 -6
- package/src/duckdb/src/function/window/window_aggregate_function.cpp +248 -0
- package/src/duckdb/src/function/window/window_aggregate_states.cpp +48 -0
- package/src/duckdb/src/function/window/window_aggregator.cpp +88 -0
- package/src/duckdb/src/function/window/window_boundaries_state.cpp +854 -0
- package/src/duckdb/src/function/window/window_collection.cpp +146 -0
- package/src/duckdb/src/function/window/window_constant_aggregator.cpp +357 -0
- package/src/duckdb/src/function/window/window_custom_aggregator.cpp +146 -0
- package/src/duckdb/src/function/window/window_distinct_aggregator.cpp +758 -0
- package/src/duckdb/src/function/window/window_executor.cpp +99 -0
- package/src/duckdb/src/function/window/window_index_tree.cpp +63 -0
- package/src/duckdb/src/function/window/window_merge_sort_tree.cpp +275 -0
- package/src/duckdb/src/function/window/window_naive_aggregator.cpp +361 -0
- package/src/duckdb/src/function/window/window_rank_function.cpp +288 -0
- package/src/duckdb/src/function/window/window_rownumber_function.cpp +191 -0
- package/src/duckdb/src/function/window/window_segment_tree.cpp +594 -0
- package/src/duckdb/src/function/window/window_shared_expressions.cpp +50 -0
- package/src/duckdb/src/function/window/window_token_tree.cpp +142 -0
- package/src/duckdb/src/function/window/window_value_function.cpp +566 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog.hpp +74 -17
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_index_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_schema_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/duck_table_entry.hpp +9 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/function_entry.hpp +4 -10
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/pragma_function_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/scalar_function_catalog_entry.hpp +2 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/schema_catalog_entry.hpp +2 -0
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_catalog_entry.hpp +18 -3
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/table_function_catalog_entry.hpp +1 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry/type_catalog_entry.hpp +2 -1
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry.hpp +5 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_entry_retriever.hpp +21 -18
- package/src/duckdb/src/include/duckdb/catalog/catalog_search_path.hpp +3 -2
- package/src/duckdb/src/include/duckdb/catalog/catalog_set.hpp +10 -2
- package/src/duckdb/src/include/duckdb/catalog/dependency_manager.hpp +11 -0
- package/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +9 -4
- package/src/duckdb/src/include/duckdb/common/allocator.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/array_ptr.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/arrow/appender/append_data.hpp +4 -1
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list_data.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/arrow/appender/list_view_data.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/arrow/appender/varchar_data.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_appender.hpp +7 -3
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_converter.hpp +26 -3
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_query_result.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_type_extension.hpp +144 -0
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_util.hpp +5 -2
- package/src/duckdb/src/include/duckdb/common/arrow/result_arrow_wrapper.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/arrow/schema_metadata.hpp +11 -4
- package/src/duckdb/src/include/duckdb/common/assert.hpp +12 -1
- package/src/duckdb/src/include/duckdb/common/atomic_ptr.hpp +102 -0
- package/src/duckdb/src/include/duckdb/common/box_renderer.hpp +65 -6
- package/src/duckdb/src/include/duckdb/common/chrono.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/column_index.hpp +72 -0
- package/src/duckdb/src/include/duckdb/common/enum_util.hpp +120 -0
- package/src/duckdb/src/include/duckdb/{core_functions/core_functions.hpp → common/enums/collation_type.hpp} +2 -7
- package/src/duckdb/src/include/duckdb/common/enums/compression_type.hpp +5 -2
- package/src/duckdb/src/include/duckdb/common/enums/function_errors.hpp +18 -0
- package/src/duckdb/src/include/duckdb/common/enums/memory_tag.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/enums/metric_type.hpp +7 -2
- package/src/duckdb/src/include/duckdb/common/enums/optimizer_type.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/enums/order_preservation_type.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/enums/physical_operator_type.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/enums/profiler_format.hpp +1 -1
- package/src/duckdb/src/include/duckdb/{core_functions/aggregate → common/enums}/quantile_enum.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/enums/scan_vector_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/error_data.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/exception/parser_exception.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/exception.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/extension_type_info.hpp +37 -0
- package/src/duckdb/src/include/duckdb/common/extra_operator_info.hpp +7 -2
- package/src/duckdb/src/include/duckdb/common/extra_type_info.hpp +9 -3
- package/src/duckdb/src/include/duckdb/common/fast_mem.hpp +6 -6
- package/src/duckdb/src/include/duckdb/common/file_buffer.hpp +19 -10
- package/src/duckdb/src/include/duckdb/common/file_opener.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/file_system.hpp +6 -1
- package/src/duckdb/src/include/duckdb/common/fsst.hpp +2 -2
- package/src/duckdb/src/include/duckdb/common/helper.hpp +6 -0
- package/src/duckdb/src/include/duckdb/common/hugeint.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/insertion_order_preserving_map.hpp +12 -2
- package/src/duckdb/src/include/duckdb/common/local_file_system.hpp +3 -0
- package/src/duckdb/src/include/duckdb/common/multi_file_list.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/multi_file_reader.hpp +147 -27
- package/src/duckdb/src/include/duckdb/common/multi_file_reader_options.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/numeric_utils.hpp +2 -7
- package/src/duckdb/src/include/duckdb/common/opener_file_system.hpp +16 -5
- package/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +16 -0
- package/src/duckdb/src/include/duckdb/common/optional_idx.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/platform.hpp +34 -3
- package/src/duckdb/src/include/duckdb/common/radix_partitioning.hpp +10 -13
- package/src/duckdb/src/include/duckdb/common/random_engine.hpp +8 -3
- package/src/duckdb/src/include/duckdb/common/row_operations/row_operations.hpp +0 -2
- package/src/duckdb/src/include/duckdb/common/serializer/buffered_file_writer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/serializer/memory_stream.hpp +7 -0
- package/src/duckdb/src/include/duckdb/common/serializer/serialization_traits.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/serializer/serializer.hpp +1 -0
- package/src/duckdb/src/include/duckdb/common/stacktrace.hpp +25 -0
- package/src/duckdb/src/include/duckdb/common/string_util.hpp +30 -2
- package/src/duckdb/src/include/duckdb/common/tree_renderer/graphviz_tree_renderer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/tree_renderer/html_tree_renderer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/tree_renderer/json_tree_renderer.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/tree_renderer/text_tree_renderer.hpp +3 -2
- package/src/duckdb/src/include/duckdb/common/tree_renderer.hpp +2 -0
- package/src/duckdb/src/include/duckdb/common/type_util.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_allocator.hpp +8 -0
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection.hpp +13 -2
- package/src/duckdb/src/include/duckdb/common/types/column/column_data_collection_segment.hpp +2 -1
- package/src/duckdb/src/include/duckdb/common/types/conflict_manager.hpp +21 -4
- package/src/duckdb/src/include/duckdb/common/types/data_chunk.hpp +4 -1
- package/src/duckdb/src/include/duckdb/common/types/date.hpp +9 -4
- package/src/duckdb/src/include/duckdb/common/types/date_lookup_cache.hpp +1 -1
- package/src/duckdb/src/include/duckdb/common/types/interval.hpp +58 -10
- package/src/duckdb/src/include/duckdb/common/types/row/partitioned_tuple_data.hpp +0 -4
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_allocator.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/types/row/tuple_data_collection.hpp +4 -0
- package/src/duckdb/src/include/duckdb/common/types/string_type.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/types/timestamp.hpp +43 -16
- package/src/duckdb/src/include/duckdb/common/types/uuid.hpp +3 -1
- package/src/duckdb/src/include/duckdb/common/types/validity_mask.hpp +63 -21
- package/src/duckdb/src/include/duckdb/common/types/value.hpp +62 -16
- package/src/duckdb/src/include/duckdb/common/types/varint.hpp +13 -0
- package/src/duckdb/src/include/duckdb/common/types/vector.hpp +34 -7
- package/src/duckdb/src/include/duckdb/common/types/vector_buffer.hpp +15 -0
- package/src/duckdb/src/include/duckdb/common/types.hpp +12 -7
- package/src/duckdb/src/include/duckdb/common/uhugeint.hpp +10 -0
- package/src/duckdb/src/include/duckdb/common/vector_operations/aggregate_executor.hpp +12 -13
- package/src/duckdb/src/include/duckdb/common/vector_operations/binary_executor.hpp +27 -0
- package/src/duckdb/src/include/duckdb/common/vector_operations/unary_executor.hpp +111 -4
- package/src/duckdb/src/include/duckdb/common/vector_operations/vector_operations.hpp +0 -1
- package/src/duckdb/src/include/duckdb/execution/adaptive_filter.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/aggregate_hashtable.hpp +48 -10
- package/src/duckdb/src/include/duckdb/execution/executor.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/expression_executor.hpp +0 -1
- package/src/duckdb/src/include/duckdb/execution/ht_entry.hpp +25 -27
- package/src/duckdb/src/include/duckdb/execution/index/art/art.hpp +28 -18
- package/src/duckdb/src/include/duckdb/execution/index/art/node.hpp +1 -0
- package/src/duckdb/src/include/duckdb/execution/index/art/prefix.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/index/bound_index.hpp +23 -16
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_allocator.hpp +4 -0
- package/src/duckdb/src/include/duckdb/execution/index/fixed_size_buffer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/join_hashtable.hpp +25 -16
- package/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp +15 -10
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_hash_aggregate.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/{persistent/physical_fixed_batch_copy.hpp → aggregate/physical_partitioned_aggregate.hpp} +25 -27
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_ungrouped_aggregate.hpp +1 -2
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/physical_window.hpp +5 -4
- package/src/duckdb/src/include/duckdb/execution/operator/aggregate/ungrouped_aggregate_state.hpp +21 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/base_scanner.hpp +38 -9
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer.hpp +8 -9
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_buffer_manager.hpp +7 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_error.hpp +29 -23
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp +15 -13
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_file_scanner.hpp +13 -5
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_option.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_reader_options.hpp +24 -10
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_schema.hpp +36 -1
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state.hpp +21 -13
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine.hpp +52 -22
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_state_machine_cache.hpp +6 -6
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_validator.hpp +58 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp +62 -0
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/global_csv_state.hpp +6 -3
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner_boundary.hpp +16 -6
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/csv_sniffer.hpp +9 -4
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine_options.hpp +8 -4
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/string_value_scanner.hpp +55 -10
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_batch_collector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_buffered_batch_collector.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_limit.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/helper/physical_reservoir_sample.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/join/join_filter_pushdown.hpp +28 -7
- package/src/duckdb/src/include/duckdb/execution/operator/join/perfect_hash_join_executor.hpp +6 -9
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_hash_join.hpp +17 -16
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_iejoin.hpp +1 -1
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_order.hpp +7 -3
- package/src/duckdb/src/include/duckdb/execution/operator/order/physical_top_n.hpp +5 -1
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_copy_to_file.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_batch_insert.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_insert.hpp +55 -4
- package/src/duckdb/src/include/duckdb/execution/operator/persistent/physical_update.hpp +2 -0
- package/src/duckdb/src/include/duckdb/execution/operator/projection/physical_tableinout_function.hpp +2 -2
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_positional_scan.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/operator/scan/physical_table_scan.hpp +10 -9
- package/src/duckdb/src/include/duckdb/execution/operator/schema/physical_create_art_index.hpp +16 -13
- package/src/duckdb/src/include/duckdb/execution/operator/set/physical_cte.hpp +0 -4
- package/src/duckdb/src/include/duckdb/execution/partition_info.hpp +79 -0
- package/src/duckdb/src/include/duckdb/execution/physical_operator.hpp +20 -9
- package/src/duckdb/src/include/duckdb/execution/physical_operator_states.hpp +1 -11
- package/src/duckdb/src/include/duckdb/execution/physical_plan_generator.hpp +0 -2
- package/src/duckdb/src/include/duckdb/execution/progress_data.hpp +58 -0
- package/src/duckdb/src/include/duckdb/execution/radix_partitioned_hashtable.hpp +2 -1
- package/src/duckdb/src/include/duckdb/execution/reservoir_sample.hpp +160 -31
- package/src/duckdb/src/include/duckdb/function/aggregate/distributive_function_utils.hpp +31 -0
- package/src/duckdb/src/include/duckdb/function/aggregate/distributive_functions.hpp +61 -10
- package/src/duckdb/src/include/duckdb/{core_functions → function}/aggregate/minmax_n_helpers.hpp +1 -1
- package/src/duckdb/src/include/duckdb/{core_functions → function}/aggregate/sort_key_helpers.hpp +2 -2
- package/src/duckdb/src/include/duckdb/function/aggregate_function.hpp +47 -27
- package/src/duckdb/src/include/duckdb/function/aggregate_state.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/built_in_functions.hpp +3 -10
- package/src/duckdb/src/include/duckdb/function/cast/bound_cast_data.hpp +13 -6
- package/src/duckdb/src/include/duckdb/function/compression/compression.hpp +15 -0
- package/src/duckdb/src/include/duckdb/function/compression_function.hpp +29 -6
- package/src/duckdb/src/include/duckdb/{core_functions → function}/create_sort_key.hpp +4 -1
- package/src/duckdb/src/include/duckdb/function/encoding_function.hpp +78 -0
- package/src/duckdb/src/include/duckdb/function/function.hpp +22 -1
- package/src/duckdb/src/include/duckdb/function/function_binder.hpp +3 -0
- package/src/duckdb/src/include/duckdb/function/function_list.hpp +39 -0
- package/src/duckdb/src/include/duckdb/function/function_set.hpp +13 -7
- package/src/duckdb/src/include/duckdb/{core_functions → function}/lambda_functions.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/partition_stats.hpp +36 -0
- package/src/duckdb/src/include/duckdb/function/register_function_list_helper.hpp +69 -0
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_functions.hpp +154 -23
- package/src/duckdb/src/include/duckdb/function/scalar/compressed_materialization_utils.hpp +45 -0
- package/src/duckdb/src/include/duckdb/function/scalar/date_functions.hpp +45 -0
- package/src/duckdb/src/include/duckdb/function/scalar/generic_common.hpp +36 -0
- package/src/duckdb/src/include/duckdb/function/scalar/generic_functions.hpp +32 -23
- package/src/duckdb/src/include/duckdb/function/scalar/list/contains_or_position.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/scalar/list_functions.hpp +156 -0
- package/src/duckdb/src/include/duckdb/function/scalar/map_functions.hpp +27 -0
- package/src/duckdb/src/include/duckdb/function/scalar/nested_functions.hpp +4 -45
- package/src/duckdb/src/include/duckdb/function/scalar/operator_functions.hpp +102 -0
- package/src/duckdb/src/include/duckdb/function/scalar/operators.hpp +2 -16
- package/src/duckdb/src/include/duckdb/function/scalar/sequence_functions.hpp +16 -25
- package/src/duckdb/src/include/duckdb/function/scalar/sequence_utils.hpp +38 -0
- package/src/duckdb/src/include/duckdb/function/scalar/strftime_format.hpp +1 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_common.hpp +49 -0
- package/src/duckdb/src/include/duckdb/function/scalar/string_functions.hpp +401 -76
- package/src/duckdb/src/include/duckdb/function/scalar/struct_functions.hpp +63 -0
- package/src/duckdb/src/include/duckdb/function/scalar/struct_utils.hpp +33 -0
- package/src/duckdb/src/include/duckdb/function/scalar/system_functions.hpp +45 -0
- package/src/duckdb/src/include/duckdb/function/scalar_function.hpp +17 -8
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_duck_schema.hpp +59 -6
- package/src/duckdb/src/include/duckdb/function/table/arrow/arrow_type_info.hpp +12 -9
- package/src/duckdb/src/include/duckdb/function/table/arrow/enum/arrow_type_info_type.hpp +2 -0
- package/src/duckdb/src/include/duckdb/function/table/arrow.hpp +18 -13
- package/src/duckdb/src/include/duckdb/function/table/read_csv.hpp +7 -4
- package/src/duckdb/src/include/duckdb/function/table/system_functions.hpp +14 -0
- package/src/duckdb/src/include/duckdb/function/table/table_scan.hpp +15 -10
- package/src/duckdb/src/include/duckdb/function/table_function.hpp +94 -18
- package/src/duckdb/src/include/duckdb/{core_functions → function}/to_interval.hpp +1 -1
- package/src/duckdb/src/include/duckdb/function/window/window_aggregate_function.hpp +44 -0
- package/src/duckdb/src/include/duckdb/function/window/window_aggregate_states.hpp +56 -0
- package/src/duckdb/src/include/duckdb/function/window/window_aggregator.hpp +194 -0
- package/src/duckdb/src/include/duckdb/function/window/window_boundaries_state.hpp +153 -0
- package/src/duckdb/src/include/duckdb/function/window/window_collection.hpp +146 -0
- package/src/duckdb/src/include/duckdb/function/window/window_constant_aggregator.hpp +38 -0
- package/src/duckdb/src/include/duckdb/function/window/window_custom_aggregator.hpp +32 -0
- package/src/duckdb/src/include/duckdb/function/window/window_distinct_aggregator.hpp +39 -0
- package/src/duckdb/src/include/duckdb/function/window/window_executor.hpp +122 -0
- package/src/duckdb/src/include/duckdb/function/window/window_index_tree.hpp +42 -0
- package/src/duckdb/src/include/duckdb/function/window/window_merge_sort_tree.hpp +108 -0
- package/src/duckdb/src/include/duckdb/function/window/window_naive_aggregator.hpp +33 -0
- package/src/duckdb/src/include/duckdb/function/window/window_rank_function.hpp +63 -0
- package/src/duckdb/src/include/duckdb/function/window/window_rownumber_function.hpp +43 -0
- package/src/duckdb/src/include/duckdb/function/window/window_segment_tree.hpp +31 -0
- package/src/duckdb/src/include/duckdb/function/window/window_shared_expressions.hpp +76 -0
- package/src/duckdb/src/include/duckdb/function/window/window_token_tree.hpp +46 -0
- package/src/duckdb/src/include/duckdb/function/window/window_value_function.hpp +79 -0
- package/src/duckdb/src/include/duckdb/logging/http_logger.hpp +2 -0
- package/src/duckdb/src/include/duckdb/logging/log_manager.hpp +81 -0
- package/src/duckdb/src/include/duckdb/logging/log_storage.hpp +127 -0
- package/src/duckdb/src/include/duckdb/logging/logger.hpp +287 -0
- package/src/duckdb/src/include/duckdb/logging/logging.hpp +83 -0
- package/src/duckdb/src/include/duckdb/main/appender.hpp +41 -18
- package/src/duckdb/src/include/duckdb/main/attached_database.hpp +6 -3
- package/src/duckdb/src/include/duckdb/main/capi/capi_internal.hpp +7 -2
- package/src/duckdb/src/include/duckdb/main/capi/extension_api.hpp +317 -231
- package/src/duckdb/src/include/duckdb/main/client_config.hpp +17 -1
- package/src/duckdb/src/include/duckdb/main/client_context.hpp +28 -6
- package/src/duckdb/src/include/duckdb/main/client_context_file_opener.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/client_context_wrapper.hpp +5 -0
- package/src/duckdb/src/include/duckdb/main/client_data.hpp +3 -2
- package/src/duckdb/src/include/duckdb/main/client_properties.hpp +8 -3
- package/src/duckdb/src/include/duckdb/main/config.hpp +52 -8
- package/src/duckdb/src/include/duckdb/main/connection.hpp +18 -3
- package/src/duckdb/src/include/duckdb/main/database.hpp +8 -7
- package/src/duckdb/src/include/duckdb/main/database_file_opener.hpp +5 -1
- package/src/duckdb/src/include/duckdb/main/database_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/db_instance_cache.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/extension.hpp +8 -2
- package/src/duckdb/src/include/duckdb/main/extension_entries.hpp +548 -9
- package/src/duckdb/src/include/duckdb/main/extension_helper.hpp +18 -0
- package/src/duckdb/src/include/duckdb/main/extension_util.hpp +12 -7
- package/src/duckdb/src/include/duckdb/main/prepared_statement.hpp +3 -3
- package/src/duckdb/src/include/duckdb/main/profiling_info.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/query_profiler.hpp +8 -4
- package/src/duckdb/src/include/duckdb/main/relation/create_table_relation.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/relation/delete_relation.hpp +2 -2
- package/src/duckdb/src/include/duckdb/main/relation/subquery_relation.hpp +1 -4
- package/src/duckdb/src/include/duckdb/main/relation/table_function_relation.hpp +3 -1
- package/src/duckdb/src/include/duckdb/main/relation/table_relation.hpp +3 -0
- package/src/duckdb/src/include/duckdb/main/relation/update_relation.hpp +3 -2
- package/src/duckdb/src/include/duckdb/main/relation/value_relation.hpp +7 -0
- package/src/duckdb/src/include/duckdb/main/relation/view_relation.hpp +1 -0
- package/src/duckdb/src/include/duckdb/main/relation/write_parquet_relation.hpp +1 -1
- package/src/duckdb/src/include/duckdb/main/relation.hpp +45 -9
- package/src/duckdb/src/include/duckdb/main/secret/secret_storage.hpp +20 -22
- package/src/duckdb/src/include/duckdb/main/settings.hpp +613 -378
- package/src/duckdb/src/include/duckdb/main/table_description.hpp +14 -4
- package/src/duckdb/src/include/duckdb/optimizer/build_probe_side_optimizer.hpp +1 -3
- package/src/duckdb/src/include/duckdb/optimizer/column_lifetime_analyzer.hpp +14 -7
- package/src/duckdb/src/include/duckdb/optimizer/common_aggregate_optimizer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/empty_result_pullup.hpp +27 -0
- package/src/duckdb/src/include/duckdb/optimizer/expression_heuristics.hpp +1 -1
- package/src/duckdb/src/include/duckdb/optimizer/filter_combiner.hpp +6 -1
- package/src/duckdb/src/include/duckdb/optimizer/filter_pushdown.hpp +2 -0
- package/src/duckdb/src/include/duckdb/optimizer/in_clause_rewriter.hpp +3 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_filter_pushdown_optimizer.hpp +5 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/plan_enumerator.hpp +2 -0
- package/src/duckdb/src/include/duckdb/optimizer/join_order/relation_statistics_helper.hpp +2 -2
- package/src/duckdb/src/include/duckdb/optimizer/late_materialization.hpp +45 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/expression_matcher.hpp +23 -0
- package/src/duckdb/src/include/duckdb/optimizer/matcher/type_matcher.hpp +18 -0
- package/src/duckdb/src/include/duckdb/optimizer/optimizer.hpp +9 -0
- package/src/duckdb/src/include/duckdb/optimizer/remove_unused_columns.hpp +33 -11
- package/src/duckdb/src/include/duckdb/optimizer/rule/distinct_aggregate_optimizer.hpp +34 -0
- package/src/duckdb/src/include/duckdb/optimizer/sampling_pushdown.hpp +25 -0
- package/src/duckdb/src/include/duckdb/optimizer/statistics_propagator.hpp +3 -1
- package/src/duckdb/src/include/duckdb/optimizer/sum_rewriter.hpp +37 -0
- package/src/duckdb/src/include/duckdb/optimizer/topn_optimizer.hpp +4 -0
- package/src/duckdb/src/include/duckdb/parallel/event.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parallel/pipeline.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parallel/pipeline_executor.hpp +26 -8
- package/src/duckdb/src/include/duckdb/parallel/thread_context.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/base_expression.hpp +51 -3
- package/src/duckdb/src/include/duckdb/parser/constraints/unique_constraint.hpp +28 -44
- package/src/duckdb/src/include/duckdb/parser/expression/columnref_expression.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/expression/comparison_expression.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/expression/conjunction_expression.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/expression/function_expression.hpp +2 -2
- package/src/duckdb/src/include/duckdb/parser/expression/operator_expression.hpp +6 -6
- package/src/duckdb/src/include/duckdb/parser/expression/star_expression.hpp +11 -1
- package/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +12 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_info.hpp +1 -0
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_scalar_function_info.hpp +3 -2
- package/src/duckdb/src/include/duckdb/parser/parsed_data/alter_table_info.hpp +22 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/attach_info.hpp +3 -4
- package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_column_info.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_function_info.hpp +16 -12
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_index_info.hpp +3 -3
- package/src/duckdb/src/include/duckdb/parser/parsed_data/create_type_info.hpp +5 -5
- package/src/duckdb/src/include/duckdb/parser/parsed_data/sample_options.hpp +12 -3
- package/src/duckdb/src/include/duckdb/parser/parser.hpp +3 -0
- package/src/duckdb/src/include/duckdb/parser/qualified_name.hpp +17 -57
- package/src/duckdb/src/include/duckdb/parser/qualified_name_set.hpp +19 -3
- package/src/duckdb/src/include/duckdb/parser/simplified_token.hpp +2 -1
- package/src/duckdb/src/include/duckdb/parser/tableref/basetableref.hpp +12 -9
- package/src/duckdb/src/include/duckdb/parser/tokens.hpp +1 -1
- package/src/duckdb/src/include/duckdb/parser/transformer.hpp +2 -2
- package/src/duckdb/src/include/duckdb/planner/bind_context.hpp +45 -28
- package/src/duckdb/src/include/duckdb/planner/binder.hpp +23 -11
- package/src/duckdb/src/include/duckdb/planner/binding_alias.hpp +44 -0
- package/src/duckdb/src/include/duckdb/planner/bound_result_modifier.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/collation_binding.hpp +4 -3
- package/src/duckdb/src/include/duckdb/planner/constraints/bound_unique_constraint.hpp +11 -10
- package/src/duckdb/src/include/duckdb/planner/expression/bound_cast_expression.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_function_expression.hpp +1 -0
- package/src/duckdb/src/include/duckdb/planner/expression/bound_subquery_expression.hpp +4 -4
- package/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +6 -0
- package/src/duckdb/src/include/duckdb/planner/expression.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/having_binder.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/expression_binder/index_binder.hpp +9 -4
- package/src/duckdb/src/include/duckdb/planner/expression_binder.hpp +8 -2
- package/src/duckdb/src/include/duckdb/planner/filter/conjunction_filter.hpp +1 -2
- package/src/duckdb/src/include/duckdb/planner/filter/dynamic_filter.hpp +48 -0
- package/src/duckdb/src/include/duckdb/planner/filter/in_filter.hpp +37 -0
- package/src/duckdb/src/include/duckdb/planner/filter/optional_filter.hpp +35 -0
- package/src/duckdb/src/include/duckdb/planner/logical_operator.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/logical_operator_visitor.hpp +3 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_comparison_join.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_create_index.hpp +9 -9
- package/src/duckdb/src/include/duckdb/planner/operator/logical_filter.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_get.hpp +16 -7
- package/src/duckdb/src/include/duckdb/planner/operator/logical_insert.hpp +2 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_join.hpp +4 -0
- package/src/duckdb/src/include/duckdb/planner/operator/logical_order.hpp +5 -1
- package/src/duckdb/src/include/duckdb/planner/operator/logical_top_n.hpp +5 -3
- package/src/duckdb/src/include/duckdb/planner/table_binding.hpp +14 -6
- package/src/duckdb/src/include/duckdb/planner/table_filter.hpp +12 -8
- package/src/duckdb/src/include/duckdb/storage/arena_allocator.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/block_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +82 -26
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_handle.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/buffer/buffer_pool.hpp +10 -3
- package/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/checkpoint/string_checkpoint_state.hpp +4 -13
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_compress.hpp +14 -15
- package/src/duckdb/src/include/duckdb/storage/compression/alp/alp_constants.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/alprd/alprd_compress.hpp +13 -15
- package/src/duckdb/src/include/duckdb/storage/compression/chimp/chimp_compress.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/dictionary/analyze.hpp +46 -0
- package/src/duckdb/src/include/duckdb/storage/compression/dictionary/common.hpp +60 -0
- package/src/duckdb/src/include/duckdb/storage/compression/dictionary/compression.hpp +61 -0
- package/src/duckdb/src/include/duckdb/storage/compression/dictionary/decompression.hpp +50 -0
- package/src/duckdb/src/include/duckdb/storage/compression/empty_validity.hpp +100 -0
- package/src/duckdb/src/include/duckdb/storage/compression/patas/patas_compress.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/compression/roaring/appender.hpp +150 -0
- package/src/duckdb/src/include/duckdb/storage/compression/roaring/roaring.hpp +618 -0
- package/src/duckdb/src/include/duckdb/storage/data_table.hpp +53 -31
- package/src/duckdb/src/include/duckdb/storage/index.hpp +2 -3
- package/src/duckdb/src/include/duckdb/storage/object_cache.hpp +0 -1
- package/src/duckdb/src/include/duckdb/storage/segment/uncompressed.hpp +4 -1
- package/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +3 -3
- package/src/duckdb/src/include/duckdb/storage/statistics/column_statistics.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/statistics/distinct_statistics.hpp +5 -4
- package/src/duckdb/src/include/duckdb/storage/statistics/numeric_stats.hpp +16 -1
- package/src/duckdb/src/include/duckdb/storage/statistics/string_stats.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/storage_index.hpp +70 -0
- package/src/duckdb/src/include/duckdb/storage/storage_info.hpp +5 -7
- package/src/duckdb/src/include/duckdb/storage/storage_manager.hpp +4 -3
- package/src/duckdb/src/include/duckdb/storage/storage_options.hpp +23 -0
- package/src/duckdb/src/include/duckdb/storage/string_uncompressed.hpp +34 -6
- package/src/duckdb/src/include/duckdb/storage/table/append_state.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/table/array_column_data.hpp +2 -2
- package/src/duckdb/src/include/duckdb/storage/table/column_checkpoint_state.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/column_data.hpp +39 -10
- package/src/duckdb/src/include/duckdb/storage/table/column_data_checkpointer.hpp +56 -14
- package/src/duckdb/src/include/duckdb/storage/table/column_segment.hpp +35 -29
- package/src/duckdb/src/include/duckdb/storage/table/delete_state.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/list_column_data.hpp +1 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group.hpp +7 -1
- package/src/duckdb/src/include/duckdb/storage/table/row_group_collection.hpp +19 -6
- package/src/duckdb/src/include/duckdb/storage/table/row_version_manager.hpp +2 -1
- package/src/duckdb/src/include/duckdb/storage/table/scan_state.hpp +29 -6
- package/src/duckdb/src/include/duckdb/storage/table/segment_tree.hpp +10 -10
- package/src/duckdb/src/include/duckdb/storage/table/standard_column_data.hpp +5 -0
- package/src/duckdb/src/include/duckdb/storage/table/table_index_list.hpp +26 -19
- package/src/duckdb/src/include/duckdb/storage/table/table_statistics.hpp +8 -1
- package/src/duckdb/src/include/duckdb/storage/table/update_segment.hpp +16 -14
- package/src/duckdb/src/include/duckdb/storage/table/validity_column_data.hpp +2 -0
- package/src/duckdb/src/include/duckdb/storage/table_io_manager.hpp +3 -0
- package/src/duckdb/src/include/duckdb/storage/table_storage_info.hpp +1 -0
- package/src/duckdb/src/include/duckdb/storage/temporary_file_manager.hpp +228 -61
- package/src/duckdb/src/include/duckdb/storage/write_ahead_log.hpp +14 -10
- package/src/duckdb/src/include/duckdb/transaction/commit_state.hpp +3 -1
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction.hpp +3 -2
- package/src/duckdb/src/include/duckdb/transaction/duck_transaction_manager.hpp +1 -0
- package/src/duckdb/src/include/duckdb/transaction/local_storage.hpp +19 -17
- package/src/duckdb/src/include/duckdb/transaction/rollback_state.hpp +5 -2
- package/src/duckdb/src/include/duckdb/transaction/transaction.hpp +1 -2
- package/src/duckdb/src/include/duckdb/transaction/undo_buffer.hpp +13 -8
- package/src/duckdb/src/include/duckdb/transaction/undo_buffer_allocator.hpp +79 -0
- package/src/duckdb/src/include/duckdb/transaction/update_info.hpp +43 -13
- package/src/duckdb/src/include/duckdb/transaction/wal_write_state.hpp +4 -1
- package/src/duckdb/src/include/duckdb/verification/copied_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/deserialized_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/external_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/fetch_row_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/no_operator_caching_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/parsed_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb/verification/prepared_statement_verifier.hpp +7 -3
- package/src/duckdb/src/include/duckdb/verification/statement_verifier.hpp +11 -5
- package/src/duckdb/src/include/duckdb/verification/unoptimized_statement_verifier.hpp +4 -2
- package/src/duckdb/src/include/duckdb.h +424 -41
- package/src/duckdb/src/include/duckdb_extension.h +301 -195
- package/src/duckdb/src/logging/log_manager.cpp +157 -0
- package/src/duckdb/src/logging/log_storage.cpp +209 -0
- package/src/duckdb/src/logging/logger.cpp +211 -0
- package/src/duckdb/src/logging/logging.cpp +42 -0
- package/src/duckdb/src/main/appender.cpp +187 -45
- package/src/duckdb/src/main/attached_database.cpp +16 -8
- package/src/duckdb/src/main/capi/appender-c.cpp +47 -4
- package/src/duckdb/src/main/capi/arrow-c.cpp +9 -4
- package/src/duckdb/src/main/capi/config-c.cpp +17 -4
- package/src/duckdb/src/main/capi/datetime-c.cpp +15 -0
- package/src/duckdb/src/main/capi/duckdb-c.cpp +54 -13
- package/src/duckdb/src/main/capi/duckdb_value-c.cpp +212 -4
- package/src/duckdb/src/main/capi/helper-c.cpp +3 -0
- package/src/duckdb/src/main/capi/prepared-c.cpp +26 -7
- package/src/duckdb/src/main/capi/replacement_scan-c.cpp +1 -1
- package/src/duckdb/src/main/capi/result-c.cpp +3 -0
- package/src/duckdb/src/main/capi/table_description-c.cpp +43 -10
- package/src/duckdb/src/main/capi/threading-c.cpp +4 -4
- package/src/duckdb/src/main/client_context.cpp +125 -51
- package/src/duckdb/src/main/client_context_file_opener.cpp +4 -0
- package/src/duckdb/src/main/client_context_wrapper.cpp +4 -0
- package/src/duckdb/src/main/client_data.cpp +1 -1
- package/src/duckdb/src/main/client_verify.cpp +39 -20
- package/src/duckdb/src/main/config.cpp +266 -74
- package/src/duckdb/src/main/connection.cpp +53 -13
- package/src/duckdb/src/main/database.cpp +39 -18
- package/src/duckdb/src/main/database_manager.cpp +12 -11
- package/src/duckdb/src/main/db_instance_cache.cpp +14 -7
- package/src/duckdb/src/main/extension/extension_helper.cpp +24 -23
- package/src/duckdb/src/main/extension/extension_install.cpp +19 -7
- package/src/duckdb/src/main/extension/extension_load.cpp +91 -41
- package/src/duckdb/src/main/extension/extension_util.cpp +40 -19
- package/src/duckdb/src/main/extension.cpp +20 -11
- package/src/duckdb/src/main/profiling_info.cpp +19 -5
- package/src/duckdb/src/main/query_profiler.cpp +135 -36
- package/src/duckdb/src/main/query_result.cpp +2 -1
- package/src/duckdb/src/main/relation/aggregate_relation.cpp +3 -3
- package/src/duckdb/src/main/relation/create_table_relation.cpp +5 -4
- package/src/duckdb/src/main/relation/create_view_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/cross_product_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/delete_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/delim_get_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/distinct_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/explain_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/filter_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/insert_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/join_relation.cpp +5 -5
- package/src/duckdb/src/main/relation/order_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/projection_relation.cpp +3 -3
- package/src/duckdb/src/main/relation/query_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/read_csv_relation.cpp +58 -20
- package/src/duckdb/src/main/relation/setop_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/subquery_relation.cpp +3 -8
- package/src/duckdb/src/main/relation/table_function_relation.cpp +10 -1
- package/src/duckdb/src/main/relation/table_relation.cpp +19 -3
- package/src/duckdb/src/main/relation/update_relation.cpp +2 -2
- package/src/duckdb/src/main/relation/value_relation.cpp +42 -2
- package/src/duckdb/src/main/relation/view_relation.cpp +8 -2
- package/src/duckdb/src/main/relation/write_csv_relation.cpp +1 -1
- package/src/duckdb/src/main/relation/write_parquet_relation.cpp +1 -1
- package/src/duckdb/src/main/relation.cpp +49 -28
- package/src/duckdb/src/main/secret/secret_manager.cpp +1 -1
- package/src/duckdb/src/main/secret/secret_storage.cpp +6 -4
- package/src/duckdb/src/main/settings/autogenerated_settings.cpp +1102 -0
- package/src/duckdb/src/main/settings/custom_settings.cpp +1343 -0
- package/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp +60 -37
- package/src/duckdb/src/optimizer/column_binding_replacer.cpp +1 -1
- package/src/duckdb/src/optimizer/column_lifetime_analyzer.cpp +126 -72
- package/src/duckdb/src/optimizer/common_aggregate_optimizer.cpp +22 -6
- package/src/duckdb/src/optimizer/compressed_materialization/compress_aggregate.cpp +3 -3
- package/src/duckdb/src/optimizer/compressed_materialization/compress_distinct.cpp +2 -2
- package/src/duckdb/src/optimizer/compressed_materialization.cpp +3 -3
- package/src/duckdb/src/optimizer/cse_optimizer.cpp +7 -7
- package/src/duckdb/src/optimizer/deliminator.cpp +6 -5
- package/src/duckdb/src/optimizer/empty_result_pullup.cpp +96 -0
- package/src/duckdb/src/optimizer/expression_heuristics.cpp +11 -3
- package/src/duckdb/src/optimizer/expression_rewriter.cpp +9 -2
- package/src/duckdb/src/optimizer/filter_combiner.cpp +190 -88
- package/src/duckdb/src/optimizer/filter_pushdown.cpp +6 -5
- package/src/duckdb/src/optimizer/in_clause_rewriter.cpp +25 -9
- package/src/duckdb/src/optimizer/join_filter_pushdown_optimizer.cpp +170 -72
- package/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +5 -4
- package/src/duckdb/src/optimizer/join_order/plan_enumerator.cpp +3 -1
- package/src/duckdb/src/optimizer/join_order/query_graph_manager.cpp +7 -7
- package/src/duckdb/src/optimizer/join_order/relation_manager.cpp +15 -6
- package/src/duckdb/src/optimizer/join_order/relation_statistics_helper.cpp +37 -22
- package/src/duckdb/src/optimizer/late_materialization.cpp +414 -0
- package/src/duckdb/src/optimizer/limit_pushdown.cpp +1 -0
- package/src/duckdb/src/optimizer/matcher/expression_matcher.cpp +30 -2
- package/src/duckdb/src/optimizer/optimizer.cpp +67 -7
- package/src/duckdb/src/optimizer/pullup/pullup_filter.cpp +3 -3
- package/src/duckdb/src/optimizer/pullup/pullup_projection.cpp +2 -2
- package/src/duckdb/src/optimizer/pullup/pullup_set_operation.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_aggregate.cpp +2 -2
- package/src/duckdb/src/optimizer/pushdown/pushdown_filter.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_left_join.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_mark_join.cpp +3 -3
- package/src/duckdb/src/optimizer/pushdown/pushdown_projection.cpp +5 -3
- package/src/duckdb/src/optimizer/pushdown/pushdown_set_operation.cpp +1 -1
- package/src/duckdb/src/optimizer/pushdown/pushdown_unnest.cpp +52 -0
- package/src/duckdb/src/optimizer/pushdown/pushdown_window.cpp +2 -2
- package/src/duckdb/src/optimizer/regex_range_filter.cpp +1 -1
- package/src/duckdb/src/optimizer/remove_duplicate_groups.cpp +1 -1
- package/src/duckdb/src/optimizer/remove_unused_columns.cpp +168 -38
- package/src/duckdb/src/optimizer/rule/arithmetic_simplification.cpp +2 -1
- package/src/duckdb/src/optimizer/rule/comparison_simplification.cpp +8 -5
- package/src/duckdb/src/optimizer/rule/conjunction_simplification.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/constant_folding.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/distinct_aggregate_optimizer.cpp +65 -0
- package/src/duckdb/src/optimizer/rule/distributivity.cpp +2 -2
- package/src/duckdb/src/optimizer/rule/enum_comparison.cpp +2 -1
- package/src/duckdb/src/optimizer/rule/equal_or_null_simplification.cpp +4 -3
- package/src/duckdb/src/optimizer/rule/in_clause_simplification_rule.cpp +3 -3
- package/src/duckdb/src/optimizer/rule/like_optimizations.cpp +3 -1
- package/src/duckdb/src/optimizer/rule/move_constants.cpp +9 -9
- package/src/duckdb/src/optimizer/rule/regex_optimizations.cpp +4 -3
- package/src/duckdb/src/optimizer/rule/timestamp_comparison.cpp +1 -1
- package/src/duckdb/src/optimizer/sampling_pushdown.cpp +24 -0
- package/src/duckdb/src/optimizer/statistics/expression/propagate_comparison.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_conjunction.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/expression/propagate_operator.cpp +1 -1
- package/src/duckdb/src/optimizer/statistics/operator/propagate_aggregate.cpp +74 -0
- package/src/duckdb/src/optimizer/statistics/operator/propagate_filter.cpp +10 -7
- package/src/duckdb/src/optimizer/statistics/operator/propagate_get.cpp +3 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_join.cpp +3 -3
- package/src/duckdb/src/optimizer/statistics/operator/propagate_window.cpp +3 -0
- package/src/duckdb/src/optimizer/sum_rewriter.cpp +174 -0
- package/src/duckdb/src/optimizer/topn_optimizer.cpp +71 -0
- package/src/duckdb/src/optimizer/unnest_rewriter.cpp +5 -5
- package/src/duckdb/src/parallel/event.cpp +4 -0
- package/src/duckdb/src/parallel/executor.cpp +11 -29
- package/src/duckdb/src/parallel/executor_task.cpp +8 -3
- package/src/duckdb/src/parallel/pipeline.cpp +15 -8
- package/src/duckdb/src/parallel/pipeline_executor.cpp +67 -43
- package/src/duckdb/src/parallel/thread_context.cpp +12 -1
- package/src/duckdb/src/parser/column_definition.cpp +3 -3
- package/src/duckdb/src/parser/constraints/unique_constraint.cpp +72 -9
- package/src/duckdb/src/parser/expression/columnref_expression.cpp +15 -3
- package/src/duckdb/src/parser/expression/conjunction_expression.cpp +1 -1
- package/src/duckdb/src/parser/expression/function_expression.cpp +1 -1
- package/src/duckdb/src/parser/expression/lambda_expression.cpp +3 -3
- package/src/duckdb/src/parser/expression/lambdaref_expression.cpp +1 -1
- package/src/duckdb/src/parser/expression/star_expression.cpp +46 -2
- package/src/duckdb/src/parser/expression/window_expression.cpp +24 -1
- package/src/duckdb/src/parser/parsed_data/alter_info.cpp +26 -2
- package/src/duckdb/src/parser/parsed_data/alter_scalar_function_info.cpp +5 -3
- package/src/duckdb/src/parser/parsed_data/alter_table_info.cpp +29 -1
- package/src/duckdb/src/parser/parsed_data/attach_info.cpp +6 -6
- package/src/duckdb/src/parser/parsed_data/create_aggregate_function_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_function_info.cpp +17 -0
- package/src/duckdb/src/parser/parsed_data/create_index_info.cpp +16 -15
- package/src/duckdb/src/parser/parsed_data/create_macro_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_pragma_function_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_scalar_function_info.cpp +3 -2
- package/src/duckdb/src/parser/parsed_data/create_schema_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_table_function_info.cpp +1 -1
- package/src/duckdb/src/parser/parsed_data/create_table_info.cpp +1 -0
- package/src/duckdb/src/parser/parsed_data/create_type_info.cpp +4 -4
- package/src/duckdb/src/parser/parsed_data/load_info.cpp +1 -0
- package/src/duckdb/src/parser/parsed_data/sample_options.cpp +31 -1
- package/src/duckdb/src/parser/parsed_expression.cpp +1 -1
- package/src/duckdb/src/parser/parsed_expression_iterator.cpp +4 -1
- package/src/duckdb/src/parser/parser.cpp +129 -0
- package/src/duckdb/src/parser/qualified_name.cpp +99 -0
- package/src/duckdb/src/parser/query_error_context.cpp +35 -6
- package/src/duckdb/src/parser/query_node/select_node.cpp +4 -4
- package/src/duckdb/src/parser/statement/delete_statement.cpp +6 -1
- package/src/duckdb/src/parser/statement/insert_statement.cpp +4 -3
- package/src/duckdb/src/parser/statement/update_statement.cpp +6 -1
- package/src/duckdb/src/parser/tableref/pivotref.cpp +2 -2
- package/src/duckdb/src/parser/tableref.cpp +2 -2
- package/src/duckdb/src/parser/transform/constraint/transform_constraint.cpp +16 -24
- package/src/duckdb/src/parser/transform/expression/transform_array_access.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_bool_expr.cpp +5 -5
- package/src/duckdb/src/parser/transform/expression/transform_columnref.cpp +61 -13
- package/src/duckdb/src/parser/transform/expression/transform_constant.cpp +10 -4
- package/src/duckdb/src/parser/transform/expression/transform_expression.cpp +2 -2
- package/src/duckdb/src/parser/transform/expression/transform_function.cpp +30 -3
- package/src/duckdb/src/parser/transform/expression/transform_operator.cpp +1 -1
- package/src/duckdb/src/parser/transform/expression/transform_subquery.cpp +25 -6
- package/src/duckdb/src/parser/transform/helpers/transform_groupby.cpp +1 -1
- package/src/duckdb/src/parser/transform/helpers/transform_sample.cpp +10 -3
- package/src/duckdb/src/parser/transform/helpers/transform_typename.cpp +4 -3
- package/src/duckdb/src/parser/transform/statement/transform_alter_table.cpp +18 -3
- package/src/duckdb/src/parser/transform/statement/transform_comment_on.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_copy.cpp +0 -1
- package/src/duckdb/src/parser/transform/statement/transform_create_function.cpp +5 -5
- package/src/duckdb/src/parser/transform/statement/transform_create_table.cpp +26 -12
- package/src/duckdb/src/parser/transform/statement/transform_create_table_as.cpp +11 -3
- package/src/duckdb/src/parser/transform/statement/transform_create_view.cpp +1 -1
- package/src/duckdb/src/parser/transform/statement/transform_pivot_stmt.cpp +2 -0
- package/src/duckdb/src/parser/transform/statement/transform_pragma.cpp +3 -3
- package/src/duckdb/src/parser/transform/statement/transform_prepare.cpp +4 -4
- package/src/duckdb/src/parser/transform/statement/transform_set.cpp +2 -2
- package/src/duckdb/src/parser/transform/statement/transform_show.cpp +21 -3
- package/src/duckdb/src/parser/transform/tableref/transform_pivot.cpp +8 -6
- package/src/duckdb/src/parser/transformer.cpp +2 -2
- package/src/duckdb/src/planner/bind_context.cpp +308 -136
- package/src/duckdb/src/planner/binder/expression/bind_aggregate_expression.cpp +30 -31
- package/src/duckdb/src/planner/binder/expression/bind_between_expression.cpp +4 -2
- package/src/duckdb/src/planner/binder/expression/bind_columnref_expression.cpp +102 -94
- package/src/duckdb/src/planner/binder/expression/bind_comparison_expression.cpp +7 -5
- package/src/duckdb/src/planner/binder/expression/bind_conjunction_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_function_expression.cpp +7 -7
- package/src/duckdb/src/planner/binder/expression/bind_lambda.cpp +10 -10
- package/src/duckdb/src/planner/binder/expression/bind_macro_expression.cpp +24 -6
- package/src/duckdb/src/planner/binder/expression/bind_operator_expression.cpp +23 -15
- package/src/duckdb/src/planner/binder/expression/bind_parameter_expression.cpp +1 -1
- package/src/duckdb/src/planner/binder/expression/bind_star_expression.cpp +97 -19
- package/src/duckdb/src/planner/binder/expression/bind_subquery_expression.cpp +74 -16
- package/src/duckdb/src/planner/binder/expression/bind_unnest_expression.cpp +6 -6
- package/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +49 -15
- package/src/duckdb/src/planner/binder/query_node/bind_select_node.cpp +32 -23
- package/src/duckdb/src/planner/binder/query_node/bind_setop_node.cpp +20 -3
- package/src/duckdb/src/planner/binder/query_node/bind_table_macro_node.cpp +2 -2
- package/src/duckdb/src/planner/binder/query_node/plan_query_node.cpp +3 -0
- package/src/duckdb/src/planner/binder/query_node/plan_setop.cpp +6 -5
- package/src/duckdb/src/planner/binder/query_node/plan_subquery.cpp +38 -19
- package/src/duckdb/src/planner/binder/statement/bind_copy.cpp +2 -12
- package/src/duckdb/src/planner/binder/statement/bind_create.cpp +117 -412
- package/src/duckdb/src/planner/binder/statement/bind_create_table.cpp +423 -144
- package/src/duckdb/src/planner/binder/statement/bind_delete.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_drop.cpp +5 -0
- package/src/duckdb/src/planner/binder/statement/bind_execute.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_export.cpp +0 -4
- package/src/duckdb/src/planner/binder/statement/bind_insert.cpp +31 -13
- package/src/duckdb/src/planner/binder/statement/bind_pragma.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_simple.cpp +96 -27
- package/src/duckdb/src/planner/binder/statement/bind_summarize.cpp +1 -1
- package/src/duckdb/src/planner/binder/statement/bind_update.cpp +5 -3
- package/src/duckdb/src/planner/binder/statement/bind_vacuum.cpp +7 -6
- package/src/duckdb/src/planner/binder/tableref/bind_basetableref.cpp +36 -9
- package/src/duckdb/src/planner/binder/tableref/bind_joinref.cpp +34 -34
- package/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +72 -35
- package/src/duckdb/src/planner/binder/tableref/bind_showref.cpp +99 -18
- package/src/duckdb/src/planner/binder/tableref/bind_table_function.cpp +23 -11
- package/src/duckdb/src/planner/binder/tableref/plan_joinref.cpp +22 -19
- package/src/duckdb/src/planner/binder.cpp +23 -45
- package/src/duckdb/src/planner/binding_alias.cpp +69 -0
- package/src/duckdb/src/planner/bound_parameter_map.cpp +1 -1
- package/src/duckdb/src/planner/bound_result_modifier.cpp +6 -2
- package/src/duckdb/src/planner/collation_binding.cpp +38 -4
- package/src/duckdb/src/planner/expression/bound_cast_expression.cpp +17 -5
- package/src/duckdb/src/planner/expression/bound_expression.cpp +1 -1
- package/src/duckdb/src/planner/expression/bound_function_expression.cpp +8 -1
- package/src/duckdb/src/planner/expression/bound_parameter_expression.cpp +2 -2
- package/src/duckdb/src/planner/expression/bound_window_expression.cpp +24 -4
- package/src/duckdb/src/planner/expression.cpp +7 -1
- package/src/duckdb/src/planner/expression_binder/aggregate_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/base_select_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/group_binder.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/having_binder.cpp +16 -0
- package/src/duckdb/src/planner/expression_binder/index_binder.cpp +53 -1
- package/src/duckdb/src/planner/expression_binder/lateral_binder.cpp +3 -3
- package/src/duckdb/src/planner/expression_binder/order_binder.cpp +8 -8
- package/src/duckdb/src/planner/expression_binder/relation_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/select_bind_state.cpp +2 -2
- package/src/duckdb/src/planner/expression_binder/table_function_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder/update_binder.cpp +1 -1
- package/src/duckdb/src/planner/expression_binder.cpp +7 -7
- package/src/duckdb/src/planner/expression_iterator.cpp +6 -3
- package/src/duckdb/src/planner/filter/constant_filter.cpp +17 -2
- package/src/duckdb/src/planner/filter/dynamic_filter.cpp +68 -0
- package/src/duckdb/src/planner/filter/in_filter.cpp +84 -0
- package/src/duckdb/src/planner/filter/null_filter.cpp +1 -2
- package/src/duckdb/src/planner/filter/optional_filter.cpp +29 -0
- package/src/duckdb/src/planner/filter/struct_filter.cpp +11 -6
- package/src/duckdb/src/planner/joinside.cpp +6 -5
- package/src/duckdb/src/planner/logical_operator.cpp +4 -1
- package/src/duckdb/src/planner/logical_operator_visitor.cpp +68 -2
- package/src/duckdb/src/planner/operator/logical_comparison_join.cpp +23 -0
- package/src/duckdb/src/planner/operator/logical_create_index.cpp +16 -12
- package/src/duckdb/src/planner/operator/logical_filter.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_get.cpp +48 -25
- package/src/duckdb/src/planner/operator/logical_insert.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_join.cpp +1 -1
- package/src/duckdb/src/planner/operator/logical_order.cpp +4 -11
- package/src/duckdb/src/planner/operator/logical_top_n.cpp +7 -0
- package/src/duckdb/src/planner/subquery/flatten_dependent_join.cpp +33 -5
- package/src/duckdb/src/planner/subquery/rewrite_correlated_expressions.cpp +2 -2
- package/src/duckdb/src/planner/table_binding.cpp +74 -36
- package/src/duckdb/src/planner/table_filter.cpp +5 -8
- package/src/duckdb/src/storage/arena_allocator.cpp +5 -4
- package/src/duckdb/src/storage/buffer/block_handle.cpp +88 -17
- package/src/duckdb/src/storage/buffer/block_manager.cpp +34 -26
- package/src/duckdb/src/storage/buffer/buffer_handle.cpp +2 -2
- package/src/duckdb/src/storage/buffer/buffer_pool.cpp +70 -49
- package/src/duckdb/src/storage/buffer_manager.cpp +4 -0
- package/src/duckdb/src/storage/checkpoint_manager.cpp +24 -5
- package/src/duckdb/src/storage/compression/bitpacking.cpp +14 -16
- package/src/duckdb/src/storage/compression/dictionary/analyze.cpp +54 -0
- package/src/duckdb/src/storage/compression/dictionary/common.cpp +90 -0
- package/src/duckdb/src/storage/compression/dictionary/compression.cpp +174 -0
- package/src/duckdb/src/storage/compression/dictionary/decompression.cpp +115 -0
- package/src/duckdb/src/storage/compression/dictionary_compression.cpp +53 -545
- package/src/duckdb/src/storage/compression/empty_validity.cpp +15 -0
- package/src/duckdb/src/storage/compression/fixed_size_uncompressed.cpp +25 -16
- package/src/duckdb/src/storage/compression/fsst.cpp +101 -47
- package/src/duckdb/src/storage/compression/numeric_constant.cpp +92 -2
- package/src/duckdb/src/storage/compression/rle.cpp +216 -46
- package/src/duckdb/src/storage/compression/roaring/analyze.cpp +179 -0
- package/src/duckdb/src/storage/compression/roaring/common.cpp +282 -0
- package/src/duckdb/src/storage/compression/roaring/compress.cpp +481 -0
- package/src/duckdb/src/storage/compression/roaring/metadata.cpp +262 -0
- package/src/duckdb/src/storage/compression/roaring/scan.cpp +364 -0
- package/src/duckdb/src/storage/compression/string_uncompressed.cpp +47 -65
- package/src/duckdb/src/storage/compression/validity_uncompressed.cpp +102 -39
- package/src/duckdb/src/storage/compression/zstd.cpp +1049 -0
- package/src/duckdb/src/storage/data_table.cpp +312 -172
- package/src/duckdb/src/storage/local_storage.cpp +104 -46
- package/src/duckdb/src/storage/metadata/metadata_manager.cpp +1 -1
- package/src/duckdb/src/storage/serialization/serialize_logical_operator.cpp +7 -3
- package/src/duckdb/src/storage/serialization/serialize_nodes.cpp +138 -58
- package/src/duckdb/src/storage/serialization/serialize_parse_info.cpp +14 -0
- package/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +19 -8
- package/src/duckdb/src/storage/serialization/serialize_statement.cpp +2 -0
- package/src/duckdb/src/storage/serialization/serialize_table_filter.cpp +43 -0
- package/src/duckdb/src/storage/serialization/serialize_types.cpp +32 -5
- package/src/duckdb/src/storage/single_file_block_manager.cpp +6 -8
- package/src/duckdb/src/storage/standard_buffer_manager.cpp +82 -71
- package/src/duckdb/src/storage/statistics/column_statistics.cpp +3 -3
- package/src/duckdb/src/storage/statistics/distinct_statistics.cpp +18 -17
- package/src/duckdb/src/storage/statistics/numeric_stats.cpp +34 -22
- package/src/duckdb/src/storage/statistics/string_stats.cpp +14 -3
- package/src/duckdb/src/storage/storage_info.cpp +72 -10
- package/src/duckdb/src/storage/storage_manager.cpp +41 -47
- package/src/duckdb/src/storage/table/array_column_data.cpp +7 -1
- package/src/duckdb/src/storage/table/column_checkpoint_state.cpp +10 -9
- package/src/duckdb/src/storage/table/column_data.cpp +105 -43
- package/src/duckdb/src/storage/table/column_data_checkpointer.cpp +307 -132
- package/src/duckdb/src/storage/table/column_segment.cpp +36 -13
- package/src/duckdb/src/storage/table/list_column_data.cpp +4 -4
- package/src/duckdb/src/storage/table/row_group.cpp +159 -66
- package/src/duckdb/src/storage/table/row_group_collection.cpp +157 -68
- package/src/duckdb/src/storage/table/row_version_manager.cpp +33 -10
- package/src/duckdb/src/storage/table/scan_state.cpp +21 -7
- package/src/duckdb/src/storage/table/standard_column_data.cpp +68 -5
- package/src/duckdb/src/storage/table/struct_column_data.cpp +42 -4
- package/src/duckdb/src/storage/table/table_statistics.cpp +91 -5
- package/src/duckdb/src/storage/table/update_segment.cpp +287 -210
- package/src/duckdb/src/storage/table_index_list.cpp +55 -58
- package/src/duckdb/src/storage/temporary_file_manager.cpp +412 -149
- package/src/duckdb/src/storage/wal_replay.cpp +132 -48
- package/src/duckdb/src/storage/write_ahead_log.cpp +75 -48
- package/src/duckdb/src/transaction/cleanup_state.cpp +0 -1
- package/src/duckdb/src/transaction/commit_state.cpp +23 -14
- package/src/duckdb/src/transaction/duck_transaction.cpp +29 -25
- package/src/duckdb/src/transaction/duck_transaction_manager.cpp +18 -6
- package/src/duckdb/src/transaction/meta_transaction.cpp +3 -2
- package/src/duckdb/src/transaction/rollback_state.cpp +5 -2
- package/src/duckdb/src/transaction/transaction_context.cpp +9 -1
- package/src/duckdb/src/transaction/undo_buffer.cpp +35 -27
- package/src/duckdb/src/transaction/undo_buffer_allocator.cpp +72 -0
- package/src/duckdb/src/transaction/wal_write_state.cpp +12 -10
- package/src/duckdb/src/verification/copied_statement_verifier.cpp +7 -4
- package/src/duckdb/src/verification/deserialized_statement_verifier.cpp +7 -5
- package/src/duckdb/src/verification/external_statement_verifier.cpp +7 -4
- package/src/duckdb/src/verification/fetch_row_verifier.cpp +7 -4
- package/src/duckdb/src/verification/no_operator_caching_verifier.cpp +8 -4
- package/src/duckdb/src/verification/parsed_statement_verifier.cpp +7 -4
- package/src/duckdb/src/verification/prepared_statement_verifier.cpp +16 -12
- package/src/duckdb/src/verification/statement_verifier.cpp +20 -15
- package/src/duckdb/src/verification/unoptimized_statement_verifier.cpp +7 -4
- package/src/duckdb/third_party/fsst/libfsst.hpp +1 -0
- package/src/duckdb/third_party/httplib/httplib.hpp +15 -22
- package/src/duckdb/third_party/libpg_query/include/nodes/parsenodes.hpp +4 -2
- package/src/duckdb/third_party/libpg_query/pg_functions.cpp +2 -4
- package/src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp +14278 -13832
- package/src/duckdb/third_party/parquet/parquet_types.cpp +3410 -1686
- package/src/duckdb/third_party/parquet/parquet_types.h +1585 -1204
- package/src/duckdb/third_party/skiplist/SkipList.h +0 -1
- package/src/duckdb/third_party/snappy/snappy-stubs-internal.h +13 -15
- package/src/duckdb/third_party/zstd/common/debug.cpp +36 -0
- package/src/duckdb/third_party/zstd/common/entropy_common.cpp +173 -49
- package/src/duckdb/third_party/zstd/common/error_private.cpp +11 -3
- package/src/duckdb/third_party/zstd/common/fse_decompress.cpp +126 -97
- package/src/duckdb/third_party/zstd/common/pool.cpp +376 -0
- package/src/duckdb/third_party/zstd/common/threading.cpp +193 -0
- package/src/duckdb/third_party/zstd/common/xxhash.cpp +18 -14
- package/src/duckdb/third_party/zstd/common/zstd_common.cpp +3 -38
- package/src/duckdb/third_party/zstd/compress/fse_compress.cpp +93 -165
- package/src/duckdb/third_party/zstd/compress/hist.cpp +28 -31
- package/src/duckdb/third_party/zstd/compress/huf_compress.cpp +957 -291
- package/src/duckdb/third_party/zstd/compress/zstd_compress.cpp +3988 -1124
- package/src/duckdb/third_party/zstd/compress/zstd_compress_literals.cpp +120 -43
- package/src/duckdb/third_party/zstd/compress/zstd_compress_sequences.cpp +47 -23
- package/src/duckdb/third_party/zstd/compress/zstd_compress_superblock.cpp +274 -424
- package/src/duckdb/third_party/zstd/compress/zstd_double_fast.cpp +403 -153
- package/src/duckdb/third_party/zstd/compress/zstd_fast.cpp +741 -268
- package/src/duckdb/third_party/zstd/compress/zstd_lazy.cpp +1339 -278
- package/src/duckdb/third_party/zstd/compress/zstd_ldm.cpp +334 -222
- package/src/duckdb/third_party/zstd/compress/zstd_opt.cpp +674 -298
- package/src/duckdb/third_party/zstd/compress/zstdmt_compress.cpp +1885 -0
- package/src/duckdb/third_party/zstd/decompress/huf_decompress.cpp +1247 -586
- package/src/duckdb/third_party/zstd/decompress/zstd_ddict.cpp +18 -17
- package/src/duckdb/third_party/zstd/decompress/zstd_decompress.cpp +724 -270
- package/src/duckdb/third_party/zstd/decompress/zstd_decompress_block.cpp +1193 -393
- package/src/duckdb/third_party/zstd/deprecated/zbuff_common.cpp +30 -0
- package/src/duckdb/third_party/zstd/deprecated/zbuff_compress.cpp +171 -0
- package/src/duckdb/third_party/zstd/deprecated/zbuff_decompress.cpp +80 -0
- package/src/duckdb/third_party/zstd/dict/cover.cpp +1271 -0
- package/src/duckdb/third_party/zstd/dict/divsufsort.cpp +1916 -0
- package/src/duckdb/third_party/zstd/dict/fastcover.cpp +775 -0
- package/src/duckdb/third_party/zstd/dict/zdict.cpp +1139 -0
- package/src/duckdb/third_party/zstd/include/zdict.h +473 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/allocations.h +58 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/bits.h +204 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/bitstream.h +88 -85
- package/src/duckdb/third_party/zstd/include/zstd/common/compiler.h +243 -47
- package/src/duckdb/third_party/zstd/include/zstd/common/cpu.h +253 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/debug.h +31 -31
- package/src/duckdb/third_party/zstd/include/zstd/common/error_private.h +94 -6
- package/src/duckdb/third_party/zstd/include/zstd/common/fse.h +424 -64
- package/src/duckdb/third_party/zstd/include/zstd/common/huf.h +255 -70
- package/src/duckdb/third_party/zstd/include/zstd/common/mem.h +125 -85
- package/src/duckdb/third_party/zstd/include/zstd/common/pool.h +84 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/portability_macros.h +158 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/threading.h +152 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/{xxhash.h → xxhash.hpp} +0 -1
- package/src/duckdb/third_party/zstd/include/zstd/common/{xxhash_static.h → xxhash_static.hpp} +1 -1
- package/src/duckdb/third_party/zstd/include/zstd/common/zstd_deps.h +122 -0
- package/src/duckdb/third_party/zstd/include/zstd/common/zstd_internal.h +143 -174
- package/src/duckdb/third_party/zstd/include/zstd/common/zstd_trace.h +159 -0
- package/src/duckdb/third_party/zstd/include/zstd/compress/clevels.h +136 -0
- package/src/duckdb/third_party/zstd/include/zstd/compress/hist.h +4 -4
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_internal.h +631 -220
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_literals.h +17 -7
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_sequences.h +2 -2
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_compress_superblock.h +3 -2
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_cwksp.h +256 -153
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_double_fast.h +16 -3
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_fast.h +4 -3
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_lazy.h +145 -11
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_ldm.h +14 -6
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_ldm_geartab.h +110 -0
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstd_opt.h +33 -9
- package/src/duckdb/third_party/zstd/include/zstd/compress/zstdmt_compress.h +107 -0
- package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_ddict.h +4 -3
- package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_block.h +20 -6
- package/src/duckdb/third_party/zstd/include/zstd/decompress/zstd_decompress_internal.h +88 -16
- package/src/duckdb/third_party/zstd/include/zstd/deprecated/zbuff.h +214 -0
- package/src/duckdb/third_party/zstd/include/zstd/dict/cover.h +156 -0
- package/src/duckdb/third_party/zstd/include/zstd/dict/divsufsort.h +62 -0
- package/src/duckdb/third_party/zstd/include/zstd.h +2171 -93
- package/src/duckdb/third_party/zstd/include/{zstd/common/zstd_errors.h → zstd_errors.h} +32 -10
- package/src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp +8 -0
- package/src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp +20 -0
- package/src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp +12 -0
- package/src/duckdb/ub_extension_core_functions_aggregate_nested.cpp +6 -0
- package/src/duckdb/ub_extension_core_functions_aggregate_regression.cpp +14 -0
- package/src/duckdb/ub_extension_core_functions_scalar_array.cpp +4 -0
- package/src/duckdb/ub_extension_core_functions_scalar_bit.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_blob.cpp +4 -0
- package/src/duckdb/ub_extension_core_functions_scalar_date.cpp +20 -0
- package/src/duckdb/ub_extension_core_functions_scalar_debug.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_enum.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_generic.cpp +18 -0
- package/src/duckdb/ub_extension_core_functions_scalar_list.cpp +22 -0
- package/src/duckdb/ub_extension_core_functions_scalar_map.cpp +14 -0
- package/src/duckdb/ub_extension_core_functions_scalar_math.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_operators.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_random.cpp +4 -0
- package/src/duckdb/ub_extension_core_functions_scalar_string.cpp +48 -0
- package/src/duckdb/ub_extension_core_functions_scalar_struct.cpp +2 -0
- package/src/duckdb/ub_extension_core_functions_scalar_union.cpp +6 -0
- package/src/duckdb/ub_src_common.cpp +4 -0
- package/src/duckdb/ub_src_common_arrow.cpp +3 -1
- package/src/duckdb/ub_src_execution.cpp +0 -6
- package/src/duckdb/ub_src_execution_operator_aggregate.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_encode.cpp +2 -0
- package/src/duckdb/ub_src_execution_operator_csv_scanner_util.cpp +2 -0
- package/src/duckdb/ub_src_execution_sample.cpp +4 -0
- package/src/duckdb/ub_src_function.cpp +6 -0
- package/src/duckdb/ub_src_function_aggregate.cpp +0 -2
- package/src/duckdb/ub_src_function_aggregate_distributive.cpp +3 -1
- package/src/duckdb/ub_src_function_scalar.cpp +2 -8
- package/src/duckdb/ub_src_function_scalar_date.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_generic.cpp +2 -2
- package/src/duckdb/ub_src_function_scalar_map.cpp +2 -0
- package/src/duckdb/ub_src_function_scalar_operator.cpp +8 -0
- package/src/duckdb/ub_src_function_scalar_string.cpp +10 -0
- package/src/duckdb/ub_src_function_scalar_struct.cpp +4 -0
- package/src/duckdb/ub_src_function_scalar_system.cpp +2 -0
- package/src/duckdb/ub_src_function_table_system.cpp +6 -0
- package/src/duckdb/ub_src_function_window.cpp +36 -0
- package/src/duckdb/ub_src_logging.cpp +8 -0
- package/src/duckdb/ub_src_main_settings.cpp +3 -1
- package/src/duckdb/ub_src_optimizer.cpp +8 -0
- package/src/duckdb/ub_src_optimizer_pushdown.cpp +2 -0
- package/src/duckdb/ub_src_optimizer_rule.cpp +2 -0
- package/src/duckdb/ub_src_parser.cpp +2 -0
- package/src/duckdb/ub_src_parser_parsed_data.cpp +2 -0
- package/src/duckdb/ub_src_planner.cpp +2 -0
- package/src/duckdb/ub_src_planner_filter.cpp +6 -0
- package/src/duckdb/ub_src_storage_compression.cpp +4 -0
- package/src/duckdb/ub_src_storage_compression_dictionary.cpp +8 -0
- package/src/duckdb/ub_src_storage_compression_roaring.cpp +10 -0
- package/src/duckdb/ub_src_transaction.cpp +2 -0
- package/vendor.py +1 -1
- package/src/duckdb/extension/json/yyjson/include/yyjson.hpp +0 -6003
- package/src/duckdb/extension/json/yyjson/yyjson.cpp +0 -8218
- package/src/duckdb/src/common/arrow/appender/list_data.cpp +0 -78
- package/src/duckdb/src/common/arrow/appender/map_data.cpp +0 -91
- package/src/duckdb/src/common/cycle_counter.cpp +0 -76
- package/src/duckdb/src/common/field_writer.cpp +0 -97
- package/src/duckdb/src/common/http_state.cpp +0 -95
- package/src/duckdb/src/common/preserved_error.cpp +0 -87
- package/src/duckdb/src/common/row_operations/row_match.cpp +0 -359
- package/src/duckdb/src/common/serializer/buffered_deserializer.cpp +0 -27
- package/src/duckdb/src/common/serializer/buffered_serializer.cpp +0 -36
- package/src/duckdb/src/common/serializer/format_serializer.cpp +0 -15
- package/src/duckdb/src/common/serializer.cpp +0 -24
- package/src/duckdb/src/common/types/chunk_collection.cpp +0 -190
- package/src/duckdb/src/core_functions/aggregate/distributive/entropy.cpp +0 -183
- package/src/duckdb/src/core_functions/scalar/date/current.cpp +0 -54
- package/src/duckdb/src/core_functions/scalar/list/list_cosine_similarity.cpp +0 -78
- package/src/duckdb/src/core_functions/scalar/list/list_inner_product.cpp +0 -70
- package/src/duckdb/src/core_functions/scalar/list/list_lambdas.cpp +0 -412
- package/src/duckdb/src/core_functions/scalar/secret/which_secret.cpp +0 -28
- package/src/duckdb/src/core_functions/scalar/string/jaro_winkler.cpp +0 -71
- package/src/duckdb/src/execution/index/art/fixed_size_allocator.cpp +0 -238
- package/src/duckdb/src/execution/index/art/node16.cpp +0 -196
- package/src/duckdb/src/execution/index/art/node4.cpp +0 -189
- package/src/duckdb/src/execution/index/unknown_index.cpp +0 -65
- package/src/duckdb/src/execution/operator/csv_scanner/base_csv_reader.cpp +0 -595
- package/src/duckdb/src/execution/operator/csv_scanner/buffered_csv_reader.cpp +0 -434
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer.cpp +0 -89
- package/src/duckdb/src/execution/operator/csv_scanner/csv_buffer_manager.cpp +0 -90
- package/src/duckdb/src/execution/operator/csv_scanner/csv_file_handle.cpp +0 -95
- package/src/duckdb/src/execution/operator/csv_scanner/csv_reader_options.cpp +0 -494
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine.cpp +0 -35
- package/src/duckdb/src/execution/operator/csv_scanner/csv_state_machine_cache.cpp +0 -99
- package/src/duckdb/src/execution/operator/csv_scanner/parallel_csv_reader.cpp +0 -689
- package/src/duckdb/src/execution/operator/join/physical_index_join.cpp +0 -242
- package/src/duckdb/src/execution/operator/persistent/base_csv_reader.cpp +0 -695
- package/src/duckdb/src/execution/operator/persistent/buffered_csv_reader.cpp +0 -1487
- package/src/duckdb/src/execution/operator/persistent/csv_buffer.cpp +0 -72
- package/src/duckdb/src/execution/operator/persistent/csv_file_handle.cpp +0 -158
- package/src/duckdb/src/execution/operator/persistent/csv_reader_options.cpp +0 -280
- package/src/duckdb/src/execution/operator/persistent/parallel_csv_reader.cpp +0 -666
- package/src/duckdb/src/execution/operator/persistent/physical_fixed_batch_copy.cpp +0 -499
- package/src/duckdb/src/execution/operator/schema/physical_create_index.cpp +0 -207
- package/src/duckdb/src/execution/partitionable_hashtable.cpp +0 -207
- package/src/duckdb/src/execution/physical_plan/plan_limit_percent.cpp +0 -18
- package/src/duckdb/src/execution/physical_plan/plan_show_select.cpp +0 -47
- package/src/duckdb/src/execution/reservoir_sample.cpp +0 -324
- package/src/duckdb/src/execution/window_executor.cpp +0 -1830
- package/src/duckdb/src/execution/window_segment_tree.cpp +0 -2073
- package/src/duckdb/src/extension_forward_decl/icu.cpp +0 -59
- package/src/duckdb/src/function/aggregate/distributive_functions.cpp +0 -15
- package/src/duckdb/src/function/scalar/compressed_materialization_functions.cpp +0 -29
- package/src/duckdb/src/function/scalar/generic_functions.cpp +0 -11
- package/src/duckdb/src/function/scalar/list/list_concat.cpp +0 -143
- package/src/duckdb/src/function/scalar/operators.cpp +0 -14
- package/src/duckdb/src/function/scalar/sequence_functions.cpp +0 -10
- package/src/duckdb/src/function/scalar/string_functions.cpp +0 -22
- package/src/duckdb/src/function/table/pragma_detailed_profiling_output.cpp +0 -173
- package/src/duckdb/src/function/table/pragma_last_profiling_output.cpp +0 -101
- package/src/duckdb/src/include/duckdb/catalog/mapping_value.hpp +0 -92
- package/src/duckdb/src/include/duckdb/common/arrow/arrow_types_extension.hpp +0 -42
- package/src/duckdb/src/include/duckdb/common/cycle_counter.hpp +0 -68
- package/src/duckdb/src/include/duckdb/common/enums/index_type.hpp +0 -34
- package/src/duckdb/src/include/duckdb/common/http_state.hpp +0 -113
- package/src/duckdb/src/include/duckdb/common/platform.h +0 -58
- package/src/duckdb/src/include/duckdb/common/preserved_error.hpp +0 -59
- package/src/duckdb/src/include/duckdb/common/serializer/deserialization_data.hpp +0 -192
- package/src/duckdb/src/include/duckdb/common/types/chunk_collection.hpp +0 -137
- package/src/duckdb/src/include/duckdb/execution/index/art/node16.hpp +0 -65
- package/src/duckdb/src/include/duckdb/execution/index/art/node4.hpp +0 -63
- package/src/duckdb/src/include/duckdb/execution/index/unknown_index.hpp +0 -65
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer.hpp +0 -103
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_buffer_manager.hpp +0 -74
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/buffer_manager/csv_file_handle.hpp +0 -60
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +0 -253
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_option.hpp +0 -155
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/csv_reader_options.hpp +0 -163
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/options/state_machine_options.hpp +0 -35
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/base_scanner.hpp +0 -228
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/column_count_scanner.hpp +0 -70
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/scanner_boundary.hpp +0 -93
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/skip_scanner.hpp +0 -60
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/scanner/string_value_scanner.hpp +0 -197
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/sniffer/quote_rules.hpp +0 -21
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state.hpp +0 -30
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine.hpp +0 -99
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.hpp +0 -87
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/csv_file_scanner.hpp +0 -70
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/table_function/global_csv_state.hpp +0 -80
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_casting.hpp +0 -137
- package/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/util/csv_error.hpp +0 -104
- package/src/duckdb/src/include/duckdb/execution/operator/join/physical_index_join.hpp +0 -79
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/base_csv_reader.hpp +0 -119
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/buffered_csv_reader.hpp +0 -72
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer.hpp +0 -110
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_buffer_manager.hpp +0 -103
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_file_handle.hpp +0 -59
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_line_info.hpp +0 -46
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_reader_options.hpp +0 -210
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_sniffer.hpp +0 -131
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state.hpp +0 -28
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine.hpp +0 -70
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/csv_state_machine_cache.hpp +0 -65
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/parallel_csv_reader.hpp +0 -167
- package/src/duckdb/src/include/duckdb/execution/operator/scan/csv/quote_rules.hpp +0 -21
- package/src/duckdb/src/include/duckdb/execution/window_executor.hpp +0 -343
- package/src/duckdb/src/include/duckdb/execution/window_segment_tree.hpp +0 -165
- package/src/duckdb/src/include/duckdb/optimizer/column_lifetime_optimizer.hpp +0 -45
- package/src/duckdb/src/include/duckdb/optimizer/join_order/estimated_properties.hpp +0 -57
- package/src/duckdb/src/include/duckdb/parser/parsed_data/comment_on_info.hpp +0 -45
- package/src/duckdb/src/include/duckdb/parser/statement/show_statement.hpp +0 -32
- package/src/duckdb/src/include/duckdb/planner/operator/logical_limit_percent.hpp +0 -49
- package/src/duckdb/src/include/duckdb/planner/operator/logical_show.hpp +0 -42
- package/src/duckdb/src/main/settings/settings.cpp +0 -2056
- package/src/duckdb/src/optimizer/join_order/estimated_properties.cpp +0 -36
- package/src/duckdb/src/parser/parsed_data/comment_on_info.cpp +0 -19
- package/src/duckdb/src/parser/statement/show_statement.cpp +0 -15
- package/src/duckdb/src/planner/binder/statement/bind_show.cpp +0 -30
- package/src/duckdb/src/planner/operator/logical_limit_percent.cpp +0 -14
- package/src/duckdb/src/storage/statistics/numeric_stats_union.cpp +0 -70
- package/src/duckdb/third_party/fsst/fsst_avx512.cpp +0 -140
- package/src/duckdb/third_party/fsst/fsst_avx512.inc +0 -57
- package/src/duckdb/third_party/fsst/fsst_avx512_unroll1.inc +0 -57
- package/src/duckdb/third_party/fsst/fsst_avx512_unroll2.inc +0 -114
- package/src/duckdb/third_party/fsst/fsst_avx512_unroll3.inc +0 -171
- package/src/duckdb/third_party/fsst/fsst_avx512_unroll4.inc +0 -228
- package/src/duckdb/third_party/parquet/parquet_constants.cpp +0 -17
- package/src/duckdb/third_party/parquet/parquet_constants.h +0 -24
- package/src/duckdb/third_party/re2/util/pod_array.h +0 -55
- package/src/duckdb/third_party/re2/util/sparse_array.h +0 -392
- package/src/duckdb/third_party/re2/util/sparse_set.h +0 -264
- package/src/duckdb/third_party/zstd/include/zstd/common/fse_static.h +0 -421
- package/src/duckdb/third_party/zstd/include/zstd/common/huf_static.h +0 -238
- package/src/duckdb/third_party/zstd/include/zstd_static.h +0 -1070
- package/src/duckdb/ub_src_core_functions.cpp +0 -6
- package/src/duckdb/ub_src_core_functions_aggregate_algebraic.cpp +0 -8
- package/src/duckdb/ub_src_core_functions_aggregate_distributive.cpp +0 -24
- package/src/duckdb/ub_src_core_functions_aggregate_holistic.cpp +0 -12
- package/src/duckdb/ub_src_core_functions_aggregate_nested.cpp +0 -6
- package/src/duckdb/ub_src_core_functions_aggregate_regression.cpp +0 -14
- package/src/duckdb/ub_src_core_functions_scalar_array.cpp +0 -4
- package/src/duckdb/ub_src_core_functions_scalar_bit.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_blob.cpp +0 -6
- package/src/duckdb/ub_src_core_functions_scalar_date.cpp +0 -22
- package/src/duckdb/ub_src_core_functions_scalar_debug.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_enum.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_generic.cpp +0 -18
- package/src/duckdb/ub_src_core_functions_scalar_list.cpp +0 -22
- package/src/duckdb/ub_src_core_functions_scalar_map.cpp +0 -16
- package/src/duckdb/ub_src_core_functions_scalar_math.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_operators.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_random.cpp +0 -4
- package/src/duckdb/ub_src_core_functions_scalar_secret.cpp +0 -2
- package/src/duckdb/ub_src_core_functions_scalar_string.cpp +0 -58
- package/src/duckdb/ub_src_core_functions_scalar_struct.cpp +0 -4
- package/src/duckdb/ub_src_core_functions_scalar_union.cpp +0 -6
- package/src/duckdb/ub_src_execution_operator_csv_scanner.cpp +0 -18
- package/src/duckdb/ub_src_function_scalar_operators.cpp +0 -8
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/covar.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/algebraic/stddev.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/aggregate/sum_helpers.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/array_kernels.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/function_list.hpp +0 -0
- /package/src/duckdb/{src/include/duckdb → extension/core_functions/include}/core_functions/scalar/secret_functions.hpp +0 -0
- /package/src/duckdb/src/function/scalar/{operators → operator}/multiply.cpp +0 -0
- /package/src/duckdb/src/function/scalar/{operators → operator}/subtract.cpp +0 -0
@@ -1,10 +1,14 @@
|
|
1
1
|
#include "column_writer.hpp"
|
2
2
|
|
3
3
|
#include "duckdb.hpp"
|
4
|
+
#include "geo_parquet.hpp"
|
5
|
+
#include "parquet_dbp_encoder.hpp"
|
6
|
+
#include "parquet_dlba_encoder.hpp"
|
4
7
|
#include "parquet_rle_bp_decoder.hpp"
|
5
8
|
#include "parquet_rle_bp_encoder.hpp"
|
9
|
+
#include "parquet_bss_encoder.hpp"
|
10
|
+
#include "parquet_statistics.hpp"
|
6
11
|
#include "parquet_writer.hpp"
|
7
|
-
#include "geo_parquet.hpp"
|
8
12
|
#ifndef DUCKDB_AMALGAMATION
|
9
13
|
#include "duckdb/common/exception.hpp"
|
10
14
|
#include "duckdb/common/operator/comparison_operators.hpp"
|
@@ -19,49 +23,32 @@
|
|
19
23
|
#include "duckdb/execution/expression_executor.hpp"
|
20
24
|
#endif
|
21
25
|
|
26
|
+
#include "brotli/encode.h"
|
22
27
|
#include "lz4.hpp"
|
23
28
|
#include "miniz_wrapper.hpp"
|
24
29
|
#include "snappy.h"
|
25
30
|
#include "zstd.h"
|
26
|
-
#include "
|
31
|
+
#include "zstd/common/xxhash.hpp"
|
32
|
+
|
33
|
+
#include <cmath>
|
27
34
|
|
28
35
|
namespace duckdb {
|
29
36
|
|
30
37
|
using namespace duckdb_parquet; // NOLINT
|
31
38
|
using namespace duckdb_miniz; // NOLINT
|
32
39
|
|
33
|
-
using duckdb_parquet::
|
34
|
-
using duckdb_parquet::
|
35
|
-
using duckdb_parquet::
|
36
|
-
using duckdb_parquet::
|
37
|
-
using duckdb_parquet::
|
38
|
-
using duckdb_parquet::
|
39
|
-
using duckdb_parquet::
|
40
|
-
using ParquetRowGroup = duckdb_parquet::
|
41
|
-
using duckdb_parquet::
|
40
|
+
using duckdb_parquet::CompressionCodec;
|
41
|
+
using duckdb_parquet::ConvertedType;
|
42
|
+
using duckdb_parquet::Encoding;
|
43
|
+
using duckdb_parquet::FieldRepetitionType;
|
44
|
+
using duckdb_parquet::FileMetaData;
|
45
|
+
using duckdb_parquet::PageHeader;
|
46
|
+
using duckdb_parquet::PageType;
|
47
|
+
using ParquetRowGroup = duckdb_parquet::RowGroup;
|
48
|
+
using duckdb_parquet::Type;
|
42
49
|
|
43
50
|
#define PARQUET_DEFINE_VALID 65535
|
44
51
|
|
45
|
-
static void VarintEncode(uint32_t val, WriteStream &ser) {
|
46
|
-
do {
|
47
|
-
uint8_t byte = val & 127;
|
48
|
-
val >>= 7;
|
49
|
-
if (val != 0) {
|
50
|
-
byte |= 128;
|
51
|
-
}
|
52
|
-
ser.Write<uint8_t>(byte);
|
53
|
-
} while (val != 0);
|
54
|
-
}
|
55
|
-
|
56
|
-
static uint8_t GetVarintSize(uint32_t val) {
|
57
|
-
uint8_t res = 0;
|
58
|
-
do {
|
59
|
-
val >>= 7;
|
60
|
-
res++;
|
61
|
-
} while (val != 0);
|
62
|
-
return res;
|
63
|
-
}
|
64
|
-
|
65
52
|
//===--------------------------------------------------------------------===//
|
66
53
|
// ColumnWriterStatistics
|
67
54
|
//===--------------------------------------------------------------------===//
|
@@ -106,7 +93,7 @@ void RleBpEncoder::BeginPrepare(uint32_t first_value) {
|
|
106
93
|
void RleBpEncoder::FinishRun() {
|
107
94
|
// last value, or value has changed
|
108
95
|
// write out the current run
|
109
|
-
byte_count += GetVarintSize(current_run_count << 1) + byte_width;
|
96
|
+
byte_count += ParquetDecodeUtils::GetVarintSize(current_run_count << 1) + byte_width;
|
110
97
|
current_run_count = 1;
|
111
98
|
run_count++;
|
112
99
|
}
|
@@ -137,7 +124,7 @@ void RleBpEncoder::BeginWrite(WriteStream &writer, uint32_t first_value) {
|
|
137
124
|
|
138
125
|
void RleBpEncoder::WriteRun(WriteStream &writer) {
|
139
126
|
// write the header of the run
|
140
|
-
VarintEncode(current_run_count << 1, writer);
|
127
|
+
ParquetDecodeUtils::VarintEncode(current_run_count << 1, writer);
|
141
128
|
// now write the value
|
142
129
|
D_ASSERT(last_value >> (byte_width * 8) == 0);
|
143
130
|
switch (byte_width) {
|
@@ -224,16 +211,11 @@ void ColumnWriter::CompressPage(MemoryStream &temp_writer, size_t &compressed_si
|
|
224
211
|
break;
|
225
212
|
}
|
226
213
|
case CompressionCodec::ZSTD: {
|
227
|
-
auto configured_compression = writer.CompressionLevel();
|
228
|
-
int compress_level = ZSTD_CLEVEL_DEFAULT;
|
229
|
-
if (configured_compression.IsValid()) {
|
230
|
-
compress_level = static_cast<int>(configured_compression.GetIndex());
|
231
|
-
}
|
232
214
|
compressed_size = duckdb_zstd::ZSTD_compressBound(temp_writer.GetPosition());
|
233
215
|
compressed_buf = unique_ptr<data_t[]>(new data_t[compressed_size]);
|
234
|
-
compressed_size =
|
235
|
-
|
236
|
-
|
216
|
+
compressed_size = duckdb_zstd::ZSTD_compress((void *)compressed_buf.get(), compressed_size,
|
217
|
+
(const void *)temp_writer.GetData(), temp_writer.GetPosition(),
|
218
|
+
UnsafeNumericCast<int32_t>(writer.CompressionLevel()));
|
237
219
|
compressed_data = compressed_buf.get();
|
238
220
|
break;
|
239
221
|
}
|
@@ -344,18 +326,20 @@ struct PageWriteInformation {
|
|
344
326
|
|
345
327
|
class BasicColumnWriterState : public ColumnWriterState {
|
346
328
|
public:
|
347
|
-
BasicColumnWriterState(duckdb_parquet::
|
329
|
+
BasicColumnWriterState(duckdb_parquet::RowGroup &row_group, idx_t col_idx)
|
348
330
|
: row_group(row_group), col_idx(col_idx) {
|
349
331
|
page_info.emplace_back();
|
350
332
|
}
|
351
333
|
~BasicColumnWriterState() override = default;
|
352
334
|
|
353
|
-
duckdb_parquet::
|
335
|
+
duckdb_parquet::RowGroup &row_group;
|
354
336
|
idx_t col_idx;
|
355
337
|
vector<PageInformation> page_info;
|
356
338
|
vector<PageWriteInformation> write_info;
|
357
339
|
unique_ptr<ColumnWriterStatistics> stats_state;
|
358
340
|
idx_t current_page = 0;
|
341
|
+
|
342
|
+
unique_ptr<ParquetBloomFilter> bloom_filter;
|
359
343
|
};
|
360
344
|
|
361
345
|
//===--------------------------------------------------------------------===//
|
@@ -377,17 +361,15 @@ public:
|
|
377
361
|
//! Dictionary pages must be below 2GB. Unlike data pages, there's only one dictionary page.
|
378
362
|
//! For this reason we go with a much higher, but still a conservative upper bound of 1GB;
|
379
363
|
static constexpr const idx_t MAX_UNCOMPRESSED_DICT_PAGE_SIZE = 1e9;
|
380
|
-
//! If the dictionary has this many entries,
|
381
|
-
//! we stop creating the dictionary
|
364
|
+
//! If the dictionary has this many entries, we stop creating the dictionary
|
382
365
|
static constexpr const idx_t DICTIONARY_ANALYZE_THRESHOLD = 1e4;
|
383
|
-
|
384
366
|
//! The maximum size a key entry in an RLE page takes
|
385
367
|
static constexpr const idx_t MAX_DICTIONARY_KEY_SIZE = sizeof(uint32_t);
|
386
368
|
//! The size of encoding the string length
|
387
369
|
static constexpr const idx_t STRING_LENGTH_SIZE = sizeof(uint32_t);
|
388
370
|
|
389
371
|
public:
|
390
|
-
unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::
|
372
|
+
unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) override;
|
391
373
|
void Prepare(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override;
|
392
374
|
void BeginWrite(ColumnWriterState &state) override;
|
393
375
|
void Write(ColumnWriterState &state, Vector &vector, idx_t count) override;
|
@@ -397,7 +379,7 @@ protected:
|
|
397
379
|
static void WriteLevels(WriteStream &temp_writer, const unsafe_vector<uint16_t> &levels, idx_t max_value,
|
398
380
|
idx_t start_offset, idx_t count);
|
399
381
|
|
400
|
-
virtual duckdb_parquet::
|
382
|
+
virtual duckdb_parquet::Encoding::type GetEncoding(BasicColumnWriterState &state);
|
401
383
|
|
402
384
|
void NextPage(BasicColumnWriterState &state);
|
403
385
|
void FlushPage(BasicColumnWriterState &state);
|
@@ -425,18 +407,18 @@ protected:
|
|
425
407
|
void WriteDictionary(BasicColumnWriterState &state, unique_ptr<MemoryStream> temp_writer, idx_t row_count);
|
426
408
|
virtual void FlushDictionary(BasicColumnWriterState &state, ColumnWriterStatistics *stats);
|
427
409
|
|
428
|
-
void SetParquetStatistics(BasicColumnWriterState &state, duckdb_parquet::
|
429
|
-
void RegisterToRowGroup(duckdb_parquet::
|
410
|
+
void SetParquetStatistics(BasicColumnWriterState &state, duckdb_parquet::ColumnChunk &column);
|
411
|
+
void RegisterToRowGroup(duckdb_parquet::RowGroup &row_group);
|
430
412
|
};
|
431
413
|
|
432
|
-
unique_ptr<ColumnWriterState> BasicColumnWriter::InitializeWriteState(duckdb_parquet::
|
414
|
+
unique_ptr<ColumnWriterState> BasicColumnWriter::InitializeWriteState(duckdb_parquet::RowGroup &row_group) {
|
433
415
|
auto result = make_uniq<BasicColumnWriterState>(row_group, row_group.columns.size());
|
434
416
|
RegisterToRowGroup(row_group);
|
435
417
|
return std::move(result);
|
436
418
|
}
|
437
419
|
|
438
|
-
void BasicColumnWriter::RegisterToRowGroup(duckdb_parquet::
|
439
|
-
|
420
|
+
void BasicColumnWriter::RegisterToRowGroup(duckdb_parquet::RowGroup &row_group) {
|
421
|
+
duckdb_parquet::ColumnChunk column_chunk;
|
440
422
|
column_chunk.__isset.meta_data = true;
|
441
423
|
column_chunk.meta_data.codec = writer.GetCodec();
|
442
424
|
column_chunk.meta_data.path_in_schema = schema_path;
|
@@ -486,7 +468,7 @@ void BasicColumnWriter::Prepare(ColumnWriterState &state_p, ColumnWriterState *p
|
|
486
468
|
}
|
487
469
|
}
|
488
470
|
|
489
|
-
duckdb_parquet::
|
471
|
+
duckdb_parquet::Encoding::type BasicColumnWriter::GetEncoding(BasicColumnWriterState &state) {
|
490
472
|
return Encoding::PLAIN;
|
491
473
|
}
|
492
474
|
|
@@ -646,8 +628,10 @@ void BasicColumnWriter::Write(ColumnWriterState &state_p, Vector &vector, idx_t
|
|
646
628
|
}
|
647
629
|
}
|
648
630
|
|
649
|
-
void BasicColumnWriter::SetParquetStatistics(BasicColumnWriterState &state,
|
650
|
-
|
631
|
+
void BasicColumnWriter::SetParquetStatistics(BasicColumnWriterState &state, duckdb_parquet::ColumnChunk &column_chunk) {
|
632
|
+
if (!state.stats_state) {
|
633
|
+
return;
|
634
|
+
}
|
651
635
|
if (max_repeat == 0) {
|
652
636
|
column_chunk.meta_data.statistics.null_count = NumericCast<int64_t>(state.null_count);
|
653
637
|
column_chunk.meta_data.statistics.__isset.null_count = true;
|
@@ -682,6 +666,11 @@ void BasicColumnWriter::SetParquetStatistics(BasicColumnWriterState &state,
|
|
682
666
|
column_chunk.meta_data.__isset.statistics = true;
|
683
667
|
}
|
684
668
|
for (const auto &write_info : state.write_info) {
|
669
|
+
// only care about data page encodings, data_page_header.encoding is meaningless for dict
|
670
|
+
if (write_info.page_header.type != PageType::DATA_PAGE &&
|
671
|
+
write_info.page_header.type != PageType::DATA_PAGE_V2) {
|
672
|
+
continue;
|
673
|
+
}
|
685
674
|
column_chunk.meta_data.encodings.push_back(write_info.page_header.data_page_header.encoding);
|
686
675
|
}
|
687
676
|
}
|
@@ -728,6 +717,11 @@ void BasicColumnWriter::FinalizeWrite(ColumnWriterState &state_p) {
|
|
728
717
|
column_chunk.meta_data.total_compressed_size =
|
729
718
|
UnsafeNumericCast<int64_t>(column_writer.GetTotalWritten() - start_offset);
|
730
719
|
column_chunk.meta_data.total_uncompressed_size = UnsafeNumericCast<int64_t>(total_uncompressed_size);
|
720
|
+
|
721
|
+
if (state.bloom_filter) {
|
722
|
+
writer.BufferBloomFilter(state.col_idx, std::move(state.bloom_filter));
|
723
|
+
}
|
724
|
+
// which row group is this?
|
731
725
|
}
|
732
726
|
|
733
727
|
void BasicColumnWriter::FlushDictionary(BasicColumnWriterState &state, ColumnWriterStatistics *stats) {
|
@@ -792,21 +786,47 @@ public:
|
|
792
786
|
return NumericLimits<SRC>::IsSigned() ? GetMaxValue() : string();
|
793
787
|
}
|
794
788
|
string GetMinValue() override {
|
795
|
-
return HasStats() ? string((
|
789
|
+
return HasStats() ? string(char_ptr_cast(&min), sizeof(T)) : string();
|
796
790
|
}
|
797
791
|
string GetMaxValue() override {
|
798
|
-
return HasStats() ? string((
|
792
|
+
return HasStats() ? string(char_ptr_cast(&max), sizeof(T)) : string();
|
799
793
|
}
|
800
794
|
};
|
801
795
|
|
802
796
|
struct BaseParquetOperator {
|
797
|
+
|
798
|
+
template <class SRC, class TGT>
|
799
|
+
static void WriteToStream(const TGT &input, WriteStream &ser) {
|
800
|
+
ser.WriteData(const_data_ptr_cast(&input), sizeof(TGT));
|
801
|
+
}
|
802
|
+
|
803
|
+
template <class SRC, class TGT>
|
804
|
+
static uint64_t XXHash64(const TGT &target_value) {
|
805
|
+
return duckdb_zstd::XXH64(&target_value, sizeof(target_value), 0);
|
806
|
+
}
|
807
|
+
|
808
|
+
template <class SRC, class TGT>
|
809
|
+
static unique_ptr<ColumnWriterStatistics> InitializeStats() {
|
810
|
+
return nullptr;
|
811
|
+
}
|
812
|
+
|
813
|
+
template <class SRC, class TGT>
|
814
|
+
static void HandleStats(ColumnWriterStatistics *stats, TGT target_value) {
|
815
|
+
}
|
816
|
+
};
|
817
|
+
|
818
|
+
struct ParquetCastOperator : public BaseParquetOperator {
|
819
|
+
template <class SRC, class TGT>
|
820
|
+
static TGT Operation(SRC input) {
|
821
|
+
return TGT(input);
|
822
|
+
}
|
803
823
|
template <class SRC, class TGT>
|
804
824
|
static unique_ptr<ColumnWriterStatistics> InitializeStats() {
|
805
825
|
return make_uniq<NumericStatisticsState<SRC, TGT, BaseParquetOperator>>();
|
806
826
|
}
|
807
827
|
|
808
828
|
template <class SRC, class TGT>
|
809
|
-
static void HandleStats(ColumnWriterStatistics *stats,
|
829
|
+
static void HandleStats(ColumnWriterStatistics *stats, TGT target_value) {
|
810
830
|
auto &numeric_stats = (NumericStatisticsState<SRC, TGT, BaseParquetOperator> &)*stats;
|
811
831
|
if (LessThan::Operation(target_value, numeric_stats.min)) {
|
812
832
|
numeric_stats.min = target_value;
|
@@ -817,24 +837,165 @@ struct BaseParquetOperator {
|
|
817
837
|
}
|
818
838
|
};
|
819
839
|
|
820
|
-
struct
|
840
|
+
struct ParquetTimestampNSOperator : public ParquetCastOperator {
|
821
841
|
template <class SRC, class TGT>
|
822
842
|
static TGT Operation(SRC input) {
|
823
843
|
return TGT(input);
|
824
844
|
}
|
825
845
|
};
|
826
846
|
|
827
|
-
struct
|
847
|
+
struct ParquetTimestampSOperator : public ParquetCastOperator {
|
828
848
|
template <class SRC, class TGT>
|
829
849
|
static TGT Operation(SRC input) {
|
830
|
-
return
|
850
|
+
return Timestamp::FromEpochSecondsPossiblyInfinite(input).value;
|
851
|
+
}
|
852
|
+
};
|
853
|
+
|
854
|
+
class StringStatisticsState : public ColumnWriterStatistics {
|
855
|
+
static constexpr const idx_t MAX_STRING_STATISTICS_SIZE = 10000;
|
856
|
+
|
857
|
+
public:
|
858
|
+
StringStatisticsState() : has_stats(false), values_too_big(false), min(), max() {
|
859
|
+
}
|
860
|
+
|
861
|
+
bool has_stats;
|
862
|
+
bool values_too_big;
|
863
|
+
string min;
|
864
|
+
string max;
|
865
|
+
|
866
|
+
public:
|
867
|
+
bool HasStats() override {
|
868
|
+
return has_stats;
|
869
|
+
}
|
870
|
+
|
871
|
+
void Update(const string_t &val) {
|
872
|
+
if (values_too_big) {
|
873
|
+
return;
|
874
|
+
}
|
875
|
+
auto str_len = val.GetSize();
|
876
|
+
if (str_len > MAX_STRING_STATISTICS_SIZE) {
|
877
|
+
// we avoid gathering stats when individual string values are too large
|
878
|
+
// this is because the statistics are copied into the Parquet file meta data in uncompressed format
|
879
|
+
// ideally we avoid placing several mega or giga-byte long strings there
|
880
|
+
// we put a threshold of 10KB, if we see strings that exceed this threshold we avoid gathering stats
|
881
|
+
values_too_big = true;
|
882
|
+
has_stats = false;
|
883
|
+
min = string();
|
884
|
+
max = string();
|
885
|
+
return;
|
886
|
+
}
|
887
|
+
if (!has_stats || LessThan::Operation(val, string_t(min))) {
|
888
|
+
min = val.GetString();
|
889
|
+
}
|
890
|
+
if (!has_stats || GreaterThan::Operation(val, string_t(max))) {
|
891
|
+
max = val.GetString();
|
892
|
+
}
|
893
|
+
has_stats = true;
|
894
|
+
}
|
895
|
+
|
896
|
+
string GetMin() override {
|
897
|
+
return GetMinValue();
|
898
|
+
}
|
899
|
+
string GetMax() override {
|
900
|
+
return GetMaxValue();
|
901
|
+
}
|
902
|
+
string GetMinValue() override {
|
903
|
+
return HasStats() ? min : string();
|
904
|
+
}
|
905
|
+
string GetMaxValue() override {
|
906
|
+
return HasStats() ? max : string();
|
831
907
|
}
|
832
908
|
};
|
833
909
|
|
834
|
-
struct
|
910
|
+
struct ParquetStringOperator : public BaseParquetOperator {
|
835
911
|
template <class SRC, class TGT>
|
836
912
|
static TGT Operation(SRC input) {
|
837
|
-
return
|
913
|
+
return input;
|
914
|
+
}
|
915
|
+
|
916
|
+
template <class SRC, class TGT>
|
917
|
+
static unique_ptr<ColumnWriterStatistics> InitializeStats() {
|
918
|
+
return make_uniq<StringStatisticsState>();
|
919
|
+
}
|
920
|
+
|
921
|
+
template <class SRC, class TGT>
|
922
|
+
static void HandleStats(ColumnWriterStatistics *stats, TGT target_value) {
|
923
|
+
auto &string_stats = stats->Cast<StringStatisticsState>();
|
924
|
+
string_stats.Update(target_value);
|
925
|
+
}
|
926
|
+
|
927
|
+
template <class SRC, class TGT>
|
928
|
+
static void WriteToStream(const TGT &target_value, WriteStream &ser) {
|
929
|
+
ser.Write<uint32_t>(target_value.GetSize());
|
930
|
+
ser.WriteData(const_data_ptr_cast(target_value.GetData()), target_value.GetSize());
|
931
|
+
}
|
932
|
+
|
933
|
+
template <class SRC, class TGT>
|
934
|
+
static uint64_t XXHash64(const TGT &target_value) {
|
935
|
+
return duckdb_zstd::XXH64(target_value.GetData(), target_value.GetSize(), 0);
|
936
|
+
}
|
937
|
+
};
|
938
|
+
|
939
|
+
struct ParquetIntervalTargetType {
|
940
|
+
static constexpr const idx_t PARQUET_INTERVAL_SIZE = 12;
|
941
|
+
data_t bytes[PARQUET_INTERVAL_SIZE];
|
942
|
+
};
|
943
|
+
|
944
|
+
struct ParquetIntervalOperator : public BaseParquetOperator {
|
945
|
+
template <class SRC, class TGT>
|
946
|
+
static TGT Operation(SRC input) {
|
947
|
+
|
948
|
+
if (input.days < 0 || input.months < 0 || input.micros < 0) {
|
949
|
+
throw IOException("Parquet files do not support negative intervals");
|
950
|
+
}
|
951
|
+
TGT result;
|
952
|
+
Store<uint32_t>(input.months, result.bytes);
|
953
|
+
Store<uint32_t>(input.days, result.bytes + sizeof(uint32_t));
|
954
|
+
Store<uint32_t>(input.micros / 1000, result.bytes + sizeof(uint32_t) * 2);
|
955
|
+
return result;
|
956
|
+
}
|
957
|
+
|
958
|
+
template <class SRC, class TGT>
|
959
|
+
static void WriteToStream(const TGT &target_value, WriteStream &ser) {
|
960
|
+
ser.WriteData(target_value.bytes, ParquetIntervalTargetType::PARQUET_INTERVAL_SIZE);
|
961
|
+
}
|
962
|
+
|
963
|
+
template <class SRC, class TGT>
|
964
|
+
static uint64_t XXHash64(const TGT &target_value) {
|
965
|
+
return duckdb_zstd::XXH64(target_value.bytes, ParquetIntervalTargetType::PARQUET_INTERVAL_SIZE, 0);
|
966
|
+
}
|
967
|
+
};
|
968
|
+
|
969
|
+
struct ParquetUUIDTargetType {
|
970
|
+
static constexpr const idx_t PARQUET_UUID_SIZE = 16;
|
971
|
+
data_t bytes[PARQUET_UUID_SIZE];
|
972
|
+
};
|
973
|
+
|
974
|
+
struct ParquetUUIDOperator : public BaseParquetOperator {
|
975
|
+
template <class SRC, class TGT>
|
976
|
+
static TGT Operation(SRC input) {
|
977
|
+
TGT result;
|
978
|
+
uint64_t high_bytes = input.upper ^ (int64_t(1) << 63);
|
979
|
+
uint64_t low_bytes = input.lower;
|
980
|
+
for (idx_t i = 0; i < sizeof(uint64_t); i++) {
|
981
|
+
auto shift_count = (sizeof(uint64_t) - i - 1) * 8;
|
982
|
+
result.bytes[i] = (high_bytes >> shift_count) & 0xFF;
|
983
|
+
}
|
984
|
+
for (idx_t i = 0; i < sizeof(uint64_t); i++) {
|
985
|
+
auto shift_count = (sizeof(uint64_t) - i - 1) * 8;
|
986
|
+
result.bytes[sizeof(uint64_t) + i] = (low_bytes >> shift_count) & 0xFF;
|
987
|
+
}
|
988
|
+
return result;
|
989
|
+
}
|
990
|
+
|
991
|
+
template <class SRC, class TGT>
|
992
|
+
static void WriteToStream(const TGT &target_value, WriteStream &ser) {
|
993
|
+
ser.WriteData(target_value.bytes, ParquetUUIDTargetType::PARQUET_UUID_SIZE);
|
994
|
+
}
|
995
|
+
|
996
|
+
template <class SRC, class TGT>
|
997
|
+
static uint64_t XXHash64(const TGT &target_value) {
|
998
|
+
return duckdb_zstd::XXH64(target_value.bytes, ParquetUUIDTargetType::PARQUET_UUID_SIZE, 0);
|
838
999
|
}
|
839
1000
|
};
|
840
1001
|
|
@@ -845,7 +1006,7 @@ struct ParquetTimeTZOperator : public BaseParquetOperator {
|
|
845
1006
|
}
|
846
1007
|
};
|
847
1008
|
|
848
|
-
struct ParquetHugeintOperator {
|
1009
|
+
struct ParquetHugeintOperator : public BaseParquetOperator {
|
849
1010
|
template <class SRC, class TGT>
|
850
1011
|
static TGT Operation(SRC input) {
|
851
1012
|
return Hugeint::Cast<double>(input);
|
@@ -857,11 +1018,11 @@ struct ParquetHugeintOperator {
|
|
857
1018
|
}
|
858
1019
|
|
859
1020
|
template <class SRC, class TGT>
|
860
|
-
static void HandleStats(ColumnWriterStatistics *stats,
|
1021
|
+
static void HandleStats(ColumnWriterStatistics *stats, TGT target_value) {
|
861
1022
|
}
|
862
1023
|
};
|
863
1024
|
|
864
|
-
struct ParquetUhugeintOperator {
|
1025
|
+
struct ParquetUhugeintOperator : public BaseParquetOperator {
|
865
1026
|
template <class SRC, class TGT>
|
866
1027
|
static TGT Operation(SRC input) {
|
867
1028
|
return Uhugeint::Cast<double>(input);
|
@@ -873,16 +1034,13 @@ struct ParquetUhugeintOperator {
|
|
873
1034
|
}
|
874
1035
|
|
875
1036
|
template <class SRC, class TGT>
|
876
|
-
static void HandleStats(ColumnWriterStatistics *stats,
|
1037
|
+
static void HandleStats(ColumnWriterStatistics *stats, TGT target_value) {
|
877
1038
|
}
|
878
1039
|
};
|
879
1040
|
|
880
1041
|
template <class SRC, class TGT, class OP = ParquetCastOperator>
|
881
1042
|
static void TemplatedWritePlain(Vector &col, ColumnWriterStatistics *stats, const idx_t chunk_start,
|
882
|
-
const idx_t chunk_end, ValidityMask &mask, WriteStream &ser) {
|
883
|
-
static constexpr idx_t WRITE_COMBINER_CAPACITY = 8;
|
884
|
-
TGT write_combiner[WRITE_COMBINER_CAPACITY];
|
885
|
-
idx_t write_combiner_count = 0;
|
1043
|
+
const idx_t chunk_end, const ValidityMask &mask, WriteStream &ser) {
|
886
1044
|
|
887
1045
|
const auto *ptr = FlatVector::GetData<SRC>(col);
|
888
1046
|
for (idx_t r = chunk_start; r < chunk_end; r++) {
|
@@ -890,368 +1048,589 @@ static void TemplatedWritePlain(Vector &col, ColumnWriterStatistics *stats, cons
|
|
890
1048
|
continue;
|
891
1049
|
}
|
892
1050
|
TGT target_value = OP::template Operation<SRC, TGT>(ptr[r]);
|
893
|
-
OP::template HandleStats<SRC, TGT>(stats,
|
894
|
-
|
895
|
-
if (write_combiner_count == WRITE_COMBINER_CAPACITY) {
|
896
|
-
ser.WriteData(const_data_ptr_cast(write_combiner), WRITE_COMBINER_CAPACITY * sizeof(TGT));
|
897
|
-
write_combiner_count = 0;
|
898
|
-
}
|
1051
|
+
OP::template HandleStats<SRC, TGT>(stats, target_value);
|
1052
|
+
OP::template WriteToStream<SRC, TGT>(target_value, ser);
|
899
1053
|
}
|
900
|
-
ser.WriteData(const_data_ptr_cast(write_combiner), write_combiner_count * sizeof(TGT));
|
901
1054
|
}
|
902
1055
|
|
903
|
-
template <class
|
904
|
-
class
|
905
|
-
public:
|
906
|
-
StandardColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p, // NOLINT
|
907
|
-
idx_t max_repeat, idx_t max_define, bool can_have_nulls)
|
908
|
-
: BasicColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls) {
|
909
|
-
}
|
910
|
-
~StandardColumnWriter() override = default;
|
911
|
-
|
1056
|
+
template <class T>
|
1057
|
+
class StandardColumnWriterState : public BasicColumnWriterState {
|
912
1058
|
public:
|
913
|
-
|
914
|
-
|
1059
|
+
StandardColumnWriterState(duckdb_parquet::RowGroup &row_group, idx_t col_idx)
|
1060
|
+
: BasicColumnWriterState(row_group, col_idx) {
|
915
1061
|
}
|
1062
|
+
~StandardColumnWriterState() override = default;
|
916
1063
|
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
TemplatedWritePlain<SRC, TGT, OP>(input_column, stats, chunk_start, chunk_end, mask, temp_writer);
|
921
|
-
}
|
1064
|
+
// analysis state for integer values for DELTA_BINARY_PACKED/DELTA_LENGTH_BYTE_ARRAY
|
1065
|
+
idx_t total_value_count = 0;
|
1066
|
+
idx_t total_string_size = 0;
|
922
1067
|
|
923
|
-
|
924
|
-
|
925
|
-
}
|
1068
|
+
unordered_map<T, uint32_t> dictionary;
|
1069
|
+
duckdb_parquet::Encoding::type encoding;
|
926
1070
|
};
|
927
1071
|
|
928
|
-
|
929
|
-
|
930
|
-
//===--------------------------------------------------------------------===//
|
931
|
-
class BooleanStatisticsState : public ColumnWriterStatistics {
|
1072
|
+
template <class SRC, class TGT>
|
1073
|
+
class StandardWriterPageState : public ColumnWriterPageState {
|
932
1074
|
public:
|
933
|
-
|
1075
|
+
explicit StandardWriterPageState(const idx_t total_value_count, const idx_t total_string_size,
|
1076
|
+
Encoding::type encoding_p, const unordered_map<SRC, uint32_t> &dictionary_p)
|
1077
|
+
: encoding(encoding_p), dbp_initialized(false), dbp_encoder(total_value_count), dlba_initialized(false),
|
1078
|
+
dlba_encoder(total_value_count, total_string_size), bss_encoder(total_value_count, sizeof(TGT)),
|
1079
|
+
dictionary(dictionary_p), dict_written_value(false),
|
1080
|
+
dict_bit_width(RleBpDecoder::ComputeBitWidth(dictionary.size())), dict_encoder(dict_bit_width) {
|
934
1081
|
}
|
1082
|
+
duckdb_parquet::Encoding::type encoding;
|
935
1083
|
|
936
|
-
bool
|
937
|
-
|
1084
|
+
bool dbp_initialized;
|
1085
|
+
DbpEncoder dbp_encoder;
|
938
1086
|
|
939
|
-
|
940
|
-
|
941
|
-
return !(min && !max);
|
942
|
-
}
|
1087
|
+
bool dlba_initialized;
|
1088
|
+
DlbaEncoder dlba_encoder;
|
943
1089
|
|
944
|
-
|
945
|
-
return GetMinValue();
|
946
|
-
}
|
947
|
-
string GetMax() override {
|
948
|
-
return GetMaxValue();
|
949
|
-
}
|
950
|
-
string GetMinValue() override {
|
951
|
-
return HasStats() ? string(const_char_ptr_cast(&min), sizeof(bool)) : string();
|
952
|
-
}
|
953
|
-
string GetMaxValue() override {
|
954
|
-
return HasStats() ? string(const_char_ptr_cast(&max), sizeof(bool)) : string();
|
955
|
-
}
|
956
|
-
};
|
1090
|
+
BssEncoder bss_encoder;
|
957
1091
|
|
958
|
-
|
959
|
-
|
960
|
-
|
961
|
-
|
1092
|
+
const unordered_map<SRC, uint32_t> &dictionary;
|
1093
|
+
bool dict_written_value;
|
1094
|
+
uint32_t dict_bit_width;
|
1095
|
+
RleBpEncoder dict_encoder;
|
962
1096
|
};
|
963
1097
|
|
964
|
-
|
965
|
-
public:
|
966
|
-
BooleanColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p, idx_t max_repeat,
|
967
|
-
idx_t max_define, bool can_have_nulls)
|
968
|
-
: BasicColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls) {
|
969
|
-
}
|
970
|
-
~BooleanColumnWriter() override = default;
|
1098
|
+
namespace dbp_encoder {
|
971
1099
|
|
972
|
-
|
973
|
-
|
974
|
-
|
975
|
-
|
1100
|
+
template <class T>
|
1101
|
+
void BeginWrite(DbpEncoder &encoder, WriteStream &writer, const T &first_value) {
|
1102
|
+
throw InternalException("Can't write type to DELTA_BINARY_PACKED column");
|
1103
|
+
}
|
976
1104
|
|
977
|
-
|
978
|
-
|
979
|
-
|
980
|
-
|
981
|
-
auto &mask = FlatVector::Validity(input_column);
|
1105
|
+
template <>
|
1106
|
+
void BeginWrite(DbpEncoder &encoder, WriteStream &writer, const int64_t &first_value) {
|
1107
|
+
encoder.BeginWrite(writer, first_value);
|
1108
|
+
}
|
982
1109
|
|
983
|
-
|
984
|
-
|
985
|
-
|
986
|
-
|
987
|
-
if (ptr[r]) {
|
988
|
-
stats.max = true;
|
989
|
-
state.byte |= 1 << state.byte_pos;
|
990
|
-
} else {
|
991
|
-
stats.min = false;
|
992
|
-
}
|
993
|
-
state.byte_pos++;
|
1110
|
+
template <>
|
1111
|
+
void BeginWrite(DbpEncoder &encoder, WriteStream &writer, const int32_t &first_value) {
|
1112
|
+
BeginWrite(encoder, writer, UnsafeNumericCast<int64_t>(first_value));
|
1113
|
+
}
|
994
1114
|
|
995
|
-
|
996
|
-
|
997
|
-
|
998
|
-
|
999
|
-
}
|
1000
|
-
}
|
1001
|
-
}
|
1002
|
-
}
|
1115
|
+
template <>
|
1116
|
+
void BeginWrite(DbpEncoder &encoder, WriteStream &writer, const uint64_t &first_value) {
|
1117
|
+
encoder.BeginWrite(writer, UnsafeNumericCast<int64_t>(first_value));
|
1118
|
+
}
|
1003
1119
|
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1120
|
+
template <>
|
1121
|
+
void BeginWrite(DbpEncoder &encoder, WriteStream &writer, const uint32_t &first_value) {
|
1122
|
+
BeginWrite(encoder, writer, UnsafeNumericCast<int64_t>(first_value));
|
1123
|
+
}
|
1007
1124
|
|
1008
|
-
|
1009
|
-
|
1010
|
-
|
1011
|
-
|
1012
|
-
state.byte = 0;
|
1013
|
-
state.byte_pos = 0;
|
1014
|
-
}
|
1015
|
-
}
|
1125
|
+
template <class T>
|
1126
|
+
void WriteValue(DbpEncoder &encoder, WriteStream &writer, const T &value) {
|
1127
|
+
throw InternalException("Can't write type to DELTA_BINARY_PACKED column");
|
1128
|
+
}
|
1016
1129
|
|
1017
|
-
|
1018
|
-
|
1019
|
-
|
1020
|
-
}
|
1130
|
+
template <>
|
1131
|
+
void WriteValue(DbpEncoder &encoder, WriteStream &writer, const int64_t &value) {
|
1132
|
+
encoder.WriteValue(writer, value);
|
1133
|
+
}
|
1021
1134
|
|
1022
|
-
|
1023
|
-
|
1024
|
-
|
1025
|
-
|
1026
|
-
bool positive = input >= 0;
|
1027
|
-
// numbers are stored as two's complement so some muckery is required
|
1028
|
-
if (!positive) {
|
1029
|
-
input = NumericLimits<hugeint_t>::Maximum() + input + 1;
|
1030
|
-
}
|
1031
|
-
uint64_t high_bytes = uint64_t(input.upper);
|
1032
|
-
uint64_t low_bytes = input.lower;
|
1135
|
+
template <>
|
1136
|
+
void WriteValue(DbpEncoder &encoder, WriteStream &writer, const int32_t &value) {
|
1137
|
+
WriteValue(encoder, writer, UnsafeNumericCast<int64_t>(value));
|
1138
|
+
}
|
1033
1139
|
|
1034
|
-
|
1035
|
-
|
1036
|
-
|
1037
|
-
}
|
1038
|
-
for (idx_t i = 0; i < sizeof(uint64_t); i++) {
|
1039
|
-
auto shift_count = (sizeof(uint64_t) - i - 1) * 8;
|
1040
|
-
result[sizeof(uint64_t) + i] = (low_bytes >> shift_count) & 0xFF;
|
1041
|
-
}
|
1042
|
-
if (!positive) {
|
1043
|
-
result[0] |= 0x80;
|
1044
|
-
}
|
1140
|
+
template <>
|
1141
|
+
void WriteValue(DbpEncoder &encoder, WriteStream &writer, const uint64_t &value) {
|
1142
|
+
encoder.WriteValue(writer, UnsafeNumericCast<int64_t>(value));
|
1045
1143
|
}
|
1046
1144
|
|
1047
|
-
|
1145
|
+
template <>
|
1146
|
+
void WriteValue(DbpEncoder &encoder, WriteStream &writer, const uint32_t &value) {
|
1147
|
+
WriteValue(encoder, writer, UnsafeNumericCast<int64_t>(value));
|
1148
|
+
}
|
1149
|
+
|
1150
|
+
} // namespace dbp_encoder
|
1151
|
+
|
1152
|
+
namespace dlba_encoder {
|
1153
|
+
|
1154
|
+
template <class T>
|
1155
|
+
void BeginWrite(DlbaEncoder &encoder, WriteStream &writer, const T &first_value) {
|
1156
|
+
throw InternalException("Can't write type to DELTA_LENGTH_BYTE_ARRAY column");
|
1157
|
+
}
|
1158
|
+
|
1159
|
+
template <>
|
1160
|
+
void BeginWrite(DlbaEncoder &encoder, WriteStream &writer, const string_t &first_value) {
|
1161
|
+
encoder.BeginWrite(writer, first_value);
|
1162
|
+
}
|
1163
|
+
|
1164
|
+
template <class T>
|
1165
|
+
void WriteValue(DlbaEncoder &encoder, WriteStream &writer, const T &value) {
|
1166
|
+
throw InternalException("Can't write type to DELTA_LENGTH_BYTE_ARRAY column");
|
1167
|
+
}
|
1168
|
+
|
1169
|
+
template <>
|
1170
|
+
void WriteValue(DlbaEncoder &encoder, WriteStream &writer, const string_t &value) {
|
1171
|
+
encoder.WriteValue(writer, value);
|
1172
|
+
}
|
1173
|
+
|
1174
|
+
// helpers to get size from strings
|
1175
|
+
template <class SRC>
|
1176
|
+
static constexpr idx_t GetDlbaStringSize(const SRC &src_value) {
|
1177
|
+
return 0;
|
1178
|
+
}
|
1179
|
+
|
1180
|
+
template <>
|
1181
|
+
idx_t GetDlbaStringSize(const string_t &src_value) {
|
1182
|
+
return src_value.GetSize();
|
1183
|
+
}
|
1184
|
+
|
1185
|
+
} // namespace dlba_encoder
|
1186
|
+
|
1187
|
+
namespace bss_encoder {
|
1188
|
+
|
1189
|
+
template <class T>
|
1190
|
+
void WriteValue(BssEncoder &encoder, const T &value) {
|
1191
|
+
throw InternalException("Can't write type to BYTE_STREAM_SPLIT column");
|
1192
|
+
}
|
1193
|
+
|
1194
|
+
template <>
|
1195
|
+
void WriteValue(BssEncoder &encoder, const float &value) {
|
1196
|
+
encoder.WriteValue(value);
|
1197
|
+
}
|
1198
|
+
|
1199
|
+
template <>
|
1200
|
+
void WriteValue(BssEncoder &encoder, const double &value) {
|
1201
|
+
encoder.WriteValue(value);
|
1202
|
+
}
|
1203
|
+
|
1204
|
+
} // namespace bss_encoder
|
1205
|
+
|
1206
|
+
template <class SRC, class TGT, class OP = ParquetCastOperator>
|
1207
|
+
class StandardColumnWriter : public BasicColumnWriter {
|
1048
1208
|
public:
|
1049
|
-
|
1209
|
+
StandardColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p, // NOLINT
|
1210
|
+
idx_t max_repeat, idx_t max_define, bool can_have_nulls)
|
1211
|
+
: BasicColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls) {
|
1050
1212
|
}
|
1051
|
-
|
1052
|
-
hugeint_t min;
|
1053
|
-
hugeint_t max;
|
1213
|
+
~StandardColumnWriter() override = default;
|
1054
1214
|
|
1055
1215
|
public:
|
1056
|
-
|
1057
|
-
|
1058
|
-
|
1059
|
-
|
1216
|
+
unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) override {
|
1217
|
+
auto result = make_uniq<StandardColumnWriterState<SRC>>(row_group, row_group.columns.size());
|
1218
|
+
result->encoding = Encoding::RLE_DICTIONARY;
|
1219
|
+
RegisterToRowGroup(row_group);
|
1220
|
+
return std::move(result);
|
1060
1221
|
}
|
1061
1222
|
|
1062
|
-
|
1063
|
-
|
1223
|
+
unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state_p) override {
|
1224
|
+
auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
|
1225
|
+
|
1226
|
+
auto result = make_uniq<StandardWriterPageState<SRC, TGT>>(state.total_value_count, state.total_string_size,
|
1227
|
+
state.encoding, state.dictionary);
|
1228
|
+
return std::move(result);
|
1064
1229
|
}
|
1065
1230
|
|
1066
|
-
void
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1231
|
+
void FlushPageState(WriteStream &temp_writer, ColumnWriterPageState *state_p) override {
|
1232
|
+
auto &page_state = state_p->Cast<StandardWriterPageState<SRC, TGT>>();
|
1233
|
+
switch (page_state.encoding) {
|
1234
|
+
case Encoding::DELTA_BINARY_PACKED:
|
1235
|
+
if (!page_state.dbp_initialized) {
|
1236
|
+
dbp_encoder::BeginWrite<int64_t>(page_state.dbp_encoder, temp_writer, 0);
|
1237
|
+
}
|
1238
|
+
page_state.dbp_encoder.FinishWrite(temp_writer);
|
1239
|
+
break;
|
1240
|
+
case Encoding::RLE_DICTIONARY:
|
1241
|
+
D_ASSERT(page_state.dict_bit_width != 0);
|
1242
|
+
if (!page_state.dict_written_value) {
|
1243
|
+
// all values are null
|
1244
|
+
// just write the bit width
|
1245
|
+
temp_writer.Write<uint8_t>(page_state.dict_bit_width);
|
1246
|
+
return;
|
1247
|
+
}
|
1248
|
+
page_state.dict_encoder.FinishWrite(temp_writer);
|
1249
|
+
break;
|
1250
|
+
case Encoding::DELTA_LENGTH_BYTE_ARRAY:
|
1251
|
+
if (!page_state.dlba_initialized) {
|
1252
|
+
dlba_encoder::BeginWrite<string_t>(page_state.dlba_encoder, temp_writer, string_t(""));
|
1253
|
+
}
|
1254
|
+
page_state.dlba_encoder.FinishWrite(temp_writer);
|
1255
|
+
break;
|
1256
|
+
case Encoding::BYTE_STREAM_SPLIT:
|
1257
|
+
page_state.bss_encoder.FinishWrite(temp_writer);
|
1258
|
+
break;
|
1259
|
+
case Encoding::PLAIN:
|
1260
|
+
break;
|
1261
|
+
default:
|
1262
|
+
throw InternalException("Unknown encoding");
|
1072
1263
|
}
|
1073
1264
|
}
|
1074
1265
|
|
1075
|
-
|
1076
|
-
|
1266
|
+
Encoding::type GetEncoding(BasicColumnWriterState &state_p) override {
|
1267
|
+
auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
|
1268
|
+
return state.encoding;
|
1077
1269
|
}
|
1078
|
-
|
1079
|
-
|
1080
|
-
|
1081
|
-
string GetMinValue() override {
|
1082
|
-
return HasStats() ? GetStats(min) : string();
|
1270
|
+
|
1271
|
+
bool HasAnalyze() override {
|
1272
|
+
return true;
|
1083
1273
|
}
|
1084
|
-
|
1085
|
-
|
1274
|
+
|
1275
|
+
void Analyze(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) override {
|
1276
|
+
auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
|
1277
|
+
|
1278
|
+
auto data_ptr = FlatVector::GetData<SRC>(vector);
|
1279
|
+
idx_t vector_index = 0;
|
1280
|
+
uint32_t new_value_index = state.dictionary.size();
|
1281
|
+
|
1282
|
+
const bool check_parent_empty = parent && !parent->is_empty.empty();
|
1283
|
+
const idx_t parent_index = state.definition_levels.size();
|
1284
|
+
|
1285
|
+
const idx_t vcount =
|
1286
|
+
check_parent_empty ? parent->definition_levels.size() - state.definition_levels.size() : count;
|
1287
|
+
|
1288
|
+
const auto &validity = FlatVector::Validity(vector);
|
1289
|
+
|
1290
|
+
for (idx_t i = 0; i < vcount; i++) {
|
1291
|
+
if (check_parent_empty && parent->is_empty[parent_index + i]) {
|
1292
|
+
continue;
|
1293
|
+
}
|
1294
|
+
if (validity.RowIsValid(vector_index)) {
|
1295
|
+
const auto &src_value = data_ptr[vector_index];
|
1296
|
+
if (state.dictionary.size() <= writer.DictionarySizeLimit()) {
|
1297
|
+
if (state.dictionary.find(src_value) == state.dictionary.end()) {
|
1298
|
+
state.dictionary[src_value] = new_value_index;
|
1299
|
+
new_value_index++;
|
1300
|
+
}
|
1301
|
+
}
|
1302
|
+
state.total_value_count++;
|
1303
|
+
state.total_string_size += dlba_encoder::GetDlbaStringSize(src_value);
|
1304
|
+
}
|
1305
|
+
vector_index++;
|
1306
|
+
}
|
1086
1307
|
}
|
1087
|
-
};
|
1088
1308
|
|
1089
|
-
|
1090
|
-
|
1091
|
-
|
1092
|
-
|
1093
|
-
|
1309
|
+
void FinalizeAnalyze(ColumnWriterState &state_p) override {
|
1310
|
+
const auto type = writer.GetType(schema_idx);
|
1311
|
+
|
1312
|
+
auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
|
1313
|
+
if (state.dictionary.size() == 0 || state.dictionary.size() > writer.DictionarySizeLimit()) {
|
1314
|
+
// If we aren't doing dictionary encoding, the following encodings are virtually always better than PLAIN
|
1315
|
+
switch (type) {
|
1316
|
+
case Type::type::INT32:
|
1317
|
+
case Type::type::INT64:
|
1318
|
+
state.encoding = Encoding::DELTA_BINARY_PACKED;
|
1319
|
+
break;
|
1320
|
+
case Type::type::BYTE_ARRAY:
|
1321
|
+
state.encoding = Encoding::DELTA_LENGTH_BYTE_ARRAY;
|
1322
|
+
break;
|
1323
|
+
case Type::type::FLOAT:
|
1324
|
+
case Type::type::DOUBLE:
|
1325
|
+
state.encoding = Encoding::BYTE_STREAM_SPLIT;
|
1326
|
+
break;
|
1327
|
+
default:
|
1328
|
+
state.encoding = Encoding::PLAIN;
|
1329
|
+
}
|
1330
|
+
state.dictionary.clear();
|
1331
|
+
}
|
1094
1332
|
}
|
1095
|
-
~FixedDecimalColumnWriter() override = default;
|
1096
1333
|
|
1097
|
-
public:
|
1098
1334
|
unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
|
1099
|
-
return
|
1335
|
+
return OP::template InitializeStats<SRC, TGT>();
|
1100
1336
|
}
|
1101
1337
|
|
1102
|
-
|
1338
|
+
bool HasDictionary(BasicColumnWriterState &state_p) override {
|
1339
|
+
auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
|
1340
|
+
return state.encoding == Encoding::RLE_DICTIONARY;
|
1341
|
+
}
|
1342
|
+
|
1343
|
+
idx_t DictionarySize(BasicColumnWriterState &state_p) override {
|
1344
|
+
auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
|
1345
|
+
return state.dictionary.size();
|
1346
|
+
}
|
1347
|
+
|
1348
|
+
void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats, ColumnWriterPageState *page_state_p,
|
1103
1349
|
Vector &input_column, idx_t chunk_start, idx_t chunk_end) override {
|
1104
|
-
auto &
|
1105
|
-
auto *ptr = FlatVector::GetData<hugeint_t>(input_column);
|
1106
|
-
auto &stats = stats_p->Cast<FixedDecimalStatistics>();
|
1350
|
+
auto &page_state = page_state_p->Cast<StandardWriterPageState<SRC, TGT>>();
|
1107
1351
|
|
1108
|
-
|
1109
|
-
|
1110
|
-
|
1111
|
-
|
1112
|
-
|
1113
|
-
|
1352
|
+
const auto &mask = FlatVector::Validity(input_column);
|
1353
|
+
const auto *data_ptr = FlatVector::GetData<SRC>(input_column);
|
1354
|
+
|
1355
|
+
switch (page_state.encoding) {
|
1356
|
+
case Encoding::RLE_DICTIONARY: {
|
1357
|
+
for (idx_t r = chunk_start; r < chunk_end; r++) {
|
1358
|
+
if (!mask.RowIsValid(r)) {
|
1359
|
+
continue;
|
1360
|
+
}
|
1361
|
+
auto &src_val = data_ptr[r];
|
1362
|
+
auto value_index = page_state.dictionary.at(src_val);
|
1363
|
+
if (!page_state.dict_written_value) {
|
1364
|
+
// first value
|
1365
|
+
// write the bit-width as a one-byte entry
|
1366
|
+
temp_writer.Write<uint8_t>(page_state.dict_bit_width);
|
1367
|
+
// now begin writing the actual value
|
1368
|
+
page_state.dict_encoder.BeginWrite(temp_writer, value_index);
|
1369
|
+
page_state.dict_written_value = true;
|
1370
|
+
} else {
|
1371
|
+
page_state.dict_encoder.WriteValue(temp_writer, value_index);
|
1372
|
+
}
|
1373
|
+
}
|
1374
|
+
break;
|
1375
|
+
}
|
1376
|
+
case Encoding::DELTA_BINARY_PACKED: {
|
1377
|
+
idx_t r = chunk_start;
|
1378
|
+
if (!page_state.dbp_initialized) {
|
1379
|
+
// find first non-null value
|
1380
|
+
for (; r < chunk_end; r++) {
|
1381
|
+
if (!mask.RowIsValid(r)) {
|
1382
|
+
continue;
|
1383
|
+
}
|
1384
|
+
const TGT target_value = OP::template Operation<SRC, TGT>(data_ptr[r]);
|
1385
|
+
OP::template HandleStats<SRC, TGT>(stats, target_value);
|
1386
|
+
dbp_encoder::BeginWrite(page_state.dbp_encoder, temp_writer, target_value);
|
1387
|
+
page_state.dbp_initialized = true;
|
1388
|
+
r++; // skip over
|
1389
|
+
break;
|
1390
|
+
}
|
1391
|
+
}
|
1392
|
+
|
1393
|
+
for (; r < chunk_end; r++) {
|
1394
|
+
if (!mask.RowIsValid(r)) {
|
1395
|
+
continue;
|
1396
|
+
}
|
1397
|
+
const TGT target_value = OP::template Operation<SRC, TGT>(data_ptr[r]);
|
1398
|
+
OP::template HandleStats<SRC, TGT>(stats, target_value);
|
1399
|
+
dbp_encoder::WriteValue(page_state.dbp_encoder, temp_writer, target_value);
|
1400
|
+
}
|
1401
|
+
break;
|
1402
|
+
}
|
1403
|
+
case Encoding::DELTA_LENGTH_BYTE_ARRAY: {
|
1404
|
+
idx_t r = chunk_start;
|
1405
|
+
if (!page_state.dlba_initialized) {
|
1406
|
+
// find first non-null value
|
1407
|
+
for (; r < chunk_end; r++) {
|
1408
|
+
if (!mask.RowIsValid(r)) {
|
1409
|
+
continue;
|
1410
|
+
}
|
1411
|
+
const TGT target_value = OP::template Operation<SRC, TGT>(data_ptr[r]);
|
1412
|
+
OP::template HandleStats<SRC, TGT>(stats, target_value);
|
1413
|
+
dlba_encoder::BeginWrite(page_state.dlba_encoder, temp_writer, target_value);
|
1414
|
+
page_state.dlba_initialized = true;
|
1415
|
+
r++; // skip over
|
1416
|
+
break;
|
1417
|
+
}
|
1418
|
+
}
|
1419
|
+
|
1420
|
+
for (; r < chunk_end; r++) {
|
1421
|
+
if (!mask.RowIsValid(r)) {
|
1422
|
+
continue;
|
1423
|
+
}
|
1424
|
+
const TGT target_value = OP::template Operation<SRC, TGT>(data_ptr[r]);
|
1425
|
+
OP::template HandleStats<SRC, TGT>(stats, target_value);
|
1426
|
+
dlba_encoder::WriteValue(page_state.dlba_encoder, temp_writer, target_value);
|
1427
|
+
}
|
1428
|
+
break;
|
1429
|
+
}
|
1430
|
+
case Encoding::BYTE_STREAM_SPLIT: {
|
1431
|
+
for (idx_t r = chunk_start; r < chunk_end; r++) {
|
1432
|
+
if (!mask.RowIsValid(r)) {
|
1433
|
+
continue;
|
1434
|
+
}
|
1435
|
+
const TGT target_value = OP::template Operation<SRC, TGT>(data_ptr[r]);
|
1436
|
+
OP::template HandleStats<SRC, TGT>(stats, target_value);
|
1437
|
+
bss_encoder::WriteValue(page_state.bss_encoder, target_value);
|
1114
1438
|
}
|
1439
|
+
break;
|
1440
|
+
}
|
1441
|
+
case Encoding::PLAIN: {
|
1442
|
+
D_ASSERT(page_state.encoding == Encoding::PLAIN);
|
1443
|
+
TemplatedWritePlain<SRC, TGT, OP>(input_column, stats, chunk_start, chunk_end, mask, temp_writer);
|
1444
|
+
break;
|
1445
|
+
}
|
1446
|
+
default:
|
1447
|
+
throw InternalException("Unknown encoding");
|
1115
1448
|
}
|
1116
1449
|
}
|
1117
1450
|
|
1118
|
-
|
1119
|
-
|
1451
|
+
void FlushDictionary(BasicColumnWriterState &state_p, ColumnWriterStatistics *stats) override {
|
1452
|
+
auto &state = state_p.Cast<StandardColumnWriterState<SRC>>();
|
1453
|
+
|
1454
|
+
D_ASSERT(state.encoding == Encoding::RLE_DICTIONARY);
|
1455
|
+
|
1456
|
+
// first we need to sort the values in index order
|
1457
|
+
auto values = vector<SRC>(state.dictionary.size());
|
1458
|
+
for (const auto &entry : state.dictionary) {
|
1459
|
+
values[entry.second] = entry.first;
|
1460
|
+
}
|
1461
|
+
|
1462
|
+
state.bloom_filter =
|
1463
|
+
make_uniq<ParquetBloomFilter>(state.dictionary.size(), writer.BloomFilterFalsePositiveRatio());
|
1464
|
+
|
1465
|
+
// first write the contents of the dictionary page to a temporary buffer
|
1466
|
+
auto temp_writer = make_uniq<MemoryStream>(MaxValue<idx_t>(
|
1467
|
+
NextPowerOfTwo(state.dictionary.size() * sizeof(TGT)), MemoryStream::DEFAULT_INITIAL_CAPACITY));
|
1468
|
+
for (idx_t r = 0; r < values.size(); r++) {
|
1469
|
+
const TGT target_value = OP::template Operation<SRC, TGT>(values[r]);
|
1470
|
+
// update the statistics
|
1471
|
+
OP::template HandleStats<SRC, TGT>(stats, target_value);
|
1472
|
+
// update the bloom filter
|
1473
|
+
auto hash = OP::template XXHash64<SRC, TGT>(target_value);
|
1474
|
+
state.bloom_filter->FilterInsert(hash);
|
1475
|
+
// actually write the dictionary value
|
1476
|
+
OP::template WriteToStream<SRC, TGT>(target_value, *temp_writer);
|
1477
|
+
}
|
1478
|
+
// flush the dictionary page and add it to the to-be-written pages
|
1479
|
+
WriteDictionary(state, std::move(temp_writer), values.size());
|
1480
|
+
// bloom filter will be queued for writing in ParquetWriter::BufferBloomFilter one level up
|
1481
|
+
}
|
1482
|
+
|
1483
|
+
// TODO this now vastly over-estimates the page size
|
1484
|
+
idx_t GetRowSize(const Vector &vector, const idx_t index, const BasicColumnWriterState &state_p) const override {
|
1485
|
+
return sizeof(TGT);
|
1120
1486
|
}
|
1121
1487
|
};
|
1122
1488
|
|
1123
1489
|
//===--------------------------------------------------------------------===//
|
1124
|
-
//
|
1490
|
+
// Boolean Column Writer
|
1125
1491
|
//===--------------------------------------------------------------------===//
|
1126
|
-
class
|
1127
|
-
static constexpr const idx_t PARQUET_UUID_SIZE = 16;
|
1128
|
-
|
1492
|
+
class BooleanStatisticsState : public ColumnWriterStatistics {
|
1129
1493
|
public:
|
1130
|
-
|
1131
|
-
idx_t max_define, bool can_have_nulls)
|
1132
|
-
: BasicColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls) {
|
1494
|
+
BooleanStatisticsState() : min(true), max(false) {
|
1133
1495
|
}
|
1134
|
-
~UUIDColumnWriter() override = default;
|
1135
1496
|
|
1136
|
-
|
1137
|
-
|
1138
|
-
uint64_t high_bytes = input.upper ^ (int64_t(1) << 63);
|
1139
|
-
uint64_t low_bytes = input.lower;
|
1497
|
+
bool min;
|
1498
|
+
bool max;
|
1140
1499
|
|
1141
|
-
|
1142
|
-
|
1143
|
-
|
1144
|
-
}
|
1145
|
-
for (idx_t i = 0; i < sizeof(uint64_t); i++) {
|
1146
|
-
auto shift_count = (sizeof(uint64_t) - i - 1) * 8;
|
1147
|
-
result[sizeof(uint64_t) + i] = (low_bytes >> shift_count) & 0xFF;
|
1148
|
-
}
|
1500
|
+
public:
|
1501
|
+
bool HasStats() override {
|
1502
|
+
return !(min && !max);
|
1149
1503
|
}
|
1150
1504
|
|
1151
|
-
|
1152
|
-
|
1153
|
-
auto &mask = FlatVector::Validity(input_column);
|
1154
|
-
auto *ptr = FlatVector::GetData<hugeint_t>(input_column);
|
1155
|
-
|
1156
|
-
data_t temp_buffer[PARQUET_UUID_SIZE];
|
1157
|
-
for (idx_t r = chunk_start; r < chunk_end; r++) {
|
1158
|
-
if (mask.RowIsValid(r)) {
|
1159
|
-
WriteParquetUUID(ptr[r], temp_buffer);
|
1160
|
-
temp_writer.WriteData(temp_buffer, PARQUET_UUID_SIZE);
|
1161
|
-
}
|
1162
|
-
}
|
1505
|
+
string GetMin() override {
|
1506
|
+
return GetMinValue();
|
1163
1507
|
}
|
1164
|
-
|
1165
|
-
|
1166
|
-
|
1508
|
+
string GetMax() override {
|
1509
|
+
return GetMaxValue();
|
1510
|
+
}
|
1511
|
+
string GetMinValue() override {
|
1512
|
+
return HasStats() ? string(const_char_ptr_cast(&min), sizeof(bool)) : string();
|
1513
|
+
}
|
1514
|
+
string GetMaxValue() override {
|
1515
|
+
return HasStats() ? string(const_char_ptr_cast(&max), sizeof(bool)) : string();
|
1167
1516
|
}
|
1168
1517
|
};
|
1169
1518
|
|
1170
|
-
|
1171
|
-
|
1172
|
-
|
1173
|
-
|
1174
|
-
|
1519
|
+
class BooleanWriterPageState : public ColumnWriterPageState {
|
1520
|
+
public:
|
1521
|
+
uint8_t byte = 0;
|
1522
|
+
uint8_t byte_pos = 0;
|
1523
|
+
};
|
1175
1524
|
|
1525
|
+
class BooleanColumnWriter : public BasicColumnWriter {
|
1176
1526
|
public:
|
1177
|
-
|
1178
|
-
|
1527
|
+
BooleanColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p, idx_t max_repeat,
|
1528
|
+
idx_t max_define, bool can_have_nulls)
|
1179
1529
|
: BasicColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls) {
|
1180
1530
|
}
|
1181
|
-
~
|
1531
|
+
~BooleanColumnWriter() override = default;
|
1182
1532
|
|
1183
1533
|
public:
|
1184
|
-
|
1185
|
-
|
1186
|
-
throw IOException("Parquet files do not support negative intervals");
|
1187
|
-
}
|
1188
|
-
Store<uint32_t>(input.months, result);
|
1189
|
-
Store<uint32_t>(input.days, result + sizeof(uint32_t));
|
1190
|
-
Store<uint32_t>(input.micros / 1000, result + sizeof(uint32_t) * 2);
|
1534
|
+
unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
|
1535
|
+
return make_uniq<BooleanStatisticsState>();
|
1191
1536
|
}
|
1192
1537
|
|
1193
|
-
void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *
|
1538
|
+
void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *state_p,
|
1194
1539
|
Vector &input_column, idx_t chunk_start, idx_t chunk_end) override {
|
1540
|
+
auto &stats = stats_p->Cast<BooleanStatisticsState>();
|
1541
|
+
auto &state = state_p->Cast<BooleanWriterPageState>();
|
1195
1542
|
auto &mask = FlatVector::Validity(input_column);
|
1196
|
-
auto *ptr = FlatVector::GetData<interval_t>(input_column);
|
1197
1543
|
|
1198
|
-
|
1544
|
+
auto *ptr = FlatVector::GetData<bool>(input_column);
|
1199
1545
|
for (idx_t r = chunk_start; r < chunk_end; r++) {
|
1200
1546
|
if (mask.RowIsValid(r)) {
|
1201
|
-
|
1202
|
-
|
1547
|
+
// only encode if non-null
|
1548
|
+
if (ptr[r]) {
|
1549
|
+
stats.max = true;
|
1550
|
+
state.byte |= 1 << state.byte_pos;
|
1551
|
+
} else {
|
1552
|
+
stats.min = false;
|
1553
|
+
}
|
1554
|
+
state.byte_pos++;
|
1555
|
+
|
1556
|
+
if (state.byte_pos == 8) {
|
1557
|
+
temp_writer.Write<uint8_t>(state.byte);
|
1558
|
+
state.byte = 0;
|
1559
|
+
state.byte_pos = 0;
|
1560
|
+
}
|
1203
1561
|
}
|
1204
1562
|
}
|
1205
1563
|
}
|
1206
1564
|
|
1565
|
+
unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state) override {
|
1566
|
+
return make_uniq<BooleanWriterPageState>();
|
1567
|
+
}
|
1568
|
+
|
1569
|
+
void FlushPageState(WriteStream &temp_writer, ColumnWriterPageState *state_p) override {
|
1570
|
+
auto &state = state_p->Cast<BooleanWriterPageState>();
|
1571
|
+
if (state.byte_pos > 0) {
|
1572
|
+
temp_writer.Write<uint8_t>(state.byte);
|
1573
|
+
state.byte = 0;
|
1574
|
+
state.byte_pos = 0;
|
1575
|
+
}
|
1576
|
+
}
|
1577
|
+
|
1207
1578
|
idx_t GetRowSize(const Vector &vector, const idx_t index, const BasicColumnWriterState &state) const override {
|
1208
|
-
return
|
1579
|
+
return sizeof(bool);
|
1209
1580
|
}
|
1210
1581
|
};
|
1211
1582
|
|
1212
1583
|
//===--------------------------------------------------------------------===//
|
1213
|
-
//
|
1584
|
+
// Decimal Column Writer
|
1214
1585
|
//===--------------------------------------------------------------------===//
|
1215
|
-
|
1216
|
-
|
1586
|
+
static void WriteParquetDecimal(hugeint_t input, data_ptr_t result) {
|
1587
|
+
bool positive = input >= 0;
|
1588
|
+
// numbers are stored as two's complement so some muckery is required
|
1589
|
+
if (!positive) {
|
1590
|
+
input = NumericLimits<hugeint_t>::Maximum() + input + 1;
|
1591
|
+
}
|
1592
|
+
uint64_t high_bytes = uint64_t(input.upper);
|
1593
|
+
uint64_t low_bytes = input.lower;
|
1594
|
+
|
1595
|
+
for (idx_t i = 0; i < sizeof(uint64_t); i++) {
|
1596
|
+
auto shift_count = (sizeof(uint64_t) - i - 1) * 8;
|
1597
|
+
result[i] = (high_bytes >> shift_count) & 0xFF;
|
1598
|
+
}
|
1599
|
+
for (idx_t i = 0; i < sizeof(uint64_t); i++) {
|
1600
|
+
auto shift_count = (sizeof(uint64_t) - i - 1) * 8;
|
1601
|
+
result[sizeof(uint64_t) + i] = (low_bytes >> shift_count) & 0xFF;
|
1602
|
+
}
|
1603
|
+
if (!positive) {
|
1604
|
+
result[0] |= 0x80;
|
1605
|
+
}
|
1606
|
+
}
|
1217
1607
|
|
1608
|
+
class FixedDecimalStatistics : public ColumnWriterStatistics {
|
1218
1609
|
public:
|
1219
|
-
|
1610
|
+
FixedDecimalStatistics() : min(NumericLimits<hugeint_t>::Maximum()), max(NumericLimits<hugeint_t>::Minimum()) {
|
1220
1611
|
}
|
1221
1612
|
|
1222
|
-
|
1223
|
-
|
1224
|
-
string min;
|
1225
|
-
string max;
|
1613
|
+
hugeint_t min;
|
1614
|
+
hugeint_t max;
|
1226
1615
|
|
1227
1616
|
public:
|
1617
|
+
string GetStats(hugeint_t &input) {
|
1618
|
+
data_t buffer[16];
|
1619
|
+
WriteParquetDecimal(input, buffer);
|
1620
|
+
return string(const_char_ptr_cast(buffer), 16);
|
1621
|
+
}
|
1622
|
+
|
1228
1623
|
bool HasStats() override {
|
1229
|
-
return
|
1624
|
+
return min <= max;
|
1230
1625
|
}
|
1231
1626
|
|
1232
|
-
void Update(
|
1233
|
-
if (
|
1234
|
-
|
1235
|
-
}
|
1236
|
-
auto str_len = val.GetSize();
|
1237
|
-
if (str_len > MAX_STRING_STATISTICS_SIZE) {
|
1238
|
-
// we avoid gathering stats when individual string values are too large
|
1239
|
-
// this is because the statistics are copied into the Parquet file meta data in uncompressed format
|
1240
|
-
// ideally we avoid placing several mega or giga-byte long strings there
|
1241
|
-
// we put a threshold of 10KB, if we see strings that exceed this threshold we avoid gathering stats
|
1242
|
-
values_too_big = true;
|
1243
|
-
has_stats = false;
|
1244
|
-
min = string();
|
1245
|
-
max = string();
|
1246
|
-
return;
|
1247
|
-
}
|
1248
|
-
if (!has_stats || LessThan::Operation(val, string_t(min))) {
|
1249
|
-
min = val.GetString();
|
1627
|
+
void Update(hugeint_t &val) {
|
1628
|
+
if (LessThan::Operation(val, min)) {
|
1629
|
+
min = val;
|
1250
1630
|
}
|
1251
|
-
if (
|
1252
|
-
max = val
|
1631
|
+
if (GreaterThan::Operation(val, max)) {
|
1632
|
+
max = val;
|
1253
1633
|
}
|
1254
|
-
has_stats = true;
|
1255
1634
|
}
|
1256
1635
|
|
1257
1636
|
string GetMin() override {
|
@@ -1261,264 +1640,44 @@ public:
|
|
1261
1640
|
return GetMaxValue();
|
1262
1641
|
}
|
1263
1642
|
string GetMinValue() override {
|
1264
|
-
return HasStats() ? min : string();
|
1643
|
+
return HasStats() ? GetStats(min) : string();
|
1265
1644
|
}
|
1266
1645
|
string GetMaxValue() override {
|
1267
|
-
return HasStats() ? max : string();
|
1268
|
-
}
|
1269
|
-
};
|
1270
|
-
|
1271
|
-
class StringColumnWriterState : public BasicColumnWriterState {
|
1272
|
-
public:
|
1273
|
-
StringColumnWriterState(duckdb_parquet::format::RowGroup &row_group, idx_t col_idx)
|
1274
|
-
: BasicColumnWriterState(row_group, col_idx) {
|
1275
|
-
}
|
1276
|
-
~StringColumnWriterState() override = default;
|
1277
|
-
|
1278
|
-
// analysis state
|
1279
|
-
idx_t estimated_dict_page_size = 0;
|
1280
|
-
idx_t estimated_rle_pages_size = 0;
|
1281
|
-
idx_t estimated_plain_size = 0;
|
1282
|
-
|
1283
|
-
// Dictionary and accompanying string heap
|
1284
|
-
string_map_t<uint32_t> dictionary;
|
1285
|
-
// key_bit_width== 0 signifies the chunk is written in plain encoding
|
1286
|
-
uint32_t key_bit_width;
|
1287
|
-
|
1288
|
-
bool IsDictionaryEncoded() const {
|
1289
|
-
return key_bit_width != 0;
|
1290
|
-
}
|
1291
|
-
};
|
1292
|
-
|
1293
|
-
class StringWriterPageState : public ColumnWriterPageState {
|
1294
|
-
public:
|
1295
|
-
explicit StringWriterPageState(uint32_t bit_width, const string_map_t<uint32_t> &values)
|
1296
|
-
: bit_width(bit_width), dictionary(values), encoder(bit_width), written_value(false) {
|
1297
|
-
D_ASSERT(IsDictionaryEncoded() || (bit_width == 0 && dictionary.empty()));
|
1298
|
-
}
|
1299
|
-
|
1300
|
-
bool IsDictionaryEncoded() {
|
1301
|
-
return bit_width != 0;
|
1646
|
+
return HasStats() ? GetStats(max) : string();
|
1302
1647
|
}
|
1303
|
-
// if 0, we're writing a plain page
|
1304
|
-
uint32_t bit_width;
|
1305
|
-
const string_map_t<uint32_t> &dictionary;
|
1306
|
-
RleBpEncoder encoder;
|
1307
|
-
bool written_value;
|
1308
1648
|
};
|
1309
1649
|
|
1310
|
-
class
|
1650
|
+
class FixedDecimalColumnWriter : public BasicColumnWriter {
|
1311
1651
|
public:
|
1312
|
-
|
1313
|
-
|
1652
|
+
FixedDecimalColumnWriter(ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p, idx_t max_repeat,
|
1653
|
+
idx_t max_define, bool can_have_nulls)
|
1314
1654
|
: BasicColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls) {
|
1315
1655
|
}
|
1316
|
-
~
|
1656
|
+
~FixedDecimalColumnWriter() override = default;
|
1317
1657
|
|
1318
1658
|
public:
|
1319
1659
|
unique_ptr<ColumnWriterStatistics> InitializeStatsState() override {
|
1320
|
-
return make_uniq<
|
1321
|
-
}
|
1322
|
-
|
1323
|
-
unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::format::RowGroup &row_group) override {
|
1324
|
-
auto result = make_uniq<StringColumnWriterState>(row_group, row_group.columns.size());
|
1325
|
-
RegisterToRowGroup(row_group);
|
1326
|
-
return std::move(result);
|
1327
|
-
}
|
1328
|
-
|
1329
|
-
bool HasAnalyze() override {
|
1330
|
-
return true;
|
1331
|
-
}
|
1332
|
-
|
1333
|
-
void Analyze(ColumnWriterState &state_p, ColumnWriterState *parent, Vector &vector, idx_t count) override {
|
1334
|
-
auto &state = state_p.Cast<StringColumnWriterState>();
|
1335
|
-
if (writer.DictionaryCompressionRatioThreshold() == NumericLimits<double>::Maximum() ||
|
1336
|
-
(state.dictionary.size() > DICTIONARY_ANALYZE_THRESHOLD && WontUseDictionary(state))) {
|
1337
|
-
// Early out: compression ratio is less than the specified parameter
|
1338
|
-
// after seeing more entries than the threshold
|
1339
|
-
return;
|
1340
|
-
}
|
1341
|
-
|
1342
|
-
idx_t vcount = parent ? parent->definition_levels.size() - state.definition_levels.size() : count;
|
1343
|
-
idx_t parent_index = state.definition_levels.size();
|
1344
|
-
auto &validity = FlatVector::Validity(vector);
|
1345
|
-
idx_t vector_index = 0;
|
1346
|
-
uint32_t new_value_index = state.dictionary.size();
|
1347
|
-
uint32_t last_value_index = -1;
|
1348
|
-
idx_t run_length = 0;
|
1349
|
-
idx_t run_count = 0;
|
1350
|
-
auto strings = FlatVector::GetData<string_t>(vector);
|
1351
|
-
for (idx_t i = 0; i < vcount; i++) {
|
1352
|
-
if (parent && !parent->is_empty.empty() && parent->is_empty[parent_index + i]) {
|
1353
|
-
continue;
|
1354
|
-
}
|
1355
|
-
|
1356
|
-
if (validity.RowIsValid(vector_index)) {
|
1357
|
-
run_length++;
|
1358
|
-
const auto &value = strings[vector_index];
|
1359
|
-
// Try to insert into the dictionary. If it's already there, we get back the value index
|
1360
|
-
auto found = state.dictionary.insert(string_map_t<uint32_t>::value_type(value, new_value_index));
|
1361
|
-
state.estimated_plain_size += value.GetSize() + STRING_LENGTH_SIZE;
|
1362
|
-
if (found.second) {
|
1363
|
-
// string didn't exist yet in the dictionary
|
1364
|
-
new_value_index++;
|
1365
|
-
state.estimated_dict_page_size += value.GetSize() + MAX_DICTIONARY_KEY_SIZE;
|
1366
|
-
}
|
1367
|
-
|
1368
|
-
// if the value changed, we will encode it in the page
|
1369
|
-
if (last_value_index != found.first->second) {
|
1370
|
-
// we will add the value index size later, when we know the total number of keys
|
1371
|
-
state.estimated_rle_pages_size += GetVarintSize(run_length);
|
1372
|
-
run_length = 0;
|
1373
|
-
run_count++;
|
1374
|
-
last_value_index = found.first->second;
|
1375
|
-
}
|
1376
|
-
}
|
1377
|
-
vector_index++;
|
1378
|
-
}
|
1379
|
-
// Add the costs of keys sizes. We don't know yet how many bytes the keys need as we haven't
|
1380
|
-
// seen all the values. therefore we use an over-estimation of
|
1381
|
-
state.estimated_rle_pages_size += MAX_DICTIONARY_KEY_SIZE * run_count;
|
1382
|
-
}
|
1383
|
-
|
1384
|
-
void FinalizeAnalyze(ColumnWriterState &state_p) override {
|
1385
|
-
auto &state = state_p.Cast<StringColumnWriterState>();
|
1386
|
-
|
1387
|
-
// check if a dictionary will require more space than a plain write, or if the dictionary page is going to
|
1388
|
-
// be too large
|
1389
|
-
if (WontUseDictionary(state)) {
|
1390
|
-
// clearing the dictionary signals a plain write
|
1391
|
-
state.dictionary.clear();
|
1392
|
-
state.key_bit_width = 0;
|
1393
|
-
} else {
|
1394
|
-
state.key_bit_width = RleBpDecoder::ComputeBitWidth(state.dictionary.size());
|
1395
|
-
}
|
1660
|
+
return make_uniq<FixedDecimalStatistics>();
|
1396
1661
|
}
|
1397
1662
|
|
1398
|
-
void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *
|
1663
|
+
void WriteVector(WriteStream &temp_writer, ColumnWriterStatistics *stats_p, ColumnWriterPageState *page_state,
|
1399
1664
|
Vector &input_column, idx_t chunk_start, idx_t chunk_end) override {
|
1400
|
-
auto &page_state = page_state_p->Cast<StringWriterPageState>();
|
1401
1665
|
auto &mask = FlatVector::Validity(input_column);
|
1402
|
-
auto
|
1666
|
+
auto *ptr = FlatVector::GetData<hugeint_t>(input_column);
|
1667
|
+
auto &stats = stats_p->Cast<FixedDecimalStatistics>();
|
1403
1668
|
|
1404
|
-
|
1405
|
-
|
1406
|
-
|
1407
|
-
for (idx_t r = chunk_start; r < chunk_end; r++) {
|
1408
|
-
if (!mask.RowIsValid(r)) {
|
1409
|
-
continue;
|
1410
|
-
}
|
1411
|
-
auto value_index = page_state.dictionary.at(ptr[r]);
|
1412
|
-
if (!page_state.written_value) {
|
1413
|
-
// first value
|
1414
|
-
// write the bit-width as a one-byte entry
|
1415
|
-
temp_writer.Write<uint8_t>(page_state.bit_width);
|
1416
|
-
// now begin writing the actual value
|
1417
|
-
page_state.encoder.BeginWrite(temp_writer, value_index);
|
1418
|
-
page_state.written_value = true;
|
1419
|
-
} else {
|
1420
|
-
page_state.encoder.WriteValue(temp_writer, value_index);
|
1421
|
-
}
|
1422
|
-
}
|
1423
|
-
} else {
|
1424
|
-
// plain page
|
1425
|
-
for (idx_t r = chunk_start; r < chunk_end; r++) {
|
1426
|
-
if (!mask.RowIsValid(r)) {
|
1427
|
-
continue;
|
1428
|
-
}
|
1669
|
+
data_t temp_buffer[16];
|
1670
|
+
for (idx_t r = chunk_start; r < chunk_end; r++) {
|
1671
|
+
if (mask.RowIsValid(r)) {
|
1429
1672
|
stats.Update(ptr[r]);
|
1430
|
-
|
1431
|
-
temp_writer.WriteData(
|
1432
|
-
}
|
1433
|
-
}
|
1434
|
-
}
|
1435
|
-
|
1436
|
-
unique_ptr<ColumnWriterPageState> InitializePageState(BasicColumnWriterState &state_p) override {
|
1437
|
-
auto &state = state_p.Cast<StringColumnWriterState>();
|
1438
|
-
return make_uniq<StringWriterPageState>(state.key_bit_width, state.dictionary);
|
1439
|
-
}
|
1440
|
-
|
1441
|
-
void FlushPageState(WriteStream &temp_writer, ColumnWriterPageState *state_p) override {
|
1442
|
-
auto &page_state = state_p->Cast<StringWriterPageState>();
|
1443
|
-
if (page_state.bit_width != 0) {
|
1444
|
-
if (!page_state.written_value) {
|
1445
|
-
// all values are null
|
1446
|
-
// just write the bit width
|
1447
|
-
temp_writer.Write<uint8_t>(page_state.bit_width);
|
1448
|
-
return;
|
1673
|
+
WriteParquetDecimal(ptr[r], temp_buffer);
|
1674
|
+
temp_writer.WriteData(temp_buffer, 16);
|
1449
1675
|
}
|
1450
|
-
page_state.encoder.FinishWrite(temp_writer);
|
1451
|
-
}
|
1452
|
-
}
|
1453
|
-
|
1454
|
-
duckdb_parquet::format::Encoding::type GetEncoding(BasicColumnWriterState &state_p) override {
|
1455
|
-
auto &state = state_p.Cast<StringColumnWriterState>();
|
1456
|
-
return state.IsDictionaryEncoded() ? Encoding::RLE_DICTIONARY : Encoding::PLAIN;
|
1457
|
-
}
|
1458
|
-
|
1459
|
-
bool HasDictionary(BasicColumnWriterState &state_p) override {
|
1460
|
-
auto &state = state_p.Cast<StringColumnWriterState>();
|
1461
|
-
return state.IsDictionaryEncoded();
|
1462
|
-
}
|
1463
|
-
|
1464
|
-
idx_t DictionarySize(BasicColumnWriterState &state_p) override {
|
1465
|
-
auto &state = state_p.Cast<StringColumnWriterState>();
|
1466
|
-
D_ASSERT(state.IsDictionaryEncoded());
|
1467
|
-
return state.dictionary.size();
|
1468
|
-
}
|
1469
|
-
|
1470
|
-
void FlushDictionary(BasicColumnWriterState &state_p, ColumnWriterStatistics *stats_p) override {
|
1471
|
-
auto &stats = stats_p->Cast<StringStatisticsState>();
|
1472
|
-
auto &state = state_p.Cast<StringColumnWriterState>();
|
1473
|
-
if (!state.IsDictionaryEncoded()) {
|
1474
|
-
return;
|
1475
|
-
}
|
1476
|
-
// first we need to sort the values in index order
|
1477
|
-
auto values = vector<string_t>(state.dictionary.size());
|
1478
|
-
for (const auto &entry : state.dictionary) {
|
1479
|
-
D_ASSERT(values[entry.second].GetSize() == 0);
|
1480
|
-
values[entry.second] = entry.first;
|
1481
|
-
}
|
1482
|
-
// first write the contents of the dictionary page to a temporary buffer
|
1483
|
-
auto temp_writer = make_uniq<MemoryStream>(
|
1484
|
-
MaxValue<idx_t>(NextPowerOfTwo(state.estimated_dict_page_size), MemoryStream::DEFAULT_INITIAL_CAPACITY));
|
1485
|
-
for (idx_t r = 0; r < values.size(); r++) {
|
1486
|
-
auto &value = values[r];
|
1487
|
-
// update the statistics
|
1488
|
-
stats.Update(value);
|
1489
|
-
// write this string value to the dictionary
|
1490
|
-
temp_writer->Write<uint32_t>(value.GetSize());
|
1491
|
-
temp_writer->WriteData(const_data_ptr_cast((value.GetData())), value.GetSize());
|
1492
|
-
}
|
1493
|
-
// flush the dictionary page and add it to the to-be-written pages
|
1494
|
-
WriteDictionary(state, std::move(temp_writer), values.size());
|
1495
|
-
}
|
1496
|
-
|
1497
|
-
idx_t GetRowSize(const Vector &vector, const idx_t index, const BasicColumnWriterState &state_p) const override {
|
1498
|
-
auto &state = state_p.Cast<StringColumnWriterState>();
|
1499
|
-
if (state.IsDictionaryEncoded()) {
|
1500
|
-
return (state.key_bit_width + 7) / 8;
|
1501
|
-
} else {
|
1502
|
-
auto strings = FlatVector::GetData<string_t>(vector);
|
1503
|
-
return strings[index].GetSize();
|
1504
1676
|
}
|
1505
1677
|
}
|
1506
1678
|
|
1507
|
-
|
1508
|
-
|
1509
|
-
return state.estimated_dict_page_size > MAX_UNCOMPRESSED_DICT_PAGE_SIZE ||
|
1510
|
-
DictionaryCompressionRatio(state) < writer.DictionaryCompressionRatioThreshold();
|
1511
|
-
}
|
1512
|
-
|
1513
|
-
static double DictionaryCompressionRatio(StringColumnWriterState &state) {
|
1514
|
-
// If any are 0, we just return a compression ratio of 1
|
1515
|
-
if (state.estimated_plain_size == 0 || state.estimated_rle_pages_size == 0 ||
|
1516
|
-
state.estimated_dict_page_size == 0) {
|
1517
|
-
return 1;
|
1518
|
-
}
|
1519
|
-
// Otherwise, plain size divided by compressed size
|
1520
|
-
return double(state.estimated_plain_size) /
|
1521
|
-
double(state.estimated_rle_pages_size + state.estimated_dict_page_size);
|
1679
|
+
idx_t GetRowSize(const Vector &vector, const idx_t index, const BasicColumnWriterState &state) const override {
|
1680
|
+
return sizeof(hugeint_t);
|
1522
1681
|
}
|
1523
1682
|
};
|
1524
1683
|
|
@@ -1527,40 +1686,42 @@ private:
|
|
1527
1686
|
//===--------------------------------------------------------------------===//
|
1528
1687
|
// Used to store the metadata for a WKB-encoded geometry column when writing
|
1529
1688
|
// GeoParquet files.
|
1530
|
-
class WKBColumnWriterState final : public
|
1689
|
+
class WKBColumnWriterState final : public StandardColumnWriterState<string_t> {
|
1531
1690
|
public:
|
1532
|
-
WKBColumnWriterState(ClientContext &context, duckdb_parquet::
|
1533
|
-
:
|
1691
|
+
WKBColumnWriterState(ClientContext &context, duckdb_parquet::RowGroup &row_group, idx_t col_idx)
|
1692
|
+
: StandardColumnWriterState(row_group, col_idx), geo_data(), geo_data_writer(context) {
|
1534
1693
|
}
|
1535
1694
|
|
1536
1695
|
GeoParquetColumnMetadata geo_data;
|
1537
1696
|
GeoParquetColumnMetadataWriter geo_data_writer;
|
1538
1697
|
};
|
1539
1698
|
|
1540
|
-
class WKBColumnWriter final : public
|
1699
|
+
class WKBColumnWriter final : public StandardColumnWriter<string_t, string_t, ParquetStringOperator> {
|
1541
1700
|
public:
|
1542
1701
|
WKBColumnWriter(ClientContext &context_p, ParquetWriter &writer, idx_t schema_idx, vector<string> schema_path_p,
|
1543
1702
|
idx_t max_repeat, idx_t max_define, bool can_have_nulls, string name)
|
1544
|
-
:
|
1703
|
+
: StandardColumnWriter(writer, schema_idx, std::move(schema_path_p), max_repeat, max_define, can_have_nulls),
|
1545
1704
|
column_name(std::move(name)), context(context_p) {
|
1546
1705
|
|
1547
1706
|
this->writer.GetGeoParquetData().RegisterGeometryColumn(column_name);
|
1548
1707
|
}
|
1549
1708
|
|
1550
|
-
unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::
|
1709
|
+
unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) override {
|
1551
1710
|
auto result = make_uniq<WKBColumnWriterState>(context, row_group, row_group.columns.size());
|
1711
|
+
result->encoding = Encoding::RLE_DICTIONARY;
|
1552
1712
|
RegisterToRowGroup(row_group);
|
1553
1713
|
return std::move(result);
|
1554
1714
|
}
|
1715
|
+
|
1555
1716
|
void Write(ColumnWriterState &state, Vector &vector, idx_t count) override {
|
1556
|
-
|
1717
|
+
StandardColumnWriter::Write(state, vector, count);
|
1557
1718
|
|
1558
1719
|
auto &geo_state = state.Cast<WKBColumnWriterState>();
|
1559
1720
|
geo_state.geo_data_writer.Update(geo_state.geo_data, vector, count);
|
1560
1721
|
}
|
1561
1722
|
|
1562
1723
|
void FinalizeWrite(ColumnWriterState &state) override {
|
1563
|
-
|
1724
|
+
StandardColumnWriter::FinalizeWrite(state);
|
1564
1725
|
|
1565
1726
|
// Add the geodata object to the writer
|
1566
1727
|
const auto &geo_state = state.Cast<WKBColumnWriterState>();
|
@@ -1658,7 +1819,7 @@ public:
|
|
1658
1819
|
page_state.encoder.FinishWrite(temp_writer);
|
1659
1820
|
}
|
1660
1821
|
|
1661
|
-
duckdb_parquet::
|
1822
|
+
duckdb_parquet::Encoding::type GetEncoding(BasicColumnWriterState &state) override {
|
1662
1823
|
return Encoding::RLE_DICTIONARY;
|
1663
1824
|
}
|
1664
1825
|
|
@@ -1710,7 +1871,7 @@ public:
|
|
1710
1871
|
vector<unique_ptr<ColumnWriter>> child_writers;
|
1711
1872
|
|
1712
1873
|
public:
|
1713
|
-
unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::
|
1874
|
+
unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) override;
|
1714
1875
|
bool HasAnalyze() override;
|
1715
1876
|
void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override;
|
1716
1877
|
void FinalizeAnalyze(ColumnWriterState &state) override;
|
@@ -1723,17 +1884,17 @@ public:
|
|
1723
1884
|
|
1724
1885
|
class StructColumnWriterState : public ColumnWriterState {
|
1725
1886
|
public:
|
1726
|
-
StructColumnWriterState(duckdb_parquet::
|
1887
|
+
StructColumnWriterState(duckdb_parquet::RowGroup &row_group, idx_t col_idx)
|
1727
1888
|
: row_group(row_group), col_idx(col_idx) {
|
1728
1889
|
}
|
1729
1890
|
~StructColumnWriterState() override = default;
|
1730
1891
|
|
1731
|
-
duckdb_parquet::
|
1892
|
+
duckdb_parquet::RowGroup &row_group;
|
1732
1893
|
idx_t col_idx;
|
1733
1894
|
vector<unique_ptr<ColumnWriterState>> child_states;
|
1734
1895
|
};
|
1735
1896
|
|
1736
|
-
unique_ptr<ColumnWriterState> StructColumnWriter::InitializeWriteState(duckdb_parquet::
|
1897
|
+
unique_ptr<ColumnWriterState> StructColumnWriter::InitializeWriteState(duckdb_parquet::RowGroup &row_group) {
|
1737
1898
|
auto result = make_uniq<StructColumnWriterState>(row_group, row_group.columns.size());
|
1738
1899
|
|
1739
1900
|
result->child_states.reserve(child_writers.size());
|
@@ -1831,7 +1992,7 @@ public:
|
|
1831
1992
|
unique_ptr<ColumnWriter> child_writer;
|
1832
1993
|
|
1833
1994
|
public:
|
1834
|
-
unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::
|
1995
|
+
unique_ptr<ColumnWriterState> InitializeWriteState(duckdb_parquet::RowGroup &row_group) override;
|
1835
1996
|
bool HasAnalyze() override;
|
1836
1997
|
void Analyze(ColumnWriterState &state, ColumnWriterState *parent, Vector &vector, idx_t count) override;
|
1837
1998
|
void FinalizeAnalyze(ColumnWriterState &state) override;
|
@@ -1844,18 +2005,17 @@ public:
|
|
1844
2005
|
|
1845
2006
|
class ListColumnWriterState : public ColumnWriterState {
|
1846
2007
|
public:
|
1847
|
-
ListColumnWriterState(duckdb_parquet::
|
1848
|
-
: row_group(row_group), col_idx(col_idx) {
|
2008
|
+
ListColumnWriterState(duckdb_parquet::RowGroup &row_group, idx_t col_idx) : row_group(row_group), col_idx(col_idx) {
|
1849
2009
|
}
|
1850
2010
|
~ListColumnWriterState() override = default;
|
1851
2011
|
|
1852
|
-
duckdb_parquet::
|
2012
|
+
duckdb_parquet::RowGroup &row_group;
|
1853
2013
|
idx_t col_idx;
|
1854
2014
|
unique_ptr<ColumnWriterState> child_state;
|
1855
2015
|
idx_t parent_index = 0;
|
1856
2016
|
};
|
1857
2017
|
|
1858
|
-
unique_ptr<ColumnWriterState> ListColumnWriter::InitializeWriteState(duckdb_parquet::
|
2018
|
+
unique_ptr<ColumnWriterState> ListColumnWriter::InitializeWriteState(duckdb_parquet::RowGroup &row_group) {
|
1859
2019
|
auto result = make_uniq<ListColumnWriterState>(row_group, row_group.columns.size());
|
1860
2020
|
result->child_state = child_writer->InitializeWriteState(row_group);
|
1861
2021
|
return std::move(result);
|
@@ -2078,12 +2238,50 @@ void ArrayColumnWriter::Write(ColumnWriterState &state_p, Vector &vector, idx_t
|
|
2078
2238
|
child_writer->Write(*state.child_state, array_child, count * array_size);
|
2079
2239
|
}
|
2080
2240
|
|
2241
|
+
// special double/float class to deal with dictionary encoding and NaN equality
|
2242
|
+
struct double_na_equal {
|
2243
|
+
double_na_equal() : val(0) {
|
2244
|
+
}
|
2245
|
+
explicit double_na_equal(const double val_p) : val(val_p) {
|
2246
|
+
}
|
2247
|
+
// NOLINTNEXTLINE: allow implicit conversion to double
|
2248
|
+
operator double() const {
|
2249
|
+
return val;
|
2250
|
+
}
|
2251
|
+
|
2252
|
+
bool operator==(const double &right) const {
|
2253
|
+
if (std::isnan(val) && std::isnan(right)) {
|
2254
|
+
return true;
|
2255
|
+
}
|
2256
|
+
return val == right;
|
2257
|
+
}
|
2258
|
+
double val;
|
2259
|
+
};
|
2260
|
+
|
2261
|
+
struct float_na_equal {
|
2262
|
+
float_na_equal() : val(0) {
|
2263
|
+
}
|
2264
|
+
explicit float_na_equal(const float val_p) : val(val_p) {
|
2265
|
+
}
|
2266
|
+
// NOLINTNEXTLINE: allow implicit conversion to float
|
2267
|
+
operator float() const {
|
2268
|
+
return val;
|
2269
|
+
}
|
2270
|
+
bool operator==(const float &right) const {
|
2271
|
+
if (std::isnan(val) && std::isnan(right)) {
|
2272
|
+
return true;
|
2273
|
+
}
|
2274
|
+
return val == right;
|
2275
|
+
}
|
2276
|
+
float val;
|
2277
|
+
};
|
2278
|
+
|
2081
2279
|
//===--------------------------------------------------------------------===//
|
2082
2280
|
// Create Column Writer
|
2083
2281
|
//===--------------------------------------------------------------------===//
|
2084
2282
|
|
2085
2283
|
unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &context,
|
2086
|
-
vector<duckdb_parquet::
|
2284
|
+
vector<duckdb_parquet::SchemaElement> &schemas,
|
2087
2285
|
ParquetWriter &writer, const LogicalType &type,
|
2088
2286
|
const string &name, vector<string> schema_path,
|
2089
2287
|
optional_ptr<const ChildFieldIDs> field_ids,
|
@@ -2107,7 +2305,7 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
|
|
2107
2305
|
if (type.id() == LogicalTypeId::STRUCT || type.id() == LogicalTypeId::UNION) {
|
2108
2306
|
auto &child_types = StructType::GetChildTypes(type);
|
2109
2307
|
// set up the schema element for this struct
|
2110
|
-
duckdb_parquet::
|
2308
|
+
duckdb_parquet::SchemaElement schema_element;
|
2111
2309
|
schema_element.repetition_type = null_type;
|
2112
2310
|
schema_element.num_children = UnsafeNumericCast<int32_t>(child_types.size());
|
2113
2311
|
schema_element.__isset.num_children = true;
|
@@ -2137,7 +2335,7 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
|
|
2137
2335
|
// set up the two schema elements for the list
|
2138
2336
|
// for some reason we only set the converted type in the OPTIONAL element
|
2139
2337
|
// first an OPTIONAL element
|
2140
|
-
duckdb_parquet::
|
2338
|
+
duckdb_parquet::SchemaElement optional_element;
|
2141
2339
|
optional_element.repetition_type = null_type;
|
2142
2340
|
optional_element.num_children = 1;
|
2143
2341
|
optional_element.converted_type = ConvertedType::LIST;
|
@@ -2154,7 +2352,7 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
|
|
2154
2352
|
schema_path.push_back(name);
|
2155
2353
|
|
2156
2354
|
// then a REPEATED element
|
2157
|
-
duckdb_parquet::
|
2355
|
+
duckdb_parquet::SchemaElement repeated_element;
|
2158
2356
|
repeated_element.repetition_type = FieldRepetitionType::REPEATED;
|
2159
2357
|
repeated_element.num_children = 1;
|
2160
2358
|
repeated_element.__isset.num_children = true;
|
@@ -2184,7 +2382,7 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
|
|
2184
2382
|
// }
|
2185
2383
|
// }
|
2186
2384
|
// top map element
|
2187
|
-
duckdb_parquet::
|
2385
|
+
duckdb_parquet::SchemaElement top_element;
|
2188
2386
|
top_element.repetition_type = null_type;
|
2189
2387
|
top_element.num_children = 1;
|
2190
2388
|
top_element.converted_type = ConvertedType::MAP;
|
@@ -2201,7 +2399,7 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
|
|
2201
2399
|
schema_path.push_back(name);
|
2202
2400
|
|
2203
2401
|
// key_value element
|
2204
|
-
duckdb_parquet::
|
2402
|
+
duckdb_parquet::SchemaElement kv_element;
|
2205
2403
|
kv_element.repetition_type = FieldRepetitionType::REPEATED;
|
2206
2404
|
kv_element.num_children = 2;
|
2207
2405
|
kv_element.__isset.repetition_type = true;
|
@@ -2229,7 +2427,7 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
|
|
2229
2427
|
return make_uniq<ListColumnWriter>(writer, schema_idx, schema_path, max_repeat, max_define,
|
2230
2428
|
std::move(struct_writer), can_have_nulls);
|
2231
2429
|
}
|
2232
|
-
duckdb_parquet::
|
2430
|
+
duckdb_parquet::SchemaElement schema_element;
|
2233
2431
|
schema_element.type = ParquetWriter::DuckDBTypeToParquetType(type);
|
2234
2432
|
schema_element.repetition_type = null_type;
|
2235
2433
|
schema_element.__isset.num_children = false;
|
@@ -2243,7 +2441,6 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
|
|
2243
2441
|
ParquetWriter::SetSchemaProperties(type, schema_element);
|
2244
2442
|
schemas.push_back(std::move(schema_element));
|
2245
2443
|
schema_path.push_back(name);
|
2246
|
-
|
2247
2444
|
if (type.id() == LogicalTypeId::BLOB && type.GetAlias() == "WKB_BLOB" &&
|
2248
2445
|
GeoParquetFileMetadata::IsGeoParquetConversionEnabled(context)) {
|
2249
2446
|
return make_uniq<WKBColumnWriter>(context, writer, schema_idx, std::move(schema_path), max_repeat, max_define,
|
@@ -2299,11 +2496,11 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
|
|
2299
2496
|
return make_uniq<StandardColumnWriter<uint64_t, uint64_t>>(writer, schema_idx, std::move(schema_path),
|
2300
2497
|
max_repeat, max_define, can_have_nulls);
|
2301
2498
|
case LogicalTypeId::FLOAT:
|
2302
|
-
return make_uniq<StandardColumnWriter<
|
2303
|
-
|
2499
|
+
return make_uniq<StandardColumnWriter<float_na_equal, float>>(writer, schema_idx, std::move(schema_path),
|
2500
|
+
max_repeat, max_define, can_have_nulls);
|
2304
2501
|
case LogicalTypeId::DOUBLE:
|
2305
|
-
return make_uniq<StandardColumnWriter<
|
2306
|
-
|
2502
|
+
return make_uniq<StandardColumnWriter<double_na_equal, double>>(writer, schema_idx, std::move(schema_path),
|
2503
|
+
max_repeat, max_define, can_have_nulls);
|
2307
2504
|
case LogicalTypeId::DECIMAL:
|
2308
2505
|
switch (type.InternalType()) {
|
2309
2506
|
case PhysicalType::INT16:
|
@@ -2321,14 +2518,14 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
|
|
2321
2518
|
}
|
2322
2519
|
case LogicalTypeId::BLOB:
|
2323
2520
|
case LogicalTypeId::VARCHAR:
|
2324
|
-
return make_uniq<
|
2325
|
-
|
2521
|
+
return make_uniq<StandardColumnWriter<string_t, string_t, ParquetStringOperator>>(
|
2522
|
+
writer, schema_idx, std::move(schema_path), max_repeat, max_define, can_have_nulls);
|
2326
2523
|
case LogicalTypeId::UUID:
|
2327
|
-
return make_uniq<
|
2328
|
-
|
2524
|
+
return make_uniq<StandardColumnWriter<hugeint_t, ParquetUUIDTargetType, ParquetUUIDOperator>>(
|
2525
|
+
writer, schema_idx, std::move(schema_path), max_repeat, max_define, can_have_nulls);
|
2329
2526
|
case LogicalTypeId::INTERVAL:
|
2330
|
-
return make_uniq<
|
2331
|
-
|
2527
|
+
return make_uniq<StandardColumnWriter<interval_t, ParquetIntervalTargetType, ParquetIntervalOperator>>(
|
2528
|
+
writer, schema_idx, std::move(schema_path), max_repeat, max_define, can_have_nulls);
|
2332
2529
|
case LogicalTypeId::ENUM:
|
2333
2530
|
return make_uniq<EnumColumnWriter>(writer, type, schema_idx, std::move(schema_path), max_repeat, max_define,
|
2334
2531
|
can_have_nulls);
|
@@ -2337,4 +2534,73 @@ unique_ptr<ColumnWriter> ColumnWriter::CreateWriterRecursive(ClientContext &cont
|
|
2337
2534
|
}
|
2338
2535
|
}
|
2339
2536
|
|
2537
|
+
template <>
|
2538
|
+
struct NumericLimits<float_na_equal> {
|
2539
|
+
static constexpr float Minimum() {
|
2540
|
+
return std::numeric_limits<float>::lowest();
|
2541
|
+
};
|
2542
|
+
static constexpr float Maximum() {
|
2543
|
+
return std::numeric_limits<float>::max();
|
2544
|
+
};
|
2545
|
+
static constexpr bool IsSigned() {
|
2546
|
+
return std::is_signed<float>::value;
|
2547
|
+
}
|
2548
|
+
static constexpr bool IsIntegral() {
|
2549
|
+
return std::is_integral<float>::value;
|
2550
|
+
}
|
2551
|
+
};
|
2552
|
+
|
2553
|
+
template <>
|
2554
|
+
struct NumericLimits<double_na_equal> {
|
2555
|
+
static constexpr double Minimum() {
|
2556
|
+
return std::numeric_limits<double>::lowest();
|
2557
|
+
};
|
2558
|
+
static constexpr double Maximum() {
|
2559
|
+
return std::numeric_limits<double>::max();
|
2560
|
+
};
|
2561
|
+
static constexpr bool IsSigned() {
|
2562
|
+
return std::is_signed<double>::value;
|
2563
|
+
}
|
2564
|
+
static constexpr bool IsIntegral() {
|
2565
|
+
return std::is_integral<double>::value;
|
2566
|
+
}
|
2567
|
+
};
|
2568
|
+
|
2340
2569
|
} // namespace duckdb
|
2570
|
+
|
2571
|
+
namespace std {
|
2572
|
+
template <>
|
2573
|
+
struct hash<duckdb::ParquetIntervalTargetType> {
|
2574
|
+
size_t operator()(const duckdb::ParquetIntervalTargetType &val) const {
|
2575
|
+
return duckdb::Hash(duckdb::const_char_ptr_cast(val.bytes),
|
2576
|
+
duckdb::ParquetIntervalTargetType::PARQUET_INTERVAL_SIZE);
|
2577
|
+
}
|
2578
|
+
};
|
2579
|
+
|
2580
|
+
template <>
|
2581
|
+
struct hash<duckdb::ParquetUUIDTargetType> {
|
2582
|
+
size_t operator()(const duckdb::ParquetUUIDTargetType &val) const {
|
2583
|
+
return duckdb::Hash(duckdb::const_char_ptr_cast(val.bytes), duckdb::ParquetUUIDTargetType::PARQUET_UUID_SIZE);
|
2584
|
+
}
|
2585
|
+
};
|
2586
|
+
|
2587
|
+
template <>
|
2588
|
+
struct hash<duckdb::float_na_equal> {
|
2589
|
+
size_t operator()(const duckdb::float_na_equal &val) const {
|
2590
|
+
if (std::isnan(val.val)) {
|
2591
|
+
return duckdb::Hash<float>(std::numeric_limits<float>::quiet_NaN());
|
2592
|
+
}
|
2593
|
+
return duckdb::Hash<float>(val.val);
|
2594
|
+
}
|
2595
|
+
};
|
2596
|
+
|
2597
|
+
template <>
|
2598
|
+
struct hash<duckdb::double_na_equal> {
|
2599
|
+
inline size_t operator()(const duckdb::double_na_equal &val) const {
|
2600
|
+
if (std::isnan(val.val)) {
|
2601
|
+
return duckdb::Hash<double>(std::numeric_limits<double>::quiet_NaN());
|
2602
|
+
}
|
2603
|
+
return duckdb::Hash<double>(val.val);
|
2604
|
+
}
|
2605
|
+
};
|
2606
|
+
} // namespace std
|